00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <iostream>
00020 #include <cassert>
00021 #include "tpt_tightindex.h"
00022
00023 namespace tpt
00024 {
00025
00026
00027
00028
00029
00030 void tightwrite(std::ostream& out, uint64_t data, bool flag)
00031 {
00032
00033 #ifdef LOG_WRITE_ACTIVITY
00034 size_t bytes_written=1;
00035 std::cerr << "starting at file position " << out.tellp()
00036 << ": tightwrite " << data;
00037 #endif
00038 if (flag)
00039 {
00040 #ifdef LOG_WRITE_ACTIVITY
00041 std::cerr << " with flag 1 ";
00042 #endif
00043 while (data >= 128)
00044 {
00045 char c = char(data%128)|char(-128);
00046 out.put(c);
00047 data >>= 7;
00048 #ifdef LOG_WRITE_ACTIVITY
00049 bytes_written++;
00050 #endif
00051 }
00052 char c = char(data%128)|char(-128);
00053 out.put(c);
00054 }
00055 else
00056 {
00057 #ifdef LOG_WRITE_ACTIVITY
00058 std::cerr << " with flag 0 ";
00059 #endif
00060 while (data >= 128)
00061 {
00062 char c = data&127;
00063 out.put(c);
00064 data >>= 7;
00065 #ifdef LOG_WRITE_ACTIVITY
00066 bytes_written++;
00067 #endif
00068 }
00069 char c = (data&127);
00070 out.put(c);
00071 }
00072 #ifdef LOG_WRITE_ACTIVITY
00073 std::cerr << " in " << bytes_written << " bytes" << std::endl;
00074 #endif
00075 }
00076
00077
00078
00079
00080
00081 #define DEBUG_TIGHTREAD 0
00082
00083
00084
00085 filepos_type
00086 tightread(std::istream& in, std::ios::pos_type stop)
00087 {
00088
00089
00090 assert(in.rdbuf()->in_avail() > 0);
00091 filepos_type data = 0;
00092 short int bitshift = 7;
00093 int pos = in.tellg();
00094 #if DEBUG_TIGHTREAD
00095 if (debug)
00096 cerr << bitpattern(uint(in.peek())) << " " << in.peek()
00097 << " pos=" << in.tellg() << "\n";
00098 #endif
00099 int buf = in.get();
00100 if (stop == std::ios::pos_type(0))
00101 stop = size_t(in.tellg())+in.rdbuf()->in_avail();
00102 else
00103 stop = std::min(size_t(stop),size_t(in.tellg())+in.rdbuf()->in_avail());
00104 if (buf < 0)
00105 std::cerr << "number read: " << buf << " " << pos << " "
00106 << in.tellg() << std::endl;
00107 assert (buf>=0);
00108
00109 if (buf >= 128)
00110 {
00111 data = buf-128;
00112 while (in.tellg() < stop && in.peek() >= 128)
00113 {
00114 #if DEBUG_TIGHTREAD
00115 if (debug)
00116 cerr << bitpattern(uint(in.peek())) << " " << in.peek();
00117 #endif
00118
00119 data += size_t(in.get()-128)<<bitshift;
00120 bitshift += 7;
00121 #if DEBUG_TIGHTREAD
00122 if (debug)
00123 cerr << " " << data << " pos=" << in.tellg() << std::endl;
00124 #endif
00125 }
00126 }
00127 else
00128 {
00129 data = buf;
00130 while (in.tellg() < stop && in.peek() < 128)
00131 {
00132
00133 #if DEBUG_TIGHTREAD
00134 if (debug)
00135 cerr << bitpattern(uint(in.peek())) << " " << in.peek();
00136
00137 #endif
00138 data += size_t(in.get())<<bitshift;
00139 bitshift += 7;
00140 #if DEBUG_TIGHTREAD
00141 if (debug)
00142 cerr << " " << data << " pos=" << in.tellg() << "\n";
00143 #endif
00144 }
00145 }
00146 return data;
00147 }
00148
00149 #define DEBUG_TIGHTFIND 0
00150 #if DEBUG_TIGHTFIND
00151 bool debug=true;
00152 #endif
00153 bool
00154 tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop)
00155 {
00156 in.seekg((start+stop)/2);
00157
00158
00159
00160
00161
00162
00163
00164 while (static_cast<filepos_type>(in.tellg()) < stop && in.get() < 128)
00165 {
00166 #if DEBUG_TIGHTFIND
00167 if (debug)
00168 {
00169 in.unget();
00170 char c = in.get();
00171 std::cerr << in.tellg() << " skipped key byte " << c << std::endl;
00172 }
00173 #endif
00174 if (in.eof()) return false;
00175 }
00176
00177 while (static_cast<filepos_type>(in.tellg()) < stop && in.peek() >= 128)
00178 {
00179 #if DEBUG_TIGHTFIND
00180 int r = in.get();
00181 if (debug)
00182 std::cerr << in.tellg() << " skipped value byte " << r
00183 << " next is " << in.peek()
00184 << std::endl;
00185 #else
00186 in.get();
00187 #endif
00188 }
00189 return true;
00190 }
00191
00192 char const*
00193 tightfind_midpoint(char const* const start,
00194 char const* const stop)
00195 {
00196 char const* mp = start + (stop - start)/2;
00197 while (*mp < 0 && mp > start) mp--;
00198 while (*mp >= 0 && mp > start) mp--;
00199 return (*mp < 0) ? ++mp : mp;
00200 }
00201
00202 bool
00203 linear_search(std::istream& in, filepos_type start, filepos_type stop,
00204 id_type key, unsigned char& flags)
00205 {
00206 in.seekg(start);
00207
00208 #if DEBUG_TIGHTFIND
00209 if (debug) std::cerr << in.tellg() << " ";
00210 #endif
00211
00212
00213
00214
00215
00216 id_type foo;
00217 for(foo = tightread(in,stop);
00218 (foo>>FLAGBITS) < key;
00219 foo = tightread(in,stop))
00220 {
00221
00222 while (static_cast<filepos_type>(in.tellg()) < stop
00223 && in.peek() >= 128) in.get();
00224
00225 #if DEBUG_TIGHTFIND
00226 if (debug)
00227 std::cerr << (foo>>FLAGBITS) << " [" << key << "] "
00228 << in.tellg() << std::endl;
00229 #endif
00230
00231 if (in.tellg() == std::ios::pos_type(stop))
00232 return false;
00233 }
00234
00235 #if DEBUG_TIGHTFIND
00236 if (debug && (foo>>FLAGBITS)==key)
00237 std::cerr << "found entry for " << key << std::endl;
00238 std::cerr << "current file position is " << in.tellg()
00239 << " (value read: " << key << std::endl;
00240 #endif
00241
00242 assert(static_cast<filepos_type>(in.tellg()) < stop);
00243 if ((foo>>FLAGBITS)==key)
00244 {
00245 flags = (foo%256);
00246 flags &= FLAGMASK;
00247 return true;
00248 }
00249 else
00250 return false;
00251 }
00252
00253 bool
00254 tightfind(std::istream& in, filepos_type start, filepos_type stop,
00255 id_type key, unsigned char& flags)
00256 {
00257
00258 #if DEBUG_TIGHTFIND
00259 if (debug)
00260 std::cerr << "looking for " << key
00261 << " in range [" << start << ":" << stop << "]" << std::endl;
00262 #endif
00263 if (start==stop) return false;
00264 assert(stop>start);
00265 if ((start+1)==stop) return false;
00266
00267 unsigned int const granularity = sizeof(filepos_type)*5;
00268
00269
00270
00271
00272 if (stop > start + granularity)
00273 if (!tightfind_midpoint(in,start,stop))
00274 return false;
00275
00276 if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
00277 {
00278
00279
00280 return linear_search(in,start,stop,key,flags);
00281 }
00282
00283
00284 filepos_type curpos = in.tellg();
00285 id_type foo = tightread(in,stop);
00286 id_type tmpid = foo>>FLAGBITS;
00287 if (tmpid == key)
00288 {
00289 flags = foo%256;
00290 flags &= FLAGMASK;
00291 #if DEBUG_TIGHTFIND
00292 if (debug) std::cerr << "found entry for " << key << std::endl;
00293 #endif
00294 return true;
00295 }
00296 else if (tmpid > key)
00297 {
00298 #if DEBUG_TIGHTFIND
00299 if (debug) std::cerr << foo << " > " << key << std::endl;
00300 #endif
00301 return tightfind(in,start,curpos,key,flags);
00302 }
00303 else
00304 {
00305 while (static_cast<filepos_type>(in.tellg()) < stop
00306 && in.rdbuf()->in_avail() > 0
00307 && in.peek() >= 128)
00308 in.get();
00309 if (in.rdbuf()->in_avail() == 0 || in.tellg() == std::ios::pos_type(stop))
00310 return false;
00311 #if DEBUG_TIGHTFIND
00312 if (debug) std::cerr << foo << " < " << key << std::endl;
00313 #endif
00314 return tightfind(in,in.tellg(),stop,key,flags);
00315 }
00316 }
00317
00318
00319 char const*
00320 tightfind(char const* const start,
00321 char const* const stop,
00322 id_type key,
00323 unsigned char& flags)
00324 {
00325
00326
00327 if (start==stop) return NULL;
00328 assert(stop>start);
00329 if ((start+1)==stop) return NULL;
00330 char const* p = tightfind_midpoint(start,stop);
00331
00332
00333 size_t foo;
00334 char const* after = tightread(p,stop,foo);
00335 id_type tmpId = foo>>FLAGBITS;
00336 if (tmpId == key)
00337 {
00338 flags = foo%256;
00339 flags &= FLAGMASK;
00340 return after;
00341 }
00342 else if (tmpId > key)
00343 {
00344 return tightfind(start,p,key,flags);
00345 }
00346 else
00347 {
00348 while (*after<0 && ++after < stop);
00349 if (after == stop) return NULL;
00350 return tightfind(after,stop,key,flags);
00351 }
00352 }
00353
00354 char const*
00355 tightfind_noflags(char const* const start,
00356 char const* const stop,
00357 id_type key)
00358 {
00359
00360
00361 if (start==stop) return NULL;
00362 assert(stop>start);
00363 if ((start+1)==stop) return NULL;
00364 char const* p = tightfind_midpoint(start,stop);
00365
00366
00367 size_t foo;
00368 char const* after = tightread(p,stop,foo);
00369 if (foo == key)
00370 return after;
00371 else if (foo > key)
00372 {
00373 return tightfind_noflags(start,p,key);
00374 }
00375 else
00376 {
00377 while (*after<0 && ++after < stop);
00378 if (after == stop) return NULL;
00379 return tightfind_noflags(after,stop,key);
00380 }
00381 }
00382
00383 bool
00384 linear_search_noflags(std::istream& in, filepos_type start,
00385 filepos_type stop, id_type key)
00386 {
00387 std::ios::pos_type mystop = stop;
00388
00389 in.seekg(start);
00390 id_type foo;
00391 for(foo = tightread(in,stop); foo < key; foo = tightread(in,stop))
00392 {
00393
00394 while (in.tellg() < mystop && in.peek() >= 128)
00395 in.get();
00396 if (in.tellg() == mystop)
00397 return false;
00398 }
00399 assert(in.tellg() < mystop);
00400 return (foo==key);
00401 }
00402
00403
00404 bool
00405 tightfind_noflags(std::istream& in, filepos_type start,
00406 filepos_type stop, id_type key)
00407 {
00408
00409 if (start==stop) return false;
00410 assert(stop>start);
00411 if ((start+1)==stop) return false;
00412
00413
00414
00415
00416 unsigned int const granularity = sizeof(filepos_type)*5;
00417
00418
00419 if (stop > start + granularity)
00420 if (!tightfind_midpoint(in,start,stop))
00421 return false;
00422
00423
00424
00425
00426 if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
00427 return linear_search_noflags(in,start,stop,key);
00428
00429
00430 filepos_type curpos = in.tellg();
00431 id_type foo = tightread(in,stop);
00432 if (foo == key)
00433 return true;
00434
00435 else if (foo > key)
00436 return tightfind_noflags(in,start,curpos,key);
00437
00438 else
00439 {
00440 std::ios::pos_type mystop = stop;
00441 while (in.tellg() < mystop
00442 && in.rdbuf()->in_avail() > 0
00443 && in.peek() >= 128)
00444 in.get();
00445 if (in.rdbuf()->in_avail() == 0 || in.tellg() == mystop)
00446 return false;
00447 return tightfind_noflags(in,in.tellg(),stop,key);
00448 }
00449 }
00450
00451 void tightwrite2(std::ostream& out, size_t data, bool flag)
00452 {
00453
00454
00455 short int foo = (data%32768);
00456 if (flag)
00457 {
00458 foo += 32768;
00459 while (data >= 32768)
00460 {
00461 out.write(reinterpret_cast<char*>(&foo),2);
00462 data >>= 15;
00463 foo = (data%32768)+32768;
00464 }
00465 }
00466 else
00467 {
00468 while (data >= 32768)
00469 {
00470 out.write(reinterpret_cast<char*>(&foo),2);
00471 data >>= 15;
00472 foo = data%32768;
00473 }
00474 }
00475 out.write(reinterpret_cast<char*>(&foo),2);
00476 }
00477
00478 char const*
00479 tightread8(char const* start,
00480 char const* stop,
00481 uint64_t& dest)
00482 {
00483 static char bitmask=127;
00484 dest = 0;
00485 if (*start < 0)
00486 {
00487 dest = (*start)&bitmask;
00488 if (++start==stop || *start >= 0) return start;
00489 dest += uint64_t((*start)&bitmask)<<7;
00490 if (++start==stop || *start >= 0) return start;
00491 dest += uint64_t((*start)&bitmask)<<14;
00492 if (++start==stop || *start >= 0) return start;
00493 dest += uint64_t((*start)&bitmask)<<21;
00494 if (++start==stop || *start >= 0) return start;
00495 dest += uint64_t((*start)&bitmask)<<28;
00496 if (++start==stop || *start >= 0) return start;
00497 dest += uint64_t((*start)&bitmask)<<35;
00498 if (++start==stop || *start >= 0) return start;
00499 dest += uint64_t((*start)&bitmask)<<42;
00500 if (++start==stop || *start >= 0) return start;
00501 dest += uint64_t((*start)&bitmask)<<49;
00502 if (++start==stop || *start >= 0) return start;
00503 dest += uint64_t((*start)&bitmask)<<56;
00504 if (++start==stop || *start >= 0) return start;
00505 dest += uint64_t((*start)&bitmask)<<63;
00506 }
00507 else
00508 {
00509 dest = *start;
00510 if (++start==stop || *start < 0) return start;
00511 dest += uint64_t(*start)<<7;
00512 if (++start==stop || *start < 0) return start;
00513 dest += uint64_t(*start)<<14;
00514 if (++start==stop || *start < 0) return start;
00515 dest += uint64_t(*start)<<21;
00516 if (++start==stop || *start < 0) return start;
00517 dest += uint64_t(*start)<<28;
00518 if (++start==stop || *start < 0) return start;
00519 dest += uint64_t(*start)<<35;
00520 if (++start==stop || *start < 0) return start;
00521 dest += uint64_t(*start)<<42;
00522 if (++start==stop || *start < 0) return start;
00523 dest += uint64_t(*start)<<49;
00524 if (++start==stop || *start < 0) return start;
00525 dest += uint64_t(*start)<<56;
00526 if (++start==stop || *start < 0) return start;
00527 dest += uint64_t(*start)<<63;
00528 }
00529 assert(start<stop);
00530 return ++start;
00531 }
00532
00533 char const*
00534 tightread4(char const* start,
00535 char const* stop,
00536 uint32_t& dest)
00537 {
00538 static char bitmask=127;
00539 dest = 0;
00540 if (*start < 0)
00541 {
00542 dest = (*start)&bitmask;
00543 if (++start==stop || *start >= 0) return start;
00544 dest += uint32_t((*start)&bitmask)<<7;
00545 if (++start==stop || *start >= 0) return start;
00546 dest += uint32_t((*start)&bitmask)<<14;
00547 if (++start==stop || *start >= 0) return start;
00548 dest += uint32_t((*start)&bitmask)<<21;
00549 if (++start==stop || *start >= 0) return start;
00550 dest += uint32_t((*start)&bitmask)<<28;
00551 }
00552 else
00553 {
00554 dest = *start;
00555 if (++start==stop || *start < 0) return start;
00556 dest += uint32_t(*start)<<7;
00557 if (++start==stop || *start < 0) return start;
00558 dest += uint32_t(*start)<<14;
00559 if (++start==stop || *start < 0) return start;
00560 dest += uint32_t(*start)<<21;
00561 if (++start==stop || *start < 0) return start;
00562 dest += uint32_t(*start)<<28;
00563 }
00564 assert(start<stop);
00565 return ++start;
00566 }
00567
00568 char const*
00569 tightread2(char const* start,
00570 char const* stop,
00571 uint16_t& dest)
00572 {
00573 static char bitmask=127;
00574 dest = 0;
00575 if (*start < 0)
00576 {
00577 dest = (*start)&bitmask;
00578 if (++start==stop || *start >= 0) return start;
00579 dest += uint32_t((*start)&bitmask)<<7;
00580 if (++start==stop || *start >= 0) return start;
00581 dest += uint32_t((*start)&bitmask)<<14;
00582 }
00583 else
00584 {
00585 dest = *start;
00586 if (++start==stop || *start < 0) return start;
00587 dest += uint32_t(*start)<<7;
00588 if (++start==stop || *start < 0) return start;
00589 dest += uint32_t(*start)<<14;
00590 }
00591 assert(start<stop);
00592 return ++start;
00593 }
00594 }