00001 #include "lm/bhiksha.hh"
00002
00003 #include "lm/binary_format.hh"
00004 #include "lm/config.hh"
00005 #include "util/file.hh"
00006 #include "util/exception.hh"
00007
00008 #include <limits>
00009
00010 namespace lm {
00011 namespace ngram {
00012 namespace trie {
00013
00014 DontBhiksha::DontBhiksha(const void * , uint64_t , uint64_t max_next, const Config &) :
00015 next_(util::BitsMask::ByMax(max_next)) {}
00016
00017 const uint8_t kArrayBhikshaVersion = 0;
00018
00019
00020 void ArrayBhiksha::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
00021 uint8_t buffer[2];
00022 file.ReadForConfig(buffer, 2, offset);
00023 uint8_t version = buffer[0];
00024 uint8_t configured_bits = buffer[1];
00025 if (version != kArrayBhikshaVersion) UTIL_THROW(FormatLoadException, "This file has sorted array compression version " << (unsigned) version << " but the code expects version " << (unsigned)kArrayBhikshaVersion);
00026 config.pointer_bhiksha_bits = configured_bits;
00027 }
00028
00029 namespace {
00030
00031
00032 uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
00033 uint8_t required = util::RequiredBits(max_next);
00034 uint8_t best_chop = 0;
00035 int64_t lowest_change = std::numeric_limits<int64_t>::max();
00036
00037 for (uint8_t chop = 0; chop <= std::min(required, config.pointer_bhiksha_bits); ++chop) {
00038 int64_t change = (max_next >> (required - chop)) * 64
00039 - max_offset * static_cast<int64_t>(chop);
00040 if (change < lowest_change) {
00041 lowest_change = change;
00042 best_chop = chop;
00043 }
00044 }
00045 return best_chop;
00046 }
00047
00048 std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &config) {
00049 uint8_t required = util::RequiredBits(max_next);
00050 uint8_t chopping = ChopBits(max_offset, max_next, config);
00051 return (max_next >> (required - chopping)) + 1 ;
00052 }
00053 }
00054
00055 uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) {
00056 return sizeof(uint64_t) * (1 + ArrayCount(max_offset, max_next, config)) + 7 ;
00057 }
00058
00059 uint8_t ArrayBhiksha::InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
00060 return util::RequiredBits(max_next) - ChopBits(max_offset, max_next, config);
00061 }
00062
00063 namespace {
00064
00065 void *AlignTo8(void *from) {
00066 uint8_t *val = reinterpret_cast<uint8_t*>(from);
00067 std::size_t remainder = reinterpret_cast<std::size_t>(val) & 7;
00068 if (!remainder) return val;
00069 return val + 8 - remainder;
00070 }
00071
00072 }
00073
00074 ArrayBhiksha::ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_next, const Config &config)
00075 : next_inline_(util::BitsMask::ByBits(InlineBits(max_offset, max_next, config))),
00076 offset_begin_(reinterpret_cast<const uint64_t*>(AlignTo8(base)) + 1 ),
00077 offset_end_(offset_begin_ + ArrayCount(max_offset, max_next, config)),
00078 write_to_(reinterpret_cast<uint64_t*>(AlignTo8(base)) + 1 + 1 ),
00079 original_base_(base) {}
00080
00081 void ArrayBhiksha::FinishedLoading(const Config &config) {
00082
00083 *(write_to_ - (write_to_ - offset_begin_)) = 0;
00084
00085 if (write_to_ != offset_end_) UTIL_THROW(util::Exception, "Did not get all the array entries that were expected.");
00086
00087 uint8_t *head_write = reinterpret_cast<uint8_t*>(original_base_);
00088 *(head_write++) = kArrayBhikshaVersion;
00089 *(head_write++) = config.pointer_bhiksha_bits;
00090 }
00091
00092 }
00093 }
00094 }