00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef LM_BHIKSHA_H
00014 #define LM_BHIKSHA_H
00015
00016 #include "lm/model_type.hh"
00017 #include "lm/trie.hh"
00018 #include "util/bit_packing.hh"
00019 #include "util/sorted_uniform.hh"
00020
00021 #include <algorithm>
00022 #include <stdint.h>
00023 #include <cassert>
00024
00025 namespace lm {
00026 namespace ngram {
00027 struct Config;
00028 class BinaryFormat;
00029
00030 namespace trie {
00031
00032 class DontBhiksha {
00033 public:
00034 static const ModelType kModelTypeAdd = static_cast<ModelType>(0);
00035
00036 static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &) {}
00037
00038 static uint64_t Size(uint64_t , uint64_t , const Config &) { return 0; }
00039
00040 static uint8_t InlineBits(uint64_t , uint64_t max_next, const Config &) {
00041 return util::RequiredBits(max_next);
00042 }
00043
00044 DontBhiksha(const void *base, uint64_t max_offset, uint64_t max_next, const Config &config);
00045
00046 void ReadNext(const void *base, uint64_t bit_offset, uint64_t , uint8_t total_bits, NodeRange &out) const {
00047 out.begin = util::ReadInt57(base, bit_offset, next_.bits, next_.mask);
00048 out.end = util::ReadInt57(base, bit_offset + total_bits, next_.bits, next_.mask);
00049
00050 }
00051
00052 void WriteNext(void *base, uint64_t bit_offset, uint64_t , uint64_t value) {
00053 util::WriteInt57(base, bit_offset, next_.bits, value);
00054 }
00055
00056 void FinishedLoading(const Config &) {}
00057
00058 uint8_t InlineBits() const { return next_.bits; }
00059
00060 private:
00061 util::BitsMask next_;
00062 };
00063
00064 class ArrayBhiksha {
00065 public:
00066 static const ModelType kModelTypeAdd = kArrayAdd;
00067
00068 static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config);
00069
00070 static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config);
00071
00072 static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config);
00073
00074 ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_value, const Config &config);
00075
00076 void ReadNext(const void *base, uint64_t bit_offset, uint64_t index, uint8_t total_bits, NodeRange &out) const {
00077
00078
00079
00080
00081 const uint64_t *begin_it = std::upper_bound(offset_begin_, offset_end_, index) - 1;
00082
00083
00084 const uint64_t *end_it;
00085 for (end_it = begin_it + 1; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {}
00086
00087 --end_it;
00088
00089 out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
00090 util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask);
00091 out.end = ((end_it - offset_begin_) << next_inline_.bits) |
00092 util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask);
00093
00094 assert(out.end >= out.begin);
00095 }
00096
00097 void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) {
00098 uint64_t encode = value >> next_inline_.bits;
00099 for (; write_to_ <= offset_begin_ + encode; ++write_to_) *write_to_ = index;
00100 util::WriteInt57(base, bit_offset, next_inline_.bits, value & next_inline_.mask);
00101 }
00102
00103 void FinishedLoading(const Config &config);
00104
00105 uint8_t InlineBits() const { return next_inline_.bits; }
00106
00107 private:
00108 const util::BitsMask next_inline_;
00109
00110 const uint64_t *const offset_begin_;
00111 const uint64_t *const offset_end_;
00112
00113 uint64_t *write_to_;
00114
00115 void *original_base_;
00116 };
00117
00118 }
00119 }
00120 }
00121
00122 #endif // LM_BHIKSHA_H