00001 #include "lm/sizes.hh"
00002 #include "lm/model.hh"
00003 #include "util/file_piece.hh"
00004
00005 #include <vector>
00006 #include <iomanip>
00007
00008 namespace lm {
00009 namespace ngram {
00010
00011 void ShowSizes(const std::vector<uint64_t> &counts, const lm::ngram::Config &config) {
00012 uint64_t sizes[6];
00013 sizes[0] = ProbingModel::Size(counts, config);
00014 sizes[1] = RestProbingModel::Size(counts, config);
00015 sizes[2] = TrieModel::Size(counts, config);
00016 sizes[3] = QuantTrieModel::Size(counts, config);
00017 sizes[4] = ArrayTrieModel::Size(counts, config);
00018 sizes[5] = QuantArrayTrieModel::Size(counts, config);
00019 uint64_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t));
00020 uint64_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t));
00021 uint64_t divide;
00022 char prefix;
00023 if (min_length < (1 << 10) * 10) {
00024 prefix = ' ';
00025 divide = 1;
00026 } else if (min_length < (1 << 20) * 10) {
00027 prefix = 'k';
00028 divide = 1 << 10;
00029 } else if (min_length < (1ULL << 30) * 10) {
00030 prefix = 'M';
00031 divide = 1 << 20;
00032 } else {
00033 prefix = 'G';
00034 divide = 1 << 30;
00035 }
00036 long int length = std::max<long int>(2, static_cast<long int>(ceil(log10((double) max_length / divide))));
00037 std::cerr << "Memory estimate for binary LM:\ntype ";
00038
00039
00040 for (long int i = 0; i < length - 2; ++i) std::cerr << ' ';
00041
00042 std::cerr << prefix << "B\n"
00043 "probing " << std::setw(length) << (sizes[0] / divide) << " assuming -p " << config.probing_multiplier << "\n"
00044 "probing " << std::setw(length) << (sizes[1] / divide) << " assuming -r models -p " << config.probing_multiplier << "\n"
00045 "trie " << std::setw(length) << (sizes[2] / divide) << " without quantization\n"
00046 "trie " << std::setw(length) << (sizes[3] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n"
00047 "trie " << std::setw(length) << (sizes[4] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " array pointer compression\n"
00048 "trie " << std::setw(length) << (sizes[5] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits<< " array pointer compression and quantization\n";
00049 }
00050
00051 void ShowSizes(const std::vector<uint64_t> &counts) {
00052 lm::ngram::Config config;
00053 ShowSizes(counts, config);
00054 }
00055
00056 void ShowSizes(const char *file, const lm::ngram::Config &config) {
00057 std::vector<uint64_t> counts;
00058 util::FilePiece f(file);
00059 lm::ReadARPACounts(f, counts);
00060 ShowSizes(counts, config);
00061 }
00062
00063 }}