#include <corpus_count.hh>

Public Member Functions | |
| CorpusCount (util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::vector< bool > &prune_words, const std::string &prune_vocab_filename, std::size_t entries_per_block, WarningAction disallowed_symbol) | |
| void | Run (const util::stream::ChainPosition &position) |
Static Public Member Functions | |
| static float | DedupeMultiplier (std::size_t order) |
| static std::size_t | VocabUsage (std::size_t vocab_estimate) |
Definition at line 23 of file corpus_count.hh.
| lm::builder::CorpusCount::CorpusCount | ( | util::FilePiece & | from, | |
| int | vocab_write, | |||
| uint64_t & | token_count, | |||
| WordIndex & | type_count, | |||
| std::vector< bool > & | prune_words, | |||
| const std::string & | prune_vocab_filename, | |||
| std::size_t | entries_per_block, | |||
| WarningAction | disallowed_symbol | |||
| ) |
Definition at line 162 of file corpus_count.cc.
| float lm::builder::CorpusCount::DedupeMultiplier | ( | std::size_t | order | ) | [static] |
Definition at line 154 of file corpus_count.cc.
| void lm::builder::CorpusCount::Run | ( | const util::stream::ChainPosition & | position | ) |
Definition at line 185 of file corpus_count.cc.
References util::BoolCharacter::Build(), count, lm::ngram::GrowableVocab< NewWordAction >::FindOrInsert(), util::scoped_base< T, Closer >::get(), util::stream::ChainPosition::GetChain(), lm::ngram::GrowableVocab< NewWordAction >::Index(), lm::builder::kBOS, lm::builder::kEOS, lm::kUNK, util::FilePiece::ReadLine(), lm::ngram::GrowableVocab< NewWordAction >::Size(), and util::Exception::what().

| std::size_t lm::builder::CorpusCount::VocabUsage | ( | std::size_t | vocab_estimate | ) | [static] |
Definition at line 158 of file corpus_count.cc.
1.5.9