#include <vocab.hh>
Public Member Functions | |
SortedVocabulary () | |
WordIndex | Index (const StringPiece &str) const |
WordIndex | Bound () const |
void | SetupMemory (void *start, std::size_t allocated, std::size_t entries, const Config &config) |
void | Relocate (void *new_start) |
void | ConfigureEnumerate (EnumerateVocab *to, std::size_t max_entries) |
WordIndex | Insert (const StringPiece &str) |
void | FinishedLoading (ProbBackoff *reorder_vocab) |
std::size_t | UnkCountChangePadding () const |
bool | SawUnk () const |
void | LoadedBinary (bool have_words, int fd, EnumerateVocab *to, uint64_t offset) |
uint64_t *& | EndHack () |
void | Populated () |
Static Public Member Functions | |
static uint64_t | Size (uint64_t entries, const Config &config) |
static void | ComputeRenumbering (WordIndex types, int from_words, int to_words, std::vector< WordIndex > &mapping) |
Definition at line 67 of file vocab.hh.
WordIndex lm::ngram::SortedVocabulary::Bound | ( | ) | const [inline] |
void lm::ngram::SortedVocabulary::ComputeRenumbering | ( | WordIndex | types, | |
int | from_words, | |||
int | to_words, | |||
std::vector< WordIndex > & | mapping | |||
) | [static] |
Definition at line 164 of file vocab.cc.
References util::scoped_memory::get(), lm::ngram::detail::HashForVocab(), util::MapRead(), util::POPULATE_OR_READ, util::scoped_memory::reset(), StringPiece::size(), util::SizeOrThrow(), sort(), str, UTIL_THROW_IF, and UTIL_THROW_IF2.
void lm::ngram::SortedVocabulary::ConfigureEnumerate | ( | EnumerateVocab * | to, | |
std::size_t | max_entries | |||
) |
Definition at line 121 of file vocab.cc.
References lm::EnumerateVocab::Add().
uint64_t*& lm::ngram::SortedVocabulary::EndHack | ( | ) | [inline] |
void lm::ngram::SortedVocabulary::FinishedLoading | ( | ProbBackoff * | reorder_vocab | ) |
WordIndex lm::ngram::SortedVocabulary::Index | ( | const StringPiece & | str | ) | const [inline, virtual] |
Implements lm::base::Vocabulary.
Definition at line 71 of file vocab.hh.
References util::BoundedSortedUniformFind(), and lm::ngram::detail::HashForVocab().
Referenced by LoadedBinary(), and Populated().
WordIndex lm::ngram::SortedVocabulary::Insert | ( | const StringPiece & | str | ) |
Definition at line 129 of file vocab.cc.
References util::Pool::Allocate(), StringPiece::data(), lm::ngram::detail::HashForVocab(), and StringPiece::size().
void lm::ngram::SortedVocabulary::LoadedBinary | ( | bool | have_words, | |
int | fd, | |||
EnumerateVocab * | to, | |||
uint64_t | offset | |||
) |
Definition at line 207 of file vocab.cc.
References Index(), and lm::base::Vocabulary::SetSpecial().
void lm::ngram::SortedVocabulary::Populated | ( | ) |
Definition at line 200 of file vocab.cc.
References Index(), and lm::base::Vocabulary::SetSpecial().
void lm::ngram::SortedVocabulary::Relocate | ( | void * | new_start | ) |
Definition at line 115 of file vocab.cc.
Referenced by lm::ngram::trie::BuildTrie().
bool lm::ngram::SortedVocabulary::SawUnk | ( | ) | const [inline] |
Definition at line 111 of file vocab.hh.
Referenced by UnkCountChangePadding().
void lm::ngram::SortedVocabulary::SetupMemory | ( | void * | start, | |
std::size_t | allocated, | |||
std::size_t | entries, | |||
const Config & | config | |||
) |
uint64_t lm::ngram::SortedVocabulary::Size | ( | uint64_t | entries, | |
const Config & | config | |||
) | [static] |
Definition at line 102 of file vocab.cc.
Referenced by SetupMemory().
std::size_t lm::ngram::SortedVocabulary::UnkCountChangePadding | ( | ) | const [inline] |
Definition at line 109 of file vocab.hh.
References SawUnk().
Referenced by lm::ngram::trie::BuildTrie().