#include <dictionary.h>
Public Member Functions | |
int | dub () |
int | dub (int value) |
const char * | OOV () |
const char * | BoS () |
const char * | EoS () |
const char * | BoD () |
const char * | EoD () |
int | oovcode (int v=-1) |
int | incflag () |
int | incflag (int v) |
int | getword (fstream &inp, char *buffer) |
int | isprintable (char *w) |
void | genoovcode () |
void | genBoScode () |
void | genEoScode () |
int | setoovrate (double oovrate) |
long long | incfreq (int code, long long value) |
long long | multfreq (int code, double value) |
long | freq (int code, long long value=-1) |
long long | totfreq () |
float | set_load_factor (float value) |
void | grow () |
void | sort () |
dictionary (char *filename, int size=DICT_INITSIZE, float lf=DICTIONARY_LOAD_FACTOR) | |
dictionary (dictionary *d, bool prune=false, int prunethresh=0) | |
~dictionary () | |
void | generate (char *filename) |
void | load (char *filename) |
void | save (char *filename, int freqflag=0) |
void | load (std::istream &fd) |
void | save (std::ostream &fd) |
void | augment (dictionary *d) |
int | size () |
int | getcode (const char *w) |
int | encode (const char *w) |
const char * | decode (int c) |
void | stat () |
void | print_curve (int curvesize, float *testOOV=NULL) |
float * | test (int curvesize, const char *filename, int listflag=0) |
void | cleanfreq () |
dict_entry * | scan (HT_ACTION action) |
Public Attributes | |
dictionary * | oovlex |
Friends | |
class | dictionary_iter |
Definition at line 81 of file dictionary.h.
dictionary::dictionary | ( | char * | filename, | |
int | size = DICT_INITSIZE , |
|||
float | lf = DICTIONARY_LOAD_FACTOR | |||
) |
Definition at line 37 of file dictionary.cpp.
References mfstream::close(), DICTIONARY_LOAD_FACTOR, freq(), generate(), load(), and NULL.
dictionary::dictionary | ( | dictionary * | d, | |
bool | prune = false , |
|||
int | prunethresh = 0 | |||
) |
Definition at line 373 of file dictionary.cpp.
References dict_entry::code, dict_entry::freq, htable< T >::insert(), NULL, strstack::push(), and dict_entry::word.
dictionary::~dictionary | ( | ) |
Definition at line 427 of file dictionary.cpp.
void dictionary::augment | ( | dictionary * | d | ) |
Definition at line 143 of file dictionary.cpp.
References decode(), encode(), incflag(), and OOV().
Referenced by mixture::mixture().
const char* dictionary::BoD | ( | ) | [inline] |
Definition at line 121 of file dictionary.h.
References BOD_.
Referenced by main(), and doc::read().
const char* dictionary::BoS | ( | ) | [inline] |
Definition at line 113 of file dictionary.h.
References BOS_.
Referenced by genBoScode(), interplm::gencorrcounts(), ngramtable::generate(), interplm::interplm(), main(), test(), interplm::test_ngt(), interplm::test_txt(), mshiftbeta::train(), and shiftbeta::train().
void dictionary::cleanfreq | ( | ) | [inline] |
Definition at line 222 of file dictionary.h.
References dict_entry::freq, and n.
Referenced by main().
const char * dictionary::decode | ( | int | c | ) |
Definition at line 541 of file dictionary.cpp.
References OOV(), and dict_entry::word.
Referenced by ngramtable::augment(), augment(), lmtable::cpsublm(), mshiftbeta::discount(), ngramtable::generate(), interplm::interplm(), lmInterpolation::load(), ngramtable::loadtxt(), main(), lmmacro::Micro2MacroMapping(), lmmacro::One2OneMapping(), operator<<(), lmtable::printTable(), mdiadaptlm::saveARPA_per_level(), mdiadaptlm::saveARPA_per_word(), mdiadaptlm::saveASR(), mdiadaptlm::savescalefactor(), plsa::saveWtxt(), and ngram::trans().
int dictionary::dub | ( | int | value | ) | [inline] |
Definition at line 105 of file dictionary.h.
int dictionary::dub | ( | ) | [inline] |
Definition at line 101 of file dictionary.h.
Referenced by mixture::discount(), mixture::dub(), interplm::dub(), interplm::test_ngt(), interplm::test_txt(), mixture::train(), interplm::trainunigr(), and mdiadaptlm::txclprob().
int dictionary::encode | ( | const char * | w | ) |
Definition at line 503 of file dictionary.cpp.
References dict_entry::code, htable< T >::find(), dict_entry::freq, grow(), htable< T >::insert(), NULL, OOV(), strstack::push(), and dict_entry::word.
Referenced by ngramtable::augment(), augment(), ngram::containsWord(), genBoScode(), interplm::gencorrcounts(), genEoScode(), ngramtable::generate(), generate(), genoovcode(), Moses::LanguageModelIRST::GetLmID(), interplm::interplm(), lmInterpolation::load(), lmclass::loadMapElement(), ngramtable::loadtxt(), main(), ngramtable::ngramtable(), operator>>(), ngram::pushw(), doc::read(), lmtable::reload(), mdiadaptlm::saveMT(), setoovrate(), interplm::test_ngt(), interplm::test_txt(), mixture::train(), interplm::trainunigr(), and ngram::trans().
const char* dictionary::EoD | ( | ) | [inline] |
Definition at line 125 of file dictionary.h.
References EOD_.
Referenced by main(), and doc::read().
const char* dictionary::EoS | ( | ) | [inline] |
Definition at line 117 of file dictionary.h.
References EOS_.
Referenced by genEoScode(), main(), mdiadaptlm::saveARPA_per_level(), mdiadaptlm::saveARPA_per_word(), mdiadaptlm::saveBIN_per_level(), mdiadaptlm::saveBIN_per_word(), mshiftbeta::train(), and shiftbeta::train().
long dictionary::freq | ( | int | code, | |
long long | value = -1 | |||
) | [inline] |
Definition at line 181 of file dictionary.h.
References dict_entry::freq.
Referenced by ngramtable::augment(), dictionary(), lmclass::getMap(), grow(), plsa::initW(), interplm::interplm(), lmclass::loadMapElement(), main(), multfreq(), print_curve(), save(), mdiadaptlm::saveMT(), plsa::saveWtxt(), setoovrate(), test(), interplm::trainunigr(), and interplm::unigr().
void dictionary::genBoScode | ( | ) | [inline] |
Definition at line 153 of file dictionary.h.
References BoS(), and encode().
void dictionary::genEoScode | ( | ) | [inline] |
Definition at line 158 of file dictionary.h.
References encode(), and EoS().
void dictionary::generate | ( | char * | filename | ) |
Definition at line 111 of file dictionary.cpp.
References mfstream::close(), encode(), getword(), incfreq(), and MAX_WORD.
Referenced by dictionary().
void dictionary::genoovcode | ( | ) | [inline] |
Definition at line 147 of file dictionary.h.
References encode(), OOV(), and oovcode().
Referenced by lmtable::cpsublm(), ngramtable::generate(), lmmacro::load(), lmInterpolation::load(), lmclass::load(), main(), and mixture::mixture().
int dictionary::getcode | ( | const char * | w | ) |
Definition at line 496 of file dictionary.cpp.
References dict_entry::code, htable< T >::find(), and NULL.
Referenced by ngramtable::augment(), main(), test(), interplm::trainunigr(), and lmtable::wdprune().
int dictionary::getword | ( | fstream & | inp, | |
char * | buffer | |||
) |
Definition at line 83 of file dictionary.cpp.
References MAX_WORD.
Referenced by generate(), load(), and test().
void dictionary::grow | ( | ) |
Definition at line 444 of file dictionary.cpp.
References freq(), GROWTH_STEP, and htable< T >::insert().
Referenced by encode(), and load().
int dictionary::incflag | ( | int | v | ) | [inline] |
Definition at line 136 of file dictionary.h.
int dictionary::incflag | ( | ) | [inline] |
Definition at line 133 of file dictionary.h.
Referenced by ngramtable::augment(), augment(), lmmacro::dictionary_incflag(), lmInterpolation::dictionary_incflag(), lmclass::dictionary_incflag(), ngramtable::generate(), ngramtable::generate_dstco(), ngramtable::generate_hmask(), Moses::LanguageModelIRST::GetLmID(), lmmacro::lmmacro(), Moses::LanguageModelIRST::Load(), lmtable::load(), lmmacro::load(), lmInterpolation::load(), lmclass::load(), ngramtable::loadtxt(), main(), lmtable::reload(), plsa::saveFeat(), and mdiadaptlm::saveMT().
long long dictionary::incfreq | ( | int | code, | |
long long | value | |||
) | [inline] |
Definition at line 171 of file dictionary.h.
References dict_entry::freq.
Referenced by ngramtable::augment(), ngramtable::generate(), generate(), ngramtable::generate_dstco(), ngramtable::generate_hmask(), ngramtable::loadtxt(), main(), plsa::saveFeat(), and interplm::trainunigr().
int dictionary::isprintable | ( | char * | w | ) | [inline] |
void dictionary::load | ( | std::istream & | fd | ) |
Definition at line 317 of file dictionary.cpp.
References dict_entry::code, dict_entry::freq, grow(), htable< T >::insert(), MAX_WORD, OOV(), strstack::push(), size(), and dict_entry::word.
void dictionary::load | ( | char * | filename | ) |
Definition at line 260 of file dictionary.cpp.
References mfstream::close(), dict_entry::code, dict_entry::freq, getword(), grow(), htable< T >::insert(), MAX_WORD, OOV(), strstack::push(), and dict_entry::word.
Referenced by dictionary(), ngramtable::loadbin(), and ngramtable::loadtxt().
long long dictionary::multfreq | ( | int | code, | |
double | value | |||
) | [inline] |
Definition at line 176 of file dictionary.h.
References dict_entry::freq, and freq().
const char* dictionary::OOV | ( | ) | [inline] |
Definition at line 109 of file dictionary.h.
References OOV_.
Referenced by ngramtable::augment(), augment(), decode(), encode(), genoovcode(), load(), main(), parseline(), mdiadaptlm::saveARPA_per_level(), mdiadaptlm::saveARPA_per_word(), mdiadaptlm::saveASR(), mdiadaptlm::saveBIN_per_level(), mdiadaptlm::saveBIN_per_word(), mdiadaptlm::saveMT(), setoovrate(), mshiftbeta::train(), shiftbeta::train(), mixture::train(), and interplm::trainunigr().
int dictionary::oovcode | ( | int | v = -1 |
) | [inline] |
Definition at line 129 of file dictionary.h.
Referenced by ngramtable::augment(), ngram::ckhisto(), computePP(), lmtable::cpsublm(), mshiftbeta::discount(), shiftbeta::discount(), shiftone::discount(), mixture::discount(), linearwb::discount(), interplm::gencorrcounts(), ngramtable::generate(), genoovcode(), lmclass::getMap(), lmclass::getMapScore(), interplm::interplm(), lmtable::is_OOV(), Moses::LanguageModelIRST::Load(), ngramtable::loadtxt(), lmtable::lprob(), main(), prob(), mdiadaptlm::saveARPA_per_level(), mdiadaptlm::saveARPA_per_word(), mdiadaptlm::saveASR(), ngramtable::savebin(), mdiadaptlm::saveBIN_per_level(), mdiadaptlm::saveMT(), mdiadaptlm::savescalefactor(), mdiadaptlm::scalefact(), setoovrate(), interplm::test_ngt(), interplm::test_txt(), mixture::train(), interplm::trainunigr(), mdiadaptlm::txclprob(), and interplm::zerofreq().
void dictionary::print_curve | ( | int | curvesize, | |
float * | testOOV = NULL | |||
) |
Definition at line 158 of file dictionary.cpp.
References dict_entry::freq, freq(), and NULL.
Referenced by main().
void dictionary::save | ( | std::ostream & | fd | ) |
Definition at line 353 of file dictionary.cpp.
References freq().
void dictionary::save | ( | char * | filename, | |
int | freqflag = 0 | |||
) |
Definition at line 469 of file dictionary.cpp.
References dict_entry::freq, freq(), and dict_entry::word.
Referenced by main(), ngramtable::savebin(), lmtable::savebin(), lmtable::savebin_dict(), mdiadaptlm::saveBIN_per_level(), mdiadaptlm::saveBIN_per_word(), mdiadaptlm::saveMT(), and ngramtable::savetxt().
dict_entry* dictionary::scan | ( | HT_ACTION | action | ) | [inline] |
Definition at line 227 of file dictionary.h.
References htable< T >::scan().
Referenced by dictionary_iter::dictionary_iter(), and dictionary_iter::next().
float dictionary::set_load_factor | ( | float | value | ) | [inline] |
Definition at line 192 of file dictionary.h.
int dictionary::setoovrate | ( | double | oovrate | ) | [inline] |
int dictionary::size | ( | ) | [inline] |
Definition at line 211 of file dictionary.h.
References n.
Referenced by mdiadaptlm::adapt(), ngramtable::augment(), ngramtable::check_dictsize_bound(), plsa::combineT(), lmtable::cpsublm(), mixture::discount(), doc::doc(), interplm::gencorrcounts(), init(), plsa::initW(), lmInterpolation::load(), load(), plsa::loadW(), main(), mixture::mixture(), mdiadaptlm::netsize(), normcache::normcache(), interplm::obswrd(), plsa::plsa(), prob(), doc::read(), mdiadaptlm::saveARPA_per_level(), mdiadaptlm::saveARPA_per_word(), mdiadaptlm::saveASR(), mdiadaptlm::saveBIN_per_level(), mdiadaptlm::saveBIN_per_word(), plsa::saveFeat(), mdiadaptlm::savescalefactor(), plsa::saveT(), plsa::saveW(), plsa::saveWtxt(), mdiadaptlm::scalefact(), lmtable::setlogOOVpenalty(), lmInterpolation::setlogOOVpenalty(), interplm::test_ngt(), interplm::test_txt(), mixture::train(), plsa::train(), interplm::trainunigr(), mdiadaptlm::txclprob(), interplm::unigr(), and plsa::~plsa().
void dictionary::sort | ( | ) |
Definition at line 407 of file dictionary.cpp.
References cmpdictentry(), dict_entry::code, htable< T >::insert(), and NULL.
Referenced by main().
void dictionary::stat | ( | ) |
Definition at line 434 of file dictionary.cpp.
References strstack::used(), and htable< T >::used().
Referenced by ngramtable::stat(), and lmtable::stat().
float * dictionary::test | ( | int | curvesize, | |
const char * | filename, | |||
int | listflag = 0 | |||
) |
Definition at line 202 of file dictionary.cpp.
References BoS(), mfstream::close(), dict_entry::freq, freq(), getcode(), getword(), k, MAX_WORD, and NULL.
Referenced by main().
long long dictionary::totfreq | ( | ) | [inline] |
Definition at line 189 of file dictionary.h.
Referenced by main(), setoovrate(), interplm::trainunigr(), and interplm::unigr().
friend class dictionary_iter [friend] |
Definition at line 97 of file dictionary.h.
Definition at line 99 of file dictionary.h.