#include <boost/algorithm/string/predicate.hpp>#include <boost/program_options.hpp>#include <boost/program_options/options_description.hpp>#include <boost/program_options/parsers.hpp>#include <boost/program_options/variables_map.hpp>#include <boost/iostreams/device/mapped_file.hpp>#include <iostream>#include <fstream>#include <sstream>#include <iomanip>#include <vector>#include <string>#include <sys/types.h>#include <sys/wait.h>#include "ug_conll_record.h"#include "tpt_tokenindex.h"#include "ug_mm_ttrack.h"#include "tpt_pickler.h"#include "ug_deptree.h"#include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h"#include "moses/TranslationModel/UG/mm/ug_im_tsa.h"Go to the source code of this file.
Functions | |
| void | interpret_args (int ac, char *av[]) |
| uchar | rangeCheck (int p, int limit) |
| id_type | get_id (TokenIndex const &T, string const &w) |
| void | open_vocab (TokenIndex &T, string fname) |
| void | ini_cnt_vec (TokenIndex const &T, vector< pair< string, size_t > > &v) |
| void | write_tokenindex (string fname, TokenIndex &T, vector< id_type > const &n2o) |
| void | init (int argc, char *argv[]) |
| void | fill_rec (Conll_Record &rec, vector< string > const &w) |
| void | log_progress (size_t ctr) |
| size_t | process_plain_input (ostream &out, vector< id_type > &s_index) |
| size_t | process_tagged_input (ostream &out, vector< id_type > &s_index, vector< id_type > &p_index) |
| size_t | numberize () |
| void | invert (vector< id_type > const &from, vector< id_type > &to) |
| void | conservative_sort (TokenIndex const &V, vector< size_t > const &cnt, vector< id_type > &xmap) |
| void | remap () |
| void | save_vocabs () |
| template<typename Token > | |
| void | build_mmTSA (string infile, string outfile) |
| bool | build_plaintext_tsas () |
| void | build_conll_tsas () |
| int | main (int argc, char *argv[]) |
Variables | |
| int | with_pfas |
| int | with_dcas |
| int | with_sfas |
| bool | incremental = false |
| bool | is_conll = false |
| bool | quiet = false |
| string | vocabBase |
| string | baseName |
| string | tmpFile |
| string | mttFile |
| string | UNK |
| TokenIndex | SF |
| TokenIndex | LM |
| TokenIndex | PS |
| TokenIndex | DT |
| vector< id_type > | smap |
| vector< id_type > | lmap |
| vector< id_type > | pmap |
| vector< id_type > | dmap |
| void build_conll_tsas | ( | ) |
| void build_mmTSA | ( | string | infile, | |
| string | outfile | |||
| ) | [inline] |
Definition at line 363 of file mtt-build.cc.
References NULL, quiet, sapt::imTSA< TOKEN >::save_as_mm_tsa(), and T.

| bool build_plaintext_tsas | ( | ) |
| void conservative_sort | ( | TokenIndex const & | V, | |
| vector< size_t > const & | cnt, | |||
| vector< id_type > & | xmap | |||
| ) |
Definition at line 276 of file mtt-build.cc.
References sapt::TokenIndex::knownVocabSize(), sort(), sorter, and sapt::TokenIndex::totalVocabSize().
Referenced by remap().


| void fill_rec | ( | Conll_Record & | rec, | |
| vector< string > const & | w | |||
| ) |
Definition at line 131 of file mtt-build.cc.
References sapt::Conll_Record::dtype, get_id(), sapt::Conll_Record::lemma, sapt::Conll_Record::majpos, sapt::Conll_Record::minpos, sapt::Conll_Record::parent, rangeCheck(), and sapt::Conll_Record::sform.
Referenced by process_tagged_input().


| id_type get_id | ( | TokenIndex const & | T, | |
| string const & | w | |||
| ) |
Definition at line 64 of file mtt-build.cc.
References UNK.
Referenced by fill_rec(), Moses::PhraseDictionaryMultiModel::GetPhraseCache(), Moses::PhraseDictionaryGroup::GetPhraseCache(), Moses::PhraseDictionaryMultiModel::GetTemporaryMultiModelWeightsVector(), process_plain_input(), and Moses::PhraseDictionaryMultiModel::SetTemporaryMultiModelWeightsVector().

| void ini_cnt_vec | ( | TokenIndex const & | T, | |
| vector< pair< string, size_t > > & | v | |||
| ) |
Definition at line 91 of file mtt-build.cc.
References sapt::TokenIndex::totalVocabSize().

| void init | ( | int | argc, | |
| char * | argv[] | |||
| ) |
Definition at line 118 of file mtt-build.cc.
Referenced by sapt::BitSetCache< TSA >::BitSetCache(), BOOST_AUTO_TEST_CASE(), sapt::DocumentBias::init_from_json(), Moses::InputFileStream::InputFileStream(), inputfilestream::inputfilestream(), Moses::SoftSourceSyntacticConstraintsFeature::LoadTargetSourceLeftHandSideJointCountFile(), main(), Moses::InputFileStream::Open(), sapt::PScoreProvenance< Token >::PScoreProvenance(), sapt::PScoreRareness< Token >::PScoreRareness(), and MosesTuning::Viterbi().

| void interpret_args | ( | int | ac, | |
| char * | av[] | |||
| ) |
Definition at line 26 of file mam2symal.cc.
| void invert | ( | vector< id_type > const & | from, | |
| vector< id_type > & | to | |||
| ) |
Definition at line 266 of file mtt-build.cc.
Referenced by main(), and remap().

| void log_progress | ( | size_t | ctr | ) |
Definition at line 155 of file mtt-build.cc.
Referenced by process_plain_input(), and process_tagged_input().

| int main | ( | int | argc, | |
| char * | argv[] | |||
| ) |
Definition at line 416 of file mtt-build.cc.
References build_conll_tsas(), build_plaintext_tsas(), init(), is_conll, sapt::TokenIndex::knownVocabSize(), mttFile, numberize(), quiet, remap(), save_vocabs(), tmpFile, and sapt::TokenIndex::totalVocabSize().

| size_t numberize | ( | ) |
Definition at line 224 of file mtt-build.cc.
References index, is_conll, tpt::numwrite(), process_plain_input(), process_tagged_input(), quiet, and tmpFile.
Referenced by main().


| void open_vocab | ( | TokenIndex & | T, | |
| string | fname | |||
| ) |
Definition at line 77 of file mtt-build.cc.
References incremental, sapt::TokenIndex::open(), sapt::TokenIndex::setDynamic(), sapt::TokenIndex::setUnkLabel(), and UNK.
Referenced by init().


| size_t process_plain_input | ( | ostream & | out, | |
| vector< id_type > & | s_index | |||
| ) |
Definition at line 170 of file mtt-build.cc.
References get_id(), log_progress(), tpt::numwrite(), and quiet.
Referenced by numberize().


| size_t process_tagged_input | ( | ostream & | out, | |
| vector< id_type > & | s_index, | |||
| vector< id_type > & | p_index | |||
| ) |
Definition at line 190 of file mtt-build.cc.
References fill_rec(), log_progress(), quiet, and starts_with().
Referenced by numberize().


| uchar rangeCheck | ( | int | p, | |
| int | limit | |||
| ) | [inline] |
Definition at line 61 of file mtt-build.cc.
Referenced by fill_rec().

| void remap | ( | ) |
Definition at line 288 of file mtt-build.cc.
References conservative_sort(), dmap, invert(), is_conll, lmap, mtt, tpt::numread(), pmap, quiet, sform, smap, tmpFile, and sapt::TokenIndex::totalVocabSize().
Referenced by main().


| void save_vocabs | ( | ) |
Definition at line 343 of file mtt-build.cc.
References baseName, dmap, is_conll, sapt::TokenIndex::knownVocabSize(), lmap, pmap, smap, sapt::TokenIndex::totalVocabSize(), and write_tokenindex().
Referenced by main().


| void write_tokenindex | ( | string | fname, | |
| TokenIndex & | T, | |||
| vector< id_type > const & | n2o | |||
| ) |
Definition at line 102 of file mtt-build.cc.
References sapt::TokenIndex::close(), quiet, sort(), UNK, and sapt::write_tokenindex_to_disk().
Referenced by save_vocabs().


| string baseName |
Definition at line 48 of file mtt-build.cc.
Referenced by build_conll_tsas(), build_plaintext_tsas(), and save_vocabs().
| vector<id_type> dmap |
Definition at line 57 of file mtt-build.cc.
| bool incremental = false |
| bool is_conll = false |
Definition at line 44 of file mtt-build.cc.
Referenced by init(), main(), numberize(), remap(), and save_vocabs().
| vector<id_type> lmap |
| string mttFile |
| vector<id_type> pmap |
Definition at line 56 of file mtt-build.cc.
| bool quiet = false |
Definition at line 45 of file mtt-build.cc.
Referenced by build_mmTSA(), main(), numberize(), process_plain_input(), process_tagged_input(), remap(), and write_tokenindex().
Definition at line 54 of file mtt-build.cc.
| vector<id_type> smap |
| string tmpFile |
Definition at line 49 of file mtt-build.cc.
Referenced by build_conll_tsas(), build_plaintext_tsas(), main(), numberize(), and remap().
| string UNK |
Definition at line 52 of file mtt-build.cc.
Referenced by get_id(), open_vocab(), and write_tokenindex().
| string vocabBase |
| int with_dcas |
| int with_pfas |
Definition at line 39 of file mtt-build.cc.
Referenced by build_conll_tsas(), and build_plaintext_tsas().
| int with_sfas |
Definition at line 41 of file mtt-build.cc.
Referenced by build_conll_tsas(), and build_plaintext_tsas().
1.5.9