00001
00002
00003 #include "util/tokenize.hh"
00004 #include "tables-core.h"
00005
00006 #define TABLE_LINE_MAX_LENGTH 1000
00007 #define UNKNOWNSTR "UNK"
00008
00009 using namespace std;
00010
00011 namespace MosesTraining
00012 {
00013
00014 WORD_ID Vocabulary::storeIfNew( const WORD& word )
00015 {
00016 map<WORD, WORD_ID>::iterator i = lookup.find( word );
00017
00018 if( i != lookup.end() )
00019 return i->second;
00020
00021 WORD_ID id = vocab.size();
00022 vocab.push_back( word );
00023 lookup[ word ] = id;
00024 return id;
00025 }
00026
00027 WORD_ID Vocabulary::getWordID( const WORD& word )
00028 {
00029 map<WORD, WORD_ID>::iterator i = lookup.find( word );
00030 if( i == lookup.end() )
00031 return 0;
00032 return i->second;
00033 }
00034
00035 PHRASE_ID PhraseTable::storeIfNew( const PHRASE& phrase )
00036 {
00037 map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
00038 if( i != lookup.end() )
00039 return i->second;
00040
00041 PHRASE_ID id = phraseTable.size();
00042 phraseTable.push_back( phrase );
00043 lookup[ phrase ] = id;
00044 return id;
00045 }
00046
00047 PHRASE_ID PhraseTable::getPhraseID( const PHRASE& phrase )
00048 {
00049 map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
00050 if( i == lookup.end() )
00051 return 0;
00052 return i->second;
00053 }
00054
00055 void PhraseTable::clear()
00056 {
00057 lookup.clear();
00058 phraseTable.clear();
00059 }
00060
00061 void DTable::init()
00062 {
00063 for(int i = -10; i<10; i++)
00064 dtable[i] = -abs( i );
00065 }
00066
00067 void DTable::load( const string& fileName )
00068 {
00069 ifstream inFile;
00070 inFile.open(fileName.c_str());
00071
00072 std::string line;
00073 int i=0;
00074 while(true) {
00075 i++;
00076 getline(inFile, line);
00077 if (inFile.eof()) break;
00078 if (!inFile) {
00079 std::cerr << "Error reading from " << fileName << std::endl;
00080 abort();
00081 }
00082
00083 const vector<string> token = util::tokenize(line);
00084 if (token.size() < 2) {
00085 cerr << "line " << i << " in " << fileName << " too short, skipping\n";
00086 continue;
00087 }
00088
00089 int d = atoi( token[0].c_str() );
00090 double prob = log( atof( token[1].c_str() ) );
00091 dtable[ d ] = prob;
00092 }
00093 }
00094
00095 double DTable::get( int distortion )
00096 {
00097 if (dtable.find( distortion ) == dtable.end())
00098 return log( 0.00001 );
00099 return dtable[ distortion ];
00100 }
00101
00102 }
00103