00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <fstream>
00023 #include <string>
00024 #include "GenerationDictionary.h"
00025 #include "FactorCollection.h"
00026 #include "Word.h"
00027 #include "Util.h"
00028 #include "InputFileStream.h"
00029 #include "StaticData.h"
00030 #include "util/exception.hh"
00031 #include "util/string_stream.hh"
00032
00033 using namespace std;
00034
00035 namespace Moses
00036 {
00037 std::vector<GenerationDictionary*> GenerationDictionary::s_staticColl;
00038
00039 GenerationDictionary::GenerationDictionary(const std::string &line)
00040 : DecodeFeature(line, true)
00041 {
00042 s_staticColl.push_back(this);
00043
00044 ReadParameters();
00045 }
00046
00047 void GenerationDictionary::Load(AllOptions::ptr const& opts)
00048 {
00049 m_options = opts;
00050 FactorCollection &factorCollection = FactorCollection::Instance();
00051
00052 const size_t numFeatureValuesInConfig = this->GetNumScoreComponents();
00053
00054
00055
00056 InputFileStream inFile(m_filePath);
00057 UTIL_THROW_IF2(!inFile.good(), "Couldn't read " << m_filePath);
00058
00059 string line;
00060 size_t lineNum = 0;
00061 while(getline(inFile, line)) {
00062 ++lineNum;
00063 vector<string> token = Tokenize( line );
00064
00065
00066 Word *inputWord = new Word();
00067 Word outputWord;
00068
00069
00070
00071
00072 vector<string> factorString = Tokenize( token[0], "|" );
00073 for (size_t i = 0 ; i < GetInput().size() ; i++) {
00074 FactorType factorType = GetInput()[i];
00075 const Factor *factor = factorCollection.AddFactor( Output, factorType, factorString[i]);
00076 inputWord->SetFactor(factorType, factor);
00077 }
00078
00079 factorString = Tokenize( token[1], "|" );
00080 for (size_t i = 0 ; i < GetOutput().size() ; i++) {
00081 FactorType factorType = GetOutput()[i];
00082
00083 const Factor *factor = factorCollection.AddFactor( Output, factorType, factorString[i]);
00084 outputWord.SetFactor(factorType, factor);
00085 }
00086
00087 size_t numFeaturesInFile = token.size() - 2;
00088 if (numFeaturesInFile < numFeatureValuesInConfig) {
00089 util::StringStream strme;
00090 strme << m_filePath << ":" << lineNum << ": expected " << numFeatureValuesInConfig
00091 << " feature values, but found " << numFeaturesInFile << "\n";
00092 throw strme.str();
00093 }
00094 std::vector<float> scores(numFeatureValuesInConfig, 0.0f);
00095 for (size_t i = 0; i < numFeatureValuesInConfig; i++)
00096 scores[i] = FloorScore(TransformScore(Scan<float>(token[2+i])));
00097
00098 Collection::iterator iterWord = m_collection.find(inputWord);
00099 if (iterWord == m_collection.end()) {
00100 m_collection[inputWord][outputWord].Assign(this, scores);
00101 } else {
00102
00103 (iterWord->second)[outputWord].Assign(this, scores);
00104 delete inputWord;
00105 }
00106 }
00107
00108 inFile.Close();
00109 }
00110
00111 GenerationDictionary::~GenerationDictionary()
00112 {
00113 Collection::const_iterator iter;
00114 for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter) {
00115 delete iter->first;
00116 }
00117 }
00118
00119 const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) const
00120 {
00121 const OutputWordCollection *ret;
00122
00123 Word wordInput;
00124 const std::vector<FactorType> &inputFactors = GetInput();
00125 for (size_t i = 0; i < inputFactors.size(); ++i) {
00126 FactorType factorType = inputFactors[i];
00127 wordInput[factorType] = word[factorType];
00128 }
00129
00130 Collection::const_iterator iter = m_collection.find(&wordInput);
00131 if (iter == m_collection.end()) {
00132
00133 ret = NULL;
00134 } else {
00135 ret = &iter->second;
00136 }
00137 return ret;
00138 }
00139
00140 void GenerationDictionary::SetParameter(const std::string& key, const std::string& value)
00141 {
00142 if (key == "path") {
00143 m_filePath = value;
00144 } else {
00145 DecodeFeature::SetParameter(key, value);
00146 }
00147 }
00148
00149 }
00150