00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 #include <fstream>
00023 #include <string>
00024 #include "GenerationDictionary.h"
00025 #include "FactorCollection.h"
00026 #include "Word.h"
00027 #include "Util.h"
00028 #include "InputFileStream.h"
00029 #include "StaticData.h"
00030 #include "util/exception.hh"
00031 #include "util/string_stream.hh"
00032 
00033 using namespace std;
00034 
00035 namespace Moses
00036 {
00037 std::vector<GenerationDictionary*> GenerationDictionary::s_staticColl;
00038 
00039 GenerationDictionary::GenerationDictionary(const std::string &line)
00040   : DecodeFeature(line, true)
00041 {
00042   s_staticColl.push_back(this);
00043 
00044   ReadParameters();
00045 }
00046 
00047 void GenerationDictionary::Load(AllOptions::ptr const& opts)
00048 {
00049   m_options = opts;
00050   FactorCollection &factorCollection = FactorCollection::Instance();
00051 
00052   const size_t numFeatureValuesInConfig = this->GetNumScoreComponents();
00053 
00054 
00055   
00056   InputFileStream inFile(m_filePath);
00057   UTIL_THROW_IF2(!inFile.good(), "Couldn't read " << m_filePath);
00058 
00059   string line;
00060   size_t lineNum = 0;
00061   while(getline(inFile, line)) {
00062     ++lineNum;
00063     vector<string> token = Tokenize( line );
00064 
00065     
00066     Word *inputWord = new Word();  
00067     Word outputWord;
00068 
00069     
00070 
00071     
00072     vector<string> factorString = Tokenize( token[0], "|" );
00073     for (size_t i = 0 ; i < GetInput().size() ; i++) {
00074       FactorType factorType = GetInput()[i];
00075       const Factor *factor = factorCollection.AddFactor( Output, factorType, factorString[i]);
00076       inputWord->SetFactor(factorType, factor);
00077     }
00078 
00079     factorString = Tokenize( token[1], "|" );
00080     for (size_t i = 0 ; i < GetOutput().size() ; i++) {
00081       FactorType factorType = GetOutput()[i];
00082 
00083       const Factor *factor = factorCollection.AddFactor( Output, factorType, factorString[i]);
00084       outputWord.SetFactor(factorType, factor);
00085     }
00086 
00087     size_t numFeaturesInFile = token.size() - 2;
00088     if (numFeaturesInFile < numFeatureValuesInConfig) {
00089       util::StringStream strme;
00090       strme << m_filePath << ":" << lineNum << ": expected " << numFeatureValuesInConfig
00091             << " feature values, but found " << numFeaturesInFile << "\n";
00092       throw strme.str();
00093     }
00094     std::vector<float> scores(numFeatureValuesInConfig, 0.0f);
00095     for (size_t i = 0; i < numFeatureValuesInConfig; i++)
00096       scores[i] = FloorScore(TransformScore(Scan<float>(token[2+i])));
00097 
00098     Collection::iterator iterWord = m_collection.find(inputWord);
00099     if (iterWord == m_collection.end()) {
00100       m_collection[inputWord][outputWord].Assign(this, scores);
00101     } else {
00102       
00103       (iterWord->second)[outputWord].Assign(this, scores);
00104       delete inputWord;
00105     }
00106   }
00107 
00108   inFile.Close();
00109 }
00110 
00111 GenerationDictionary::~GenerationDictionary()
00112 {
00113   Collection::const_iterator iter;
00114   for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter) {
00115     delete iter->first;
00116   }
00117 }
00118 
00119 const OutputWordCollection *GenerationDictionary::FindWord(const Word &word) const
00120 {
00121   const OutputWordCollection *ret;
00122 
00123   Word wordInput;
00124   const std::vector<FactorType> &inputFactors = GetInput();
00125   for (size_t i = 0; i < inputFactors.size(); ++i) {
00126     FactorType factorType = inputFactors[i];
00127     wordInput[factorType] = word[factorType];
00128   }
00129 
00130   Collection::const_iterator iter = m_collection.find(&wordInput);
00131   if (iter == m_collection.end()) {
00132     
00133     ret = NULL;
00134   } else {
00135     ret = &iter->second;
00136   }
00137   return ret;
00138 }
00139 
00140 void GenerationDictionary::SetParameter(const std::string& key, const std::string& value)
00141 {
00142   if (key == "path") {
00143     m_filePath = value;
00144   } else {
00145     DecodeFeature::SetParameter(key, value);
00146   }
00147 }
00148 
00149 }
00150