00001
00002 #include <cstdlib>
00003
00004 #include "PhraseDictionaryTransliteration.h"
00005 #include "moses/DecodeGraph.h"
00006 #include "moses/DecodeStep.h"
00007 #include "util/tempfile.hh"
00008
00009 using namespace std;
00010
00011 namespace Moses
00012 {
00013 PhraseDictionaryTransliteration::PhraseDictionaryTransliteration(const std::string &line)
00014 : PhraseDictionary(line, true)
00015 {
00016 ReadParameters();
00017 UTIL_THROW_IF2(m_mosesDir.empty() ||
00018 m_scriptDir.empty() ||
00019 m_externalDir.empty() ||
00020 m_inputLang.empty() ||
00021 m_outputLang.empty(), "Must specify all arguments");
00022 }
00023
00024 void PhraseDictionaryTransliteration::Load(AllOptions::ptr const& opts)
00025 {
00026 m_options = opts;
00027 SetFeaturesToApply();
00028 }
00029
00030 void PhraseDictionaryTransliteration::CleanUpAfterSentenceProcessing(const InputType& source)
00031 {
00032 ReduceCache();
00033 }
00034
00035 void PhraseDictionaryTransliteration::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
00036 {
00037
00038 InputPathList::const_iterator iter;
00039 for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
00040 InputPath &inputPath = **iter;
00041
00042 if (!SatisfyBackoff(inputPath)) {
00043 continue;
00044 }
00045
00046 const Phrase &sourcePhrase = inputPath.GetPhrase();
00047
00048 if (sourcePhrase.GetSize() != 1) {
00049
00050 continue;
00051 }
00052
00053 GetTargetPhraseCollection(inputPath);
00054 }
00055 }
00056
00057 void
00058 PhraseDictionaryTransliteration::
00059 GetTargetPhraseCollection(InputPath &inputPath) const
00060 {
00061 const Phrase &sourcePhrase = inputPath.GetPhrase();
00062 size_t hash = hash_value(sourcePhrase);
00063
00064 CacheColl &cache = GetCache();
00065
00066 CacheColl::iterator iter;
00067 iter = cache.find(hash);
00068
00069 if (iter != cache.end()) {
00070
00071 TargetPhraseCollection::shared_ptr tpColl = iter->second.first;
00072 inputPath.SetTargetPhrases(*this, tpColl, NULL);
00073 } else {
00074
00075 const util::temp_file inFile;
00076 const util::temp_dir outDir;
00077
00078 ofstream inStream(inFile.path().c_str());
00079 inStream << sourcePhrase.ToString() << endl;
00080 inStream.close();
00081
00082 string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
00083 " --transliteration-model-dir " + m_filePath +
00084 " --moses-src-dir " + m_mosesDir +
00085 " --external-bin-dir " + m_externalDir +
00086 " --input-extension " + m_inputLang +
00087 " --output-extension " + m_outputLang +
00088 " --oov-file " + inFile.path() +
00089 " --out-dir " + outDir.path();
00090
00091 int ret = system(cmd.c_str());
00092 UTIL_THROW_IF2(ret != 0, "Transliteration script error");
00093
00094 TargetPhraseCollection::shared_ptr tpColl(new TargetPhraseCollection);
00095 vector<TargetPhrase*> targetPhrases
00096 = CreateTargetPhrases(sourcePhrase, outDir.path());
00097 vector<TargetPhrase*>::const_iterator iter;
00098 for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
00099 TargetPhrase *tp = *iter;
00100 tpColl->Add(tp);
00101 }
00102 cache[hash] = CacheCollEntry(tpColl, clock());
00103 inputPath.SetTargetPhrases(*this, tpColl, NULL);
00104 }
00105 }
00106
00107 std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(const Phrase &sourcePhrase, const string &outDir) const
00108 {
00109 std::vector<TargetPhrase*> ret;
00110
00111 string outPath = outDir + "/out.txt";
00112 ifstream outStream(outPath.c_str());
00113
00114 string line;
00115 while (getline(outStream, line)) {
00116 vector<string> toks;
00117 Tokenize(toks, line, "\t");
00118 UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
00119
00120 TargetPhrase *tp = new TargetPhrase(this);
00121 Word &word = tp->AddWord();
00122 word.CreateFromString(Output, m_output, toks[0], false);
00123
00124 float score = Scan<float>(toks[1]);
00125 tp->GetScoreBreakdown().PlusEquals(this, score);
00126
00127
00128 tp->EvaluateInIsolation(sourcePhrase, GetFeaturesToApply());
00129
00130 ret.push_back(tp);
00131 }
00132
00133 outStream.close();
00134
00135 return ret;
00136 }
00137
00138 ChartRuleLookupManager* PhraseDictionaryTransliteration::CreateRuleLookupManager(const ChartParser &parser,
00139 const ChartCellCollectionBase &cellCollection,
00140 std::size_t )
00141 {
00142 return NULL;
00143
00144 }
00145
00146 void
00147 PhraseDictionaryTransliteration::
00148 SetParameter(const std::string& key, const std::string& value)
00149 {
00150 if (key == "moses-dir") {
00151 m_mosesDir = value;
00152 } else if (key == "script-dir") {
00153 m_scriptDir = value;
00154 } else if (key == "external-dir") {
00155 m_externalDir = value;
00156 } else if (key == "input-lang") {
00157 m_inputLang = value;
00158 } else if (key == "output-lang") {
00159 m_outputLang = value;
00160 } else {
00161 PhraseDictionary::SetParameter(key, value);
00162 }
00163 }
00164
00165 TO_STRING_BODY(PhraseDictionaryTransliteration);
00166
00167
00168 ostream& operator<<(ostream& out, const PhraseDictionaryTransliteration& phraseDict)
00169 {
00170 return out;
00171 }
00172
00173 }