00001
00002
00003 #include <list>
00004 #include "TranslationOptionCollectionLattice.h"
00005 #include "ConfusionNet.h"
00006 #include "WordLattice.h"
00007 #include "DecodeGraph.h"
00008 #include "DecodeStepTranslation.h"
00009 #include "DecodeStepGeneration.h"
00010 #include "FactorCollection.h"
00011 #include "FF/InputFeature.h"
00012 #include "TranslationModel/PhraseDictionaryTreeAdaptor.h"
00013 #include "util/exception.hh"
00014 #include "TranslationTask.h"
00015
00016 using namespace std;
00017
00018 namespace Moses
00019 {
00020
00022 TranslationOptionCollectionLattice
00023 ::TranslationOptionCollectionLattice
00024 ( ttasksptr const& ttask, const WordLattice &input)
00025
00026 : TranslationOptionCollection(ttask, input)
00027
00028 {
00029 UTIL_THROW_IF2(StaticData::Instance().GetUseLegacyPT(),
00030 "Not for models using the legqacy binary phrase table");
00031
00032 size_t maxNoTransOptPerCoverage = ttask->options()->search.max_trans_opt_per_cov;
00033 float translationOptionThreshold = ttask->options()->search.trans_opt_threshold;
00034 const InputFeature *inputFeature = InputFeature::InstancePtr();
00035 UTIL_THROW_IF2(inputFeature == NULL, "Input feature must be specified");
00036
00037 size_t maxPhraseLength = ttask->options()->search.max_phrase_length;
00038 size_t size = input.GetSize();
00039
00040
00041 for (size_t startPos = 0; startPos < size; ++startPos) {
00042
00043 const std::vector<size_t> &nextNodes = input.GetNextNodes(startPos);
00044
00045 const ConfusionNet::Column &col = input.GetColumn(startPos);
00046 for (size_t i = 0; i < col.size(); ++i) {
00047 const Word &word = col[i].first;
00048 UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
00049
00050 size_t nextNode = nextNodes[i];
00051 size_t endPos = startPos + nextNode - 1;
00052
00053 Range range(startPos, endPos);
00054
00055 if (range.GetNumWordsCovered() > maxPhraseLength) {
00056 continue;
00057 }
00058
00059 const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
00060
00061 Phrase subphrase;
00062 subphrase.AddWord(word);
00063
00064 const ScorePair &scores = col[i].second;
00065 ScorePair *inputScore = new ScorePair(scores);
00066
00067 InputPath *path
00068 = new InputPath(ttask.get(), subphrase, labels, range, NULL, inputScore);
00069
00070 path->SetNextNode(nextNode);
00071 m_inputPathQueue.push_back(path);
00072
00073
00074 Extend(*path, input, ttask->options()->search.max_phrase_length);
00075
00076 }
00077 }
00078 }
00079
00080 void
00081 TranslationOptionCollectionLattice::
00082 Extend(const InputPath &prevPath, const WordLattice &input,
00083 size_t const maxPhraseLength)
00084 {
00085 size_t nextPos = prevPath.GetWordsRange().GetEndPos() + 1;
00086 if (nextPos >= input.GetSize()) {
00087 return;
00088 }
00089
00090 size_t startPos = prevPath.GetWordsRange().GetStartPos();
00091 const Phrase &prevPhrase = prevPath.GetPhrase();
00092 const ScorePair *prevInputScore = prevPath.GetInputScore();
00093 UTIL_THROW_IF2(prevInputScore == NULL,
00094 "Null previous score");
00095
00096
00097 const std::vector<size_t> &nextNodes = input.GetNextNodes(nextPos);
00098
00099 const ConfusionNet::Column &col = input.GetColumn(nextPos);
00100 for (size_t i = 0; i < col.size(); ++i) {
00101 const Word &word = col[i].first;
00102 UTIL_THROW_IF2(word.IsEpsilon(), "Epsilon not supported");
00103
00104 size_t nextNode = nextNodes[i];
00105 size_t endPos = nextPos + nextNode - 1;
00106
00107 Range range(startPos, endPos);
00108
00109
00110 if (range.GetNumWordsCovered() > maxPhraseLength) {
00111 continue;
00112 }
00113
00114 const NonTerminalSet &labels = input.GetLabelSet(startPos, endPos);
00115
00116 Phrase subphrase(prevPhrase);
00117 subphrase.AddWord(word);
00118
00119 const ScorePair &scores = col[i].second;
00120 ScorePair *inputScore = new ScorePair(*prevInputScore);
00121 inputScore->PlusEquals(scores);
00122
00123 InputPath *path = new InputPath(prevPath.ttask, subphrase, labels,
00124 range, &prevPath, inputScore);
00125
00126 path->SetNextNode(nextNode);
00127 m_inputPathQueue.push_back(path);
00128
00129
00130 Extend(*path, input, maxPhraseLength);
00131
00132 }
00133 }
00134
00135 void TranslationOptionCollectionLattice::CreateTranslationOptions()
00136 {
00137 GetTargetPhraseCollectionBatch();
00138
00139 VERBOSE(2,"Translation Option Collection\n " << *this << endl);
00140 const vector <DecodeGraph*> &decodeGraphs = StaticData::Instance().GetDecodeGraphs();
00141 UTIL_THROW_IF2(decodeGraphs.size() != 1, "Multiple decoder graphs not supported yet");
00142 const DecodeGraph &decodeGraph = *decodeGraphs[0];
00143 UTIL_THROW_IF2(decodeGraph.GetSize() != 1, "Factored decomposition not supported yet");
00144
00145 const DecodeStep &decodeStep = **decodeGraph.begin();
00146 const PhraseDictionary &phraseDictionary = *decodeStep.GetPhraseDictionaryFeature();
00147
00148 for (size_t i = 0; i < m_inputPathQueue.size(); ++i) {
00149 const InputPath &path = *m_inputPathQueue[i];
00150
00151 TargetPhraseCollection::shared_ptr tpColl
00152 = path.GetTargetPhrases(phraseDictionary);
00153 const Range &range = path.GetWordsRange();
00154
00155 if (tpColl && tpColl->GetSize()) {
00156 TargetPhraseCollection::const_iterator iter;
00157 for (iter = tpColl->begin(); iter != tpColl->end(); ++iter) {
00158 const TargetPhrase &tp = **iter;
00159 TranslationOption *transOpt = new TranslationOption(range, tp);
00160 transOpt->SetInputPath(path);
00161 transOpt->EvaluateWithSourceContext(m_source);
00162
00163 Add(transOpt);
00164 }
00165 } else if (path.GetPhrase().GetSize() == 1) {
00166
00167 ProcessOneUnknownWord(path, path.GetWordsRange().GetStartPos(), path.GetWordsRange().GetNumWordsCovered() , path.GetInputScore());
00168 }
00169 }
00170
00171
00172 Prune();
00173
00174 Sort();
00175
00176
00177 CalcEstimatedScore();
00178
00179
00180 CacheLexReordering();
00181
00182 }
00183
00184 void
00185 TranslationOptionCollectionLattice::
00186 ProcessUnknownWord(size_t sourcePos)
00187 {
00188 UTIL_THROW(util::Exception, "ProcessUnknownWord() not implemented for lattice");
00189
00190 }
00191
00192 bool
00193 TranslationOptionCollectionLattice::
00194 CreateTranslationOptionsForRange
00195 (const DecodeGraph &decodeStepList, size_t startPosition, size_t endPosition,
00196 bool adhereTableLimit, size_t graphInd)
00197 {
00198 UTIL_THROW(util::Exception,
00199 "CreateTranslationOptionsForRange() not implemented for lattice");
00200 }
00201
00202 }
00203
00204