00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "ChartRuleLookupManagerOnDisk.h"
00021
00022 #include <algorithm>
00023
00024 #include "moses/ChartParser.h"
00025 #include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
00026 #include "moses/StaticData.h"
00027 #include "moses/ChartParserCallback.h"
00028 #include "DotChartOnDisk.h"
00029 #include "OnDiskPt/TargetPhraseCollection.h"
00030
00031 using namespace std;
00032
00033 namespace Moses
00034 {
00035
00036 ChartRuleLookupManagerOnDisk::ChartRuleLookupManagerOnDisk(
00037 const ChartParser &parser,
00038 const ChartCellCollectionBase &cellColl,
00039 const PhraseDictionaryOnDisk &dictionary,
00040 OnDiskPt::OnDiskWrapper &dbWrapper,
00041 const std::vector<FactorType> &inputFactorsVec,
00042 const std::vector<FactorType> &outputFactorsVec)
00043 : ChartRuleLookupManagerCYKPlus(parser, cellColl)
00044 , m_dictionary(dictionary)
00045 , m_dbWrapper(dbWrapper)
00046 , m_inputFactorsVec(inputFactorsVec)
00047 , m_outputFactorsVec(outputFactorsVec)
00048 {
00049 UTIL_THROW_IF2(m_expandableDottedRuleListVec.size() != 0,
00050 "Dotted rule collection not correctly initialized");
00051
00052 size_t sourceSize = parser.GetSize();
00053 m_expandableDottedRuleListVec.resize(sourceSize);
00054 m_input_default_nonterminal = parser.options()->syntax.input_default_non_terminal;
00055
00056 for (size_t ind = 0; ind < m_expandableDottedRuleListVec.size(); ++ind) {
00057 DottedRuleOnDisk *initDottedRule = new DottedRuleOnDisk(m_dbWrapper.GetRootSourceNode());
00058
00059 DottedRuleStackOnDisk *processedStack = new DottedRuleStackOnDisk(sourceSize - ind + 1);
00060 processedStack->Add(0, initDottedRule);
00061
00062 m_expandableDottedRuleListVec[ind] = processedStack;
00063 }
00064 }
00065
00066 ChartRuleLookupManagerOnDisk::~ChartRuleLookupManagerOnDisk()
00067 {
00068
00069
00070
00071
00072
00073
00074
00075 RemoveAllInColl(m_expandableDottedRuleListVec);
00076 RemoveAllInColl(m_sourcePhraseNode);
00077 }
00078
00079 void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
00080 const InputPath &inputPath,
00081 size_t lastPos,
00082 ChartParserCallback &outColl)
00083 {
00084 const StaticData &staticData = StaticData::Instance();
00085
00086 const Range &range = inputPath.GetWordsRange();
00087
00088 size_t relEndPos = range.GetEndPos() - range.GetStartPos();
00089 size_t absEndPos = range.GetEndPos();
00090
00091
00092 DottedRuleStackOnDisk &expandableDottedRuleList = *m_expandableDottedRuleListVec[range.GetStartPos()];
00093
00094
00095 expandableDottedRuleList.SortSavedNodes();
00096
00097 const DottedRuleStackOnDisk::SavedNodeColl &savedNodeColl = expandableDottedRuleList.GetSavedNodeColl();
00098
00099
00100 const ChartCellLabel &sourceWordLabel = GetSourceAt(absEndPos);
00101
00102 for (size_t ind = 0; ind < (savedNodeColl.size()) ; ++ind) {
00103 const SavedNodeOnDisk &savedNode = *savedNodeColl[ind];
00104
00105 const DottedRuleOnDisk &prevDottedRule = savedNode.GetDottedRule();
00106 const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode();
00107 size_t startPos = prevDottedRule.IsRoot() ? range.GetStartPos() : prevDottedRule.GetWordsRange().GetEndPos() + 1;
00108
00109
00110 if (startPos == absEndPos) {
00111 OnDiskPt::Word *sourceWordBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceWordLabel.GetLabel());
00112
00113 if (sourceWordBerkeleyDb != NULL) {
00114 const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper);
00115 if (node != NULL) {
00116
00117
00118
00119 DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, sourceWordLabel, prevDottedRule);
00120 expandableDottedRuleList.Add(relEndPos+1, dottedRule);
00121
00122
00123 m_sourcePhraseNode.push_back(node);
00124 }
00125
00126 delete sourceWordBerkeleyDb;
00127 }
00128 }
00129
00130
00131 size_t endPos, stackInd;
00132 if (startPos > absEndPos)
00133 continue;
00134 else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) {
00135
00136 endPos = absEndPos - 1;
00137 stackInd = relEndPos;
00138 } else {
00139 endPos = absEndPos;
00140 stackInd = relEndPos + 1;
00141 }
00142
00143
00144 const ChartCellLabelSet &chartNonTermSet =
00145 GetTargetLabelSet(startPos, endPos);
00146
00147
00148
00149
00150
00151 const NonTerminalSet &sourceLHSSet = GetParser().GetInputPath(startPos, endPos).GetNonTerminalSet();
00152
00153 NonTerminalSet::const_iterator iterSourceLHS;
00154 for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) {
00155 const Word &sourceLHS = *iterSourceLHS;
00156
00157 OnDiskPt::Word *sourceLHSBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceLHS);
00158
00159 if (sourceLHSBerkeleyDb == NULL) {
00160 delete sourceLHSBerkeleyDb;
00161 continue;
00162 }
00163
00164 const OnDiskPt::PhraseNode *sourceNode = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
00165 delete sourceLHSBerkeleyDb;
00166
00167 if (sourceNode == NULL)
00168 continue;
00169
00170
00171 ChartCellLabelSet::const_iterator iterChartNonTerm;
00172 for (iterChartNonTerm = chartNonTermSet.begin(); iterChartNonTerm != chartNonTermSet.end(); ++iterChartNonTerm) {
00173 if (*iterChartNonTerm == NULL) {
00174 continue;
00175 }
00176 const ChartCellLabel &cellLabel = **iterChartNonTerm;
00177
00178 bool doSearch = true;
00179 if (m_dictionary.m_maxSpanDefault != NOT_FOUND) {
00180
00181
00182 bool isSourceSyntaxNonTerm = sourceLHS != m_input_default_nonterminal;
00183 size_t nonTermNumWordsCovered = endPos - startPos + 1;
00184
00185 doSearch = isSourceSyntaxNonTerm ?
00186 nonTermNumWordsCovered <= m_dictionary.m_maxSpanLabelled :
00187 nonTermNumWordsCovered <= m_dictionary.m_maxSpanDefault;
00188
00189 }
00190
00191 if (doSearch) {
00192
00193 OnDiskPt::Word *chartNonTermBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_outputFactorsVec, cellLabel.GetLabel());
00194
00195 if (chartNonTermBerkeleyDb == NULL)
00196 continue;
00197
00198 const OnDiskPt::PhraseNode *node = sourceNode->GetChild(*chartNonTermBerkeleyDb, m_dbWrapper);
00199 delete chartNonTermBerkeleyDb;
00200
00201 if (node == NULL)
00202 continue;
00203
00204
00205
00206 DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, cellLabel, prevDottedRule);
00207 expandableDottedRuleList.Add(stackInd, dottedRule);
00208
00209 m_sourcePhraseNode.push_back(node);
00210 }
00211 }
00212
00213 delete sourceNode;
00214
00215 }
00216
00217
00218 DottedRuleCollOnDisk &nodes = expandableDottedRuleList.Get(relEndPos + 1);
00219
00220
00221 DottedRuleCollOnDisk::const_iterator iterDottedRuleColl;
00222 for (iterDottedRuleColl = nodes.begin(); iterDottedRuleColl != nodes.end(); ++iterDottedRuleColl) {
00223
00224 const DottedRuleOnDisk &prevDottedRule = **iterDottedRuleColl;
00225 if (prevDottedRule.Done())
00226 continue;
00227 prevDottedRule.Done(true);
00228
00229 const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode();
00230
00231
00232 const NonTerminalSet &lhsSet = GetParser().GetInputPath(range.GetStartPos(), range.GetEndPos()).GetNonTerminalSet();
00233 NonTerminalSet::const_iterator iterLabelSet;
00234 for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) {
00235 const Word &sourceLHS = *iterLabelSet;
00236
00237 OnDiskPt::Word *sourceLHSBerkeleyDb = m_dictionary.ConvertFromMoses(m_dbWrapper, m_inputFactorsVec, sourceLHS);
00238 if (sourceLHSBerkeleyDb == NULL)
00239 continue;
00240
00241 TargetPhraseCollection::shared_ptr targetPhraseCollection;
00242 const OnDiskPt::PhraseNode *node
00243 = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
00244 if (node) {
00245 uint64_t tpCollFilePos = node->GetValue();
00246 std::map<uint64_t, TargetPhraseCollection::shared_ptr >::const_iterator iterCache = m_cache.find(tpCollFilePos);
00247 if (iterCache == m_cache.end()) {
00248
00249 OnDiskPt::TargetPhraseCollection::shared_ptr tpcollBerkeleyDb
00250 = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
00251
00252 std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
00253 targetPhraseCollection
00254 = m_dictionary.ConvertToMoses(tpcollBerkeleyDb
00255 ,m_inputFactorsVec
00256 ,m_outputFactorsVec
00257 ,m_dictionary
00258 ,weightT
00259 ,m_dbWrapper.GetVocab()
00260 ,true);
00261
00262 tpcollBerkeleyDb.reset();
00263 m_cache[tpCollFilePos] = targetPhraseCollection;
00264 } else {
00265
00266 targetPhraseCollection = iterCache->second;
00267 }
00268
00269 UTIL_THROW_IF2(targetPhraseCollection == NULL, "Error");
00270 if (!targetPhraseCollection->IsEmpty()) {
00271 AddCompletedRule(prevDottedRule, *targetPhraseCollection,
00272 range, outColl);
00273 }
00274
00275 }
00276
00277 delete node;
00278 delete sourceLHSBerkeleyDb;
00279 }
00280 }
00281 }
00282
00283
00284 }
00285
00286 }