00001 #include <vector>
00002 #include <limits>
00003 #include <boost/math/special_functions/fpclassify.hpp>
00004 #include <assert.h>
00005 #include "TargetPreferencesFeature.h"
00006 #include "moses/StaticData.h"
00007 #include "moses/InputFileStream.h"
00008 #include "moses/ScoreComponentCollection.h"
00009 #include "moses/Hypothesis.h"
00010 #include "moses/ChartHypothesis.h"
00011 #include "moses/ChartManager.h"
00012 #include "moses/FactorCollection.h"
00013 #include "moses/TreeInput.h"
00014 #include "moses/PP/TargetPreferencesPhraseProperty.h"
00015
00016
00017 using namespace std;
00018
00019 namespace Moses
00020 {
00021
00022 void TargetPreferencesFeatureState::AddProbabilityForLHSLabel(size_t label, double cost)
00023 {
00024 std::pair< std::map<size_t,double>::iterator, bool > inserted =
00025 m_probabilitiesForLHSLabels.insert(std::pair<size_t,double>(label,cost));
00026 if ( !inserted.second ) {
00027 (inserted.first)->second += cost;
00028 }
00029 }
00030
00031 void TargetPreferencesFeatureState::NormalizeProbabilitiesForLHSLabels(double denominator)
00032 {
00033 for ( std::map<size_t,double>::iterator iter=m_probabilitiesForLHSLabels.begin();
00034 iter!=m_probabilitiesForLHSLabels.end(); ++iter ) {
00035 (iter->second) /= denominator;
00036 }
00037 }
00038
00039 double TargetPreferencesFeatureState::GetProbabilityForLHSLabel(size_t label, bool &isMatch) const
00040 {
00041 std::map<size_t,double>::const_iterator iter = m_probabilitiesForLHSLabels.find(label);
00042 if ( iter != m_probabilitiesForLHSLabels.end() ) {
00043 isMatch = true;
00044 return iter->second;
00045 }
00046 isMatch = false;
00047 return 0;
00048 }
00049
00050 size_t TargetPreferencesFeatureState::hash() const
00051 {
00052 if (!m_distinguishStates) {
00053 return 0;
00054 }
00055 size_t ret = 0;
00056 boost::hash_combine(ret, m_probabilitiesForLHSLabels.size());
00057 for (std::map<size_t,double>::const_iterator it=m_probabilitiesForLHSLabels.begin();
00058 it!=m_probabilitiesForLHSLabels.end(); ++it) {
00059 boost::hash_combine(ret, it->first);
00060 }
00061 return ret;
00062 };
00063
00064 bool TargetPreferencesFeatureState::operator==(const FFState& other) const
00065 {
00066 if (!m_distinguishStates) {
00067 return true;
00068 }
00069
00070 if (this == &other) {
00071 return true;
00072 }
00073
00074 const TargetPreferencesFeatureState* otherState =
00075 dynamic_cast<const TargetPreferencesFeatureState*>(&other);
00076 UTIL_THROW_IF2(otherState == NULL, "Wrong state type");
00077
00078 if (m_probabilitiesForLHSLabels.size() != (otherState->m_probabilitiesForLHSLabels).size()) {
00079 return false;
00080 }
00081 std::map<size_t,double>::const_iterator thisIt, otherIt;
00082 for (thisIt=m_probabilitiesForLHSLabels.begin(), otherIt=(otherState->m_probabilitiesForLHSLabels).begin();
00083 thisIt!=m_probabilitiesForLHSLabels.end(); ++thisIt, ++otherIt) {
00084 if (thisIt->first != otherIt->first) {
00085 return false;
00086 }
00087 }
00088 return true;
00089 };
00090
00091
00092 TargetPreferencesFeature::TargetPreferencesFeature(const std::string &line)
00093 : StatefulFeatureFunction(2, line)
00094 , m_featureVariant(0)
00095 , m_distinguishStates(false)
00096 , m_noMismatches(false)
00097 {
00098 VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
00099 ReadParameters();
00100 VERBOSE(1, " Done." << std::endl);
00101 VERBOSE(1, " Feature variant: " << m_featureVariant << "." << std::endl);
00102 }
00103
00104 TargetPreferencesFeature::~TargetPreferencesFeature()
00105 {}
00106
00107 void TargetPreferencesFeature::SetParameter(const std::string& key, const std::string& value)
00108 {
00109 if (key == "label-set-file") {
00110 m_labelSetFile = value;
00111 } else if (key == "unknown-word-labels-file") {
00112 m_unknownLeftHandSideFile = value;
00113 } else if (key == "variant") {
00114 m_featureVariant = Scan<size_t>(value);
00115 } else if (key == "distinguish-states") {
00116 m_distinguishStates = Scan<bool>(value);
00117 } else if (key == "no-mismatches") {
00118 m_noMismatches = Scan<bool>(value);
00119 } else {
00120 StatefulFeatureFunction::SetParameter(key, value);
00121 }
00122 }
00123
00124
00125 void TargetPreferencesFeature::Load(AllOptions::ptr const& opts)
00126 {
00127
00128 LoadLabelSet();
00129 LoadUnknownLeftHandSideFile();
00130 }
00131
00132 void TargetPreferencesFeature::LoadLabelSet()
00133 {
00134 FEATUREVERBOSE(2, "Loading label set from file " << m_labelSetFile << " ...");
00135 InputFileStream inFile(m_labelSetFile);
00136
00137
00138 std::string line;
00139 m_labels.clear();
00140 m_labelsByIndex.clear();
00141 while (getline(inFile, line)) {
00142 std::istringstream tokenizer(line);
00143 std::string label;
00144 size_t index;
00145 try {
00146 tokenizer >> label >> index;
00147 } catch (const std::exception &e) {
00148 UTIL_THROW2(GetScoreProducerDescription()
00149 << ": Error reading label set file " << m_labelSetFile << " .");
00150 }
00151 std::pair< boost::unordered_map<std::string,size_t>::iterator, bool > inserted = m_labels.insert( std::pair<std::string,size_t>(label,index) );
00152 UTIL_THROW_IF2(!inserted.second, GetScoreProducerDescription()
00153 << ": Label set file " << m_labelSetFile << " should contain each label only once.");
00154
00155 if (index >= m_labelsByIndex.size()) {
00156 m_labelsByIndex.resize(index+1);
00157 }
00158 m_labelsByIndex[index] = label;
00159 }
00160
00161 inFile.Close();
00162
00163 std::list<std::string> specialLabels;
00164 specialLabels.push_back("GlueTop");
00165 for (std::list<std::string>::const_iterator iter=specialLabels.begin();
00166 iter!=specialLabels.end(); ++iter) {
00167 boost::unordered_map<std::string,size_t>::iterator found = m_labels.find(*iter);
00168 UTIL_THROW_IF2(found == m_labels.end(), GetScoreProducerDescription()
00169 << ": Label set file " << m_labelSetFile << " should contain an entry for the special label \"" << *iter << "\".");
00170 if (!(found->first).compare("GlueTop")) {
00171 m_GlueTopLabel = found->second;
00172 }
00173 }
00174 FEATUREVERBOSE2(2, " Done." << std::endl);
00175 }
00176
00177
00178 void TargetPreferencesFeature::LoadUnknownLeftHandSideFile()
00179 {
00180 FEATUREVERBOSE(2, "Loading left-hand side labels for unknowns from file " << m_unknownLeftHandSideFile << std::endl);
00181 InputFileStream inFile(m_unknownLeftHandSideFile);
00182
00183
00184 std::string line;
00185 m_unknownLHSProbabilities.clear();
00186 double countsSum = 0.0;
00187 while (getline(inFile, line)) {
00188 istringstream tokenizer(line);
00189 std::string label;
00190 double count;
00191 tokenizer >> label;
00192 tokenizer >> count;
00193 boost::unordered_map<std::string,size_t>::iterator found = m_labels.find( label );
00194 if ( found != m_labels.end() ) {
00195 std::pair< std::map<size_t,double>::iterator, bool > inserted =
00196 m_unknownLHSProbabilities.insert( std::pair<size_t,double>(found->second,count) );
00197 if ( !inserted.second ) {
00198 (inserted.first)->second += count;
00199 }
00200 countsSum += count;
00201 } else {
00202 FEATUREVERBOSE(1, "WARNING: undefined label \"" << label << "\" in file " << m_unknownLeftHandSideFile << std::endl);
00203 }
00204 }
00205
00206 countsSum += (double)m_labels.size();
00207 for (std::map<size_t,double>::iterator iter=m_unknownLHSProbabilities.begin();
00208 iter!=m_unknownLHSProbabilities.end(); ++iter) {
00209 iter->second /= countsSum;
00210 }
00211
00212 IFFEATUREVERBOSE(3) {
00213 for (std::map<size_t,double>::iterator iter=m_unknownLHSProbabilities.begin();
00214 iter!=m_unknownLHSProbabilities.end(); ++iter) {
00215 FEATUREVERBOSE(3, GetScoreProducerDescription() << "::LoadUnknownLeftHandSideFile(): " << iter->first << " " << iter->second << std::endl);
00216 }
00217 }
00218
00219 inFile.Close();
00220 }
00221
00222 FFState* TargetPreferencesFeature::EvaluateWhenApplied(
00223 const ChartHypothesis& hypo,
00224 int featureID,
00225 ScoreComponentCollection* accumulator) const
00226 {
00227 streamsize cerr_precision = std::cerr.precision();
00228 std::cerr.precision(20);
00229
00230
00231 std::vector<float> newScores(m_numScoreComponents,0);
00232
00233
00234
00235 TargetPreferencesFeatureState *state = new TargetPreferencesFeatureState(m_distinguishStates);
00236
00237 size_t nNTs = 1;
00238 double overallTreeProbability = 0.0;
00239 bool isGlueGrammarRule = false;
00240
00241
00242 const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
00243
00244 FEATUREVERBOSE(2, "Phrase: " << currTarPhr << std::endl);
00245
00246 if (const PhraseProperty *property = currTarPhr.GetProperty("TargetPreferences")) {
00247
00248 const TargetPreferencesPhraseProperty *targetPreferencesPhraseProperty = static_cast<const TargetPreferencesPhraseProperty*>(property);
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259 nNTs = targetPreferencesPhraseProperty->GetNumberOfNonTerminals();
00260 double totalCount = targetPreferencesPhraseProperty->GetTotalCount();
00261
00262
00263 const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
00264 currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
00265
00266
00267 std::vector< const TargetPreferencesFeatureState* > prevStatesByNonTerminal(nNTs-1);
00268
00269 if (nNTs > 1) {
00270 size_t nonTerminalNumber = 0;
00271
00272 for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
00273
00274 const Word &word = currTarPhr.GetWord(phrasePos);
00275 if ( word.IsNonTerminal() ) {
00276
00277 size_t nonTermIndex = nonTermIndexMap[phrasePos];
00278 const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
00279 const TargetPreferencesFeatureState* prevState =
00280 static_cast<const TargetPreferencesFeatureState*>(prevHypo->GetFFState(featureID));
00281 prevStatesByNonTerminal[nonTerminalNumber] = prevState;
00282
00283 IFFEATUREVERBOSE(2) {
00284
00285 const std::map<size_t,double> &prevHypoTreeProbabilities =
00286 prevStatesByNonTerminal[nonTerminalNumber]->GetProbabilitiesForLHSLabels();
00287 FEATUREVERBOSE(2, "Previous tree probs:");
00288 for (std::map<size_t,double>::const_iterator iter=prevHypoTreeProbabilities.begin();
00289 iter!=prevHypoTreeProbabilities.end(); ++iter) {
00290 FEATUREVERBOSE2(2, " " << m_labelsByIndex[iter->first] << " " << iter->second);
00291 }
00292 FEATUREVERBOSE2(2, std::endl);
00293 }
00294
00295 ++nonTerminalNumber;
00296 }
00297 }
00298 }
00299
00300
00301
00302 overallTreeProbability = 0.0;
00303
00304 const std::list<TargetPreferencesPhrasePropertyItem> &targetPreferencesItems = targetPreferencesPhraseProperty->GetTargetPreferencesItems();
00305
00306 for (std::list<TargetPreferencesPhrasePropertyItem>::const_iterator targetPreferencesItem = targetPreferencesItems.begin();
00307 targetPreferencesItem != targetPreferencesItems.end(); ++targetPreferencesItem) {
00308
00309 const std::list<size_t> &targetPreferencesRHS = targetPreferencesItem->GetTargetPreferencesRHS();
00310 const std::list< std::pair<size_t,float> > &targetPreferencesLHSList = targetPreferencesItem->GetTargetPreferencesLHSList();
00311
00312 assert(targetPreferencesRHS.size() == nNTs-1);
00313
00314 size_t currentTargetLabelsMismatches = nNTs - 1;
00315 double matchingLabelsProbabilityProduct = 1.0;
00316
00317 size_t nonTerminalNumber=0;
00318 for (std::list<size_t>::const_iterator targetPreferencesRHSIt = targetPreferencesRHS.begin();
00319 targetPreferencesRHSIt != targetPreferencesRHS.end(); ++targetPreferencesRHSIt, ++nonTerminalNumber) {
00320
00321 bool isLabelMatch = false;
00322 double matchingLabelsProbability =
00323 prevStatesByNonTerminal[nonTerminalNumber]->GetProbabilityForLHSLabel(*targetPreferencesRHSIt,
00324 isLabelMatch);
00325 matchingLabelsProbabilityProduct *= matchingLabelsProbability;
00326
00327 if ( isLabelMatch ) {
00328 currentTargetLabelsMismatches -= 1;
00329 }
00330 }
00331
00332 FEATUREVERBOSE(2, "matchingLabelsProbabilityProduct = " << matchingLabelsProbabilityProduct << std::endl);
00333
00334
00335 for (std::list< std::pair<size_t,float> >::const_iterator targetPreferencesLHSIt = targetPreferencesLHSList.begin();
00336 targetPreferencesLHSIt != targetPreferencesLHSList.end(); ++targetPreferencesLHSIt) {
00337
00338 size_t targetPreferenceLHS = targetPreferencesLHSIt->first;
00339
00340 if ( targetPreferenceLHS == m_GlueTopLabel ) {
00341 isGlueGrammarRule = true;
00342 }
00343
00344
00345 double ruleTargetPreferenceCount = targetPreferencesLHSIt->second;
00346 double ruleTargetPreferenceProbability = ruleTargetPreferenceCount / totalCount;
00347
00348 FEATUREVERBOSE(2, " ruleTargetPreferenceProbability = " << ruleTargetPreferenceProbability << std::endl);
00349
00350 double weightedTargetPreferenceRuleProbability = ruleTargetPreferenceProbability * matchingLabelsProbabilityProduct;
00351 if ( weightedTargetPreferenceRuleProbability != 0 ) {
00352 state->AddProbabilityForLHSLabel(targetPreferenceLHS, weightedTargetPreferenceRuleProbability);
00353 }
00354 overallTreeProbability += weightedTargetPreferenceRuleProbability;
00355 }
00356 }
00357
00358 IFFEATUREVERBOSE(2) {
00359 FEATUREVERBOSE(2, "overallTreeProbability = " << overallTreeProbability);
00360 if ( overallTreeProbability > 1.0001 ) {
00361 FEATUREVERBOSE2(2, " -- WARNING: overallTreeProbability > 1");
00362 }
00363 FEATUREVERBOSE2(2, std::endl);
00364 }
00365
00366 if ( overallTreeProbability != 0 ) {
00367 UTIL_THROW_IF2(!boost::math::isnormal(overallTreeProbability), GetScoreProducerDescription()
00368 << ": Oops. Numerical precision issues.");
00369 state->NormalizeProbabilitiesForLHSLabels(overallTreeProbability);
00370 }
00371
00372 } else {
00373
00374
00375 UTIL_THROW_IF2(!currTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
00376 << ": Missing TargetPreferences property. Please check phrase table and glue rules.");
00377
00378
00379 overallTreeProbability = 1.0;
00380
00381 for (std::map<size_t,double>::const_iterator iter=m_unknownLHSProbabilities.begin();
00382 iter!=m_unknownLHSProbabilities.end(); ++iter) {
00383
00384 state->AddProbabilityForLHSLabel(iter->first, iter->second);
00385 }
00386 }
00387
00388 FEATUREVERBOSE(2, "-> OVERALLTREEPROB = " << overallTreeProbability << std::endl);
00389
00390
00391
00392
00393 newScores[0] = (overallTreeProbability == 0 ? 0 : std::log(overallTreeProbability) );
00394 if ( m_noMismatches && (overallTreeProbability == 0) && !isGlueGrammarRule ) {
00395 newScores[0] = -std::numeric_limits<float>::infinity();
00396 }
00397
00398
00399 newScores[1] = (overallTreeProbability == 0 ? 1 : 0 );
00400
00401 accumulator->PlusEquals(this, newScores);
00402
00403 std::cerr.precision(cerr_precision);
00404 return state;
00405 }
00406
00407 }
00408