00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <algorithm>
00022 #include <cstdlib>
00023 #include "util/exception.hh"
00024 #include "util/tokenize_piece.hh"
00025
00026 #include "TargetPhrase.h"
00027 #include "GenerationDictionary.h"
00028 #include "LM/Base.h"
00029 #include "StaticData.h"
00030 #include "ScoreComponentCollection.h"
00031 #include "Util.h"
00032 #include "AlignmentInfoCollection.h"
00033 #include "InputPath.h"
00034 #include "TranslationTask.h"
00035 #include "moses/TranslationModel/PhraseDictionary.h"
00036 #include <boost/foreach.hpp>
00037
00038 using namespace std;
00039
00040 namespace Moses
00041 {
00042 TargetPhrase::TargetPhrase( std::string out_string, const PhraseDictionary *pt)
00043 :Phrase(0)
00044 , m_futureScore(0.0)
00045 , m_estimatedScore(0.0)
00046 , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00047 , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00048 , m_lhsTarget(NULL)
00049 , m_ruleSource(NULL)
00050 , m_container(pt)
00051 {
00052
00053 const StaticData &staticData = StaticData::Instance();
00054
00055 CreateFromString(Output, staticData.options()->input.factor_order, out_string,
00056 NULL);
00057 }
00058
00059 TargetPhrase::TargetPhrase(ttasksptr& ttask, std::string out_string, const PhraseDictionary *pt)
00060 :Phrase(0)
00061 , m_futureScore(0.0)
00062 , m_estimatedScore(0.0)
00063 , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00064 , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00065 , m_lhsTarget(NULL)
00066 , m_ruleSource(NULL)
00067 , m_container(pt)
00068 {
00069 if (ttask) m_scope = ttask->GetScope();
00070
00071
00072 CreateFromString(Output, ttask->options()->input.factor_order, out_string,
00073 NULL);
00074 }
00075
00076 TargetPhrase::TargetPhrase(ttasksptr& ttask, const PhraseDictionary *pt)
00077 : Phrase()
00078 , m_futureScore(0.0)
00079 , m_estimatedScore(0.0)
00080 , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00081 , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00082 , m_lhsTarget(NULL)
00083 , m_ruleSource(NULL)
00084 , m_container(pt)
00085 {
00086 if (ttask) m_scope = ttask->GetScope();
00087 }
00088
00089 TargetPhrase::TargetPhrase(ttasksptr& ttask, const Phrase &phrase, const PhraseDictionary *pt)
00090 : Phrase(phrase)
00091 , m_futureScore(0.0)
00092 , m_estimatedScore(0.0)
00093 , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00094 , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00095 , m_lhsTarget(NULL)
00096 , m_ruleSource(NULL)
00097 , m_container(pt)
00098 {
00099 if (ttask) m_scope = ttask->GetScope();
00100 }
00101
00102 TargetPhrase::TargetPhrase(const PhraseDictionary *pt)
00103 :Phrase()
00104 , m_futureScore(0.0)
00105 , m_estimatedScore(0.0)
00106 , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00107 , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00108 , m_lhsTarget(NULL)
00109 , m_ruleSource(NULL)
00110 , m_container(pt)
00111 {
00112 }
00113
00114 TargetPhrase::TargetPhrase(const Phrase &phrase, const PhraseDictionary *pt)
00115 : Phrase(phrase)
00116 , m_futureScore(0.0)
00117 , m_estimatedScore(0.0)
00118 , m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00119 , m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
00120 , m_lhsTarget(NULL)
00121 , m_ruleSource(NULL)
00122 , m_container(pt)
00123 {
00124 }
00125
00126 TargetPhrase::TargetPhrase(const TargetPhrase ©)
00127 : Phrase(copy)
00128 , m_cached_coord(copy.m_cached_coord)
00129 , m_cached_scores(copy.m_cached_scores)
00130 , m_scope(copy.m_scope)
00131 , m_futureScore(copy.m_futureScore)
00132 , m_estimatedScore(copy.m_estimatedScore)
00133 , m_scoreBreakdown(copy.m_scoreBreakdown)
00134 , m_alignTerm(copy.m_alignTerm)
00135 , m_alignNonTerm(copy.m_alignNonTerm)
00136 , m_properties(copy.m_properties)
00137 , m_container(copy.m_container)
00138 {
00139 if (copy.m_lhsTarget) {
00140 m_lhsTarget = new Word(*copy.m_lhsTarget);
00141 } else {
00142 m_lhsTarget = NULL;
00143 }
00144
00145 if (copy.m_ruleSource) {
00146 m_ruleSource = new Phrase(*copy.m_ruleSource);
00147 } else {
00148 m_ruleSource = NULL;
00149 }
00150 }
00151
00152 TargetPhrase::~TargetPhrase()
00153 {
00154
00155
00156 delete m_lhsTarget;
00157 delete m_ruleSource;
00158 }
00159
00160 #ifdef HAVE_PROTOBUF
00161 void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const
00162 {
00163 pb->add_trg_words("[X,1]");
00164 for (size_t pos = 0 ; pos < GetSize() ; pos++)
00165 pb->add_trg_words(GetWord(pos)[0]->GetString());
00166 }
00167 #endif
00168
00169 bool TargetPhrase::HasScope() const
00170 {
00171 return !m_scope.expired();
00172 }
00173
00174 SPTR<ContextScope> TargetPhrase::GetScope() const
00175 {
00176 return m_scope.lock();
00177 }
00178
00179 void TargetPhrase::EvaluateInIsolation(const Phrase &source)
00180 {
00181 const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
00182 EvaluateInIsolation(source, ffs);
00183 }
00184
00185 void TargetPhrase::EvaluateInIsolation(const Phrase &source, const std::vector<FeatureFunction*> &ffs)
00186 {
00187 if (ffs.size()) {
00188 const StaticData &staticData = StaticData::Instance();
00189 ScoreComponentCollection estimatedScores;
00190 for (size_t i = 0; i < ffs.size(); ++i) {
00191 const FeatureFunction &ff = *ffs[i];
00192 if (! staticData.IsFeatureFunctionIgnored( ff )) {
00193 ff.EvaluateInIsolation(source, *this, m_scoreBreakdown, estimatedScores);
00194 }
00195 }
00196
00197 float weightedScore = m_scoreBreakdown.GetWeightedScore();
00198 m_estimatedScore += estimatedScores.GetWeightedScore();
00199 m_futureScore = weightedScore + m_estimatedScore;
00200 }
00201 }
00202
00203 void TargetPhrase::EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath)
00204 {
00205 const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
00206 const StaticData &staticData = StaticData::Instance();
00207 ScoreComponentCollection futureScoreBreakdown;
00208 for (size_t i = 0; i < ffs.size(); ++i) {
00209 const FeatureFunction &ff = *ffs[i];
00210 if (! staticData.IsFeatureFunctionIgnored( ff )) {
00211 ff.EvaluateWithSourceContext(input, inputPath, *this, NULL, m_scoreBreakdown, &futureScoreBreakdown);
00212 }
00213 }
00214 float weightedScore = m_scoreBreakdown.GetWeightedScore();
00215 m_estimatedScore += futureScoreBreakdown.GetWeightedScore();
00216 m_futureScore = weightedScore + m_estimatedScore;
00217 }
00218
00219 void TargetPhrase::UpdateScore(ScoreComponentCollection* futureScoreBreakdown)
00220 {
00221 float weightedScore = m_scoreBreakdown.GetWeightedScore();
00222 if(futureScoreBreakdown)
00223 m_estimatedScore += futureScoreBreakdown->GetWeightedScore();
00224 m_futureScore = weightedScore + m_estimatedScore;
00225 }
00226
00227 void TargetPhrase::SetXMLScore(float score)
00228 {
00229 const FeatureFunction* prod = PhraseDictionary::GetColl()[0];
00230 size_t numScores = prod->GetNumScoreComponents();
00231 vector <float> scoreVector(numScores,score/numScores);
00232
00233 m_scoreBreakdown.Assign(prod, scoreVector);
00234 }
00235
00236 void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
00237 {
00238 AlignmentInfo::CollType alignTerm, alignNonTerm;
00239 for (util::TokenIter<util::AnyCharacter, true> token(alignString, util::AnyCharacter(" \t")); token; ++token) {
00240 util::TokenIter<util::SingleCharacter, false> dash(*token, util::SingleCharacter('-'));
00241
00242 char *endptr;
00243 size_t sourcePos = strtoul(dash->data(), &endptr, 10);
00244 UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash);
00245 ++dash;
00246 size_t targetPos = strtoul(dash->data(), &endptr, 10);
00247 UTIL_THROW_IF(endptr != dash->data() + dash->size(), util::ErrnoException, "Error parsing alignment" << *dash);
00248 UTIL_THROW_IF2(++dash, "Extra gunk in alignment " << *token);
00249
00250 if (GetWord(targetPos).IsNonTerminal()) {
00251 alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
00252 } else {
00253 alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
00254 }
00255 }
00256 SetAlignTerm(alignTerm);
00257 SetAlignNonTerm(alignNonTerm);
00258
00259 }
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274 void TargetPhrase::SetSparseScore(const FeatureFunction* translationScoreProducer, const StringPiece &sparseString)
00275 {
00276 m_scoreBreakdown.Assign(translationScoreProducer, sparseString.as_string());
00277 }
00278
00279 boost::shared_ptr<Scores>
00280 mergescores(boost::shared_ptr<Scores> const& a,
00281 boost::shared_ptr<Scores> const& b)
00282 {
00283 boost::shared_ptr<Scores> ret;
00284 if (!a) return b ? b : ret;
00285 if (!b) return a;
00286 if (a->size() != b->size()) return ret;
00287 ret.reset(new Scores(*a));
00288 for (size_t i = 0; i < a->size(); ++i) {
00289 if ((*a)[i] == 0) (*a)[i] = (*b)[i];
00290 else if ((*b)[i]) {
00291 UTIL_THROW_IF2((*a)[i] != (*b)[i], "can't merge feature vectors");
00292 }
00293 }
00294 return ret;
00295 }
00296
00297 void
00298 TargetPhrase::
00299 Merge(const TargetPhrase ©, const std::vector<FactorType>& factorVec)
00300 {
00301 Phrase::MergeFactors(copy, factorVec);
00302 m_scoreBreakdown.Merge(copy.GetScoreBreakdown());
00303 m_estimatedScore += copy.m_estimatedScore;
00304 m_futureScore += copy.m_futureScore;
00305 typedef ScoreCache_t::iterator iter;
00306 typedef ScoreCache_t::value_type item;
00307 BOOST_FOREACH(item const& s, copy.m_cached_scores) {
00308 pair<iter,bool> foo = m_cached_scores.insert(s);
00309 if (foo.second == false)
00310 foo.first->second = mergescores(foo.first->second, s.second);
00311 }
00312 }
00313
00314 TargetPhrase::ScoreCache_t const&
00315 TargetPhrase::
00316 GetExtraScores() const
00317 {
00318 return m_cached_scores;
00319 }
00320
00321 Scores const*
00322 TargetPhrase::
00323 GetExtraScores(FeatureFunction const* ff) const
00324 {
00325 ScoreCache_t::const_iterator m = m_cached_scores.find(ff);
00326 return m != m_cached_scores.end() ? m->second.get() : NULL;
00327 }
00328
00329 void
00330 TargetPhrase::
00331 SetExtraScores(FeatureFunction const* ff,
00332 boost::shared_ptr<Scores> const& s)
00333 {
00334 m_cached_scores[ff] = s;
00335 }
00336
00337 vector<SPTR<vector<float> > > const*
00338 TargetPhrase::
00339 GetCoordList(size_t const spaceID) const
00340 {
00341 if(!m_cached_coord) {
00342 return NULL;
00343 }
00344 CoordCache_t::const_iterator m = m_cached_coord->find(spaceID);
00345 if(m == m_cached_coord->end()) {
00346 return NULL;
00347 }
00348 return &m->second;
00349 }
00350
00351 void
00352 TargetPhrase::
00353 PushCoord(size_t const spaceID,
00354 SPTR<vector<float> > const coord)
00355 {
00356 if (!m_cached_coord) {
00357 m_cached_coord.reset(new CoordCache_t);
00358 }
00359 vector<SPTR<vector<float> > >& coordList = (*m_cached_coord)[spaceID];
00360 coordList.push_back(coord);
00361 }
00362
00363 void TargetPhrase::SetProperties(const StringPiece &str)
00364 {
00365 if (str.size() == 0) {
00366 return;
00367 }
00368
00369 vector<string> toks;
00370 TokenizeMultiCharSeparator(toks, str.as_string(), "{{");
00371 for (size_t i = 0; i < toks.size(); ++i) {
00372 string &tok = toks[i];
00373 if (tok.empty()) {
00374 continue;
00375 }
00376 size_t endPos = tok.rfind("}");
00377
00378 tok = tok.substr(0, endPos - 1);
00379
00380 vector<string> keyValue = TokenizeFirstOnly(tok, " ");
00381 UTIL_THROW_IF2(keyValue.size() != 2,
00382 "Incorrect format of property: " << str);
00383 SetProperty(keyValue[0], keyValue[1]);
00384 }
00385 }
00386
00387 void TargetPhrase::SetProperty(const std::string &key, const std::string &value)
00388 {
00389 const StaticData &staticData = StaticData::Instance();
00390 const PhrasePropertyFactory& phrasePropertyFactory = staticData.GetPhrasePropertyFactory();
00391 m_properties[key] = phrasePropertyFactory.ProduceProperty(key,value);
00392 }
00393
00394 const PhraseProperty *TargetPhrase::GetProperty(const std::string &key) const
00395 {
00396 std::map<std::string, boost::shared_ptr<PhraseProperty> >::const_iterator iter;
00397 iter = m_properties.find(key);
00398 if (iter != m_properties.end()) {
00399 const boost::shared_ptr<PhraseProperty> &pp = iter->second;
00400 return pp.get();
00401 }
00402 return NULL;
00403 }
00404
00405 void TargetPhrase::SetRuleSource(const Phrase &ruleSource) const
00406 {
00407 if (m_ruleSource == NULL) {
00408 m_ruleSource = new Phrase(ruleSource);
00409 }
00410 }
00411
00412 void swap(TargetPhrase &first, TargetPhrase &second)
00413 {
00414 first.SwapWords(second);
00415 std::swap(first.m_futureScore, second.m_futureScore);
00416 std::swap(first.m_estimatedScore, second.m_estimatedScore);
00417 swap(first.m_scoreBreakdown, second.m_scoreBreakdown);
00418 std::swap(first.m_alignTerm, second.m_alignTerm);
00419 std::swap(first.m_alignNonTerm, second.m_alignNonTerm);
00420 std::swap(first.m_lhsTarget, second.m_lhsTarget);
00421 std::swap(first.m_cached_scores, second.m_cached_scores);
00422 }
00423
00424 TO_STRING_BODY(TargetPhrase);
00425
00426 std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
00427 {
00428 if (tp.m_lhsTarget) {
00429 os << *tp.m_lhsTarget<< " -> ";
00430 }
00431
00432 os << static_cast<const Phrase&>(tp) << ":" << flush;
00433 os << tp.GetAlignNonTerm() << flush;
00434 os << ": term=" << tp.GetAlignTerm() << flush;
00435 os << ": nonterm=" << tp.GetAlignNonTerm() << flush;
00436 os << ": c=" << tp.m_futureScore << flush;
00437 os << " " << tp.m_scoreBreakdown << flush;
00438
00439 const Phrase *sourcePhrase = tp.GetRuleSource();
00440 if (sourcePhrase) {
00441 os << " sourcePhrase=" << *sourcePhrase << flush;
00442 }
00443
00444 if (tp.m_properties.size()) {
00445 os << " properties: " << flush;
00446
00447 TargetPhrase::Properties::const_iterator iter;
00448 for (iter = tp.m_properties.begin(); iter != tp.m_properties.end(); ++iter) {
00449 const string &key = iter->first;
00450 const PhraseProperty *prop = iter->second.get();
00451 assert(prop);
00452
00453 os << key << "=" << *prop << " ";
00454 }
00455 }
00456
00457 return os;
00458 }
00459
00460
00461
00462 }
00463