00001 #pragma once
00002
00003 #include "moses/FF/UnknownWordPenaltyProducer.h"
00004 #include "moses/StaticData.h"
00005
00006 namespace Moses
00007 {
00008 namespace Syntax
00009 {
00010 namespace S2T
00011 {
00012
00013 template<typename RuleTrie>
00014 template<typename InputIterator>
00015 boost::shared_ptr<RuleTrie> OovHandler<RuleTrie>::SynthesizeRuleTrie(
00016 InputIterator first, InputIterator last)
00017 {
00018 const UnknownLHSList &lhsList = StaticData::Instance().GetUnknownLHS();
00019
00020 boost::shared_ptr<RuleTrie> trie(new RuleTrie(&m_ruleTableFF));
00021
00022 while (first != last) {
00023 const Word &oov = *first++;
00024 if (ShouldDrop(oov)) {
00025 continue;
00026 }
00027 boost::scoped_ptr<Phrase> srcPhrase(SynthesizeSourcePhrase(oov));
00028 for (UnknownLHSList::const_iterator p = lhsList.begin();
00029 p != lhsList.end(); ++p) {
00030 const std::string &targetLhsStr = p->first;
00031 float prob = p->second;
00032
00033 Word *tgtLHS = SynthesizeTargetLhs(targetLhsStr);
00034 TargetPhrase *tp = SynthesizeTargetPhrase(oov, *srcPhrase, *tgtLHS, prob);
00035 TargetPhraseCollection::shared_ptr tpc;
00036 tpc= GetOrCreateTargetPhraseCollection(*trie, *srcPhrase, *tp, NULL);
00037
00038 tpc->Add(tp);
00039 }
00040 }
00041
00042 return trie;
00043 }
00044
00045 template<typename RuleTrie>
00046 Phrase *OovHandler<RuleTrie>::SynthesizeSourcePhrase(const Word &sourceWord)
00047 {
00048 Phrase *phrase = new Phrase(1);
00049 phrase->AddWord() = sourceWord;
00050 phrase->GetWord(0).SetIsOOV(true);
00051 return phrase;
00052 }
00053
00054 template<typename RuleTrie>
00055 Word *
00056 OovHandler<RuleTrie>::SynthesizeTargetLhs(const std::string &lhsStr)
00057 {
00058 Word *targetLhs = new Word(true);
00059 targetLhs->CreateFromString(Output,
00060 StaticData::Instance().options()->output.factor_order,
00061 lhsStr, true);
00062 UTIL_THROW_IF2(targetLhs->GetFactor(0) == NULL, "Null factor for target LHS");
00063 return targetLhs;
00064 }
00065
00066 template<typename RuleTrie>
00067 TargetPhrase *OovHandler<RuleTrie>::SynthesizeTargetPhrase(
00068 const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob)
00069 {
00070 const StaticData &SD = StaticData::Instance();
00071
00072 const UnknownWordPenaltyProducer &unknownWordPenaltyProducer =
00073 UnknownWordPenaltyProducer::Instance();
00074
00075 TargetPhrase *targetPhrase = new TargetPhrase();
00076 Word &targetWord = targetPhrase->AddWord();
00077 targetWord.CreateUnknownWord(oov);
00078
00079
00080 float score = FloorScore(TransformScore(prob));
00081
00082 targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, score);
00083 targetPhrase->EvaluateInIsolation(srcPhrase);
00084 targetPhrase->SetTargetLHS(&targetLhs);
00085 targetPhrase->SetAlignmentInfo("0-0");
00086 if (!SD.options()->output.detailed_tree_transrep_filepath.empty() ||
00087 SD.GetTreeStructure() != NULL) {
00088 std::string value = "[ " + targetLhs[0]->GetString().as_string() + " " +
00089 oov[0]->GetString().as_string() + " ]";
00090 targetPhrase->SetProperty("Tree", value);
00091 }
00092
00093 return targetPhrase;
00094 }
00095
00096 template<typename RuleTrie>
00097 bool OovHandler<RuleTrie>::ShouldDrop(const Word &oov)
00098 {
00099 if (!StaticData::Instance().options()->unk.drop) {
00100 return false;
00101 }
00102 const Factor *f = oov[0];
00103 const StringPiece s = f->GetString();
00104 return s.find_first_of("0123456789") != std::string::npos;
00105 }
00106
00107 }
00108 }
00109 }