00001 #include <sstream>
00002 #include "SourceWordDeletionFeature.h"
00003 #include "moses/Phrase.h"
00004 #include "moses/TargetPhrase.h"
00005 #include "moses/Hypothesis.h"
00006 #include "moses/ChartHypothesis.h"
00007 #include "moses/ScoreComponentCollection.h"
00008 #include "moses/TranslationOption.h"
00009 #include "moses/Util.h"
00010
00011 #include "util/string_piece_hash.hh"
00012 #include "util/exception.hh"
00013
00014 namespace Moses
00015 {
00016
00017 using namespace std;
00018
00019 SourceWordDeletionFeature::SourceWordDeletionFeature(const std::string &line)
00020 :StatelessFeatureFunction(0, line),
00021 m_unrestricted(true)
00022 {
00023 VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
00024 ReadParameters();
00025 VERBOSE(1, " Done." << std::endl);
00026 }
00027
00028 void SourceWordDeletionFeature::SetParameter(const std::string& key, const std::string& value)
00029 {
00030 if (key == "factor") {
00031 m_factorType = Scan<FactorType>(value);
00032 } else if (key == "path") {
00033 m_filename = value;
00034 } else {
00035 StatelessFeatureFunction::SetParameter(key, value);
00036 }
00037 }
00038
00039 void SourceWordDeletionFeature::Load(AllOptions::ptr const& opts)
00040 {
00041 m_options = opts;
00042 if (m_filename.empty())
00043 return;
00044
00045 FEATUREVERBOSE(1, "Loading source word deletion word list from " << m_filename << std::endl);
00046 ifstream inFile(m_filename.c_str());
00047 UTIL_THROW_IF2(!inFile, "Can't open file " << m_filename);
00048
00049 std::string line;
00050 while (getline(inFile, line)) {
00051 m_vocab.insert(line);
00052 }
00053
00054 inFile.close();
00055
00056 m_unrestricted = false;
00057 }
00058
00059 bool SourceWordDeletionFeature::IsUseable(const FactorMask &mask) const
00060 {
00061 bool ret = mask[m_factorType];
00062 return ret;
00063 }
00064
00065 void SourceWordDeletionFeature::EvaluateInIsolation(const Phrase &source
00066 , const TargetPhrase &targetPhrase
00067 , ScoreComponentCollection &scoreBreakdown
00068 , ScoreComponentCollection &estimatedScores) const
00069 {
00070 const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
00071 ComputeFeatures(source, targetPhrase, &scoreBreakdown, alignmentInfo);
00072 }
00073
00074 void SourceWordDeletionFeature::ComputeFeatures(const Phrase &source,
00075 const TargetPhrase& targetPhrase,
00076 ScoreComponentCollection* accumulator,
00077 const AlignmentInfo &alignmentInfo) const
00078 {
00079
00080 size_t targetLength = targetPhrase.GetSize();
00081 size_t sourceLength = source.GetSize();
00082 if (targetLength == 1 && sourceLength == 1 && !alignmentInfo.GetSize()) return;
00083
00084
00085 std::vector<bool> aligned(sourceLength, false);
00086 for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++)
00087 aligned[ alignmentPoint->first ] = true;
00088
00089
00090 for(size_t i=0; i<sourceLength; i++) {
00091 if (!aligned[i]) {
00092 const Word &w = source.GetWord(i);
00093 if (!w.IsNonTerminal()) {
00094 const StringPiece word = w.GetFactor(m_factorType)->GetString();
00095 if (word != "<s>" && word != "</s>") {
00096 if (!m_unrestricted && FindStringPiece(m_vocab, word ) == m_vocab.end()) {
00097 accumulator->PlusEquals(this, StringPiece("OTHER"),1);
00098 } else {
00099 accumulator->PlusEquals(this,word,1);
00100 }
00101 }
00102 }
00103 }
00104 }
00105 }
00106
00107 }