00001 #include <sstream>
00002 #include "EditOps.h"
00003 #include "moses/Phrase.h"
00004 #include "moses/TargetPhrase.h"
00005 #include "moses/Hypothesis.h"
00006 #include "moses/ChartHypothesis.h"
00007 #include "moses/ScoreComponentCollection.h"
00008 #include "moses/TranslationOption.h"
00009 #include "util/string_piece_hash.hh"
00010 #include "util/exception.hh"
00011
00012 #include <functional>
00013
00014 #include <boost/foreach.hpp>
00015 #include <boost/algorithm/string.hpp>
00016
00017 #include "Diffs.h"
00018
00019 namespace Moses
00020 {
00021
00022 using namespace std;
00023
00024 std::string ParseScores(const std::string &line, const std::string& defaultScores)
00025 {
00026 std::vector<std::string> toks = Tokenize(line);
00027 UTIL_THROW_IF2(toks.empty(), "Empty line");
00028
00029 for (size_t i = 1; i < toks.size(); ++i) {
00030 std::vector<std::string> args = TokenizeFirstOnly(toks[i], "=");
00031 UTIL_THROW_IF2(args.size() != 2,
00032 "Incorrect format for feature function arg: " << toks[i]);
00033
00034 if (args[0] == "scores") {
00035 return args[1];
00036 }
00037 }
00038 return defaultScores;
00039 }
00040
00041 EditOps::EditOps(const std::string &line)
00042 : StatelessFeatureFunction(ParseScores(line, "dis").size(), line)
00043 , m_factorType(0), m_chars(false), m_scores(ParseScores(line, "dis"))
00044 {
00045 std::cerr << "Initializing EditOps feature.." << std::endl;
00046 ReadParameters();
00047 }
00048
00049 void EditOps::SetParameter(const std::string& key, const std::string& value)
00050 {
00051 if (key == "factor") {
00052 m_factorType = Scan<FactorType>(value);
00053 } else if (key == "chars") {
00054 m_chars = Scan<bool>(value);
00055 } else if (key == "scores") {
00056 m_scores = value;
00057 } else {
00058 StatelessFeatureFunction::SetParameter(key, value);
00059 }
00060 }
00061
00062 void EditOps::Load()
00063 { }
00064
00065 void EditOps::EvaluateInIsolation(const Phrase &source
00066 , const TargetPhrase &target
00067 , ScoreComponentCollection &scoreBreakdown
00068 , ScoreComponentCollection &estimatedFutureScore) const
00069 {
00070 ComputeFeatures(source, target, &scoreBreakdown);
00071 }
00072
00073 void EditOps::ComputeFeatures(
00074 const Phrase &source,
00075 const TargetPhrase& target,
00076 ScoreComponentCollection* accumulator) const
00077 {
00078 std::vector<float> ops(GetNumScoreComponents(), 0);
00079
00080 if(m_chars) {
00081 std::vector<FactorType> factors;
00082 factors.push_back(m_factorType);
00083
00084 std::string sourceStr = source.GetStringRep(factors);
00085 std::string targetStr = target.GetStringRep(factors);
00086
00087 AddStats(sourceStr, targetStr, m_scores, ops);
00088 } else {
00089 std::vector<std::string> sourceTokens;
00090
00091 for(size_t i = 0; i < source.GetSize(); ++i) {
00092 if(!source.GetWord(i).IsNonTerminal())
00093 sourceTokens.push_back(source.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
00094
00095 }
00096
00097
00098 std::vector<std::string> targetTokens;
00099
00100 for(size_t i = 0; i < target.GetSize(); ++i) {
00101 if(!target.GetWord(i).IsNonTerminal())
00102 targetTokens.push_back(target.GetWord(i).GetFactor(m_factorType)->GetString().as_string());
00103
00104 }
00105
00106
00107 AddStats(sourceTokens, targetTokens, m_scores, ops);
00108 }
00109
00110 accumulator->PlusEquals(this, ops);
00111 }
00112
00113 bool EditOps::IsUseable(const FactorMask &mask) const
00114 {
00115 bool ret = mask[m_factorType];
00116 return ret;
00117 }
00118
00119 }