00001 #include "SoftMatchingFeature.h"
00002 #include "moses/AlignmentInfo.h"
00003 #include "moses/TargetPhrase.h"
00004 #include "moses/ChartHypothesis.h"
00005 #include "moses/StaticData.h"
00006 #include "moses/InputFileStream.h"
00007 #include "moses/FactorCollection.h"
00008 #include "moses/Util.h"
00009
00010 namespace Moses
00011 {
00012
00013 SoftMatchingFeature::SoftMatchingFeature(const std::string &line)
00014 : StatelessFeatureFunction(0, line)
00015 , m_softMatches(moses_MaxNumNonterminals)
00016 , m_scoreIdentical(true)
00017 {
00018 ReadParameters();
00019 }
00020
00021 void SoftMatchingFeature::SetParameter(const std::string& key, const std::string& value)
00022 {
00023 std::cerr << "setting: " << this->GetScoreProducerDescription() << " - " << key << "\n";
00024 if (key == "tuneable") {
00025 m_tuneable = Scan<bool>(value);
00026 } else if (key == "filterable") {
00027 } else if (key == "path") {
00028 const std::string filePath = value;
00029 Load(filePath);
00030 } else if (key == "score-identical") {
00031 m_scoreIdentical = Scan<bool>(value);
00032 } else {
00033 UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
00034 }
00035 }
00036
00037
00038 bool SoftMatchingFeature::Load(const std::string& filePath)
00039 {
00040
00041 StaticData &SD = StaticData::InstanceNonConst();
00042
00043 InputFileStream inStream(filePath);
00044 std::string line;
00045 while(getline(inStream, line)) {
00046 std::vector<std::string> tokens = Tokenize(line);
00047 UTIL_THROW_IF2(tokens.size() != 2, "Error: wrong format of SoftMatching file: must have two nonterminals per line");
00048
00049
00050 if (tokens[0] == tokens[1]) {
00051 continue;
00052 }
00053
00054 Word LHS, RHS;
00055 LHS.CreateFromString(Output, SD.options()->output.factor_order, tokens[0], true);
00056 RHS.CreateFromString(Output, SD.options()->output.factor_order, tokens[1], true);
00057
00058 m_softMatches[RHS[0]->GetId()].push_back(LHS);
00059 GetOrSetFeatureName(RHS, LHS);
00060 }
00061
00062 SD.SetSoftMatches(m_softMatches);
00063
00064 return true;
00065 }
00066
00067 void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
00068 ScoreComponentCollection* accumulator) const
00069 {
00070
00071 const TargetPhrase& target = hypo.GetCurrTargetPhrase();
00072
00073 const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = target.GetAlignNonTerm().GetNonTermIndexMap();
00074
00075
00076 for (size_t pos = 0; pos < target.GetSize(); ++pos) {
00077 const Word& word = target.GetWord(pos);
00078
00079
00080 if (word.IsNonTerminal()) {
00081 size_t nonTermInd = nonTermIndexMap[pos];
00082
00083 const ChartHypothesis* prevHypo = hypo.GetPrevHypo(nonTermInd);
00084 const Word& prevLHS = prevHypo->GetTargetLHS();
00085
00086 if ( (word != prevLHS) || m_scoreIdentical ) {
00087 const std::string &name = GetOrSetFeatureName(word, prevLHS);
00088 accumulator->PlusEquals(this,name,1);
00089 }
00090 }
00091 }
00092 }
00093
00094
00095 void SoftMatchingFeature::ResizeCache() const
00096 {
00097 FactorCollection& fc = FactorCollection::Instance();
00098 size_t numNonTerminals = fc.GetNumNonTerminals();
00099
00100 m_nameCache.resize(numNonTerminals);
00101 for (size_t i = 0; i < numNonTerminals; i++) {
00102 m_nameCache[i].resize(numNonTerminals);
00103 }
00104 }
00105
00106
00107 const std::string& SoftMatchingFeature::GetOrSetFeatureName(const Word& RHS, const Word& LHS) const
00108 {
00109 try {
00110 #ifdef WITH_THREADS //try read-only lock
00111 boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
00112 #endif
00113 const std::string &name = m_nameCache.at(RHS[0]->GetId()).at(LHS[0]->GetId());
00114 if (!name.empty()) {
00115 return name;
00116 }
00117 } catch (const std::out_of_range& oor) {
00118 #ifdef WITH_THREADS //need to resize cache; write lock
00119 boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
00120 #endif
00121 ResizeCache();
00122 }
00123 #ifdef WITH_THREADS //need to update cache; write lock
00124 boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
00125 #endif
00126 std::string &name = m_nameCache[RHS[0]->GetId()][LHS[0]->GetId()];
00127 const std::vector<FactorType> & oFactors
00128 = StaticData::Instance().options()->output.factor_order;
00129 std::string LHS_string = LHS.GetString(oFactors, false);
00130 std::string RHS_string = RHS.GetString(oFactors, false);
00131 name = LHS_string + "->" + RHS_string;
00132 return name;
00133 }
00134
00135 }
00136