00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #pragma once
00012
00013 #include <bitset>
00014 #include <string>
00015 #include <vector>
00016 #include "StatefulFeatureFunction.h"
00017 #include "FFState.h"
00018 #include "moses/Factor.h"
00019 #include "phrase-extract/PhraseOrientation.h"
00020 #include "moses/PP/OrientationPhraseProperty.h"
00021 #include <boost/unordered_set.hpp>
00022
00023
00024 namespace Moses
00025 {
00026
00027 class PhraseOrientationFeatureState : public FFState
00028 {
00029 public:
00030
00031 friend class PhraseOrientationFeature;
00032
00033 PhraseOrientationFeatureState(bool distinguishStates, bool useSparseWord, bool useSparseNT)
00034 : m_leftBoundaryNonTerminalL2RScores(3,0)
00035 , m_rightBoundaryNonTerminalR2LScores(3,0)
00036 , m_leftBoundaryNonTerminalL2RPossibleFutureOrientations(0x7)
00037 , m_rightBoundaryNonTerminalR2LPossibleFutureOrientations(0x7)
00038 , m_leftBoundaryRecursionGuard(false)
00039 , m_rightBoundaryRecursionGuard(false)
00040 , m_leftBoundaryIsSet(false)
00041 , m_rightBoundaryIsSet(false)
00042 , m_distinguishStates(distinguishStates)
00043 , m_useSparseWord(useSparseWord)
00044 , m_useSparseNT(useSparseNT)
00045 {}
00046
00047 void SetLeftBoundaryL2R(const std::vector<float> &scores,
00048 size_t heuristicScoreIndex,
00049 std::bitset<3> &possibleFutureOrientations,
00050 const Factor* leftBoundaryNonTerminalSymbol,
00051 const PhraseOrientationFeatureState* prevState) {
00052 for (size_t i=0; i<3; ++i) {
00053 m_leftBoundaryNonTerminalL2RScores[i] = scores[i];
00054 m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i] = possibleFutureOrientations[i];
00055 }
00056 m_leftBoundaryNonTerminalL2RHeuristicScoreIndex = heuristicScoreIndex;
00057 m_leftBoundaryNonTerminalSymbol = leftBoundaryNonTerminalSymbol;
00058 m_leftBoundaryPrevState = prevState;
00059 m_leftBoundaryIsSet = true;
00060 }
00061
00062 void SetRightBoundaryR2L(const std::vector<float> &scores,
00063 size_t heuristicScoreIndex,
00064 std::bitset<3> &possibleFutureOrientations,
00065 const Factor* rightBoundaryNonTerminalSymbol,
00066 const PhraseOrientationFeatureState* prevState) {
00067 for (size_t i=0; i<3; ++i) {
00068 m_rightBoundaryNonTerminalR2LScores[i] = scores[i];
00069 m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i] = possibleFutureOrientations[i];
00070 }
00071 m_rightBoundaryNonTerminalR2LHeuristicScoreIndex = heuristicScoreIndex;
00072 m_rightBoundaryNonTerminalSymbol = rightBoundaryNonTerminalSymbol;
00073 m_rightBoundaryPrevState = prevState;
00074 m_rightBoundaryIsSet = true;
00075 }
00076
00077 float GetLeftBoundaryL2RScoreMono() const {
00078 return m_leftBoundaryNonTerminalL2RScores[0];
00079 }
00080
00081 float GetLeftBoundaryL2RScoreSwap() const {
00082 return m_leftBoundaryNonTerminalL2RScores[1];
00083 }
00084
00085 float GetLeftBoundaryL2RScoreDiscontinuous() const {
00086 return m_leftBoundaryNonTerminalL2RScores[2];
00087 }
00088
00089
00090 float GetRightBoundaryR2LScoreMono() const {
00091 return m_rightBoundaryNonTerminalR2LScores[0];
00092 }
00093
00094 float GetRightBoundaryR2LScoreSwap() const {
00095 return m_rightBoundaryNonTerminalR2LScores[1];
00096 }
00097
00098 float GetRightBoundaryR2LScoreDiscontinuous() const {
00099 return m_rightBoundaryNonTerminalR2LScores[2];
00100 }
00101
00102 virtual size_t hash() const;
00103 virtual bool operator==(const FFState& other) const;
00104
00105 protected:
00106
00107 static int CompareLeftBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) {
00108 if (!state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
00109 return 0;
00110 }
00111 if (state.m_leftBoundaryIsSet && !otherState.m_leftBoundaryIsSet) {
00112 return 1;
00113 }
00114 if (!state.m_leftBoundaryIsSet && otherState.m_leftBoundaryIsSet) {
00115 return -1;
00116 }
00117
00118 if (useSparseNT) {
00119 if ( otherState.m_leftBoundaryNonTerminalSymbol < state.m_leftBoundaryNonTerminalSymbol ) {
00120 return 1;
00121 }
00122 if ( state.m_leftBoundaryNonTerminalSymbol < otherState.m_leftBoundaryNonTerminalSymbol ) {
00123 return -1;
00124 }
00125 }
00126
00127 if ( otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ) {
00128 return 1;
00129 }
00130 if ( state.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex < otherState.m_leftBoundaryNonTerminalL2RHeuristicScoreIndex ) {
00131 return -1;
00132 }
00133 if ( Smaller(otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) ) {
00134 return 1;
00135 }
00136 if ( Smaller(state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations, otherState.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations) ) {
00137 return -1;
00138 }
00139 for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) {
00140
00141
00142 if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
00143 if (state.m_leftBoundaryNonTerminalL2RScores[i] > otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
00144 return 1;
00145 }
00146 if (state.m_leftBoundaryNonTerminalL2RScores[i] < otherState.m_leftBoundaryNonTerminalL2RScores[i]) {
00147 return -1;
00148 }
00149 }
00150 }
00151
00152 if (state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard) {
00153 return 0;
00154 }
00155 if (state.m_leftBoundaryRecursionGuard && !otherState.m_leftBoundaryRecursionGuard) {
00156 return 1;
00157 }
00158 if (!state.m_leftBoundaryRecursionGuard && otherState.m_leftBoundaryRecursionGuard) {
00159 return -1;
00160 }
00161
00162 const PhraseOrientationFeatureState *prevState = state.m_leftBoundaryPrevState;
00163 const PhraseOrientationFeatureState *otherPrevState = otherState.m_leftBoundaryPrevState;
00164
00165 return CompareLeftBoundaryRecursive(*prevState, *otherPrevState, useSparseNT);
00166 };
00167
00168 static int CompareRightBoundaryRecursive(const PhraseOrientationFeatureState& state, const PhraseOrientationFeatureState& otherState, bool useSparseNT) {
00169 if (!state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
00170 return 0;
00171 }
00172 if (state.m_rightBoundaryIsSet && !otherState.m_rightBoundaryIsSet) {
00173 return 1;
00174 }
00175 if (!state.m_rightBoundaryIsSet && otherState.m_rightBoundaryIsSet) {
00176 return -1;
00177 }
00178
00179 if (useSparseNT) {
00180 if ( otherState.m_rightBoundaryNonTerminalSymbol < state.m_rightBoundaryNonTerminalSymbol ) {
00181 return 1;
00182 }
00183 if ( state.m_rightBoundaryNonTerminalSymbol < otherState.m_rightBoundaryNonTerminalSymbol ) {
00184 return -1;
00185 }
00186 }
00187
00188 if ( otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ) {
00189 return 1;
00190 }
00191 if ( state.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex < otherState.m_rightBoundaryNonTerminalR2LHeuristicScoreIndex ) {
00192 return -1;
00193 }
00194 if ( Smaller(otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) ) {
00195 return 1;
00196 }
00197 if ( Smaller(state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations, otherState.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations) ) {
00198 return -1;
00199 }
00200 for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) {
00201
00202
00203 if ( state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) {
00204 if (state.m_rightBoundaryNonTerminalR2LScores[i] > otherState.m_rightBoundaryNonTerminalR2LScores[i]) {
00205 return 1;
00206 }
00207 if (state.m_rightBoundaryNonTerminalR2LScores[i] < otherState.m_rightBoundaryNonTerminalR2LScores[i]) {
00208 return -1;
00209 }
00210 }
00211 }
00212
00213 if (state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard) {
00214 return 0;
00215 }
00216 if (state.m_rightBoundaryRecursionGuard && !otherState.m_rightBoundaryRecursionGuard) {
00217 return 1;
00218 }
00219 if (!state.m_rightBoundaryRecursionGuard && otherState.m_rightBoundaryRecursionGuard) {
00220 return -1;
00221 }
00222
00223 const PhraseOrientationFeatureState *prevState = state.m_rightBoundaryPrevState;
00224 const PhraseOrientationFeatureState *otherPrevState = otherState.m_rightBoundaryPrevState;
00225
00226 return CompareRightBoundaryRecursive(*prevState, *otherPrevState, useSparseNT);
00227 };
00228
00229
00230 static void HashCombineLeftBoundaryRecursive(size_t &hash, const PhraseOrientationFeatureState& state, bool useSparseNT) {
00231 if (useSparseNT) {
00232 boost::hash_combine(hash, state.m_leftBoundaryNonTerminalSymbol);
00233 }
00234
00235
00236
00237 for (size_t i=0; i<state.m_leftBoundaryNonTerminalL2RScores.size(); ++i) {
00238 if (state.m_leftBoundaryNonTerminalL2RPossibleFutureOrientations[i]) {
00239 boost::hash_combine(hash, state.m_leftBoundaryNonTerminalL2RScores[i]);
00240 } else {
00241 boost::hash_combine(hash, 0);
00242 }
00243 }
00244
00245 if (!state.m_leftBoundaryRecursionGuard) {
00246 const PhraseOrientationFeatureState *prevState = state.m_leftBoundaryPrevState;
00247 if (prevState->m_leftBoundaryIsSet) {
00248 HashCombineLeftBoundaryRecursive(hash, *prevState, useSparseNT);
00249 }
00250 }
00251 };
00252
00253 static void HashCombineRightBoundaryRecursive(size_t &hash, const PhraseOrientationFeatureState& state, bool useSparseNT) {
00254 if (useSparseNT) {
00255 boost::hash_combine(hash, state.m_rightBoundaryNonTerminalSymbol);
00256 }
00257
00258
00259
00260 for (size_t i=0; i<state.m_rightBoundaryNonTerminalR2LScores.size(); ++i) {
00261 if (state.m_rightBoundaryNonTerminalR2LPossibleFutureOrientations[i]) {
00262 boost::hash_combine(hash, state.m_rightBoundaryNonTerminalR2LScores[i]);
00263 } else {
00264 boost::hash_combine(hash, 0);
00265 }
00266 }
00267
00268 if (!state.m_rightBoundaryRecursionGuard) {
00269 const PhraseOrientationFeatureState *prevState = state.m_rightBoundaryPrevState;
00270 if (prevState->m_rightBoundaryIsSet) {
00271 HashCombineRightBoundaryRecursive(hash, *prevState, useSparseNT);
00272 }
00273 }
00274 };
00275
00276
00277 template<std::size_t N> static bool Smaller(const std::bitset<N>& x, const std::bitset<N>& y) {
00278 for (size_t i=0; i<N; ++i) {
00279 if (x[i] ^ y[i])
00280 return y[i];
00281 }
00282 return false;
00283 }
00284
00285 std::vector<float> m_leftBoundaryNonTerminalL2RScores;
00286 std::vector<float> m_rightBoundaryNonTerminalR2LScores;
00287
00288 size_t m_leftBoundaryNonTerminalL2RHeuristicScoreIndex;
00289 size_t m_rightBoundaryNonTerminalR2LHeuristicScoreIndex;
00290
00291 std::bitset<3> m_leftBoundaryNonTerminalL2RPossibleFutureOrientations;
00292 std::bitset<3> m_rightBoundaryNonTerminalR2LPossibleFutureOrientations;
00293
00294 bool m_leftBoundaryRecursionGuard;
00295 bool m_rightBoundaryRecursionGuard;
00296 bool m_leftBoundaryIsSet;
00297 bool m_rightBoundaryIsSet;
00298 const PhraseOrientationFeatureState* m_leftBoundaryPrevState;
00299 const PhraseOrientationFeatureState* m_rightBoundaryPrevState;
00300 const bool m_distinguishStates;
00301 const bool m_useSparseWord;
00302 const bool m_useSparseNT;
00303 const Factor* m_leftBoundaryNonTerminalSymbol;
00304 const Factor* m_rightBoundaryNonTerminalSymbol;
00305 };
00306
00307
00308
00309 class PhraseOrientationFeature : public StatefulFeatureFunction
00310 {
00311 public:
00312
00313 struct ReoClassData {
00314 public:
00315 std::vector<MosesTraining::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
00316 std::vector<MosesTraining::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
00317 bool firstNonTerminalIsBoundary;
00318 bool firstNonTerminalPreviousSourceSpanIsAligned;
00319 bool firstNonTerminalFollowingSourceSpanIsAligned;
00320 bool lastNonTerminalIsBoundary;
00321 bool lastNonTerminalPreviousSourceSpanIsAligned;
00322 bool lastNonTerminalFollowingSourceSpanIsAligned;
00323 };
00324
00325 PhraseOrientationFeature(const std::string &line);
00326
00327 ~PhraseOrientationFeature() {
00328 }
00329
00330 bool IsUseable(const FactorMask &mask) const {
00331 return true;
00332 }
00333
00334 virtual const FFState* EmptyHypothesisState(const InputType &input) const {
00335 return new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
00336 }
00337
00338 void SetParameter(const std::string& key, const std::string& value);
00339
00340 void Load(AllOptions::ptr const& opts);
00341
00342 void EvaluateInIsolation(const Phrase &source
00343 , const TargetPhrase &targetPhrase
00344 , ScoreComponentCollection &scoreBreakdown
00345 , ScoreComponentCollection &estimatedScores) const;
00346
00347 FFState* EvaluateWhenApplied(
00348 const Hypothesis& cur_hypo,
00349 const FFState* prev_state,
00350 ScoreComponentCollection* accumulator) const {
00351 UTIL_THROW2(GetScoreProducerDescription()
00352 << ": EvaluateWhenApplied(const Hypothesis&, ...) not implemented");
00353 return new PhraseOrientationFeatureState(m_distinguishStates,m_useSparseWord,m_useSparseNT);
00354 };
00355
00356 FFState* EvaluateWhenApplied(
00357 const ChartHypothesis& cur_hypo,
00358 int featureID,
00359 ScoreComponentCollection* accumulator) const;
00360
00361 protected:
00362
00363 void LoadWordList(const std::string& filename,
00364 boost::unordered_set<const Factor*>& list);
00365
00366 void LookaheadScore(const OrientationPhraseProperty *orientationPhraseProperty,
00367 ScoreComponentCollection &scoreBreakdown,
00368 const Factor* targetPhraseLHS,
00369 bool subtract=false) const;
00370
00371 size_t GetHeuristicScoreIndex(const std::vector<float>& scores,
00372 size_t weightsVectorOffset,
00373 const std::bitset<3> possibleFutureOrientations = 0x7) const;
00374
00375 void LeftBoundaryL2RScoreRecursive(int featureID,
00376 const PhraseOrientationFeatureState *state,
00377 const std::bitset<3> orientation,
00378 std::vector<float>& newScores,
00379 ScoreComponentCollection* scoreBreakdown) const;
00380
00381 void RightBoundaryR2LScoreRecursive(int featureID,
00382 const PhraseOrientationFeatureState *state,
00383 const std::bitset<3> orientation,
00384 std::vector<float>& newScores,
00385 ScoreComponentCollection* scoreBreakdown) const;
00386
00387 void SparseWordL2RScore(const ChartHypothesis* hypo,
00388 ScoreComponentCollection* scoreBreakdown,
00389 const std::string* o) const;
00390
00391 void SparseWordR2LScore(const ChartHypothesis* hypo,
00392 ScoreComponentCollection* scoreBreakdown,
00393 const std::string* o) const;
00394
00395 void SparseNonTerminalL2RScore(const Factor* nonTerminalSymbol,
00396 ScoreComponentCollection* scoreBreakdown,
00397 const std::string* o) const;
00398
00399 void SparseNonTerminalR2LScore(const Factor* nonTerminalSymbol,
00400 ScoreComponentCollection* scoreBreakdown,
00401 const std::string* o) const;
00402
00403 const std::string* ToString(const MosesTraining::PhraseOrientation::REO_CLASS o) const;
00404
00405 static const std::string MORIENT;
00406 static const std::string SORIENT;
00407 static const std::string DORIENT;
00408
00409 std::string m_glueLabelStr;
00410 const Factor* m_glueLabel;
00411 bool m_noScoreBoundary;
00412 bool m_monotoneScoreBoundary;
00413 bool m_distinguishStates;
00414 bool m_lookaheadScore;
00415 bool m_heuristicScoreUseWeights;
00416 bool m_useSparseWord;
00417 bool m_useSparseNT;
00418 size_t m_offsetR2LScores;
00419 mutable std::vector<float> m_weightsVector;
00420 std::string m_filenameTargetWordList;
00421 boost::unordered_set<const Factor*> m_targetWordList;
00422 bool m_useTargetWordList;
00423 std::string m_filenameSourceWordList;
00424 boost::unordered_set<const Factor*> m_sourceWordList;
00425 bool m_useSourceWordList;
00426
00427 };
00428
00429
00430 }
00431