00001 #ifndef BLUESCOREFEATURE_H
00002 #define BLUESCOREFEATURE_H
00003
00004 #include <utility>
00005 #include <string>
00006 #include <vector>
00007
00008 #include <boost/unordered_map.hpp>
00009
00010 #include "StatefulFeatureFunction.h"
00011
00012 #include "moses/FF/FFState.h"
00013 #include "moses/Phrase.h"
00014 #include "moses/ChartHypothesis.h"
00015
00016 namespace Moses
00017 {
00018
00019 class BleuScoreFeature;
00020
00021 class BleuScoreState : public FFState
00022 {
00023 public:
00024 friend class BleuScoreFeature;
00025 static size_t bleu_order;
00026
00027 BleuScoreState(bool is_syntax);
00028 size_t hash() const;
00029 virtual bool operator==(const FFState& other) const;
00030
00031 void print(std::ostream& out) const;
00032
00033 private:
00034 Phrase m_words;
00035 size_t m_source_length;
00036 size_t m_target_length;
00037 bool m_is_syntax;
00038
00039 float m_scaled_ref_length;
00040
00041 std::vector< size_t > m_ngram_counts;
00042 std::vector< size_t > m_ngram_matches;
00043
00044 void AddNgramCountAndMatches(std::vector< size_t >& counts, std::vector< size_t >& matches);
00045 };
00046
00047
00048 std::ostream& operator<<(std::ostream& out, const BleuScoreState& state);
00049
00050 typedef boost::unordered_map< Phrase, size_t > NGrams;
00051
00052 class RefValue : public std::pair<std::vector<size_t>,NGrams>
00053 {
00054 public:
00055 RefValue& operator=( const RefValue& rhs ) {
00056 first = rhs.first;
00057 second = rhs.second;
00058 return *this;
00059 }
00060 };
00061
00062
00063 class BleuScoreFeature : public StatefulFeatureFunction
00064 {
00065 public:
00066 static const std::vector<BleuScoreFeature*>& GetColl() {
00067 return s_staticColl;
00068 }
00069
00070 typedef boost::unordered_map<size_t, RefValue > RefCounts;
00071 typedef boost::unordered_map<size_t, NGrams> Matches;
00072
00073 BleuScoreFeature(const std::string &line);
00074
00075 void SetParameter(const std::string& key, const std::string& value);
00076
00077 std::vector<float> DefaultWeights() const;
00078
00079 void PrintHistory(std::ostream& out) const;
00080 void LoadReferences(const std::vector< std::vector< std::string > > &);
00081 void SetCurrSourceLength(size_t);
00082 void SetCurrNormSourceLength(size_t);
00083 void SetCurrShortestRefLength(size_t);
00084 void SetCurrAvgRefLength(size_t sent_id);
00085 void SetAvgInputLength (float l) {
00086 m_avg_input_length = l;
00087 }
00088 void SetCurrReferenceNgrams(size_t sent_id);
00089 size_t GetShortestRefIndex(size_t ref_id);
00090 size_t GetClosestRefLength(size_t ref_id, int hypoLength);
00091 void UpdateHistory(const std::vector< const Word* >&);
00092 void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
00093 void PrintRefLength(const std::vector<size_t>& ref_ids);
00094 void SetBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
00095 bool scaleByInverseLength, bool scaleByAvgInverseLength,
00096 float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu);
00097
00098 void GetNgramMatchCounts(Phrase&,
00099 const NGrams&,
00100 std::vector< size_t >&,
00101 std::vector< size_t >&,
00102 size_t skip = 0) const;
00103 void GetNgramMatchCounts_prefix(Phrase&,
00104 const NGrams&,
00105 std::vector< size_t >&,
00106 std::vector< size_t >&,
00107 size_t new_start_indices,
00108 size_t last_end_index) const;
00109 void GetNgramMatchCounts_overlap(Phrase& phrase,
00110 const NGrams& ref_ngram_counts,
00111 std::vector< size_t >& ret_counts,
00112 std::vector< size_t >& ret_matches,
00113 size_t overlap_index) const;
00114 void GetClippedNgramMatchesAndCounts(Phrase&,
00115 const NGrams&,
00116 std::vector< size_t >&,
00117 std::vector< size_t >&,
00118 size_t skip = 0) const;
00119
00120 FFState* EvaluateWhenApplied( const Hypothesis& cur_hypo,
00121 const FFState* prev_state,
00122 ScoreComponentCollection* accumulator) const;
00123 FFState* EvaluateWhenApplied(const ChartHypothesis& cur_hypo,
00124 int featureID,
00125 ScoreComponentCollection* accumulator) const;
00126
00127 bool Enabled() const {
00128 return m_enabled;
00129 }
00130
00131 bool IsUseable(const FactorMask &mask) const;
00132
00133 float CalculateBleu(BleuScoreState*) const;
00134 float CalculateBleu(Phrase translation) const;
00135 const FFState* EmptyHypothesisState(const InputType&) const;
00136
00137 float GetSourceLengthHistory() {
00138 return m_source_length_history;
00139 }
00140 float GetTargetLengthHistory() {
00141 return m_target_length_history;
00142 }
00143 float GetAverageInputLength() {
00144 return m_avg_input_length;
00145 }
00146
00147 void Load(AllOptions::ptr const& opts);
00148
00149 private:
00150 static std::vector<BleuScoreFeature*> s_staticColl;
00151
00152 bool m_enabled;
00153 bool m_sentence_bleu;
00154 bool m_simple_history_bleu;
00155 bool m_is_syntax;
00156
00157 std::vector< float > m_count_history;
00158 std::vector< float > m_match_history;
00159 float m_source_length_history;
00160 float m_target_length_history;
00161 float m_ref_length_history;
00162
00163 size_t m_cur_source_length;
00164 size_t m_cur_norm_source_length;
00165 RefCounts m_refs;
00166 NGrams m_cur_ref_ngrams;
00167 float m_cur_ref_length;
00168
00169
00170 bool m_scale_by_input_length;
00171 bool m_scale_by_avg_input_length;
00172
00173
00174 bool m_scale_by_inverse_length;
00175 bool m_scale_by_avg_inverse_length;
00176
00177 float m_avg_input_length;
00178
00179 float m_scale_by_x;
00180
00181
00182 float m_historySmoothing;
00183
00184 enum SmoothingScheme { PLUS_ONE = 1, PLUS_POINT_ONE = 2, PAPINENI = 3 };
00185 SmoothingScheme m_smoothing_scheme;
00186 };
00187
00188 }
00189
00190 #endif //BLUESCOREFEATURE_H
00191