00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "SentenceLevelScorer.h"
00010
00011 #include <iostream>
00012 #include <boost/spirit/home/support/detail/lexer/runtime_error.hpp>
00013
00014 using namespace std;
00015
00016 namespace MosesTuning
00017 {
00018
00019 SentenceLevelScorer::SentenceLevelScorer(const string& name, const string& config)
00020 : Scorer(name, config),
00021 m_regularisationStrategy(REG_NONE),
00022 m_regularisationWindow(0)
00023 {
00024 Init();
00025 }
00026
00027 SentenceLevelScorer::~SentenceLevelScorer() {}
00028
00029 void SentenceLevelScorer::Init()
00030 {
00031
00032 static string KEY_TYPE = "regtype";
00033 static string KEY_WINDOW = "regwin";
00034 static string KEY_CASE = "case";
00035 static string TYPE_NONE = "none";
00036 static string TYPE_AVERAGE = "average";
00037 static string TYPE_MINIMUM = "min";
00038 static string TRUE = "true";
00039 static string FALSE = "false";
00040
00041 const string type = getConfig(KEY_TYPE, TYPE_NONE);
00042 if (type == TYPE_NONE) {
00043 m_regularisationStrategy = REG_NONE;
00044 } else if (type == TYPE_AVERAGE) {
00045 m_regularisationStrategy = REG_AVERAGE;
00046 } else if (type == TYPE_MINIMUM) {
00047 m_regularisationStrategy = REG_MINIMUM;
00048 } else {
00049 throw boost::lexer::runtime_error("Unknown scorer regularisation strategy: " + type);
00050 }
00051 cerr << "Using scorer regularisation strategy: " << type << endl;
00052
00053 const string window = getConfig(KEY_WINDOW, "0");
00054 m_regularisationWindow = atoi(window.c_str());
00055 cerr << "Using scorer regularisation window: " << m_regularisationWindow << endl;
00056
00057 const string preservecase = getConfig(KEY_CASE, TRUE);
00058 if (preservecase == TRUE) {
00059 m_enable_preserve_case = true;
00060 } else if (preservecase == FALSE) {
00061 m_enable_preserve_case = false;
00062 }
00063 cerr << "Using case preservation: " << m_enable_preserve_case << endl;
00064 }
00065
00066 void SentenceLevelScorer::score(const candidates_t& candidates, const diffs_t& diffs,
00067 statscores_t& scores)
00068 {
00069
00070 if (!m_score_data) {
00071 throw runtime_error("Score data not loaded");
00072 }
00073
00074 if (m_score_data->size() == 0) {
00075 throw runtime_error("Score data is empty");
00076 }
00077 if (candidates.size() == 0) {
00078 throw runtime_error("No candidates supplied");
00079 }
00080 const int numCounts = m_score_data->get(0,candidates[0]).size();
00081 vector<float> totals(numCounts);
00082 for (size_t i = 0; i < candidates.size(); ++i) {
00083
00084 ScoreStats stats = m_score_data->get(i,candidates[i]);
00085 if (stats.size() != totals.size()) {
00086 stringstream msg;
00087 msg << "Statistics for (" << "," << candidates[i] << ") have incorrect "
00088 << "number of fields. Found: " << stats.size() << " Expected: "
00089 << totals.size();
00090 throw runtime_error(msg.str());
00091 }
00092
00093 for (size_t k = 0; k < totals.size(); ++k) {
00094 totals[k] += stats.get(k);
00095
00096 }
00097
00098 }
00099
00100 for (size_t k = 0; k < totals.size(); ++k) {
00101
00102
00103 totals[k] /= candidates.size();
00104
00105 }
00106
00107 scores.push_back(calculateScore(totals));
00108
00109 candidates_t last_candidates(candidates);
00110
00111 for (size_t i = 0; i < diffs.size(); ++i) {
00112 for (size_t j = 0; j < diffs[i].size(); ++j) {
00113 const size_t sid = diffs[i][j].first;
00114 const size_t nid = diffs[i][j].second;
00115
00116
00117 const size_t last_nid = last_candidates[sid];
00118 for (size_t k = 0; k < totals.size(); ++k) {
00119 const float diff = m_score_data->get(sid,nid).get(k)
00120 - m_score_data->get(sid,last_nid).get(k);
00121
00122 totals[k] += diff/candidates.size();
00123
00124 }
00125 last_candidates[sid] = nid;
00126 }
00127 scores.push_back(calculateScore(totals));
00128 }
00129
00130
00131
00132 if (m_regularisationStrategy == REG_NONE || m_regularisationWindow <= 0) {
00133
00134 return;
00135 }
00136
00137
00138 statscores_t raw_scores(scores);
00139 for (size_t i = 0; i < scores.size(); ++i) {
00140 size_t start = 0;
00141 if (i >= m_regularisationWindow) {
00142 start = i - m_regularisationWindow;
00143 }
00144 const size_t end = min(scores.size(), i + m_regularisationWindow+1);
00145 if (m_regularisationStrategy == REG_AVERAGE) {
00146 scores[i] = score_average(raw_scores, start, end);
00147 } else {
00148 scores[i] = score_min(raw_scores, start, end);
00149 }
00150 }
00151 }
00152
00153 }