00001 #include "InterpolatedScorer.h"
00002 #include "ScorerFactory.h"
00003 #include "Util.h"
00004
00005 using namespace std;
00006
00007 namespace MosesTuning
00008 {
00009
00010
00011
00012
00013 InterpolatedScorer::InterpolatedScorer(const string& name, const string& config)
00014 : Scorer(name,config)
00015 {
00016
00017 string scorers = name;
00018 while (scorers.length() > 0) {
00019 string scorertype = "";
00020 getNextPound(scorers, scorertype,",");
00021 Scorer *scorer = ScorerFactory::getScorer(scorertype,config);
00022 m_scorers.push_back(scorer);
00023 }
00024 if (m_scorers.size() == 0) {
00025 throw runtime_error("There are no scorers");
00026 }
00027 cerr << "Number of scorers: " << m_scorers.size() << endl;
00028
00029
00030 string wtype = getConfig("weights","");
00031
00032
00033 if (wtype.length() == 0) {
00034 float weight = 1.0 / m_scorers.size() ;
00035
00036 for (size_t i = 0; i < m_scorers.size(); i ++) {
00037 m_scorer_weights.push_back(weight);
00038 }
00039 } else {
00040 float tot=0;
00041
00042 while (wtype.length() > 0) {
00043 string scoreweight = "";
00044 getNextPound(wtype,scoreweight,"+");
00045 float weight = atof(scoreweight.c_str());
00046 m_scorer_weights.push_back(weight);
00047 tot += weight;
00048
00049 }
00050
00051 if (tot != float(1)) {
00052 for (vector<float>::iterator it = m_scorer_weights.begin();
00053 it != m_scorer_weights.end(); ++it) {
00054 *it /= tot;
00055 }
00056 }
00057
00058 if (m_scorers.size() != m_scorer_weights.size()) {
00059 throw runtime_error("The number of weights does not equal the number of scorers!");
00060 }
00061 }
00062 cerr << "The weights for the interpolated scorers are: " << endl;
00063 for (vector<float>::iterator it = m_scorer_weights.begin(); it < m_scorer_weights.end(); it++) {
00064 cerr << *it << " " ;
00065 }
00066 cerr <<endl;
00067 }
00068
00069 bool InterpolatedScorer::useAlignment() const
00070 {
00071
00072 for (vector<Scorer*>::const_iterator itsc = m_scorers.begin(); itsc < m_scorers.end(); itsc++) {
00073 if ((*itsc)->useAlignment()) {
00074
00075 return true;
00076 }
00077 }
00078 return false;
00079 };
00080
00081 void InterpolatedScorer::setScoreData(ScoreData* data)
00082 {
00083 size_t last = 0;
00084 m_score_data = data;
00085 for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
00086 itsc != m_scorers.end(); ++itsc) {
00087 int numScoresScorer = (*itsc)->NumberOfScores();
00088 ScoreData* newData =new ScoreData(*itsc);
00089 for (size_t i = 0; i < data->size(); i++) {
00090 ScoreArray scoreArray = data->get(i);
00091 ScoreArray newScoreArray;
00092 size_t numNBest = scoreArray.size();
00093
00094 for (size_t j = 0; j < numNBest ; j++) {
00095 ScoreStats scoreStats = data->get(i, j);
00096
00097 ScoreStats newScoreStats;
00098 for (size_t k = last; k < size_t(numScoresScorer + last); k++) {
00099 ScoreStatsType score = scoreStats.get(k);
00100 newScoreStats.add(score);
00101 }
00102
00103 newScoreArray.add(newScoreStats);
00104 }
00105 newScoreArray.setIndex(i);
00106 newData->add(newScoreArray);
00107 }
00108
00109
00110
00111
00112 m_scorers_score_data.push_back(newData);
00113
00114 (*itsc)->setScoreData(newData);
00115 last += numScoresScorer;
00116 }
00117 }
00118
00119
00122 void InterpolatedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
00123 statscores_t& scores) const
00124 {
00125
00126 size_t scorerNum = 0;
00127 for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
00128 itsc != m_scorers.end(); ++itsc) {
00129
00130 statscores_t tscores;
00131 (*itsc)->score(candidates,diffs,tscores);
00132 size_t inc = 0;
00133 for (statscores_t::iterator itstatsc = tscores.begin();
00134 itstatsc != tscores.end(); ++itstatsc) {
00135
00136 float weight = m_scorer_weights[scorerNum];
00137 if (weight == 0) {
00138 stringstream msg;
00139 msg << "No weights for scorer" << scorerNum ;
00140 throw runtime_error(msg.str());
00141 }
00142 if (scorerNum == 0) {
00143 scores.push_back(weight * (*itstatsc));
00144 } else {
00145 scores[inc] += weight * (*itstatsc);
00146 }
00147
00148 inc++;
00149
00150 }
00151 scorerNum++;
00152 }
00153
00154 }
00155
00158 float InterpolatedScorer::calculateScore(const std::vector<ScoreStatsType>& totals) const
00159 {
00160 size_t scorerNum = 0;
00161 size_t last = 0;
00162 float score = 0;
00163 for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
00164 itsc != m_scorers.end(); ++itsc) {
00165 int numScoresScorer = (*itsc)->NumberOfScores();
00166 std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
00167 score += (*itsc)->calculateScore(totals_scorer) * m_scorer_weights[scorerNum];
00168 last += numScoresScorer;
00169 scorerNum++;
00170 }
00171 return score;
00172 }
00173
00174
00175 float InterpolatedScorer::getReferenceLength(const std::vector<ScoreStatsType>& totals) const
00176 {
00177 size_t scorerNum = 0;
00178 size_t last = 0;
00179 float refLen = 0;
00180 for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
00181 itsc != m_scorers.end(); ++itsc) {
00182 int numScoresScorer = (*itsc)->NumberOfScores();
00183 std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
00184 refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
00185 last += numScoresScorer;
00186 scorerNum++;
00187 }
00188 return refLen;
00189 }
00190
00191 void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
00192 {
00193 for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
00194 itsc != m_scorers.end(); ++itsc) {
00195 (*itsc)->setReferenceFiles(referenceFiles);
00196 }
00197 }
00198
00199 void InterpolatedScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
00200 {
00201 stringstream buff;
00202 string align = text;
00203 string sentence = text;
00204 size_t alignmentData = text.find("|||");
00205
00206 if(alignmentData != string::npos) {
00207 getNextPound(align,sentence, "|||");
00208 }
00209
00210 int i = 0;
00211 for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin(); itsc != m_scorers.end(); ++itsc) {
00212 ScoreStats tempEntry;
00213 if ((*itsc)->useAlignment()) {
00214 (*itsc)->prepareStats(sid, text, tempEntry);
00215 } else {
00216 (*itsc)->prepareStats(sid, sentence, tempEntry);
00217 }
00218 if (i > 0) buff << " ";
00219 buff << tempEntry;
00220 i++;
00221 }
00222
00223 string str = buff.str();
00224 entry.set(str);
00225 }
00226
00227 void InterpolatedScorer::setFactors(const string& factors)
00228 {
00229 if (factors.empty()) return;
00230
00231 vector<string> fsplit;
00232 split(factors, ',', fsplit);
00233
00234 if (fsplit.size() != m_scorers.size())
00235 throw runtime_error("Number of factor specifications does not equal number of interpolated scorers.");
00236
00237 for (size_t i = 0; i < m_scorers.size(); ++i) {
00238 m_scorers[i]->setFactors(fsplit[i]);
00239 }
00240 }
00241
00242 void InterpolatedScorer::setFilter(const string& filterCommand)
00243 {
00244 if (filterCommand.empty()) return;
00245
00246 vector<string> csplit;
00247 split(filterCommand, ',', csplit);
00248
00249 if (csplit.size() != m_scorers.size())
00250 throw runtime_error("Number of command specifications does not equal number of interpolated scorers.");
00251
00252 for (size_t i = 0; i < m_scorers.size(); ++i) {
00253 m_scorers[i]->setFilter(csplit[i]);
00254 }
00255 }
00256
00257 }