00001
00002 #include <vector>
00003 #include <iostream>
00004 #include <cstdlib>
00005 #include <numeric>
00006 #include <cstdio>
00007 #include <sstream>
00008 #include <string>
00009 #include "zlib.h"
00010
00011 #include "reordering_classes.h"
00012
00013 using namespace std;
00014
00015 ModelScore::ModelScore()
00016 {
00017 for(int i=MONO; i<=NOMONO; ++i) {
00018 count_fe_prev.push_back(0);
00019 count_fe_next.push_back(0);
00020 count_f_prev.push_back(0);
00021 count_f_next.push_back(0);
00022 }
00023 }
00024
00025 ModelScore::~ModelScore() {}
00026
00027 ModelScore* ModelScore::createModelScore(const string& modeltype)
00028 {
00029 if (modeltype.compare("mslr") == 0) {
00030 return new ModelScoreMSLR();
00031 } else if (modeltype.compare("msd") == 0) {
00032 return new ModelScoreMSD();
00033 } else if (modeltype.compare("monotonicity") == 0 ) {
00034 return new ModelScoreMonotonicity();
00035 } else if (modeltype.compare("leftright") == 0) {
00036 return new ModelScoreLR();
00037 } else {
00038 cerr << "Illegal model type given for lexical reordering model scoring: "
00039 << modeltype
00040 << ". The allowed types are: mslr, msd, monotonicity, leftright"
00041 << endl;
00042 exit(1);
00043 }
00044 }
00045
00046 void ModelScore::reset_fe()
00047 {
00048 for(int i=MONO; i<=NOMONO; ++i) {
00049 count_fe_prev[i] = 0;
00050 count_fe_next[i] = 0;
00051 }
00052 }
00053
00054 void ModelScore::reset_f()
00055 {
00056 for(int i=MONO; i<=NOMONO; ++i) {
00057 count_f_prev[i] = 0;
00058 count_f_next[i] = 0;
00059 }
00060 }
00061
00062 void ModelScore::add_example
00063 (const StringPiece& previous, const StringPiece& next, float weight)
00064 {
00065 count_fe_prev[getType(previous)]+=weight;
00066 count_f_prev[getType(previous)]+=weight;
00067 count_fe_next[getType(next)]+=weight;
00068 count_f_next[getType(next)]+=weight;
00069 }
00070
00071 const vector<double>& ModelScore::get_scores_fe_prev() const
00072 {
00073 return count_fe_prev;
00074 }
00075
00076 const vector<double>& ModelScore::get_scores_fe_next() const
00077 {
00078 return count_fe_next;
00079 }
00080
00081 const vector<double>& ModelScore::get_scores_f_prev() const
00082 {
00083 return count_f_prev;
00084 }
00085
00086 const vector<double>& ModelScore::get_scores_f_next() const
00087 {
00088 return count_f_next;
00089 }
00090
00091
00092 ORIENTATION ModelScore::getType(const StringPiece& s)
00093 {
00094 if (s.compare("mono") == 0) {
00095 return MONO;
00096 } else if (s.compare("swap") == 0) {
00097 return SWAP;
00098 } else if (s.compare("dright") == 0) {
00099 return DRIGHT;
00100 } else if (s.compare("dleft") == 0) {
00101 return DLEFT;
00102 } else if (s.compare("other") == 0) {
00103 return OTHER;
00104 } else if (s.compare("nomono") == 0) {
00105 return NOMONO;
00106 } else {
00107 cerr << "Illegal reordering type used: " << s << endl;
00108 exit(1);
00109 }
00110 }
00111
00112
00113 ORIENTATION ModelScoreMSLR::getType(const StringPiece& s)
00114 {
00115 if (s.compare("mono") == 0) {
00116 return MONO;
00117 } else if (s.compare("swap") == 0) {
00118 return SWAP;
00119 } else if (s.compare("dright") == 0) {
00120 return DRIGHT;
00121 } else if (s.compare("dleft") == 0) {
00122 return DLEFT;
00123 } else if (s.compare("other") == 0 || s.compare("nomono") == 0) {
00124 cerr << "Illegal reordering type used: " << s << " for model type mslr. You have to re-run step 5 in order to train such a model." << endl;
00125 exit(1);
00126 } else {
00127 cerr << "Illegal reordering type used: " << s << endl;
00128 exit(1);
00129 }
00130 }
00131
00132
00133 ORIENTATION ModelScoreLR::getType(const StringPiece& s)
00134 {
00135 if (s.compare("mono") == 0 || s.compare("dright") == 0) {
00136 return DRIGHT;
00137 } else if (s.compare("swap") == 0 || s.compare("dleft") == 0) {
00138 return DLEFT;
00139 } else if (s.compare("other") == 0 || s.compare("nomono") == 0) {
00140 cerr << "Illegal reordering type used: " << s << " for model type LeftRight. You have to re-run step 5 in order to train such a model." << endl;
00141 exit(1);
00142 } else {
00143 cerr << "Illegal reordering type used: " << s << endl;
00144 exit(1);
00145 }
00146 }
00147
00148
00149 ORIENTATION ModelScoreMSD::getType(const StringPiece& s)
00150 {
00151 if (s.compare("mono") == 0) {
00152 return MONO;
00153 } else if (s.compare("swap") == 0) {
00154 return SWAP;
00155 } else if (s.compare("dleft") == 0 ||
00156 s.compare("dright") == 0 ||
00157 s.compare("other") == 0) {
00158 return OTHER;
00159 } else if (s.compare("nomono") == 0) {
00160 cerr << "Illegal reordering type used: " << s << " for model type msd. You have to re-run step 5 in order to train such a model." << endl;
00161 exit(1);
00162 } else {
00163 cerr << "Illegal reordering type used: " << s << endl;
00164 exit(1);
00165 }
00166 }
00167
00168 ORIENTATION ModelScoreMonotonicity::getType(const StringPiece& s)
00169 {
00170 if (s.compare("mono") == 0) {
00171 return MONO;
00172 } else if (s.compare("swap") == 0 ||
00173 s.compare("dleft") == 0 ||
00174 s.compare("dright") == 0 ||
00175 s.compare("other") == 0 ||
00176 s.compare("nomono") == 0 ) {
00177 return NOMONO;
00178 } else {
00179 cerr << "Illegal reordering type used: " << s << endl;
00180 exit(1);
00181 }
00182 }
00183
00184
00185
00186 void ScorerMSLR::score(const vector<double>& all_scores, vector<double>& scores) const
00187 {
00188 scores.push_back(all_scores[MONO]);
00189 scores.push_back(all_scores[SWAP]);
00190 scores.push_back(all_scores[DLEFT]);
00191 scores.push_back(all_scores[DRIGHT]);
00192 }
00193
00194 void ScorerMSD::score(const vector<double>& all_scores, vector<double>& scores) const
00195 {
00196 scores.push_back(all_scores[MONO]);
00197 scores.push_back(all_scores[SWAP]);
00198 scores.push_back(all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]);
00199 }
00200
00201 void ScorerMonotonicity::score(const vector<double>& all_scores, vector<double>& scores) const
00202 {
00203 scores.push_back(all_scores[MONO]);
00204 scores.push_back(all_scores[SWAP]+all_scores[DRIGHT]+all_scores[DLEFT]+all_scores[OTHER]+all_scores[NOMONO]);
00205 }
00206
00207
00208 void ScorerLR::score(const vector<double>& all_scores, vector<double>& scores) const
00209 {
00210 scores.push_back(all_scores[MONO]+all_scores[DRIGHT]);
00211 scores.push_back(all_scores[SWAP]+all_scores[DLEFT]);
00212 }
00213
00214
00215 void ScorerMSLR::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const
00216 {
00217 double total = accumulate(scores.begin(), scores.end(), 0);
00218 smoothing.push_back(weight*(scores[MONO]+0.1)/total);
00219 smoothing.push_back(weight*(scores[SWAP]+0.1)/total);
00220 smoothing.push_back(weight*(scores[DLEFT]+0.1)/total);
00221 smoothing.push_back(weight*(scores[DRIGHT]+0.1)/total);
00222 }
00223
00224 void ScorerMSLR::createConstSmoothing(double weight, vector<double>& smoothing) const
00225 {
00226 for (int i=1; i<=4; ++i) {
00227 smoothing.push_back(weight);
00228 }
00229 }
00230
00231
00232 void ScorerMSD::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const
00233 {
00234 double total = accumulate(scores.begin(), scores.end(), 0);
00235 smoothing.push_back(weight*(scores[MONO]+0.1)/total);
00236 smoothing.push_back(weight*(scores[SWAP]+0.1)/total);
00237 smoothing.push_back(weight*(scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+0.1)/total);
00238 }
00239
00240 void ScorerMSD::createConstSmoothing(double weight, vector<double>& smoothing) const
00241 {
00242 for (int i=1; i<=3; ++i) {
00243 smoothing.push_back(weight);
00244 }
00245 }
00246
00247 void ScorerMonotonicity::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const
00248 {
00249 double total = accumulate(scores.begin(), scores.end(), 0);
00250 smoothing.push_back(weight*(scores[MONO]+0.1)/total);
00251 smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT]+scores[DRIGHT]+scores[OTHER]+scores[NOMONO]+0.1)/total);
00252 }
00253
00254 void ScorerMonotonicity::createConstSmoothing(double weight, vector<double>& smoothing) const
00255 {
00256 for (double i=1; i<=2; ++i) {
00257 smoothing.push_back(weight);
00258 }
00259 }
00260
00261
00262 void ScorerLR::createSmoothing(const vector<double>& scores, double weight, vector<double>& smoothing) const
00263 {
00264 double total = accumulate(scores.begin(), scores.end(), 0);
00265 smoothing.push_back(weight*(scores[MONO]+scores[DRIGHT]+0.1)/total);
00266 smoothing.push_back(weight*(scores[SWAP]+scores[DLEFT])/total);
00267 }
00268
00269 void ScorerLR::createConstSmoothing(double weight, vector<double>& smoothing) const
00270 {
00271 for (int i=1; i<=2; ++i) {
00272 smoothing.push_back(weight);
00273 }
00274 }
00275
00276 void Model::score_fe(const string& f, const string& e)
00277 {
00278 if (!fe)
00279 return;
00280 outputFile << f << " ||| " << e << " |||";
00281
00282 if (previous) {
00283 vector<double> scores;
00284 scorer->score(modelscore->get_scores_fe_prev(), scores);
00285 double sum = 0;
00286 for(size_t i=0; i<scores.size(); ++i) {
00287 scores[i] += smoothing_prev[i];
00288 sum += scores[i];
00289 }
00290 for(size_t i=0; i<scores.size(); ++i) {
00291 outputFile << " " << (scores[i]/sum);
00292 }
00293 }
00294
00295 if (next) {
00296 vector<double> scores;
00297 scorer->score(modelscore->get_scores_fe_next(), scores);
00298 double sum = 0;
00299 for(size_t i=0; i<scores.size(); ++i) {
00300 scores[i] += smoothing_next[i];
00301 sum += scores[i];
00302 }
00303 for(size_t i=0; i<scores.size(); ++i) {
00304 outputFile << " " << (scores[i]/sum);
00305 }
00306 }
00307 outputFile << endl;
00308 }
00309
00310 void Model::score_f(const string& f)
00311 {
00312 if (fe)
00313 return;
00314 cout << f << " |||";
00315
00316 if (previous) {
00317 vector<double> scores;
00318 scorer->score(modelscore->get_scores_f_prev(), scores);
00319 double sum = 0;
00320 for(size_t i=0; i<scores.size(); ++i) {
00321 scores[i] += smoothing_prev[i];
00322 sum += scores[i];
00323 }
00324 for(size_t i=0; i<scores.size(); ++i) {
00325 outputFile << " " << (scores[i]/sum);
00326 }
00327 }
00328
00329 if (next) {
00330 vector<double> scores;
00331 scorer->score(modelscore->get_scores_f_next(), scores);
00332 double sum = 0;
00333 for(size_t i=0; i<scores.size(); ++i) {
00334 scores[i] += smoothing_next[i];
00335 sum += scores[i];
00336 }
00337 for(size_t i=0; i<scores.size(); ++i) {
00338 outputFile << " " << (scores[i]/sum);
00339 }
00340 }
00341 outputFile << endl;
00342 }
00343
00344 Model::Model(ModelScore* ms, Scorer* sc, const string& dir, const string& lang, const string& fn)
00345 : modelscore(ms), scorer(sc), filename(fn)
00346 {
00347 outputFile.Open( (filename+".gz").c_str() );
00348 fe = false;
00349 if (lang.compare("fe") == 0) {
00350 fe = true;
00351 } else if (lang.compare("f") != 0) {
00352 cerr << "You have given an illegal language to condition on: " << lang
00353 << "\nLegal types: fe (on both languages), f (only on source language)\n";
00354 exit(1);
00355 }
00356
00357 previous = true;
00358 next = true;
00359 if (dir.compare("backward") == 0) {
00360 next = false;
00361 } else if (dir.compare("forward") == 0) {
00362 previous = false;
00363 }
00364 }
00365
00366 Model::~Model()
00367 {
00368 outputFile.Close();
00369 delete modelscore;
00370 delete scorer;
00371 }
00372
00373 void Model::split_config(const string& config, string& dir, string& lang, string& orient)
00374 {
00375 istringstream is(config);
00376 string type;
00377 getline(is, type, '-');
00378 getline(is, orient, '-');
00379 getline(is, dir, '-');
00380 getline(is, lang, '-');
00381 }
00382
00383 Model* Model::createModel(ModelScore* modelscore, const string& config, const string& filepath)
00384 {
00385 string dir, lang, orient, filename;
00386 split_config(config,dir,lang,orient);
00387
00388 filename = filepath + config;
00389 if (orient.compare("mslr") == 0) {
00390 return new Model(modelscore, new ScorerMSLR(), dir, lang, filename);
00391 } else if (orient.compare("msd") == 0) {
00392 return new Model(modelscore, new ScorerMSD(), dir, lang, filename);
00393 } else if (orient.compare("monotonicity") == 0) {
00394 return new Model(modelscore, new ScorerMonotonicity(), dir, lang, filename);
00395 } else if (orient.compare("leftright") == 0) {
00396 return new Model(modelscore, new ScorerLR(), dir, lang, filename);
00397 } else {
00398 cerr << "Illegal orientation type of reordering model: " << orient
00399 << "\n allowed types: mslr, msd, monotonicity, leftright\n";
00400 exit(1);
00401 }
00402 }
00403
00404
00405
00406 void Model::createSmoothing(double w)
00407 {
00408 scorer->createSmoothing(modelscore->get_scores_fe_prev(), w, smoothing_prev);
00409 scorer->createSmoothing(modelscore->get_scores_fe_next(), w, smoothing_next);
00410 }
00411
00412 void Model::createConstSmoothing(double w)
00413 {
00414 scorer->createConstSmoothing(w, smoothing_prev);
00415 scorer->createConstSmoothing(w, smoothing_next);
00416 }