00001 #include <cstdlib>
00002 #include <fstream>
00003 #include <iostream>
00004 #include <string>
00005 #include <vector>
00006 #include <algorithm>
00007 #include <getopt.h>
00008 #include <cmath>
00009
00010 #if defined __MINGW32__
00011 #include <ctime>
00012 #endif // defined
00013
00014 #include "Scorer.h"
00015 #include "ScorerFactory.h"
00016 #include "Timer.h"
00017 #include "Util.h"
00018 #include "Data.h"
00019 #include "util/random.hh"
00020
00021 using namespace std;
00022 using namespace MosesTuning;
00023
00024 namespace
00025 {
00026
00027 Scorer* g_scorer = NULL;
00028 bool g_has_more_files = false;
00029 bool g_has_more_scorers = false;
00030 const float g_alpha = 0.05;
00031
00032
00033 class EvaluatorUtil
00034 {
00035 public:
00036 static void evaluate(const string& candFile, int bootstrap, bool nbest_mode);
00037 static float average(const vector<float>& list);
00038 static string int2string(int n);
00039 static vector<ScoreStats> loadNBest(const string& nBestFile);
00040 static vector<ScoreStats> loadCand(const string& candFile);
00041
00042 private:
00043 EvaluatorUtil() {}
00044 ~EvaluatorUtil() {}
00045 };
00046
00047
00048 vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile)
00049 {
00050
00051 ifstream cand(candFile.c_str());
00052 if (!cand.good()) throw runtime_error("Error opening candidate file");
00053
00054 vector<ScoreStats> entries;
00055
00056
00057 ScoreStats scoreentry;
00058 string line;
00059 while (getline(cand, line)) {
00060 g_scorer->prepareStats(entries.size(), line, scoreentry);
00061 entries.push_back(scoreentry);
00062 }
00063 return entries;
00064 }
00065
00066
00067 vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile)
00068 {
00069 vector<ScoreStats> entries;
00070
00071 Data data(g_scorer);
00072 data.loadNBest(nBestFile, true);
00073 const ScoreDataHandle & score_data = data.getScoreData();
00074 for (size_t i = 0; i != score_data->size(); i++) {
00075 entries.push_back(score_data->get(i, 0));
00076 }
00077 return entries;
00078 }
00079
00080
00081 void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_input)
00082 {
00083
00084 vector<ScoreStats> entries;
00085
00086 if (nbest_input) {
00087 entries = loadNBest(candFile);
00088 } else {
00089 entries = loadCand(candFile);
00090 }
00091
00092 int n = entries.size();
00093 if (bootstrap) {
00094 vector<float> scores;
00095 for (int i = 0; i < bootstrap; ++i) {
00096 ScoreData scoredata(g_scorer);
00097 for (int j = 0; j < n; ++j) {
00098 const int randomIndex = util::rand_excl(n);
00099 scoredata.add(entries[randomIndex], j);
00100 }
00101 g_scorer->setScoreData(&scoredata);
00102 candidates_t candidates(n, 0);
00103 float score = g_scorer->score(candidates);
00104 scores.push_back(score);
00105 }
00106
00107 float avg = average(scores);
00108
00109 sort(scores.begin(), scores.end());
00110
00111 int lbIdx = scores.size() * (g_alpha / 2);
00112 int rbIdx = scores.size() * (1 - g_alpha / 2);
00113
00114 float lb = scores[lbIdx];
00115 float rb = scores[rbIdx];
00116
00117 if (g_has_more_files) cout << candFile << "\t";
00118 if (g_has_more_scorers) cout << g_scorer->getName() << "\t";
00119
00120 cout.setf(ios::fixed, ios::floatfield);
00121 cout.precision(4);
00122 cout << avg << "\t[" << lb << "," << rb << "]" << endl;
00123 } else {
00124 ScoreData scoredata(g_scorer);
00125 for (int sid = 0; sid < n; ++sid) {
00126 scoredata.add(entries[sid], sid);
00127 }
00128 g_scorer->setScoreData(&scoredata);
00129 candidates_t candidates(n, 0);
00130 float score = g_scorer->score(candidates);
00131
00132 if (g_has_more_files) cout << candFile << "\t";
00133 if (g_has_more_scorers) cout << g_scorer->getName() << "\t";
00134
00135 cout.setf(ios::fixed, ios::floatfield);
00136 cout.precision(4);
00137 cout << score << endl;
00138 }
00139 }
00140
00141 string EvaluatorUtil::int2string(int n)
00142 {
00143 stringstream ss;
00144 ss << n;
00145 return ss.str();
00146 }
00147
00148 float EvaluatorUtil::average(const vector<float>& list)
00149 {
00150 float sum = 0;
00151 for (vector<float>::const_iterator it = list.begin(); it != list.end(); ++it)
00152 sum += *it;
00153
00154 return sum / list.size();
00155 }
00156
00157 void usage()
00158 {
00159 cerr << "usage: evaluator [options] --reference ref1[,ref2[,ref3...]] --candidate cand1[,cand2[,cand3...]] " << endl;
00160 cerr << "[--sctype|-s] the scorer type (default BLEU)" << endl;
00161 cerr << "[--scconfig|-c] configuration string passed to scorer" << endl;
00162 cerr << "\tThis is of the form NAME1:VAL1,NAME2:VAL2 etc " << endl;
00163 cerr << "[--reference|-R] comma separated list of reference files" << endl;
00164 cerr << "[--candidate|-C] comma separated list of candidate files" << endl;
00165 cerr << "[--nbest|-n] comma separated list of nbest files (only 1-best is evaluated)" << endl;
00166 cerr << "[--factors|-f] list of factors passed to the scorer (e.g. 0|2)" << endl;
00167 cerr << "[--filter|-l] filter command which will be used to preprocess the sentences" << endl;
00168 cerr << "[--bootstrap|-b] number of booststraped samples (default 0 - no bootstraping)" << endl;
00169 cerr << "[--rseed|-r] the random seed for bootstraping (defaults to system clock)" << endl;
00170 cerr << "[--help|-h] print this message and exit" << endl;
00171 cerr << endl;
00172 cerr << "Evaluator is able to compute more metrics at once. To do this," << endl;
00173 cerr << "specify more --sctype arguments. You can also specify more --scconfig strings." << endl;
00174 cerr << endl;
00175 cerr << "The example below prints BLEU score, PER score and interpolated" << endl;
00176 cerr << "score of CDER and PER with the given weights." << endl;
00177 cerr << endl;
00178 cerr << "./evaluator \\" << endl;
00179 cerr << "\t--sctype BLEU --scconfig reflen:closest \\" << endl;
00180 cerr << "\t--sctype PER \\" << endl;
00181 cerr << "\t--sctype CDER,PER --scconfig weights:0.25+0.75 \\" << endl;
00182 cerr << "\t--candidate CANDIDATE \\" << endl;
00183 cerr << "\t--reference REFERENCE" << endl;
00184 cerr << endl;
00185 cerr << "If you specify only one scorer and one candidate file, only the final score" << endl;
00186 cerr << "will be printed to stdout. Otherwise each line will contain metric name" << endl;
00187 cerr << "and/or filename and the final score. Since most of the metrics prints some" << endl;
00188 cerr << "debuging info, consider redirecting stderr to /dev/null." << endl;
00189 exit(1);
00190 }
00191
00192 static struct option long_options[] = {
00193 {"sctype", required_argument, 0, 's'},
00194 {"scconfig", required_argument, 0, 'c'},
00195 {"reference", required_argument, 0, 'R'},
00196 {"candidate", required_argument, 0, 'C'},
00197 {"nbest", required_argument, 0, 'n'},
00198 {"bootstrap", required_argument, 0, 'b'},
00199 {"rseed", required_argument, 0, 'r'},
00200 {"factors", required_argument, 0, 'f'},
00201 {"filter", required_argument, 0, 'l'},
00202 {"help", no_argument, 0, 'h'},
00203 {0, 0, 0, 0}
00204 };
00205
00206
00207 struct ProgramOption {
00208 vector<string> scorer_types;
00209 vector<string> scorer_configs;
00210 string reference;
00211 string candidate;
00212 string nbest;
00213 vector<string> scorer_factors;
00214 vector<string> scorer_filter;
00215 int bootstrap;
00216 int seed;
00217 bool has_seed;
00218
00219 ProgramOption()
00220 : reference(""),
00221 candidate(""),
00222 nbest(""),
00223 bootstrap(0),
00224 seed(0),
00225 has_seed(false) { }
00226 };
00227
00228 void ParseCommandOptions(int argc, char** argv, ProgramOption* opt)
00229 {
00230 int c;
00231 int option_index;
00232 int last_scorer_index = -1;
00233 while ((c = getopt_long(argc, argv, "s:c:R:C:n:b:r:f:l:h", long_options, &option_index)) != -1) {
00234 switch(c) {
00235 case 's':
00236 opt->scorer_types.push_back(string(optarg));
00237 opt->scorer_configs.push_back(string(""));
00238 opt->scorer_factors.push_back(string(""));
00239 opt->scorer_filter.push_back(string(""));
00240 last_scorer_index++;
00241 break;
00242 case 'c':
00243 if (last_scorer_index == -1) throw runtime_error("You need to specify a scorer before its config string.");
00244 opt->scorer_configs[last_scorer_index] = string(optarg);
00245 break;
00246 case 'R':
00247 opt->reference = string(optarg);
00248 break;
00249 case 'C':
00250 opt->candidate = string(optarg);
00251 break;
00252 case 'n':
00253 opt->nbest = string(optarg);
00254 break;
00255 case 'b':
00256 opt->bootstrap = atoi(optarg);
00257 break;
00258 case 'r':
00259 opt->seed = strtol(optarg, NULL, 10);
00260 opt->has_seed = true;
00261 break;
00262 case 'f':
00263 if (last_scorer_index == -1) throw runtime_error("You need to specify a scorer before its list of factors.");
00264 opt->scorer_factors[last_scorer_index] = string(optarg);
00265 break;
00266 case 'l':
00267 if (last_scorer_index == -1) throw runtime_error("You need to specify a scorer before its filter.");
00268 opt->scorer_filter[last_scorer_index] = string(optarg);
00269 break;
00270 default:
00271 usage();
00272 }
00273 }
00274
00275
00276 if (opt->scorer_types.size() == 0) {
00277 opt->scorer_types.push_back(string("BLEU"));
00278 opt->scorer_configs.push_back(string(""));
00279 opt->scorer_factors.push_back(string(""));
00280 opt->scorer_filter.push_back(string(""));
00281 }
00282 }
00283
00284 void InitSeed(const ProgramOption *opt)
00285 {
00286 if (opt->has_seed) {
00287 cerr << "Seeding random numbers with " << opt->seed << endl;
00288 util::rand_init(opt->seed);
00289 } else {
00290 cerr << "Seeding random numbers with system clock " << endl;
00291 util::rand_init();
00292 }
00293 }
00294
00295 }
00296
00297 int main(int argc, char** argv)
00298 {
00299 ResetUserTime();
00300
00301 ProgramOption option;
00302 ParseCommandOptions(argc, argv, &option);
00303
00304 if (option.bootstrap) {
00305 InitSeed(&option);
00306 }
00307
00308 try {
00309 vector<string> refFiles;
00310 vector<string> candFiles;
00311
00312 if (option.reference.length() == 0) throw runtime_error("You have to specify at least one reference file.");
00313 split(option.reference, ',', refFiles);
00314
00315 if (option.candidate.length() == 0 && option.nbest.length() == 0) throw runtime_error("You have to specify at least one candidate (or n-best) file.");
00316 if (option.candidate.length() > 0 && option.nbest.length() > 0) throw runtime_error("You can either specify candidate files or n-best files, but not both.");
00317 bool nbest_input = option.nbest.length() > 0;
00318 if (nbest_input)
00319 split(option.nbest, ',', candFiles);
00320 else
00321 split(option.candidate, ',', candFiles);
00322
00323 if (candFiles.size() > 1) g_has_more_files = true;
00324 if (option.scorer_types.size() > 1) g_has_more_scorers = true;
00325
00326 for (vector<string>::const_iterator fileIt = candFiles.begin(); fileIt != candFiles.end(); ++fileIt) {
00327 for (size_t i = 0; i < option.scorer_types.size(); i++) {
00328 g_scorer = ScorerFactory::getScorer(option.scorer_types[i], option.scorer_configs[i]);
00329 g_scorer->setFactors(option.scorer_factors[i]);
00330 g_scorer->setFilter(option.scorer_filter[i]);
00331 g_scorer->setReferenceFiles(refFiles);
00332 EvaluatorUtil::evaluate(*fileIt, option.bootstrap, nbest_input);
00333 delete g_scorer;
00334 }
00335 }
00336 return EXIT_SUCCESS;
00337 } catch (const exception& e) {
00338 cerr << "Exception: " << e.what() << endl;
00339 return EXIT_FAILURE;
00340 }
00341 }