00001 #include "Optimizer.h"
00002
00003 #include <cmath>
00004 #include "util/exception.hh"
00005 #include <vector>
00006 #include <limits>
00007 #include <map>
00008 #include <cfloat>
00009 #include <iostream>
00010 #include <stdint.h>
00011
00012 #include "Point.h"
00013 #include "Util.h"
00014
00015 using namespace std;
00016
00017 static const float MIN_FLOAT = -1.0 * numeric_limits<float>::max();
00018 static const float MAX_FLOAT = numeric_limits<float>::max();
00019
00020 namespace
00021 {
00022
00026 inline float intersect(float m1, float b1, float m2, float b2)
00027 {
00028 float isect = (b2 - b1) / (m1 - m2);
00029 if (!isfinite(isect)) {
00030 isect = MAX_FLOAT;
00031 }
00032 return isect;
00033 }
00034
00035 }
00036
00037 namespace MosesTuning
00038 {
00039
00040
00041 Optimizer::Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& pos, const vector<parameter_t>& start, unsigned int nrandom)
00042 : m_scorer(NULL), m_feature_data(), m_num_random_directions(nrandom), m_positive(pos)
00043 {
00044
00045 Point::m_pdim = Pd;
00046
00047 UTIL_THROW_IF(start.size() != Pd, util::Exception, "Error");
00048 Point::m_dim = i2O.size();
00049 Point::m_opt_indices = i2O;
00050 if (Point::m_pdim > Point::m_dim) {
00051 for (unsigned int i = 0; i < Point::m_pdim; i++) {
00052 unsigned int j = 0;
00053 while (j < Point::m_dim && i != i2O[j])
00054 j++;
00055
00056
00057
00058 if (j == Point::m_dim)
00059 Point::m_fixed_weights[i] = start[i];
00060 }
00061 }
00062 }
00063
00064 Optimizer::~Optimizer() {}
00065
00066 statscore_t Optimizer::GetStatScore(const Point& param) const
00067 {
00068 vector<unsigned> bests;
00069 Get1bests(param, bests);
00070 statscore_t score = GetStatScore(bests);
00071 return score;
00072 }
00073
00074 map<float,diff_t >::iterator AddThreshold(map<float,diff_t >& thresholdmap, float newt, const pair<unsigned,unsigned>& newdiff)
00075 {
00076 map<float,diff_t>::iterator it = thresholdmap.find(newt);
00077 if (it != thresholdmap.end()) {
00078
00079 if (it->second.back().first == newdiff.first)
00080
00081 it->second.back().second = newdiff.second;
00082 else
00083 it->second.push_back(newdiff);
00084 } else {
00085
00086 pair<map<float,diff_t>::iterator, bool> ins = thresholdmap.insert(threshold(newt, diff_t(1, newdiff)));
00087 UTIL_THROW_IF(!ins.second, util::Exception, "Error");
00088 it = ins.first;
00089 }
00090 return it;
00091 }
00092
00093 statscore_t Optimizer::LineOptimize(const Point& origin, const Point& direction, Point& bestpoint) const
00094 {
00095
00096 float min_int = 0.0001;
00097
00098
00099
00100 map<float,diff_t> thresholdmap;
00101 thresholdmap[MIN_FLOAT] = diff_t();
00102 vector<unsigned> first1best;
00103 for (unsigned int S = 0; S < size(); S++) {
00104 map<float,diff_t >::iterator previnserted = thresholdmap.begin();
00105
00106
00107
00108 multimap<float, unsigned> gradient;
00109 vector<float> f0;
00110 f0.resize(m_feature_data->get(S).size());
00111 for (unsigned j = 0; j < m_feature_data->get(S).size(); j++) {
00112
00113 gradient.insert(pair<float, unsigned>(direction * (m_feature_data->get(S,j)), j));
00114
00115 f0[j] = origin * m_feature_data->get(S, j);
00116 }
00117
00118
00119
00120
00121
00122 multimap<float,unsigned>::iterator gradientit = gradient.begin();
00123 multimap<float,unsigned>::iterator highest_f0 = gradient.begin();
00124
00125 float smallest = gradientit->first;
00126
00127
00128 gradientit++;
00129 while (gradientit != gradient.end() && gradientit->first == smallest) {
00130
00131
00132 if (f0[gradientit->second] > f0[highest_f0->second])
00133 highest_f0 = gradientit;
00134 gradientit++;
00135 }
00136
00137 gradientit = highest_f0;
00138 first1best.push_back(highest_f0->second);
00139
00140
00141
00142 while (gradientit != gradient.end()) {
00143 map<float,unsigned>::iterator leftmost = gradientit;
00144 float m = gradientit->first;
00145 float b = f0[gradientit->second];
00146 multimap<float,unsigned>::iterator gradientit2 = gradientit;
00147 gradientit2++;
00148 float leftmostx = MAX_FLOAT;
00149 for (; gradientit2 != gradient.end(); gradientit2++) {
00150
00151
00152
00153 float curintersect;
00154 if (m != gradientit2->first) {
00155 curintersect = intersect(m, b, gradientit2->first, f0[gradientit2->second]);
00156
00157 if (curintersect<=leftmostx) {
00158
00159
00160
00161 leftmostx = curintersect;
00162 leftmost = gradientit2;
00163 }
00164 }
00165 }
00166 if (leftmost == gradientit) {
00167
00168
00169
00170
00171 UTIL_THROW_IF(abs(leftmost->first-gradient.rbegin()->first) >= 0.0001,
00172 util::Exception, "Error");
00173
00174 break;
00175 }
00176
00177
00178 pair<unsigned,unsigned> newd(S, leftmost->second);
00179
00180 if (leftmostx-previnserted->first < min_int) {
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190 map<float,diff_t>::iterator tit = thresholdmap.find(leftmostx);
00191 if (tit == previnserted) {
00192
00193 UTIL_THROW_IF(previnserted->second.back().first != newd.first,
00194 util::Exception,
00195 "Error");
00196 previnserted->second.back()=newd;
00197
00198 } else {
00199
00200 if (tit == thresholdmap.end()) {
00201 thresholdmap[leftmostx]=previnserted->second;
00202 thresholdmap.erase(previnserted);
00203 previnserted = thresholdmap.find(leftmostx);
00204 previnserted->second.back()=newd;
00205
00206 } else {
00207
00208 tit->second.insert(tit->second.end(),previnserted->second.begin(),previnserted->second.end());
00209 UTIL_THROW_IF(tit->second.back().first != newd.first,
00210 util::Exception,
00211 "Error");
00212 tit->second.back()=newd;
00213 thresholdmap.erase(previnserted);
00214 previnserted = tit;
00215 }
00216 }
00217
00218 UTIL_THROW_IF(previnserted == thresholdmap.end(),
00219 util::Exception,
00220 "Error");
00221 } else {
00222 previnserted = AddThreshold(thresholdmap, leftmostx, newd);
00223 }
00224 gradientit = leftmost;
00225 }
00226 }
00227
00228
00229
00230
00231 map<float,diff_t >::iterator thrit;
00232 if (verboselevel() > 6) {
00233 cerr << "Thresholds:(" << thresholdmap.size() << ")" << endl;
00234 for (thrit = thresholdmap.begin(); thrit != thresholdmap.end(); thrit++) {
00235 cerr << "x: " << thrit->first << " diffs";
00236 for (size_t j = 0; j < thrit->second.size(); ++j) {
00237 cerr << " " <<thrit->second[j].first << "," << thrit->second[j].second;
00238 }
00239 cerr << endl;
00240 }
00241 }
00242
00243
00244 thrit = thresholdmap.begin();
00245 ++thrit;
00246 diffs_t diffs;
00247 for (; thrit != thresholdmap.end(); thrit++)
00248 diffs.push_back(thrit->second);
00249 vector<statscore_t> scores = GetIncStatScore(first1best, diffs);
00250
00251 thrit = thresholdmap.begin();
00252 statscore_t bestscore = MIN_FLOAT;
00253 float bestx = MIN_FLOAT;
00254
00255
00256 UTIL_THROW_IF(scores.size() != thresholdmap.size(),
00257 util::Exception,
00258 "Error");
00259 for (unsigned int sc = 0; sc != scores.size(); sc++) {
00260
00261
00262
00263 Point respoint = origin + direction * thrit->first;
00264 bool is_valid = true;
00265 for (unsigned int k=0; k < respoint.getdim(); k++) {
00266 if (m_positive[k] && respoint[k] <= 0.0)
00267 is_valid = false;
00268 }
00269
00270 if (is_valid && scores[sc] > bestscore) {
00271
00272
00273
00274 bestscore = scores[sc];
00275
00276
00277
00278
00279
00280
00281 float leftx = thrit->first;
00282 if (thrit == thresholdmap.begin()) {
00283 leftx = MIN_FLOAT;
00284 }
00285 ++thrit;
00286 float rightx = MAX_FLOAT;
00287 if (thrit != thresholdmap.end()) {
00288 rightx = thrit->first;
00289 }
00290 --thrit;
00291
00292 if (leftx == MIN_FLOAT) {
00293 bestx = rightx-1000;
00294 } else if (rightx == MAX_FLOAT) {
00295 bestx = leftx + 0.1;
00296 } else {
00297 bestx = 0.5 * (rightx + leftx);
00298 }
00299
00300 }
00301 ++thrit;
00302 }
00303
00304 if (abs(bestx) < 0.00015) {
00305
00306
00307 bestx = 0.0;
00308
00309
00310
00311 if (verboselevel() > 4)
00312 cerr << "best point on line at origin" << endl;
00313 }
00314 if (verboselevel() > 3) {
00315
00316 }
00317 bestpoint = direction * bestx + origin;
00318 bestpoint.SetScore(bestscore);
00319 return bestscore;
00320 }
00321
00322 void Optimizer::Get1bests(const Point& P, vector<unsigned>& bests) const
00323 {
00324 UTIL_THROW_IF(m_feature_data == NULL, util::Exception, "Error");
00325 bests.clear();
00326 bests.resize(size());
00327
00328 for (unsigned i = 0; i < size(); i++) {
00329 float bestfs = MIN_FLOAT;
00330 unsigned idx = 0;
00331 unsigned j;
00332 for (j = 0; j < m_feature_data->get(i).size(); j++) {
00333 float curfs = P * m_feature_data->get(i, j);
00334 if (curfs > bestfs) {
00335 bestfs = curfs;
00336 idx = j;
00337 }
00338 }
00339 bests[i]=idx;
00340 }
00341
00342 }
00343
00344 statscore_t Optimizer::Run(Point& P) const
00345 {
00346 if (!m_feature_data) {
00347 cerr << "error trying to optimize without Features loaded" << endl;
00348 exit(2);
00349 }
00350 if (!m_scorer) {
00351 cerr << "error trying to optimize without a Scorer loaded" << endl;
00352 exit(2);
00353 }
00354 if (m_scorer->getReferenceSize() != m_feature_data->size()) {
00355 cerr << "error length mismatch between feature file and score file" << endl;
00356 exit(2);
00357 }
00358
00359 P.SetScore(GetStatScore(P));
00360
00361 if (verboselevel () > 2) {
00362 cerr << "Starting point: " << P << " => " << P.GetScore() << endl;
00363 }
00364 statscore_t score = TrueRun(P);
00365
00366
00367 P.SetScore(score);
00368 if (verboselevel() > 2) {
00369 cerr << "Ending point: " << P << " => " << score << endl;
00370 }
00371 return score;
00372 }
00373
00374
00375 vector<statscore_t> Optimizer::GetIncStatScore(const vector<unsigned>& thefirst, const vector<vector <pair<unsigned,unsigned> > >& thediffs) const
00376 {
00377 UTIL_THROW_IF(m_scorer == NULL, util::Exception, "Error");
00378
00379 vector<statscore_t> theres;
00380
00381 m_scorer->score(thefirst, thediffs, theres);
00382 return theres;
00383 }
00384
00385
00386 statscore_t SimpleOptimizer::TrueRun(Point& P) const
00387 {
00388 statscore_t prevscore = 0;
00389 statscore_t bestscore = MIN_FLOAT;
00390 Point best;
00391
00392
00393
00394 if (P.GetScore() > bestscore) {
00395 bestscore = P.GetScore();
00396 best = P;
00397 }
00398
00399 int nrun = 0;
00400 do {
00401 ++nrun;
00402 if (verboselevel() > 2 && nrun > 1)
00403 cerr << "last diff=" << bestscore-prevscore << " nrun " << nrun << endl;
00404 prevscore = bestscore;
00405
00406 Point linebest;
00407
00408 for (unsigned int d = 0; d < Point::getdim() + m_num_random_directions; d++) {
00409 if (verboselevel() > 4) {
00410
00411 cerr << "starting point: " << P << " => " << prevscore << endl;
00412 }
00413 Point direction;
00414 if (d < Point::getdim()) {
00415 for (unsigned int i = 0; i < Point::getdim(); i++)
00416 direction[i]=0.0;
00417 direction[d]=1.0;
00418 } else {
00419 direction.Randomize();
00420 }
00421 statscore_t curscore = LineOptimize(P, direction, linebest);
00422 if (verboselevel() > 5) {
00423 cerr << "direction: " << d << " => " << curscore << endl;
00424 cerr << "\tending point: "<< linebest << " => " << curscore << endl;
00425 }
00426 if (curscore > bestscore) {
00427 bestscore = curscore;
00428 best = linebest;
00429 if (verboselevel() > 3) {
00430 cerr << "new best dir:" << d << " (" << nrun << ")" << endl;
00431 cerr << "new best Point " << best << " => " << curscore << endl;
00432 }
00433 }
00434 }
00435 P = best;
00436 if (verboselevel() > 3)
00437 cerr << nrun << "\t" << P << endl;
00438 } while (bestscore-prevscore > kEPS);
00439
00440 if (verboselevel() > 2) {
00441 cerr << "end Powell Algo, nrun=" << nrun << endl;
00442 cerr << "last diff=" << bestscore-prevscore << endl;
00443 cerr << "\t" << P << endl;
00444 }
00445 return bestscore;
00446 }
00447
00448 statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const
00449 {
00450 statscore_t prevscore = P.GetScore();
00451
00452
00453 unsigned int nrun = 0;
00454 unsigned int nrun_no_change = 0;
00455 for (; nrun_no_change < m_num_random_directions; nrun++, nrun_no_change++) {
00456
00457 Point direction;
00458 direction.Randomize();
00459
00460
00461 statscore_t score = LineOptimize(P, direction, P);
00462 if (verboselevel() > 4) {
00463 cerr << "direction: " << direction << " => " << score;
00464 cerr << " (" << (score-prevscore) << ")" << endl;
00465 cerr << "\tending point: " << P << " => " << score << endl;
00466 }
00467
00468 if (score-prevscore > kEPS)
00469 nrun_no_change = 0;
00470 prevscore = score;
00471 }
00472
00473 if (verboselevel() > 2) {
00474 cerr << "end Powell Algo, nrun=" << nrun << endl;
00475 }
00476 return prevscore;
00477 }
00478
00479
00480 statscore_t RandomOptimizer::TrueRun(Point& P) const
00481 {
00482 P.Randomize();
00483 statscore_t score = GetStatScore(P);
00484 P.SetScore(score);
00485 return score;
00486 }
00487
00488 }