00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "util/exception.hh"
00020 #include "util/string_stream.hh"
00021
00022 #include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
00023
00024 using namespace std;
00025
00026 namespace Moses
00027
00028 {
00029
00030 PhraseDictionaryMultiModel::
00031 PhraseDictionaryMultiModel(const std::string &line)
00032 : PhraseDictionary(line, true)
00033 {
00034 ReadParameters();
00035
00036 if (m_mode == "interpolate") {
00037 size_t numWeights = m_numScoreComponents;
00038 UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
00039 m_pdStr.size()*numWeights != m_multimodelweights.size(),
00040 "Number of scores and weights are not equal");
00041 } else if (m_mode == "all" || m_mode == "all-restrict") {
00042 UTIL_THROW2("Implementation has moved: use PhraseDictionaryGroup with restrict=true/false");
00043 } else {
00044 util::StringStream msg;
00045 msg << "combination mode unknown: " << m_mode;
00046 throw runtime_error(msg.str());
00047 }
00048 }
00049
00050 PhraseDictionaryMultiModel::
00051 PhraseDictionaryMultiModel(int type, const std::string &line)
00052 :PhraseDictionary(line, true)
00053 {
00054 if (type == 1) {
00055
00056 UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
00057 m_pdStr.size()*4 != m_multimodelweights.size(),
00058 "Number of scores and weights are not equal");
00059 }
00060 }
00061
00062 void
00063 PhraseDictionaryMultiModel::
00064 SetParameter(const std::string& key, const std::string& value)
00065 {
00066 if (key == "mode") {
00067 m_mode = value;
00068 } else if (key == "components") {
00069 m_pdStr = Tokenize(value, ",");
00070 m_numModels = m_pdStr.size();
00071 } else if (key == "lambda") {
00072 m_multimodelweights = Tokenize<float>(value, ",");
00073 } else {
00074 PhraseDictionary::SetParameter(key, value);
00075 }
00076 }
00077
00078 PhraseDictionaryMultiModel::
00079 ~PhraseDictionaryMultiModel()
00080 { }
00081
00082 void PhraseDictionaryMultiModel::Load(AllOptions::ptr const& opts)
00083 {
00084 m_options = opts;
00085 SetFeaturesToApply();
00086
00087 for(size_t i = 0; i < m_numModels; ++i) {
00088 const string &ptName = m_pdStr[i];
00089
00090 PhraseDictionary *pt = FindPhraseDictionary(ptName);
00091 UTIL_THROW_IF2(pt == NULL,
00092 "Could not find component phrase table " << ptName);
00093 m_pd.push_back(pt);
00094 }
00095 }
00096
00097 TargetPhraseCollection::shared_ptr
00098 PhraseDictionaryMultiModel::
00099 GetTargetPhraseCollectionLEGACY(const Phrase& src) const
00100 {
00101
00102 std::vector<std::vector<float> > multimodelweights;
00103 multimodelweights = getWeights(m_numScoreComponents, true);
00104 TargetPhraseCollection::shared_ptr ret;
00105
00106 std::map<std::string, multiModelStats*>* allStats;
00107 allStats = new(std::map<std::string,multiModelStats*>);
00108 CollectSufficientStatistics(src, allStats);
00109 ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights);
00110 RemoveAllInMap(*allStats);
00111 delete allStats;
00112
00113 ret->NthElement(m_tableLimit);
00114 const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
00115
00116 return ret;
00117 }
00118
00119 void
00120 PhraseDictionaryMultiModel::
00121 CollectSufficientStatistics
00122 (const Phrase& src, std::map<std::string, multiModelStats*>* allStats) const
00123 {
00124 for(size_t i = 0; i < m_numModels; ++i) {
00125 const PhraseDictionary &pd = *m_pd[i];
00126
00127 TargetPhraseCollection::shared_ptr ret_raw;
00128 ret_raw = pd.GetTargetPhraseCollectionLEGACY(src);
00129 if (ret_raw != NULL) {
00130
00131 TargetPhraseCollection::const_iterator iterTargetPhrase, iterLast;
00132 if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) {
00133 iterLast = ret_raw->begin() + m_tableLimit;
00134 } else {
00135 iterLast = ret_raw->end();
00136 }
00137
00138 for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast; ++iterTargetPhrase) {
00139 const TargetPhrase * targetPhrase = *iterTargetPhrase;
00140 std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
00141
00142 std::string targetString = targetPhrase->GetStringRep(m_output);
00143 if (allStats->find(targetString) == allStats->end()) {
00144
00145 multiModelStats * statistics = new multiModelStats;
00146 statistics->targetPhrase = new TargetPhrase(*targetPhrase);
00147 statistics->p.resize(m_numScoreComponents);
00148 for(size_t j = 0; j < m_numScoreComponents; ++j) {
00149 statistics->p[j].resize(m_numModels);
00150 }
00151
00152
00153 statistics->targetPhrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
00154 vector<FeatureFunction*> pd_feature;
00155 pd_feature.push_back(m_pd[i]);
00156 const vector<FeatureFunction*> pd_feature_const(pd_feature);
00157 statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
00158
00159 statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
00160
00161 (*allStats)[targetString] = statistics;
00162
00163 }
00164 multiModelStats * statistics = (*allStats)[targetString];
00165
00166 for(size_t j = 0; j < m_numScoreComponents; ++j) {
00167 statistics->p[j][i] = UntransformScore(raw_scores[j]);
00168 }
00169
00170 (*allStats)[targetString] = statistics;
00171 }
00172 }
00173 }
00174 }
00175
00176 TargetPhraseCollection::shared_ptr
00177 PhraseDictionaryMultiModel::
00178 CreateTargetPhraseCollectionLinearInterpolation
00179 ( const Phrase& src,
00180 std::map<std::string,multiModelStats*>* allStats,
00181 std::vector<std::vector<float> > &multimodelweights) const
00182 {
00183 TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
00184 for ( std::map< std::string, multiModelStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
00185
00186 multiModelStats * statistics = iter->second;
00187
00188 Scores scoreVector(m_numScoreComponents);
00189
00190 for(size_t i = 0; i < m_numScoreComponents; ++i) {
00191 scoreVector[i] = TransformScore(std::inner_product(statistics->p[i].begin(), statistics->p[i].end(), multimodelweights[i].begin(), 0.0));
00192 }
00193
00194 statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
00195
00196
00197 vector<FeatureFunction*> pd_feature;
00198 pd_feature.push_back(const_cast<PhraseDictionaryMultiModel*>(this));
00199 const vector<FeatureFunction*> pd_feature_const(pd_feature);
00200 statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
00201
00202 ret->Add(new TargetPhrase(*statistics->targetPhrase));
00203 }
00204 return ret;
00205 }
00206
00207
00208 std::vector<std::vector<float> >
00209 PhraseDictionaryMultiModel::
00210 getWeights(size_t numWeights, bool normalize) const
00211 {
00212 const std::vector<float>* weights_ptr;
00213 std::vector<float> raw_weights;
00214
00215 weights_ptr = GetTemporaryMultiModelWeightsVector();
00216
00217
00218
00219 if (weights_ptr == NULL || weights_ptr->size() == 0) {
00220 weights_ptr = &m_multimodelweights;
00221 } else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) {
00222
00223 std::cerr << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ". Reverting to weights in config";
00224 weights_ptr = &m_multimodelweights;
00225 }
00226
00227
00228 if (weights_ptr == NULL || weights_ptr->size() == 0) {
00229 for (size_t i=0; i < m_numModels; i++) {
00230 raw_weights.push_back(1.0/m_numModels);
00231 }
00232 } else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) {
00233 util::StringStream strme;
00234 strme << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ".";
00235 UTIL_THROW(util::Exception, strme.str());
00236 } else {
00237 raw_weights = *weights_ptr;
00238 }
00239
00240 std::vector<std::vector<float> > multimodelweights (numWeights);
00241
00242 for (size_t i=0; i < numWeights; i++) {
00243 std::vector<float> weights_onefeature (m_numModels);
00244 if(raw_weights.size() == m_numModels) {
00245 weights_onefeature = raw_weights;
00246 } else {
00247 copy ( raw_weights.begin()+i*m_numModels, raw_weights.begin()+(i+1)*m_numModels, weights_onefeature.begin() );
00248 }
00249 if(normalize) {
00250 multimodelweights[i] = normalizeWeights(weights_onefeature);
00251 } else {
00252 multimodelweights[i] = weights_onefeature;
00253 }
00254 }
00255
00256 return multimodelweights;
00257 }
00258
00259 std::vector<float>
00260 PhraseDictionaryMultiModel::
00261 normalizeWeights(std::vector<float> &weights) const
00262 {
00263 std::vector<float> ret (m_numModels);
00264 float total = std::accumulate(weights.begin(),weights.end(),0.0);
00265 for (size_t i=0; i < weights.size(); i++) {
00266 ret[i] = weights[i]/total;
00267 }
00268 return ret;
00269 }
00270
00271
00272 ChartRuleLookupManager *
00273 PhraseDictionaryMultiModel::
00274 CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
00275 std::size_t)
00276 {
00277 UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
00278 }
00279
00280
00281
00282 void
00283 PhraseDictionaryMultiModel::
00284 CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
00285 {
00286 GetPhraseCache().push_back(tpc);
00287 }
00288
00289
00290 void
00291 PhraseDictionaryMultiModel::
00292 CleanUpAfterSentenceProcessing(const InputType &source)
00293 {
00294
00295
00296
00297
00298
00299
00300
00301 GetPhraseCache().clear();
00302
00303 CleanUpComponentModels(source);
00304
00305 std::vector<float> empty_vector;
00306 SetTemporaryMultiModelWeightsVector(empty_vector);
00307 }
00308
00309
00310 void
00311 PhraseDictionaryMultiModel::
00312 CleanUpComponentModels(const InputType &source)
00313 {
00314 for(size_t i = 0; i < m_numModels; ++i) {
00315 m_pd[i]->CleanUpAfterSentenceProcessing(source);
00316 }
00317 }
00318
00319 const std::vector<float>*
00320 PhraseDictionaryMultiModel::
00321 GetTemporaryMultiModelWeightsVector() const
00322 {
00323 #ifdef WITH_THREADS
00324 boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
00325 if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
00326 return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
00327 } else {
00328 return NULL;
00329 }
00330 #else
00331 return &m_multimodelweights_tmp;
00332 #endif
00333 }
00334
00335 void
00336 PhraseDictionaryMultiModel::
00337 SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
00338 {
00339 #ifdef WITH_THREADS
00340 boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
00341 m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
00342 #else
00343 m_multimodelweights_tmp = weights;
00344 #endif
00345 }
00346
00347 #ifdef WITH_DLIB
00348 vector<float>
00349 PhraseDictionaryMultiModel::
00350 MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
00351 {
00352
00353 map<pair<string, string>, size_t> phrase_pair_map;
00354
00355 for ( vector<pair<string, string> >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) {
00356 phrase_pair_map[*iter] += 1;
00357 }
00358
00359 vector<multiModelStatsOptimization*> optimizerStats;
00360
00361 for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
00362
00363 pair<string, string> phrase_pair = iter->first;
00364 string source_string = phrase_pair.first;
00365 string target_string = phrase_pair.second;
00366
00367 vector<float> fs(m_numModels);
00368 map<string,multiModelStats*>* allStats = new(map<string,multiModelStats*>);
00369
00370 Phrase sourcePhrase(0);
00371 sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
00372
00373 CollectSufficientStatistics(sourcePhrase, allStats);
00374
00375
00376 if (allStats->find(target_string) == allStats->end()) {
00377 RemoveAllInMap(*allStats);
00378 delete allStats;
00379 continue;
00380 }
00381
00382 multiModelStatsOptimization* targetStatistics = new multiModelStatsOptimization();
00383 targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
00384 targetStatistics->p = (*allStats)[target_string]->p;
00385 targetStatistics->f = iter->second;
00386 optimizerStats.push_back(targetStatistics);
00387
00388 RemoveAllInMap(*allStats);
00389 delete allStats;
00390 }
00391
00392 Sentence sentence;
00393 CleanUpAfterSentenceProcessing(sentence);
00394
00395 size_t numWeights = m_numScoreComponents;
00396
00397 vector<float> ret (m_numModels*numWeights);
00398 for (size_t iFeature=0; iFeature < numWeights; iFeature++) {
00399
00400 CrossEntropy * ObjectiveFunction = new CrossEntropy(optimizerStats, this, iFeature);
00401
00402 vector<float> weight_vector = Optimize(ObjectiveFunction, m_numModels);
00403
00404 if (m_mode == "interpolate") {
00405 weight_vector = normalizeWeights(weight_vector);
00406 }
00407
00408 cerr << "Weight vector for feature " << iFeature << ": ";
00409 for (size_t i=0; i < m_numModels; i++) {
00410 ret[(iFeature*m_numModels)+i] = weight_vector[i];
00411 cerr << weight_vector[i] << " ";
00412 }
00413 cerr << endl;
00414 delete ObjectiveFunction;
00415 }
00416
00417 RemoveAllInColl(optimizerStats);
00418 return ret;
00419
00420 }
00421
00422 vector<float>
00423 PhraseDictionaryMultiModel::
00424 Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels)
00425 {
00426
00427 dlib::matrix<double,0,1> starting_point;
00428 starting_point.set_size(numModels);
00429 starting_point = 1.0;
00430
00431 try {
00432 dlib::find_min_bobyqa(*ObjectiveFunction,
00433 starting_point,
00434 2*numModels+1,
00435 dlib::uniform_matrix<double>(numModels,1, 1e-09),
00436 dlib::uniform_matrix<double>(numModels,1, 1e100),
00437 1.0,
00438 1e-5,
00439 10000
00440 );
00441 } catch (dlib::bobyqa_failure& e) {
00442 cerr << e.what() << endl;
00443 }
00444
00445 vector<float> weight_vector (numModels);
00446
00447 for (int i=0; i < starting_point.nr(); i++) {
00448 weight_vector[i] = starting_point(i);
00449 }
00450
00451 cerr << "Cross-entropy: " << (*ObjectiveFunction)(starting_point) << endl;
00452 return weight_vector;
00453 }
00454
00455
00456 double CrossEntropy::operator() ( const dlib::matrix<double,0,1>& arg) const
00457 {
00458 double total = 0.0;
00459 double n = 0.0;
00460 std::vector<float> weight_vector (m_model->m_numModels);
00461
00462 for (int i=0; i < arg.nr(); i++) {
00463 weight_vector[i] = arg(i);
00464 }
00465 if (m_model->m_mode == "interpolate") {
00466 weight_vector = m_model->normalizeWeights(weight_vector);
00467 }
00468
00469 for ( std::vector<multiModelStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
00470 multiModelStatsOptimization* statistics = *iter;
00471 size_t f = statistics->f;
00472
00473 double score;
00474 score = std::inner_product(statistics->p[m_iFeature].begin(), statistics->p[m_iFeature].end(), weight_vector.begin(), 0.0);
00475
00476 total -= (FloorScore(TransformScore(score))/TransformScore(2))*f;
00477 n += f;
00478 }
00479 return total/n;
00480 }
00481
00482 #endif
00483
00484 PhraseDictionary *FindPhraseDictionary(const string &ptName)
00485 {
00486 const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
00487
00488 PhraseDictionary *pt = NULL;
00489 std::vector<PhraseDictionary*>::const_iterator iter;
00490 for (iter = pts.begin(); iter != pts.end(); ++iter) {
00491 PhraseDictionary *currPt = *iter;
00492 if (currPt->GetScoreProducerDescription() == ptName) {
00493 pt = currPt;
00494 break;
00495 }
00496 }
00497
00498 return pt;
00499 }
00500
00501 }