Moses: /disk4/html/www/moses/doxygen/mosesdecoder/moses/TranslationModel/PhraseDictionaryMultiModel.cpp Source File

00001 /***********************************************************************
00002 Moses - factored phrase-based language decoder
00003 Copyright (C) 2006 University of Edinburgh
00004 
00005 This library is free software; you can redistribute it and/or
00006 modify it under the terms of the GNU Lesser General Public
00007 License as published by the Free Software Foundation; either
00008 version 2.1 of the License, or (at your option) any later version.
00009 
00010 This library is distributed in the hope that it will be useful,
00011 but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013 Lesser General Public License for more details.
00014 
00015 You should have received a copy of the GNU Lesser General Public
00016 License along with this library; if not, write to the Free Software
00017 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00018 ***********************************************************************/
00019 #include "util/exception.hh"
00020 #include "util/string_stream.hh"
00021 
00022 #include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
00023 
00024 using namespace std;
00025 
00026 namespace Moses
00027 
00028 {
00029 
00030 PhraseDictionaryMultiModel::
00031 PhraseDictionaryMultiModel(const std::string &line)
00032   : PhraseDictionary(line, true)
00033 {
00034   ReadParameters();
00035 
00036   if (m_mode == "interpolate") {
00037     size_t numWeights = m_numScoreComponents;
00038     UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
00039                    m_pdStr.size()*numWeights != m_multimodelweights.size(),
00040                    "Number of scores and weights are not equal");
00041   } else if (m_mode == "all" || m_mode == "all-restrict") {
00042     UTIL_THROW2("Implementation has moved: use PhraseDictionaryGroup with restrict=true/false");
00043   } else {
00044     util::StringStream msg;
00045     msg << "combination mode unknown: " << m_mode;
00046     throw runtime_error(msg.str());
00047   }
00048 }
00049 
00050 PhraseDictionaryMultiModel::
00051 PhraseDictionaryMultiModel(int type, const std::string &line)
00052   :PhraseDictionary(line, true)
00053 {
00054   if (type == 1) {
00055     // PhraseDictionaryMultiModelCounts
00056     UTIL_THROW_IF2(m_pdStr.size() != m_multimodelweights.size() &&
00057                    m_pdStr.size()*4 != m_multimodelweights.size(),
00058                    "Number of scores and weights are not equal");
00059   }
00060 }
00061 
00062 void
00063 PhraseDictionaryMultiModel::
00064 SetParameter(const std::string& key, const std::string& value)
00065 {
00066   if (key == "mode") {
00067     m_mode = value;
00068   } else if (key == "components") {
00069     m_pdStr = Tokenize(value, ",");
00070     m_numModels = m_pdStr.size();
00071   } else if (key == "lambda") {
00072     m_multimodelweights = Tokenize<float>(value, ",");
00073   } else {
00074     PhraseDictionary::SetParameter(key, value);
00075   }
00076 }
00077 
00078 PhraseDictionaryMultiModel::
00079 ~PhraseDictionaryMultiModel()
00080 { }
00081 
00082 void PhraseDictionaryMultiModel::Load(AllOptions::ptr const& opts)
00083 {
00084   m_options = opts;
00085   SetFeaturesToApply();
00086 
00087   for(size_t i = 0; i < m_numModels; ++i) {
00088     const string &ptName = m_pdStr[i];
00089 
00090     PhraseDictionary *pt = FindPhraseDictionary(ptName);
00091     UTIL_THROW_IF2(pt == NULL,
00092                    "Could not find component phrase table " << ptName);
00093     m_pd.push_back(pt);
00094   }
00095 }
00096 
00097 TargetPhraseCollection::shared_ptr
00098 PhraseDictionaryMultiModel::
00099 GetTargetPhraseCollectionLEGACY(const Phrase& src) const
00100 {
00101 
00102   std::vector<std::vector<float> > multimodelweights;
00103   multimodelweights = getWeights(m_numScoreComponents, true);
00104   TargetPhraseCollection::shared_ptr ret;
00105 
00106   std::map<std::string, multiModelStats*>* allStats;
00107   allStats = new(std::map<std::string,multiModelStats*>);
00108   CollectSufficientStatistics(src, allStats);
00109   ret = CreateTargetPhraseCollectionLinearInterpolation(src, allStats, multimodelweights);
00110   RemoveAllInMap(*allStats);
00111   delete allStats; // ??? Why the detour through malloc? UG
00112 
00113   ret->NthElement(m_tableLimit); // sort the phrases for pruning later
00114   const_cast<PhraseDictionaryMultiModel*>(this)->CacheForCleanup(ret);
00115 
00116   return ret;
00117 }
00118 
00119 void
00120 PhraseDictionaryMultiModel::
00121 CollectSufficientStatistics
00122 (const Phrase& src, std::map<std::string, multiModelStats*>* allStats) const
00123 {
00124   for(size_t i = 0; i < m_numModels; ++i) {
00125     const PhraseDictionary &pd = *m_pd[i];
00126 
00127     TargetPhraseCollection::shared_ptr ret_raw;
00128     ret_raw = pd.GetTargetPhraseCollectionLEGACY(src);
00129     if (ret_raw != NULL) {
00130 
00131       TargetPhraseCollection::const_iterator iterTargetPhrase, iterLast;
00132       if (m_tableLimit != 0 && ret_raw->GetSize() > m_tableLimit) {
00133         iterLast = ret_raw->begin() + m_tableLimit;
00134       } else {
00135         iterLast = ret_raw->end();
00136       }
00137 
00138       for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != iterLast;  ++iterTargetPhrase) {
00139         const TargetPhrase * targetPhrase = *iterTargetPhrase;
00140         std::vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
00141 
00142         std::string targetString = targetPhrase->GetStringRep(m_output);
00143         if (allStats->find(targetString) == allStats->end()) {
00144 
00145           multiModelStats * statistics = new multiModelStats;
00146           statistics->targetPhrase = new TargetPhrase(*targetPhrase); //make a copy so that we don't overwrite the original phrase table info
00147           statistics->p.resize(m_numScoreComponents);
00148           for(size_t j = 0; j < m_numScoreComponents; ++j) {
00149             statistics->p[j].resize(m_numModels);
00150           }
00151 
00152           //correct future cost estimates and total score
00153           statistics->targetPhrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
00154           vector<FeatureFunction*> pd_feature;
00155           pd_feature.push_back(m_pd[i]);
00156           const vector<FeatureFunction*> pd_feature_const(pd_feature);
00157           statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
00158           // zero out scores from original phrase table
00159           statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
00160 
00161           (*allStats)[targetString] = statistics;
00162 
00163         }
00164         multiModelStats * statistics = (*allStats)[targetString];
00165 
00166         for(size_t j = 0; j < m_numScoreComponents; ++j) {
00167           statistics->p[j][i] = UntransformScore(raw_scores[j]);
00168         }
00169 
00170         (*allStats)[targetString] = statistics;
00171       }
00172     }
00173   }
00174 }
00175 
00176 TargetPhraseCollection::shared_ptr
00177 PhraseDictionaryMultiModel::
00178 CreateTargetPhraseCollectionLinearInterpolation
00179 ( const Phrase& src,
00180   std::map<std::string,multiModelStats*>* allStats,
00181   std::vector<std::vector<float> > &multimodelweights) const
00182 {
00183   TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
00184   for ( std::map< std::string, multiModelStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
00185 
00186     multiModelStats * statistics = iter->second;
00187 
00188     Scores scoreVector(m_numScoreComponents);
00189 
00190     for(size_t i = 0; i < m_numScoreComponents; ++i) {
00191       scoreVector[i] = TransformScore(std::inner_product(statistics->p[i].begin(), statistics->p[i].end(), multimodelweights[i].begin(), 0.0));
00192     }
00193 
00194     statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
00195 
00196     //correct future cost estimates and total score
00197     vector<FeatureFunction*> pd_feature;
00198     pd_feature.push_back(const_cast<PhraseDictionaryMultiModel*>(this));
00199     const vector<FeatureFunction*> pd_feature_const(pd_feature);
00200     statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
00201 
00202     ret->Add(new TargetPhrase(*statistics->targetPhrase));
00203   }
00204   return ret;
00205 }
00206 
00207 //TODO: is it worth caching the results as long as weights don't change?
00208 std::vector<std::vector<float> >
00209 PhraseDictionaryMultiModel::
00210 getWeights(size_t numWeights, bool normalize) const
00211 {
00212   const std::vector<float>* weights_ptr;
00213   std::vector<float> raw_weights;
00214 
00215   weights_ptr = GetTemporaryMultiModelWeightsVector();
00216 
00217   // HIEU - uninitialised variable.
00218   //checking weights passed to mosesserver; only valid for this sentence; *don't* raise exception if client weights are malformed
00219   if (weights_ptr == NULL || weights_ptr->size() == 0) {
00220     weights_ptr = &m_multimodelweights; //fall back to weights defined in config
00221   } else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) {
00222     //TODO: can we pass error message to client if weights are malformed?
00223     std::cerr << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ". Reverting to weights in config";
00224     weights_ptr = &m_multimodelweights; //fall back to weights defined in config
00225   }
00226 
00227   //checking weights defined in config; only valid for this sentence; raise exception if config weights are malformed
00228   if (weights_ptr == NULL || weights_ptr->size() == 0) {
00229     for (size_t i=0; i < m_numModels; i++) {
00230       raw_weights.push_back(1.0/m_numModels); //uniform weights created online
00231     }
00232   } else if(weights_ptr->size() != m_numModels && weights_ptr->size() != m_numModels * numWeights) {
00233     util::StringStream strme;
00234     strme << "Must have either one multimodel weight per model (" << m_numModels << "), or one per weighted feature and model (" << numWeights << "*" << m_numModels << "). You have " << weights_ptr->size() << ".";
00235     UTIL_THROW(util::Exception, strme.str());
00236   } else {
00237     raw_weights = *weights_ptr;
00238   }
00239 
00240   std::vector<std::vector<float> > multimodelweights (numWeights);
00241 
00242   for (size_t i=0; i < numWeights; i++) {
00243     std::vector<float> weights_onefeature (m_numModels);
00244     if(raw_weights.size() == m_numModels) {
00245       weights_onefeature = raw_weights;
00246     } else {
00247       copy ( raw_weights.begin()+i*m_numModels, raw_weights.begin()+(i+1)*m_numModels, weights_onefeature.begin() );
00248     }
00249     if(normalize) {
00250       multimodelweights[i] = normalizeWeights(weights_onefeature);
00251     } else {
00252       multimodelweights[i] = weights_onefeature;
00253     }
00254   }
00255 
00256   return multimodelweights;
00257 }
00258 
00259 std::vector<float>
00260 PhraseDictionaryMultiModel::
00261 normalizeWeights(std::vector<float> &weights) const
00262 {
00263   std::vector<float> ret (m_numModels);
00264   float total = std::accumulate(weights.begin(),weights.end(),0.0);
00265   for (size_t i=0; i < weights.size(); i++) {
00266     ret[i] = weights[i]/total;
00267   }
00268   return ret;
00269 }
00270 
00271 
00272 ChartRuleLookupManager *
00273 PhraseDictionaryMultiModel::
00274 CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
00275                         std::size_t)
00276 {
00277   UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
00278 }
00279 
00280 
00281 //copied from PhraseDictionaryCompact; free memory allocated to TargetPhraseCollection (and each TargetPhrase) at end of sentence
00282 void
00283 PhraseDictionaryMultiModel::
00284 CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
00285 {
00286   GetPhraseCache().push_back(tpc);
00287 }
00288 
00289 
00290 void
00291 PhraseDictionaryMultiModel::
00292 CleanUpAfterSentenceProcessing(const InputType &source)
00293 {
00294   // PhraseCache &ref = GetPhraseCache();
00295   // for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++) {
00296   //   it->reset();
00297   // }
00298 
00299   // PhraseCache temp;
00300   // temp.swap(ref);
00301   GetPhraseCache().clear();
00302 
00303   CleanUpComponentModels(source);
00304 
00305   std::vector<float> empty_vector;
00306   SetTemporaryMultiModelWeightsVector(empty_vector);
00307 }
00308 
00309 
00310 void
00311 PhraseDictionaryMultiModel::
00312 CleanUpComponentModels(const InputType &source)
00313 {
00314   for(size_t i = 0; i < m_numModels; ++i) {
00315     m_pd[i]->CleanUpAfterSentenceProcessing(source);
00316   }
00317 }
00318 
00319 const std::vector<float>*
00320 PhraseDictionaryMultiModel::
00321 GetTemporaryMultiModelWeightsVector() const
00322 {
00323 #ifdef WITH_THREADS
00324   boost::shared_lock<boost::shared_mutex> read_lock(m_lock_weights);
00325   if (m_multimodelweights_tmp.find(boost::this_thread::get_id()) != m_multimodelweights_tmp.end()) {
00326     return &m_multimodelweights_tmp.find(boost::this_thread::get_id())->second;
00327   } else {
00328     return NULL;
00329   }
00330 #else
00331   return &m_multimodelweights_tmp;
00332 #endif
00333 }
00334 
00335 void
00336 PhraseDictionaryMultiModel::
00337 SetTemporaryMultiModelWeightsVector(std::vector<float> weights)
00338 {
00339 #ifdef WITH_THREADS
00340   boost::unique_lock<boost::shared_mutex> lock(m_lock_weights);
00341   m_multimodelweights_tmp[boost::this_thread::get_id()] = weights;
00342 #else
00343   m_multimodelweights_tmp = weights;
00344 #endif
00345 }
00346 
00347 #ifdef WITH_DLIB
00348 vector<float>
00349 PhraseDictionaryMultiModel::
00350 MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
00351 {
00352 
00353   map<pair<string, string>, size_t> phrase_pair_map;
00354 
00355   for ( vector<pair<string, string> >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) {
00356     phrase_pair_map[*iter] += 1;
00357   }
00358 
00359   vector<multiModelStatsOptimization*> optimizerStats;
00360 
00361   for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
00362 
00363     pair<string, string> phrase_pair = iter->first;
00364     string source_string = phrase_pair.first;
00365     string target_string = phrase_pair.second;
00366 
00367     vector<float> fs(m_numModels);
00368     map<string,multiModelStats*>* allStats = new(map<string,multiModelStats*>);
00369 
00370     Phrase sourcePhrase(0);
00371     sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
00372 
00373     CollectSufficientStatistics(sourcePhrase, allStats); //optimization potential: only call this once per source phrase
00374 
00375     //phrase pair not found; leave cache empty
00376     if (allStats->find(target_string) == allStats->end()) {
00377       RemoveAllInMap(*allStats);
00378       delete allStats;
00379       continue;
00380     }
00381 
00382     multiModelStatsOptimization* targetStatistics = new multiModelStatsOptimization();
00383     targetStatistics->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
00384     targetStatistics->p = (*allStats)[target_string]->p;
00385     targetStatistics->f = iter->second;
00386     optimizerStats.push_back(targetStatistics);
00387 
00388     RemoveAllInMap(*allStats);
00389     delete allStats;
00390   }
00391 
00392   Sentence sentence;
00393   CleanUpAfterSentenceProcessing(sentence); // free memory used by compact phrase tables
00394 
00395   size_t numWeights = m_numScoreComponents;
00396 
00397   vector<float> ret (m_numModels*numWeights);
00398   for (size_t iFeature=0; iFeature < numWeights; iFeature++) {
00399 
00400     CrossEntropy * ObjectiveFunction = new CrossEntropy(optimizerStats, this, iFeature);
00401 
00402     vector<float> weight_vector = Optimize(ObjectiveFunction, m_numModels);
00403 
00404     if (m_mode == "interpolate") {
00405       weight_vector = normalizeWeights(weight_vector);
00406     }
00407 
00408     cerr << "Weight vector for feature " << iFeature << ": ";
00409     for (size_t i=0; i < m_numModels; i++) {
00410       ret[(iFeature*m_numModels)+i] = weight_vector[i];
00411       cerr << weight_vector[i] << " ";
00412     }
00413     cerr << endl;
00414     delete ObjectiveFunction;
00415   }
00416 
00417   RemoveAllInColl(optimizerStats);
00418   return ret;
00419 
00420 }
00421 
00422 vector<float>
00423 PhraseDictionaryMultiModel::
00424 Optimize(OptimizationObjective *ObjectiveFunction, size_t numModels)
00425 {
00426 
00427   dlib::matrix<double,0,1> starting_point;
00428   starting_point.set_size(numModels);
00429   starting_point = 1.0;
00430 
00431   try {
00432     dlib::find_min_bobyqa(*ObjectiveFunction,
00433                           starting_point,
00434                           2*numModels+1,    // number of interpolation points
00435                           dlib::uniform_matrix<double>(numModels,1, 1e-09),  // lower bound constraint
00436                           dlib::uniform_matrix<double>(numModels,1, 1e100),   // upper bound constraint
00437                           1.0,    // initial trust region radius
00438                           1e-5,  // stopping trust region radius
00439                           10000    // max number of objective function evaluations
00440                          );
00441   } catch (dlib::bobyqa_failure& e) {
00442     cerr << e.what() << endl;
00443   }
00444 
00445   vector<float> weight_vector (numModels);
00446 
00447   for (int i=0; i < starting_point.nr(); i++) {
00448     weight_vector[i] = starting_point(i);
00449   }
00450 
00451   cerr << "Cross-entropy: " << (*ObjectiveFunction)(starting_point) << endl;
00452   return weight_vector;
00453 }
00454 
00455 
00456 double CrossEntropy::operator() ( const dlib::matrix<double,0,1>& arg) const
00457 {
00458   double total = 0.0;
00459   double n = 0.0;
00460   std::vector<float> weight_vector (m_model->m_numModels);
00461 
00462   for (int i=0; i < arg.nr(); i++) {
00463     weight_vector[i] = arg(i);
00464   }
00465   if (m_model->m_mode == "interpolate") {
00466     weight_vector = m_model->normalizeWeights(weight_vector);
00467   }
00468 
00469   for ( std::vector<multiModelStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
00470     multiModelStatsOptimization* statistics = *iter;
00471     size_t f = statistics->f;
00472 
00473     double score;
00474     score = std::inner_product(statistics->p[m_iFeature].begin(), statistics->p[m_iFeature].end(), weight_vector.begin(), 0.0);
00475 
00476     total -= (FloorScore(TransformScore(score))/TransformScore(2))*f;
00477     n += f;
00478   }
00479   return total/n;
00480 }
00481 
00482 #endif
00483 
00484 PhraseDictionary *FindPhraseDictionary(const string &ptName)
00485 {
00486   const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
00487 
00488   PhraseDictionary *pt = NULL;
00489   std::vector<PhraseDictionary*>::const_iterator iter;
00490   for (iter = pts.begin(); iter != pts.end(); ++iter) {
00491     PhraseDictionary *currPt = *iter;
00492     if (currPt->GetScoreProducerDescription() == ptName) {
00493       pt = currPt;
00494       break;
00495     }
00496   }
00497 
00498   return pt;
00499 }
00500 
00501 } //namespace