00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "util/exception.hh"
00020 #include "util/tokenize.hh"
00021 #include "util/string_stream.hh"
00022 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
00023
00024 using namespace std;
00025
00026 template<typename T>
00027 void OutputVec(const vector<T> &vec)
00028 {
00029 for (size_t i = 0; i < vec.size(); ++i) {
00030 cerr << vec[i] << " " << flush;
00031 }
00032 cerr << endl;
00033 }
00034
00035 namespace Moses
00036 {
00037
00038 PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(const std::string &line)
00039 :PhraseDictionaryMultiModel(1, line)
00040 {
00041 m_mode = "instance_weighting";
00042 m_combineFunction = InstanceWeighting;
00043 cerr << "m_args=" << m_args.size() << endl;
00044 ReadParameters();
00045
00046 UTIL_THROW_IF2(m_targetTable.size() != m_pdStr.size(),
00047 "List of phrase tables and target tables must be equal");
00048
00049 }
00050
00051 void PhraseDictionaryMultiModelCounts::SetParameter(const std::string& key, const std::string& value)
00052 {
00053 if (key == "mode") {
00054 m_mode = value;
00055 if (m_mode == "instance_weighting")
00056 m_combineFunction = InstanceWeighting;
00057 else if (m_mode == "interpolate")
00058 m_combineFunction = LinearInterpolationFromCounts;
00059 else {
00060 util::StringStream msg;
00061 msg << "combination mode unknown: " << m_mode;
00062 throw runtime_error(msg.str());
00063 }
00064 } else if (key == "lex-e2f") {
00065 m_lexE2FStr = Tokenize(value, ",");
00066 UTIL_THROW_IF2(m_lexE2FStr.size() != m_pdStr.size(),
00067 "Number of scores for lexical probability p(f|e) incorrectly specified");
00068 } else if (key == "lex-f2e") {
00069 m_lexF2EStr = Tokenize(value, ",");
00070 UTIL_THROW_IF2(m_lexF2EStr.size() != m_pdStr.size(),
00071 "Number of scores for lexical probability p(e|f) incorrectly specified");
00072 } else if (key == "target-table") {
00073 m_targetTable = Tokenize(value, ",");
00074 } else {
00075 PhraseDictionaryMultiModel::SetParameter(key, value);
00076 }
00077 }
00078
00079 PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
00080 {
00081 RemoveAllInColl(m_lexTable_e2f);
00082 RemoveAllInColl(m_lexTable_f2e);
00083 }
00084
00085
00086 void PhraseDictionaryMultiModelCounts::Load(AllOptions::ptr const& opts)
00087 {
00088 m_options = opts;
00089 SetFeaturesToApply();
00090 for(size_t i = 0; i < m_numModels; ++i) {
00091
00092
00093 const string &ptName = m_pdStr[i];
00094
00095 PhraseDictionary *pt;
00096 pt = FindPhraseDictionary(ptName);
00097 UTIL_THROW_IF2(pt == NULL,
00098 "Could not find component phrase table " << ptName);
00099 m_pd.push_back(pt);
00100
00101
00102 const string &target_table = m_targetTable[i];
00103 pt = FindPhraseDictionary(target_table);
00104 UTIL_THROW_IF2(pt == NULL,
00105 "Could not find component phrase table " << target_table);
00106 m_inverse_pd.push_back(pt);
00107
00108
00109 string lex_e2f = m_lexE2FStr[i];
00110 string lex_f2e = m_lexF2EStr[i];
00111 lexicalTable* e2f = new lexicalTable;
00112 LoadLexicalTable(lex_e2f, e2f);
00113 lexicalTable* f2e = new lexicalTable;
00114 LoadLexicalTable(lex_f2e, f2e);
00115
00116 m_lexTable_e2f.push_back(e2f);
00117 m_lexTable_f2e.push_back(f2e);
00118
00119 }
00120
00121 }
00122
00123
00124 TargetPhraseCollection::shared_ptr PhraseDictionaryMultiModelCounts::GetTargetPhraseCollectionLEGACY(const Phrase& src) const
00125 {
00126 vector<vector<float> > multimodelweights;
00127 bool normalize;
00128 normalize = (m_mode == "interpolate") ? true : false;
00129 multimodelweights = getWeights(4,normalize);
00130
00131
00132 vector<float> fs(m_numModels);
00133
00134 map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
00135
00136 CollectSufficientStats(src, fs, allStats);
00137
00138 TargetPhraseCollection::shared_ptr ret
00139 = CreateTargetPhraseCollectionCounts(src, fs, allStats, multimodelweights);
00140
00141 ret->NthElement(m_tableLimit);
00142 const_cast<PhraseDictionaryMultiModelCounts*>(this)->CacheForCleanup(ret);
00143 return ret;
00144 }
00145
00146
00147 void
00148 PhraseDictionaryMultiModelCounts::
00149 CollectSufficientStats(const Phrase& src, vector<float> &fs,
00150 map<string,multiModelCountsStats*>* allStats) const
00151
00152 {
00153 for(size_t i = 0; i < m_numModels; ++i) {
00154 const PhraseDictionary &pd = *m_pd[i];
00155
00156 TargetPhraseCollection::shared_ptr ret_raw
00157 = pd.GetTargetPhraseCollectionLEGACY(src);
00158 if (ret_raw != NULL) {
00159
00160 TargetPhraseCollection::const_iterator iterTargetPhrase;
00161 for (iterTargetPhrase = ret_raw->begin(); iterTargetPhrase != ret_raw->end(); ++iterTargetPhrase) {
00162
00163 const TargetPhrase * targetPhrase = *iterTargetPhrase;
00164 vector<float> raw_scores = targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd);
00165
00166 string targetString = targetPhrase->GetStringRep(m_output);
00167 if (allStats->find(targetString) == allStats->end()) {
00168
00169 multiModelCountsStats * statistics = new multiModelCountsStats;
00170 statistics->targetPhrase = new TargetPhrase(*targetPhrase);
00171
00172
00173 statistics->targetPhrase->GetScoreBreakdown().InvertDenseFeatures(&pd);
00174 vector<FeatureFunction*> pd_feature;
00175 pd_feature.push_back(m_pd[i]);
00176 const vector<FeatureFunction*> pd_feature_const(pd_feature);
00177 statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
00178
00179 statistics->targetPhrase->GetScoreBreakdown().ZeroDenseFeatures(&pd);
00180
00181 statistics->fst.resize(m_numModels);
00182 statistics->ft.resize(m_numModels);
00183
00184 (*allStats)[targetString] = statistics;
00185
00186 }
00187 multiModelCountsStats * statistics = (*allStats)[targetString];
00188
00189 statistics->fst[i] = UntransformScore(raw_scores[0]);
00190 statistics->ft[i] = UntransformScore(raw_scores[1]);
00191 fs[i] = UntransformScore(raw_scores[2]);
00192 (*allStats)[targetString] = statistics;
00193 }
00194 }
00195 }
00196
00197
00198 for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
00199 multiModelCountsStats * statistics = iter->second;
00200
00201 for (size_t i = 0; i < m_numModels; ++i) {
00202 if (!statistics->ft[i]) {
00203 statistics->ft[i] = GetTargetCount(static_cast<const Phrase&>(*statistics->targetPhrase), i);
00204 }
00205 }
00206 }
00207 }
00208
00209 TargetPhraseCollection::shared_ptr
00210 PhraseDictionaryMultiModelCounts::
00211 CreateTargetPhraseCollectionCounts(const Phrase &src, vector<float> &fs, map<string,multiModelCountsStats*>* allStats, vector<vector<float> > &multimodelweights) const
00212 {
00213 TargetPhraseCollection::shared_ptr ret(new TargetPhraseCollection);
00214 for ( map< string, multiModelCountsStats*>::const_iterator iter = allStats->begin(); iter != allStats->end(); ++iter ) {
00215
00216 multiModelCountsStats * statistics = iter->second;
00217
00218 if (statistics->targetPhrase->GetAlignTerm().GetSize() == 0) {
00219 UTIL_THROW(util::Exception, " alignment information empty\ncount-tables need to include alignment information for computation of lexical weights.\nUse --phrase-word-alignment during training; for on-disk tables, also set -alignment-info when creating on-disk tables.");
00220 }
00221
00222 try {
00223 pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(src, static_cast<const Phrase&>(*statistics->targetPhrase), statistics->targetPhrase->GetAlignTerm());
00224 vector< set<size_t> > alignedToT = alignment.first;
00225 vector< set<size_t> > alignedToS = alignment.second;
00226 double lexst = ComputeWeightedLexicalTranslation(static_cast<const Phrase&>(*statistics->targetPhrase), src, alignedToS, m_lexTable_e2f, multimodelweights[1], false );
00227 double lexts = ComputeWeightedLexicalTranslation(src, static_cast<const Phrase&>(*statistics->targetPhrase), alignedToT, m_lexTable_f2e, multimodelweights[3], true );
00228
00229 Scores scoreVector(4);
00230 scoreVector[0] = FloorScore(TransformScore(m_combineFunction(statistics->fst, statistics->ft, multimodelweights[0])));
00231 scoreVector[1] = FloorScore(TransformScore(lexst));
00232 scoreVector[2] = FloorScore(TransformScore(m_combineFunction(statistics->fst, fs, multimodelweights[2])));
00233 scoreVector[3] = FloorScore(TransformScore(lexts));
00234
00235 statistics->targetPhrase->GetScoreBreakdown().Assign(this, scoreVector);
00236
00237
00238 vector<FeatureFunction*> pd_feature;
00239 pd_feature.push_back(const_cast<PhraseDictionaryMultiModelCounts*>(this));
00240 const vector<FeatureFunction*> pd_feature_const(pd_feature);
00241 statistics->targetPhrase->EvaluateInIsolation(src, pd_feature_const);
00242 } catch (AlignmentException& e) {
00243 continue;
00244 }
00245
00246 ret->Add(new TargetPhrase(*statistics->targetPhrase));
00247 }
00248
00249 RemoveAllInMap(*allStats);
00250 delete allStats;
00251 return ret;
00252 }
00253
00254
00255 float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, size_t modelIndex) const
00256 {
00257
00258 const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
00259 TargetPhraseCollection::shared_ptr ret_raw = pd.GetTargetPhraseCollectionLEGACY(target);
00260
00261
00262 if (ret_raw && ret_raw->GetSize() > 0) {
00263 const TargetPhrase * targetPhrase = *(ret_raw->begin());
00264 return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd)[0]);
00265 }
00266
00267
00268 else return 0;
00269 }
00270
00271
00272 pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> PhraseDictionaryMultiModelCounts::GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const
00273 {
00274
00275 size_t tsize = phraseT.GetSize();
00276 size_t ssize = phraseS.GetSize();
00277 AlignVector alignedToT (tsize);
00278 AlignVector alignedToS (ssize);
00279 AlignmentInfo::const_iterator iter;
00280
00281 for (iter = alignment.begin(); iter != alignment.end(); ++iter) {
00282 const pair<size_t,size_t> &alignPair = *iter;
00283 size_t s = alignPair.first;
00284 size_t t = alignPair.second;
00285 if (s >= ssize || t >= tsize) {
00286 cerr << "Error: inconsistent alignment for phrase pair: " << phraseS << " - " << phraseT << endl;
00287 cerr << "phrase pair will be discarded" << endl;
00288 throw AlignmentException();
00289 }
00290 alignedToT[t].insert( s );
00291 alignedToS[s].insert( t );
00292 }
00293 return make_pair(alignedToT,alignedToS);
00294 }
00295
00296
00297 double PhraseDictionaryMultiModelCounts::ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, vector<float> &multimodelweights, bool is_input) const
00298 {
00299
00300
00301 double lexScore = 1.0;
00302 Word null;
00303 if (is_input) {
00304 null.CreateFromString(Input, m_input, "NULL", false);
00305 } else {
00306 null.CreateFromString(Output, m_output, "NULL", false);
00307 }
00308
00309
00310 for(size_t ti=0; ti<alignment.size(); ti++) {
00311 const set< size_t > & srcIndices = alignment[ ti ];
00312 Word t_word = phraseT.GetWord(ti);
00313
00314 if (srcIndices.empty()) {
00315
00316 lexScore *= GetLexicalProbability( null, t_word, tables, multimodelweights );
00317 } else {
00318
00319 double thisWordScore = 0;
00320 for (set< size_t >::const_iterator si(srcIndices.begin()); si != srcIndices.end(); ++si) {
00321 Word s_word = phraseS.GetWord(*si);
00322 thisWordScore += GetLexicalProbability( s_word, t_word, tables, multimodelweights );
00323 }
00324 lexScore *= thisWordScore / srcIndices.size();
00325 }
00326 }
00327 return lexScore;
00328 }
00329
00330
00331 lexicalCache PhraseDictionaryMultiModelCounts::CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const vector<lexicalTable*> &tables, bool is_input )
00332 {
00333
00334
00335 Word null;
00336 if (is_input) {
00337 null.CreateFromString(Input, m_input, "NULL", false);
00338 } else {
00339 null.CreateFromString(Output, m_output, "NULL", false);
00340 }
00341
00342 lexicalCache ret;
00343
00344
00345 for(size_t ti=0; ti<alignment.size(); ti++) {
00346 const set< size_t > & srcIndices = alignment[ ti ];
00347 Word t_word = phraseT.GetWord(ti);
00348
00349 vector<lexicalPair> ti_vector;
00350 if (srcIndices.empty()) {
00351
00352 vector<float> joint_count (m_numModels);
00353 vector<float> marginals (m_numModels);
00354
00355 FillLexicalCountsJoint(null, t_word, joint_count, tables);
00356 FillLexicalCountsMarginal(null, marginals, tables);
00357
00358 ti_vector.push_back(make_pair(joint_count, marginals));
00359
00360 } else {
00361 for (set< size_t >::const_iterator si(srcIndices.begin()); si != srcIndices.end(); ++si) {
00362 Word s_word = phraseS.GetWord(*si);
00363 vector<float> joint_count (m_numModels);
00364 vector<float> marginals (m_numModels);
00365
00366 FillLexicalCountsJoint(s_word, t_word, joint_count, tables);
00367 FillLexicalCountsMarginal(s_word, marginals, tables);
00368
00369 ti_vector.push_back(make_pair(joint_count, marginals));
00370 }
00371 }
00372 ret.push_back(ti_vector);
00373 }
00374 return ret;
00375 }
00376
00377
00378 double PhraseDictionaryMultiModelCounts::ComputeWeightedLexicalTranslationFromCache( lexicalCache &cache, vector<float> &weights ) const
00379 {
00380
00381
00382 double lexScore = 1.0;
00383
00384 for (lexicalCache::const_iterator iter = cache.begin(); iter != cache.end(); ++iter) {
00385 vector<lexicalPair> t_vector = *iter;
00386 double thisWordScore = 0;
00387 for ( vector<lexicalPair>::const_iterator iter2 = t_vector.begin(); iter2 != t_vector.end(); ++iter2) {
00388 vector<float> joint_count = iter2->first;
00389 vector<float> marginal = iter2->second;
00390 thisWordScore += m_combineFunction(joint_count, marginal, weights);
00391 }
00392 lexScore *= thisWordScore / t_vector.size();
00393 }
00394 return lexScore;
00395 }
00396
00397
00398 double PhraseDictionaryMultiModelCounts::GetLexicalProbability( Word &wordS, Word &wordT, const vector<lexicalTable*> &tables, vector<float> &multimodelweights ) const
00399 {
00400 vector<float> joint_count (m_numModels);
00401 vector<float> marginals (m_numModels);
00402
00403 FillLexicalCountsJoint(wordS, wordT, joint_count, tables);
00404 FillLexicalCountsMarginal(wordS, marginals, tables);
00405
00406 double lexProb = m_combineFunction(joint_count, marginals, multimodelweights);
00407
00408 return lexProb;
00409 }
00410
00411
00412 void PhraseDictionaryMultiModelCounts::FillLexicalCountsJoint(Word &wordS, Word &wordT, vector<float> &count, const vector<lexicalTable*> &tables) const
00413 {
00414 for (size_t i=0; i < m_numModels; i++) {
00415 lexicalMapJoint::iterator joint_s = tables[i]->joint.find( wordS );
00416 if (joint_s == tables[i]->joint.end()) count[i] = 0.0;
00417 else {
00418 lexicalMap::iterator joint_t = joint_s->second.find( wordT );
00419 if (joint_t == joint_s->second.end()) count[i] = 0.0;
00420 else count[i] = joint_t->second;
00421 }
00422 }
00423 }
00424
00425 void PhraseDictionaryMultiModelCounts::FillLexicalCountsMarginal(Word &wordS, vector<float> &count, const vector<lexicalTable*> &tables) const
00426 {
00427 for (size_t i=0; i < m_numModels; i++) {
00428 lexicalMap::iterator marginal_s = tables[i]->marginal.find( wordS );
00429 if (marginal_s == tables[i]->marginal.end()) count[i] = 0.0;
00430 else count[i] = marginal_s->second;
00431 }
00432 }
00433
00434
00435 void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexicalTable* ltable)
00436 {
00437
00438 cerr << "Loading lexical translation table from " << fileName;
00439 ifstream inFile;
00440 inFile.open(fileName.c_str());
00441 if (inFile.fail()) {
00442 cerr << " - ERROR: could not open file\n";
00443 exit(1);
00444 }
00445 istream *inFileP = &inFile;
00446
00447 int i=0;
00448 string line;
00449
00450 while(getline(*inFileP, line)) {
00451 i++;
00452 if (i%100000 == 0) cerr << "." << flush;
00453
00454 const vector<string> token = util::tokenize( line );
00455 if (token.size() != 4) {
00456 cerr << "line " << i << " in " << fileName
00457 << " has wrong number of tokens, skipping:\n"
00458 << token.size() << " " << token[0] << " " << line << endl;
00459 continue;
00460 }
00461
00462 double joint = atof( token[2].c_str() );
00463 double marginal = atof( token[3].c_str() );
00464 Word wordT, wordS;
00465 wordT.CreateFromString(Output, m_output, token[0], false);
00466 wordS.CreateFromString(Input, m_input, token[1], false);
00467 ltable->joint[ wordS ][ wordT ] = joint;
00468 ltable->marginal[ wordS ] = marginal;
00469 }
00470 cerr << endl;
00471
00472 }
00473
00474
00475 #ifdef WITH_DLIB
00476 vector<float> PhraseDictionaryMultiModelCounts::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
00477 {
00478
00479 map<pair<string, string>, size_t> phrase_pair_map;
00480
00481 for ( vector<pair<string, string> >::const_iterator iter = phrase_pair_vector.begin(); iter != phrase_pair_vector.end(); ++iter ) {
00482 phrase_pair_map[*iter] += 1;
00483 }
00484
00485 vector<multiModelCountsStatsOptimization*> optimizerStats;
00486
00487 for ( map<pair<string, string>, size_t>::iterator iter = phrase_pair_map.begin(); iter != phrase_pair_map.end(); ++iter ) {
00488
00489 pair<string, string> phrase_pair = iter->first;
00490 string source_string = phrase_pair.first;
00491 string target_string = phrase_pair.second;
00492
00493 vector<float> fs(m_numModels);
00494 map<string,multiModelCountsStats*>* allStats = new(map<string,multiModelCountsStats*>);
00495
00496 Phrase sourcePhrase(0);
00497 sourcePhrase.CreateFromString(Input, m_input, source_string, NULL);
00498
00499 CollectSufficientStats(sourcePhrase, fs, allStats);
00500
00501
00502 if (allStats->find(target_string) == allStats->end()) {
00503 RemoveAllInMap(*allStats);
00504 delete allStats;
00505 continue;
00506 }
00507
00508 multiModelCountsStatsOptimization * targetStats = new multiModelCountsStatsOptimization();
00509 targetStats->targetPhrase = new TargetPhrase(*(*allStats)[target_string]->targetPhrase);
00510 targetStats->fs = fs;
00511 targetStats->fst = (*allStats)[target_string]->fst;
00512 targetStats->ft = (*allStats)[target_string]->ft;
00513 targetStats->f = iter->second;
00514
00515 try {
00516 pair<vector< set<size_t> >, vector< set<size_t> > > alignment = GetAlignmentsForLexWeights(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), targetStats->targetPhrase->GetAlignTerm());
00517 targetStats->lexCachee2f = CacheLexicalStats(static_cast<const Phrase&>(*targetStats->targetPhrase), sourcePhrase, alignment.second, m_lexTable_e2f, false );
00518 targetStats->lexCachef2e = CacheLexicalStats(sourcePhrase, static_cast<const Phrase&>(*targetStats->targetPhrase), alignment.first, m_lexTable_f2e, true );
00519
00520 optimizerStats.push_back(targetStats);
00521 } catch (AlignmentException& e) {}
00522
00523 RemoveAllInMap(*allStats);
00524 delete allStats;
00525 }
00526
00527 Sentence sentence;
00528 CleanUpAfterSentenceProcessing(sentence);
00529
00530 vector<float> ret (m_numModels*4);
00531 for (size_t iFeature=0; iFeature < 4; iFeature++) {
00532
00533 CrossEntropyCounts * ObjectiveFunction = new CrossEntropyCounts(optimizerStats, this, iFeature);
00534
00535 vector<float> weight_vector = Optimize(ObjectiveFunction, m_numModels);
00536
00537 if (m_mode == "interpolate") {
00538 weight_vector = normalizeWeights(weight_vector);
00539 } else if (m_mode == "instance_weighting") {
00540 float first_value = weight_vector[0];
00541 for (size_t i=0; i < m_numModels; i++) {
00542 weight_vector[i] = weight_vector[i]/first_value;
00543 }
00544 }
00545 cerr << "Weight vector for feature " << iFeature << ": ";
00546 for (size_t i=0; i < m_numModels; i++) {
00547 ret[(iFeature*m_numModels)+i] = weight_vector[i];
00548 cerr << weight_vector[i] << " ";
00549 }
00550 cerr << endl;
00551 delete ObjectiveFunction;
00552 }
00553
00554 RemoveAllInColl(optimizerStats);
00555 return ret;
00556
00557 }
00558
00559 double CrossEntropyCounts::operator() ( const dlib::matrix<double,0,1>& arg) const
00560 {
00561 double total = 0.0;
00562 double n = 0.0;
00563 std::vector<float> weight_vector (m_model->m_numModels);
00564
00565 for (int i=0; i < arg.nr(); i++) {
00566 weight_vector[i] = arg(i);
00567 }
00568 if (m_model->m_mode == "interpolate") {
00569 weight_vector = m_model->normalizeWeights(weight_vector);
00570 }
00571
00572 for ( std::vector<multiModelCountsStatsOptimization*>::const_iterator iter = m_optimizerStats.begin(); iter != m_optimizerStats.end(); ++iter ) {
00573 multiModelCountsStatsOptimization* statistics = *iter;
00574 size_t f = statistics->f;
00575
00576 double score;
00577 if (m_iFeature == 0) {
00578 score = m_model->m_combineFunction(statistics->fst, statistics->ft, weight_vector);
00579 } else if (m_iFeature == 1) {
00580 score = m_model->ComputeWeightedLexicalTranslationFromCache(statistics->lexCachee2f, weight_vector);
00581 } else if (m_iFeature == 2) {
00582 score = m_model->m_combineFunction(statistics->fst, statistics->fs, weight_vector);
00583 } else if (m_iFeature == 3) {
00584 score = m_model->ComputeWeightedLexicalTranslationFromCache(statistics->lexCachef2e, weight_vector);
00585 } else {
00586 score = 0;
00587 UTIL_THROW(util::Exception, "Trying to optimize feature that I don't know. Aborting");
00588 }
00589 total -= (FloorScore(TransformScore(score))/TransformScore(2))*f;
00590 n += f;
00591 }
00592 return total/n;
00593 }
00594
00595 #endif
00596
00597
00598 double InstanceWeighting(vector<float> &joint_counts, vector<float> &marginals, vector<float> &multimodelweights)
00599 {
00600
00601 double joint_counts_weighted = inner_product(joint_counts.begin(), joint_counts.end(), multimodelweights.begin(), 0.0);
00602 double marginals_weighted = inner_product(marginals.begin(), marginals.end(), multimodelweights.begin(), 0.0);
00603
00604 if (marginals_weighted == 0) {
00605 return 0;
00606 } else {
00607 return joint_counts_weighted/marginals_weighted;
00608 }
00609 }
00610
00611
00612
00613
00614 double LinearInterpolationFromCounts(vector<float> &joint_counts, vector<float> &marginals, vector<float> &multimodelweights)
00615 {
00616
00617 vector<float> p(marginals.size());
00618
00619 for (size_t i=0; i < marginals.size(); i++) {
00620 if (marginals[i] != 0) {
00621 p[i] = joint_counts[i]/marginals[i];
00622 }
00623 }
00624
00625 double p_weighted = inner_product(p.begin(), p.end(), multimodelweights.begin(), 0.0);
00626
00627 return p_weighted;
00628 }
00629
00630 }