Moses: /disk4/html/www/moses/doxygen/mosesdecoder/phrase-extract/ExtractionPhrasePair.cpp Source File

00001 /***********************************************************************
00002   Moses - factored phrase-based language decoder
00003   Copyright (C) 2009 University of Edinburgh
00004 
00005   This library is free software; you can redistribute it and/or
00006   modify it under the terms of the GNU Lesser General Public
00007   License as published by the Free Software Foundation; either
00008   version 2.1 of the License, or (at your option) any later version.
00009 
00010   This library is distributed in the hope that it will be useful,
00011   but WITHOUT ANY WARRANTY; without even the implied warranty of
00012   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013   Lesser General Public License for more details.
00014 
00015   You should have received a copy of the GNU Lesser General Public
00016   License along with this library; if not, write to the Free Software
00017   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
00018  ***********************************************************************/
00019 
00020 #include <sstream>
00021 #include "ExtractionPhrasePair.h"
00022 #include "tables-core.h"
00023 #include "score.h"
00024 #include "moses/Util.h"
00025 
00026 #include <cstdlib>
00027 
00028 using namespace std;
00029 
00030 
00031 namespace MosesTraining
00032 {
00033 
00034 
00035 extern Vocabulary vcbT;
00036 extern Vocabulary vcbS;
00037 
00038 extern bool hierarchicalFlag;
00039 
00040 
00041 ExtractionPhrasePair::ExtractionPhrasePair( const PHRASE *phraseSource,
00042     const PHRASE *phraseTarget,
00043     ALIGNMENT *targetToSourceAlignment,
00044     float count, float pcfgSum ) :
00045   m_phraseSource(phraseSource),
00046   m_phraseTarget(phraseTarget),
00047   m_count(count),
00048   m_pcfgSum(pcfgSum)
00049 {
00050   assert(!phraseSource->empty());
00051 
00052   m_count = count;
00053   m_pcfgSum = pcfgSum;
00054 
00055   std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
00056     m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
00057 
00058   m_lastTargetToSourceAlignment = insertedAlignment.first;
00059   m_lastCount = m_count;
00060   m_lastPcfgSum = m_pcfgSum;
00061 
00062   m_isValid = true;
00063 }
00064 
00065 
00066 ExtractionPhrasePair::~ExtractionPhrasePair( )
00067 {
00068   Clear();
00069 }
00070 
00071 
00072 // return value: true if the given alignment was seen for the first time and thus will be stored,
00073 //               false if it was present already (the pointer may thus be deleted(
00074 bool ExtractionPhrasePair::Add( ALIGNMENT *targetToSourceAlignment,
00075                                 float count, float pcfgSum )
00076 {
00077   m_count += count;
00078   m_pcfgSum += pcfgSum;
00079 
00080   m_lastCount = count;
00081   m_lastPcfgSum = pcfgSum;
00082 
00083   std::map<ALIGNMENT*,float>::iterator iter = m_lastTargetToSourceAlignment;
00084   if ( *(iter->first) == *targetToSourceAlignment ) {
00085     iter->second += count;
00086     return false;
00087   } else {
00088     std::pair< std::map<ALIGNMENT*,float>::iterator, bool > insertedAlignment =
00089       m_targetToSourceAlignments.insert( std::pair<ALIGNMENT*,float>(targetToSourceAlignment,count) );
00090     if ( !insertedAlignment.second ) {
00091       // the alignment already exists: increment count
00092       insertedAlignment.first->second += count;
00093       return false;
00094     }
00095     m_lastTargetToSourceAlignment = insertedAlignment.first;
00096   }
00097 
00098   return true;
00099 }
00100 
00101 
00102 void ExtractionPhrasePair::IncrementPrevious( float count, float pcfgSum )
00103 {
00104   m_count += count;
00105   m_pcfgSum += pcfgSum;
00106   m_lastTargetToSourceAlignment->second += count;
00107   // properties
00108   for ( std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter=m_properties.begin();
00109         iter !=m_properties.end(); ++iter ) {
00110     LAST_PROPERTY_VALUE *lastPropertyValue = (iter->second).second;
00111     (*lastPropertyValue)->second += count;
00112   }
00113 
00114   m_lastCount = count;
00115   m_lastPcfgSum = pcfgSum;
00116 }
00117 
00118 
00119 // Check for lexical match
00120 // and in case of SCFG rules for equal non-terminal alignment.
00121 bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
00122                                     const PHRASE *otherPhraseTarget,
00123                                     ALIGNMENT *otherTargetToSourceAlignment ) const
00124 {
00125   if (*otherPhraseTarget != *m_phraseTarget) {
00126     return false;
00127   }
00128   if (*otherPhraseSource != *m_phraseSource) {
00129     return false;
00130   }
00131 
00132   return MatchesAlignment( otherTargetToSourceAlignment );
00133 }
00134 
00135 // Check for lexical match
00136 // and in case of SCFG rules for equal non-terminal alignment.
00137 // Set boolean indicators.
00138 // (Note that we check in the order: target - source - alignment
00139 //  and do not touch the subsequent boolean indicators once a previous one has been set to false.)
00140 bool ExtractionPhrasePair::Matches( const PHRASE *otherPhraseSource,
00141                                     const PHRASE *otherPhraseTarget,
00142                                     ALIGNMENT *otherTargetToSourceAlignment,
00143                                     bool &sourceMatch,
00144                                     bool &targetMatch,
00145                                     bool &alignmentMatch ) const
00146 {
00147   if (*otherPhraseSource != *m_phraseSource) {
00148     sourceMatch = false;
00149     return false;
00150   } else {
00151     sourceMatch = true;
00152   }
00153   if (*otherPhraseTarget != *m_phraseTarget) {
00154     targetMatch = false;
00155     return false;
00156   } else {
00157     targetMatch = true;
00158   }
00159   if ( !MatchesAlignment(otherTargetToSourceAlignment) ) {
00160     alignmentMatch = false;
00161     return false;
00162   } else {
00163     alignmentMatch = true;
00164   }
00165   return true;
00166 }
00167 
00168 // Check for equal non-terminal alignment in case of SCFG rules.
00169 // Precondition: otherTargetToSourceAlignment has the same size as m_targetToSourceAlignments.begin()->first
00170 bool ExtractionPhrasePair::MatchesAlignment( ALIGNMENT *otherTargetToSourceAlignment ) const
00171 {
00172   if (!hierarchicalFlag) return true;
00173 
00174   // all or none of the phrasePair's word alignment matrices match, so just pick one
00175   const ALIGNMENT *thisTargetToSourceAlignment = m_targetToSourceAlignments.begin()->first;
00176 
00177   assert(m_phraseTarget->size() == thisTargetToSourceAlignment->size() + 1);
00178   assert(thisTargetToSourceAlignment->size() == otherTargetToSourceAlignment->size());
00179 
00180   // loop over all symbols but the left hand side of the rule
00181   for (size_t i=0; i<thisTargetToSourceAlignment->size()-1; ++i) {
00182     if (isNonTerminal( vcbT.getWord( m_phraseTarget->at(i) ) )) {
00183       size_t thisAlign  = *(thisTargetToSourceAlignment->at(i).begin());
00184       size_t otherAlign = *(otherTargetToSourceAlignment->at(i).begin());
00185 
00186       if (thisTargetToSourceAlignment->at(i).size() != 1 ||
00187           otherTargetToSourceAlignment->at(i).size() != 1 ||
00188           thisAlign != otherAlign) {
00189         return false;
00190       }
00191     }
00192   }
00193 
00194   return true;
00195 }
00196 
00197 void ExtractionPhrasePair::Clear()
00198 {
00199   delete m_phraseSource;
00200   delete m_phraseTarget;
00201 
00202   m_count = 0.0f;
00203   m_pcfgSum = 0.0f;
00204 
00205   for ( std::map<ALIGNMENT*,float>::iterator iter=m_targetToSourceAlignments.begin();
00206         iter!=m_targetToSourceAlignments.end(); ++iter) {
00207     delete iter->first;
00208   }
00209   m_targetToSourceAlignments.clear();
00210 
00211   for ( std::map<std::string, std::pair< PROPERTY_VALUES*, LAST_PROPERTY_VALUE* > >::iterator iter=m_properties.begin();
00212         iter!=m_properties.end(); ++iter) {
00213     delete (iter->second).second;
00214     delete (iter->second).first;
00215   }
00216   m_properties.clear();
00217 
00218   m_lastCount = 0.0f;
00219   m_lastPcfgSum = 0.0f;
00220   m_lastTargetToSourceAlignment = m_targetToSourceAlignments.begin();
00221 
00222   m_isValid = false;
00223 }
00224 
00225 
00226 void ExtractionPhrasePair::AddProperties( const std::string &propertiesString, float count )
00227 {
00228   if (propertiesString.empty()) {
00229     return;
00230   }
00231 
00232   vector<std::string> toks;
00233   Moses::TokenizeMultiCharSeparator(toks, propertiesString, "{{");
00234   for (size_t i = 1; i < toks.size(); ++i) {
00235     std::string &tok = toks[i];
00236     if (tok.empty()) {
00237       continue;
00238     }
00239     size_t endPos = tok.rfind("}");
00240     tok = tok.substr(0, endPos - 1);
00241 
00242     vector<std::string> keyValue = Moses::TokenizeFirstOnly(tok, " ");
00243     if (keyValue.size() == 2) {
00244       AddProperty(keyValue[0], keyValue[1], count);
00245     }
00246   }
00247 }
00248 
00249 
00250 const ALIGNMENT *ExtractionPhrasePair::FindBestAlignmentTargetToSource() const
00251 {
00252   float bestAlignmentCount = -1;
00253 
00254   std::map<ALIGNMENT*,float>::const_iterator bestAlignment = m_targetToSourceAlignments.end();
00255 
00256   for (std::map<ALIGNMENT*,float>::const_iterator iter=m_targetToSourceAlignments.begin();
00257        iter!=m_targetToSourceAlignments.end(); ++iter) {
00258     if ( (iter->second > bestAlignmentCount) ||
00259          ( (iter->second == bestAlignmentCount) &&
00260            (*(iter->first) > *(bestAlignment->first)) ) ) {
00261       bestAlignmentCount = iter->second;
00262       bestAlignment = iter;
00263     }
00264   }
00265 
00266   if ( bestAlignment == m_targetToSourceAlignments.end()) {
00267     return NULL;
00268   }
00269 
00270   return bestAlignment->first;
00271 }
00272 
00273 
00274 const std::string *ExtractionPhrasePair::FindBestPropertyValue(const std::string &key) const
00275 {
00276   float bestPropertyCount = -1;
00277 
00278   const PROPERTY_VALUES *allPropertyValues = GetProperty( key );
00279   if ( allPropertyValues == NULL ) {
00280     return NULL;
00281   }
00282 
00283   PROPERTY_VALUES::const_iterator bestPropertyValue = allPropertyValues->end();
00284 
00285   for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
00286        iter!=allPropertyValues->end(); ++iter) {
00287     if ( (iter->second > bestPropertyCount) ||
00288          ( (iter->second == bestPropertyCount) &&
00289            (iter->first > bestPropertyValue->first) ) ) {
00290       bestPropertyCount = iter->second;
00291       bestPropertyValue = iter;
00292     }
00293   }
00294 
00295   if ( bestPropertyValue == allPropertyValues->end()) {
00296     return NULL;
00297   }
00298 
00299   return &(bestPropertyValue->first);
00300 }
00301 
00302 
00303 std::string ExtractionPhrasePair::CollectAllPropertyValues(const std::string &key) const
00304 {
00305   const PROPERTY_VALUES *allPropertyValues = GetProperty( key );
00306 
00307   if ( allPropertyValues == NULL ) {
00308     return "";
00309   }
00310 
00311   std::ostringstream oss;
00312   for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
00313        iter!=allPropertyValues->end(); ++iter) {
00314     if (!(iter->first).empty()) {
00315       if (iter!=allPropertyValues->begin()) {
00316         oss << " ";
00317       }
00318       oss << iter->first;
00319       oss << " ";
00320       oss << iter->second;
00321     }
00322   }
00323 
00324   std::string allPropertyValuesString(oss.str());
00325   return allPropertyValuesString;
00326 }
00327 
00328 
00329 std::string ExtractionPhrasePair::CollectAllLabelsSeparateLHSAndRHS(const std::string& propertyKey,
00330     std::set<std::string>& labelSet,
00331     boost::unordered_map<std::string,float>& countsLabelsLHS,
00332     boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >& jointCountsRulesTargetLHSAndLabelsLHS,
00333     Vocabulary &vcbT) const
00334 {
00335   const PROPERTY_VALUES *allPropertyValues = GetProperty( propertyKey );
00336 
00337   if ( allPropertyValues == NULL ) {
00338     return "";
00339   }
00340 
00341   std::string lhs="", rhs="", currentRhs="";
00342   float currentRhsCount = 0.0;
00343   std::list< std::pair<std::string,float> > lhsGivenCurrentRhsCounts;
00344 
00345   std::ostringstream oss;
00346   for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
00347        iter!=allPropertyValues->end(); ++iter) {
00348 
00349     size_t space = (iter->first).find_last_of(' ');
00350     if ( space == string::npos ) {
00351       lhs = iter->first;
00352       rhs.clear();
00353     } else {
00354       lhs = (iter->first).substr(space+1);
00355       rhs = (iter->first).substr(0,space);
00356     }
00357 
00358     labelSet.insert(lhs);
00359 
00360     if ( rhs.compare(currentRhs) ) {
00361 
00362       if ( iter!=allPropertyValues->begin() ) {
00363         if ( !currentRhs.empty() ) {
00364           istringstream tokenizer(currentRhs);
00365           std::string rhsLabel;
00366           while ( tokenizer.peek() != EOF ) {
00367             tokenizer >> rhsLabel;
00368             labelSet.insert(rhsLabel);
00369           }
00370           oss << " " << currentRhs << " " << currentRhsCount;
00371         }
00372         if ( lhsGivenCurrentRhsCounts.size() > 0 ) {
00373           if ( !currentRhs.empty() ) {
00374             oss << " " << lhsGivenCurrentRhsCounts.size();
00375           }
00376           for ( std::list< std::pair<std::string,float> >::const_iterator iter2=lhsGivenCurrentRhsCounts.begin();
00377                 iter2!=lhsGivenCurrentRhsCounts.end(); ++iter2 ) {
00378             oss << " " << iter2->first << " " << iter2->second;
00379 
00380             // update countsLabelsLHS and jointCountsRulesTargetLHSAndLabelsLHS
00381             std::string ruleTargetLhs = vcbT.getWord(m_phraseTarget->back());
00382             ruleTargetLhs.erase(ruleTargetLhs.begin());  // strip square brackets
00383             ruleTargetLhs.erase(ruleTargetLhs.size()-1);
00384 
00385             std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedCountsLabelsLHS =
00386               countsLabelsLHS.insert(std::pair<std::string,float>(iter2->first,iter2->second));
00387             if (!insertedCountsLabelsLHS.second) {
00388               (insertedCountsLabelsLHS.first)->second += iter2->second;
00389             }
00390 
00391             boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >::iterator jointCountsRulesTargetLHSAndLabelsLHSIter =
00392               jointCountsRulesTargetLHSAndLabelsLHS.find(ruleTargetLhs);
00393             if ( jointCountsRulesTargetLHSAndLabelsLHSIter == jointCountsRulesTargetLHSAndLabelsLHS.end() ) {
00394               boost::unordered_map<std::string,float>* jointCounts = new boost::unordered_map<std::string,float>;
00395               jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
00396               jointCountsRulesTargetLHSAndLabelsLHS.insert(std::pair<std::string,boost::unordered_map<std::string,float>* >(ruleTargetLhs,jointCounts));
00397             } else {
00398               boost::unordered_map<std::string,float>* jointCounts = jointCountsRulesTargetLHSAndLabelsLHSIter->second;
00399               std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedJointCounts =
00400                 jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
00401               if (!insertedJointCounts.second) {
00402                 (insertedJointCounts.first)->second += iter2->second;
00403               }
00404             }
00405 
00406           }
00407         }
00408 
00409         lhsGivenCurrentRhsCounts.clear();
00410       }
00411 
00412       currentRhsCount = 0.0;
00413       currentRhs = rhs;
00414     }
00415 
00416     currentRhsCount += iter->second;
00417     lhsGivenCurrentRhsCounts.push_back( std::pair<std::string,float>(lhs,iter->second) );
00418   }
00419 
00420   if ( !currentRhs.empty() ) {
00421     istringstream tokenizer(currentRhs);
00422     std::string rhsLabel;
00423     while ( tokenizer.peek() != EOF ) {
00424       tokenizer >> rhsLabel;
00425       labelSet.insert(rhsLabel);
00426     }
00427     oss << " " << currentRhs << " " << currentRhsCount;
00428   }
00429   if ( lhsGivenCurrentRhsCounts.size() > 0 ) {
00430     if ( !currentRhs.empty() ) {
00431       oss << " " << lhsGivenCurrentRhsCounts.size();
00432     }
00433     for ( std::list< std::pair<std::string,float> >::const_iterator iter2=lhsGivenCurrentRhsCounts.begin();
00434           iter2!=lhsGivenCurrentRhsCounts.end(); ++iter2 ) {
00435       oss << " " << iter2->first << " " << iter2->second;
00436 
00437       // update countsLabelsLHS and jointCountsRulesTargetLHSAndLabelsLHS
00438       std::string ruleTargetLhs = vcbT.getWord(m_phraseTarget->back());
00439       ruleTargetLhs.erase(ruleTargetLhs.begin());  // strip square brackets
00440       ruleTargetLhs.erase(ruleTargetLhs.size()-1);
00441 
00442       std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedCountsLabelsLHS =
00443         countsLabelsLHS.insert(std::pair<std::string,float>(iter2->first,iter2->second));
00444       if (!insertedCountsLabelsLHS.second) {
00445         (insertedCountsLabelsLHS.first)->second += iter2->second;
00446       }
00447 
00448       boost::unordered_map<std::string, boost::unordered_map<std::string,float>* >::iterator jointCountsRulesTargetLHSAndLabelsLHSIter =
00449         jointCountsRulesTargetLHSAndLabelsLHS.find(ruleTargetLhs);
00450       if ( jointCountsRulesTargetLHSAndLabelsLHSIter == jointCountsRulesTargetLHSAndLabelsLHS.end() ) {
00451         boost::unordered_map<std::string,float>* jointCounts = new boost::unordered_map<std::string,float>;
00452         jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
00453         jointCountsRulesTargetLHSAndLabelsLHS.insert(std::pair<std::string,boost::unordered_map<std::string,float>* >(ruleTargetLhs,jointCounts));
00454       } else {
00455         boost::unordered_map<std::string,float>* jointCounts = jointCountsRulesTargetLHSAndLabelsLHSIter->second;
00456         std::pair< boost::unordered_map<std::string,float>::iterator, bool > insertedJointCounts =
00457           jointCounts->insert(std::pair<std::string,float>(iter2->first,iter2->second));
00458         if (!insertedJointCounts.second) {
00459           (insertedJointCounts.first)->second += iter2->second;
00460         }
00461       }
00462 
00463     }
00464   }
00465 
00466   std::string allPropertyValuesString(oss.str());
00467   return allPropertyValuesString;
00468 }
00469 
00470 
00471 void ExtractionPhrasePair::CollectAllPhraseOrientations(const std::string &key,
00472     const std::vector<float> &orientationClassPriorsL2R,
00473     const std::vector<float> &orientationClassPriorsR2L,
00474     double smoothingFactor,
00475     std::ostream &out) const
00476 {
00477   assert(orientationClassPriorsL2R.size()==4 && orientationClassPriorsR2L.size()==4); // mono swap dleft dright
00478 
00479   const PROPERTY_VALUES *allPropertyValues = GetProperty( key );
00480 
00481   if ( allPropertyValues == NULL ) {
00482     return;
00483   }
00484 
00485   // bidirectional MSLR phrase orientation with 2x4 orientation classes:
00486   // mono swap dright dleft
00487   std::vector<float> orientationClassCountSumL2R(4,0);
00488   std::vector<float> orientationClassCountSumR2L(4,0);
00489 
00490   for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
00491        iter!=allPropertyValues->end(); ++iter) {
00492     std::string l2rOrientationClass, r2lOrientationClass;
00493     try {
00494       istringstream tokenizer(iter->first);
00495       tokenizer >> l2rOrientationClass;
00496       tokenizer >> r2lOrientationClass;
00497       if ( tokenizer.peek() != EOF ) {
00498         UTIL_THROW(util::Exception, "ExtractionPhrasePair"
00499                    << ": Collecting phrase orientations failed. "
00500                    << "Too many tokens?");
00501       }
00502     } catch (const std::exception &e) {
00503       UTIL_THROW(util::Exception, "ExtractionPhrasePair"
00504                  << ": Collecting phrase orientations failed. "
00505                  << "Flawed property value in extract file?");
00506     }
00507 
00508     int l2rOrientationClassId = -1;
00509     if (!l2rOrientationClass.compare("mono")) {
00510       l2rOrientationClassId = 0;
00511     }
00512     if (!l2rOrientationClass.compare("swap")) {
00513       l2rOrientationClassId = 1;
00514     }
00515     if (!l2rOrientationClass.compare("dleft")) {
00516       l2rOrientationClassId = 2;
00517     }
00518     if (!l2rOrientationClass.compare("dright")) {
00519       l2rOrientationClassId = 3;
00520     }
00521     if (l2rOrientationClassId == -1) {
00522       UTIL_THROW(util::Exception, "ExtractionPhrasePair"
00523                  << ": Collecting phrase orientations failed. "
00524                  << "Unknown orientation class \"" << l2rOrientationClass << "\"." );
00525     }
00526     int r2lOrientationClassId = -1;
00527     if (!r2lOrientationClass.compare("mono")) {
00528       r2lOrientationClassId = 0;
00529     }
00530     if (!r2lOrientationClass.compare("swap")) {
00531       r2lOrientationClassId = 1;
00532     }
00533     if (!r2lOrientationClass.compare("dleft")) {
00534       r2lOrientationClassId = 2;
00535     }
00536     if (!r2lOrientationClass.compare("dright")) {
00537       r2lOrientationClassId = 3;
00538     }
00539     if (r2lOrientationClassId == -1) {
00540       UTIL_THROW(util::Exception, "ExtractionPhrasePair"
00541                  << ": Collecting phrase orientations failed. "
00542                  << "Unknown orientation class \"" << r2lOrientationClass << "\"." );
00543     }
00544 
00545     orientationClassCountSumL2R[l2rOrientationClassId] += iter->second;
00546     orientationClassCountSumR2L[r2lOrientationClassId] += iter->second;
00547   }
00548 
00549   for (size_t i=0; i<4; ++i) {
00550     if (i>0) {
00551       out << " ";
00552     }
00553     out << (float)( (smoothingFactor*orientationClassPriorsL2R[i] + orientationClassCountSumL2R[i]) / (smoothingFactor + m_count) );
00554   }
00555   for (size_t i=0; i<4; ++i) {
00556     out << " " << (float)( (smoothingFactor*orientationClassPriorsR2L[i] + orientationClassCountSumR2L[i]) / (smoothingFactor + m_count) );
00557   }
00558 }
00559 
00560 
00561 void ExtractionPhrasePair::UpdateVocabularyFromValueTokens(const std::string& propertyKey,
00562     std::set<std::string>& vocabulary) const
00563 {
00564   const PROPERTY_VALUES *allPropertyValues = GetProperty( propertyKey );
00565 
00566   if ( allPropertyValues == NULL ) {
00567     return;
00568   }
00569 
00570   for (PROPERTY_VALUES::const_iterator iter=allPropertyValues->begin();
00571        iter!=allPropertyValues->end(); ++iter) {
00572 
00573     std::vector<std::string> tokens = Moses::Tokenize(iter->first);
00574     for (std::vector<std::string>::const_iterator tokenIt=tokens.begin();
00575          tokenIt!=tokens.end(); ++tokenIt) {
00576       vocabulary.insert(*tokenIt);
00577     }
00578   }
00579 }
00580 
00581 
00582 
00583 }
00584