00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <queue>
00023 #include "moses/TranslationModel/PhraseDictionary.h"
00024 #include "moses/StaticData.h"
00025 #include "moses/InputType.h"
00026 #include "moses/TranslationOption.h"
00027 #include "moses/DecodeStep.h"
00028 #include "moses/DecodeGraph.h"
00029 #include "moses/InputPath.h"
00030 #include "util/exception.hh"
00031
00032 using namespace std;
00033
00034 namespace Moses
00035 {
00036 std::vector<PhraseDictionary*> PhraseDictionary::s_staticColl;
00037
00038 PhraseDictionary::PhraseDictionary(const std::string &line, bool registerNow)
00039 : DecodeFeature(line, registerNow)
00040 , m_tableLimit(20)
00041 , m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
00042 {
00043 m_id = s_staticColl.size();
00044 s_staticColl.push_back(this);
00045 }
00046
00047 bool
00048 PhraseDictionary::
00049 ProvidesPrefixCheck() const
00050 {
00051 return false;
00052 }
00053
00054 TargetPhraseCollection::shared_ptr
00055 PhraseDictionary::
00056 GetTargetPhraseCollectionLEGACY(const Phrase& src) const
00057 {
00058 TargetPhraseCollection::shared_ptr ret;
00059 typedef std::pair<TargetPhraseCollection::shared_ptr , clock_t> entry;
00060 if (m_maxCacheSize) {
00061 CacheColl &cache = GetCache();
00062
00063 size_t hash = hash_value(src);
00064
00065 CacheColl::iterator iter;
00066 iter = cache.find(hash);
00067
00068 if (iter == cache.end()) {
00069
00070 ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
00071 if (ret) {
00072 ret.reset(new TargetPhraseCollection(*ret));
00073 }
00074 cache[hash] = entry(ret, clock());
00075 } else {
00076 iter->second.second = clock();
00077 ret = iter->second.first;
00078 }
00079 } else {
00080
00081 ret = GetTargetPhraseCollectionNonCacheLEGACY(src);
00082 }
00083
00084 return ret;
00085 }
00086
00087 TargetPhraseCollection::shared_ptr
00088 PhraseDictionary::
00089 GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const
00090 {
00091 UTIL_THROW(util::Exception, "Legacy method not implemented");
00092 }
00093
00094
00095 TargetPhraseCollectionWithSourcePhrase::shared_ptr
00096 PhraseDictionary::
00097 GetTargetPhraseCollectionLEGACY(InputType const& src,Range const& range) const
00098 {
00099 UTIL_THROW(util::Exception, "Legacy method not implemented");
00100 }
00101
00102 void
00103 PhraseDictionary::
00104 SetParameter(const std::string& key, const std::string& value)
00105 {
00106 if (key == "cache-size") {
00107 m_maxCacheSize = Scan<size_t>(value);
00108 } else if (key == "path") {
00109 m_filePath = value;
00110 } else if (key == "table-limit") {
00111 m_tableLimit = Scan<size_t>(value);
00112 } else {
00113 DecodeFeature::SetParameter(key, value);
00114 }
00115 }
00116
00117 void
00118 PhraseDictionary::
00119 SetFeaturesToApply()
00120 {
00121
00122 const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
00123 for (size_t i = 0; i < allFeatures.size(); ++i) {
00124 FeatureFunction *feature = allFeatures[i];
00125 if (feature->IsUseable(m_outputFactors)) {
00126 m_featuresToApply.push_back(feature);
00127 }
00128 }
00129 }
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 bool
00142 PhraseDictionary::
00143 PrefixExists(ttasksptr const& ttask, Phrase const& phrase) const
00144 {
00145 return true;
00146 }
00147
00148 void
00149 PhraseDictionary::
00150 GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
00151 {
00152 InputPathList::const_iterator iter;
00153 for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
00154 InputPath &inputPath = **iter;
00155
00156
00157 if (!SatisfyBackoff(inputPath)) {
00158 continue;
00159 }
00160
00161 const Phrase &phrase = inputPath.GetPhrase();
00162 TargetPhraseCollection::shared_ptr targetPhrases = this->GetTargetPhraseCollectionLEGACY(phrase);
00163 inputPath.SetTargetPhrases(*this, targetPhrases, NULL);
00164 }
00165 }
00166
00167
00168 void PhraseDictionary::ReduceCache() const
00169 {
00170 Timer reduceCacheTime;
00171 reduceCacheTime.start();
00172 CacheColl &cache = GetCache();
00173 if (cache.size() <= m_maxCacheSize) return;
00174
00175
00176 priority_queue< clock_t > lastUsedTimes;
00177 CacheColl::iterator iter;
00178 iter = cache.begin();
00179 while( iter != cache.end() ) {
00180 lastUsedTimes.push( iter->second.second );
00181 iter++;
00182 }
00183 for( size_t i=0; i < lastUsedTimes.size()-m_maxCacheSize/2; i++ )
00184 lastUsedTimes.pop();
00185 clock_t cutoffLastUsedTime = lastUsedTimes.top();
00186
00187
00188 iter = cache.begin();
00189 while( iter != cache.end() ) {
00190 if (iter->second.second < cutoffLastUsedTime) {
00191 CacheColl::iterator iterRemove = iter++;
00192
00193 cache.erase(iterRemove);
00194 } else iter++;
00195 }
00196 VERBOSE(2,"Reduced persistent translation option cache in "
00197 << reduceCacheTime << " seconds." << std::endl);
00198 }
00199
00200 CacheColl &
00201 PhraseDictionary::
00202 GetCache() const
00203 {
00204 CacheColl *cache;
00205 cache = m_cache.get();
00206 if (cache == NULL) {
00207 cache = new CacheColl;
00208 m_cache.reset(cache);
00209 }
00210 assert(cache);
00211 return *cache;
00212 }
00213
00214 bool PhraseDictionary::SatisfyBackoff(const InputPath &inputPath) const
00215 {
00216 const Phrase &sourcePhrase = inputPath.GetPhrase();
00217
00218 assert(m_container);
00219 const DecodeGraph &decodeGraph = GetDecodeGraph();
00220 size_t backoff = decodeGraph.GetBackoff();
00221
00222 if (backoff == 0) {
00223
00224 return true;
00225 }
00226
00227 if (sourcePhrase.GetSize() > backoff) {
00228
00229 return false;
00230 }
00231
00232
00233 InputPath::TargetPhrases::const_iterator iter;
00234 for (iter = inputPath.GetTargetPhrases().begin(); iter != inputPath.GetTargetPhrases().end(); ++iter) {
00235 const std::pair<TargetPhraseCollection::shared_ptr , const void*> &temp = iter->second;
00236 TargetPhraseCollection::shared_ptr tpCollPrev = temp.first;
00237
00238 if (tpCollPrev && tpCollPrev->GetSize()) {
00239
00240 return false;
00241 }
00242 }
00243
00244 return true;
00245 }
00246
00247 }
00248