00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_TargetPhraseCollectionCache_h
00023 #define moses_TargetPhraseCollectionCache_h
00024
00025 #include <map>
00026 #include <set>
00027 #include <vector>
00028
00029 #include <boost/thread/tss.hpp>
00030 #include <boost/shared_ptr.hpp>
00031
00032 #include "moses/Phrase.h"
00033 #include "moses/TargetPhraseCollection.h"
00034
00035 namespace Moses
00036 {
00037
00038
00039 typedef std::vector<TargetPhrase> TargetPhraseVector;
00040 typedef boost::shared_ptr<TargetPhraseVector> TargetPhraseVectorPtr;
00041
00043 class TargetPhraseCollectionCache
00044 {
00045 private:
00046 size_t m_max;
00047 float m_tolerance;
00048
00049 struct LastUsed {
00050 clock_t m_clock;
00051 TargetPhraseVectorPtr m_tpv;
00052 size_t m_bitsLeft;
00053
00054 LastUsed() : m_clock(0), m_bitsLeft(0) {}
00055
00056 LastUsed(clock_t clock, TargetPhraseVectorPtr tpv, size_t bitsLeft = 0)
00057 : m_clock(clock), m_tpv(tpv), m_bitsLeft(bitsLeft) {}
00058 };
00059
00060 typedef std::map<Phrase, LastUsed> CacheMap;
00061 static boost::thread_specific_ptr<CacheMap> m_phraseCache;
00062
00063 public:
00064
00065 typedef CacheMap::iterator iterator;
00066 typedef CacheMap::const_iterator const_iterator;
00067
00068 TargetPhraseCollectionCache(size_t max = 5000, float tolerance = 0.2)
00069 : m_max(max), m_tolerance(tolerance) {
00070 }
00071
00072 iterator Begin() {
00073 if(!m_phraseCache.get())
00074 m_phraseCache.reset(new CacheMap());
00075 return m_phraseCache->begin();
00076 }
00077
00078 const_iterator Begin() const {
00079 if(!m_phraseCache.get())
00080 m_phraseCache.reset(new CacheMap());
00081 return m_phraseCache->begin();
00082 }
00083
00084 iterator End() {
00085 if(!m_phraseCache.get())
00086 m_phraseCache.reset(new CacheMap());
00087 return m_phraseCache->end();
00088 }
00089
00090 const_iterator End() const {
00091 if(!m_phraseCache.get())
00092 m_phraseCache.reset(new CacheMap());
00093 return m_phraseCache->end();
00094 }
00095
00097 void Cache(const Phrase &sourcePhrase, TargetPhraseVectorPtr tpv,
00098 size_t bitsLeft = 0, size_t maxRank = 0) {
00099 if(!m_phraseCache.get())
00100 m_phraseCache.reset(new CacheMap());
00101
00102 iterator it = m_phraseCache->find(sourcePhrase);
00103 if(it != m_phraseCache->end())
00104
00105 it->second.m_clock = clock();
00106 else {
00107
00108 if(maxRank && tpv->size() > maxRank) {
00109 TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector());
00110 tpv_temp->resize(maxRank);
00111 std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin());
00112 (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
00113 } else
00114 (*m_phraseCache)[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
00115 }
00116 }
00117
00118 std::pair<TargetPhraseVectorPtr, size_t> Retrieve(const Phrase &sourcePhrase) {
00119 if(!m_phraseCache.get())
00120 m_phraseCache.reset(new CacheMap());
00121 iterator it = m_phraseCache->find(sourcePhrase);
00122 if(it != m_phraseCache->end()) {
00123 LastUsed &lu = it->second;
00124 lu.m_clock = clock();
00125 return std::make_pair(lu.m_tpv, lu.m_bitsLeft);
00126 } else
00127 return std::make_pair(TargetPhraseVectorPtr(), 0);
00128 }
00129
00130
00131 void Prune() {
00132 if(!m_phraseCache.get())
00133 m_phraseCache.reset(new CacheMap());
00134 if(m_phraseCache->size() > m_max * (1 + m_tolerance)) {
00135 typedef std::set<std::pair<clock_t, Phrase> > Cands;
00136 Cands cands;
00137 for(CacheMap::iterator it = m_phraseCache->begin();
00138 it != m_phraseCache->end(); it++) {
00139 LastUsed &lu = it->second;
00140 cands.insert(std::make_pair(lu.m_clock, it->first));
00141 }
00142
00143 for(Cands::iterator it = cands.begin(); it != cands.end(); it++) {
00144 const Phrase& p = it->second;
00145 m_phraseCache->erase(p);
00146
00147 if(m_phraseCache->size() < (m_max * (1 - m_tolerance)))
00148 break;
00149 }
00150 }
00151 }
00152
00153 void CleanUp() {
00154 if(!m_phraseCache.get())
00155 m_phraseCache.reset(new CacheMap());
00156 m_phraseCache->clear();
00157 }
00158
00159 };
00160
00161 }
00162
00163 #endif