00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef moses_PhraseDictionaryMultiModel_h
00021 #define moses_PhraseDictionaryMultiModel_h
00022
00023 #include "moses/TranslationModel/PhraseDictionary.h"
00024
00025
00026 #include <boost/unordered_map.hpp>
00027 #include <boost/thread/shared_mutex.hpp>
00028 #include "moses/StaticData.h"
00029 #include "moses/TargetPhrase.h"
00030 #include "moses/Util.h"
00031
00032 #ifdef WITH_DLIB
00033 #include <dlib/optimization.h>
00034 #endif
00035
00036 namespace Moses
00037 {
00038
00039 struct multiModelStats {
00040 TargetPhrase *targetPhrase;
00041 std::vector<std::vector<float> > p;
00042 ~multiModelStats() {
00043 delete targetPhrase;
00044 };
00045 };
00046
00047 struct multiModelStatsOptimization: multiModelStats {
00048 size_t f;
00049 };
00050
00051 class OptimizationObjective;
00052
00053 struct multiModelPhrase {
00054 TargetPhrase *targetPhrase;
00055 std::vector<float> p;
00056 ~multiModelPhrase() {
00057 delete targetPhrase;
00058 };
00059 };
00060
00063 class PhraseDictionaryMultiModel: public PhraseDictionary
00064 {
00065 #ifdef WITH_DLIB
00066 friend class CrossEntropy;
00067 #endif
00068
00069 public:
00070 PhraseDictionaryMultiModel(const std::string &line);
00071 PhraseDictionaryMultiModel(int type, const std::string &line);
00072 ~PhraseDictionaryMultiModel();
00073 void Load(AllOptions::ptr const& opts);
00074
00075 virtual void
00076 CollectSufficientStatistics
00077 (const Phrase& src, std::map<std::string,multiModelStats*>* allStats)
00078 const;
00079
00080 virtual TargetPhraseCollection::shared_ptr
00081 CreateTargetPhraseCollectionLinearInterpolation
00082 (const Phrase& src, std::map<std::string,multiModelStats*>* allStats,
00083 std::vector<std::vector<float> > &multimodelweights) const;
00084
00085 std::vector<std::vector<float> >
00086 getWeights(size_t numWeights, bool normalize) const;
00087
00088 std::vector<float>
00089 normalizeWeights(std::vector<float> &weights) const;
00090
00091 void
00092 CacheForCleanup(TargetPhraseCollection::shared_ptr tpc);
00093
00094 void
00095 CleanUpAfterSentenceProcessing(const InputType &source);
00096
00097 virtual void
00098 CleanUpComponentModels(const InputType &source);
00099
00100 #ifdef WITH_DLIB
00101 virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
00102 std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels);
00103 #endif
00104
00105
00106 virtual TargetPhraseCollection::shared_ptr
00107 GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
00108
00109 virtual void
00110 InitializeForInput(ttasksptr const& ttask) {
00111
00112
00113 }
00114
00115 ChartRuleLookupManager*
00116 CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&,
00117 std::size_t);
00118
00119 void
00120 SetParameter(const std::string& key, const std::string& value);
00121
00122 const std::vector<float>*
00123 GetTemporaryMultiModelWeightsVector() const;
00124
00125 void
00126 SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
00127
00128 protected:
00129 std::string m_mode;
00130 std::vector<std::string> m_pdStr;
00131 std::vector<PhraseDictionary*> m_pd;
00132 size_t m_numModels;
00133 std::vector<float> m_multimodelweights;
00134
00135 typedef std::vector<TargetPhraseCollection::shared_ptr> PhraseCache;
00136 #ifdef WITH_THREADS
00137 boost::shared_mutex m_lock_cache;
00138 typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
00139 #else
00140 typedef PhraseCache SentenceCache;
00141 #endif
00142 SentenceCache m_sentenceCache;
00143
00144 PhraseCache& GetPhraseCache() {
00145 #ifdef WITH_THREADS
00146 {
00147
00148 boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache);
00149 SentenceCache::iterator i = m_sentenceCache.find(boost::this_thread::get_id());
00150 if (i != m_sentenceCache.end()) return i->second;
00151 }
00152 boost::unique_lock<boost::shared_mutex> lock(m_lock_cache);
00153 return m_sentenceCache[boost::this_thread::get_id()];
00154 #else
00155 return m_sentenceCache;
00156 #endif
00157 }
00158
00159 #ifdef WITH_THREADS
00160
00161 mutable boost::shared_mutex m_lock_weights;
00162 std::map<boost::thread::id, std::vector<float> > m_multimodelweights_tmp;
00163 #else
00164 std::vector<float> m_multimodelweights_tmp;
00165 #endif
00166 };
00167
00168 #ifdef WITH_DLIB
00169 class OptimizationObjective
00170 {
00171 public:
00172
00173 virtual double operator() ( const dlib::matrix<double,0,1>& arg) const = 0;
00174 };
00175
00176 class CrossEntropy: public OptimizationObjective
00177 {
00178 public:
00179
00180 CrossEntropy (
00181 std::vector<multiModelStatsOptimization*> &optimizerStats,
00182 PhraseDictionaryMultiModel * model,
00183 size_t iFeature
00184 ) {
00185 m_optimizerStats = optimizerStats;
00186 m_model = model;
00187 m_iFeature = iFeature;
00188 }
00189
00190 double operator() ( const dlib::matrix<double,0,1>& arg) const;
00191
00192 protected:
00193 std::vector<multiModelStatsOptimization*> m_optimizerStats;
00194 PhraseDictionaryMultiModel * m_model;
00195 size_t m_iFeature;
00196 };
00197 #endif
00198
00199 PhraseDictionary *FindPhraseDictionary(const std::string &ptName);
00200
00201 }
00202
00203 #endif