00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef moses_PhraseDictionaryMultiModelCounts_h
00021 #define moses_PhraseDictionaryMultiModelCounts_h
00022
00023 #include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
00024
00025
00026 #include <boost/unordered_map.hpp>
00027 #include "moses/StaticData.h"
00028 #include "moses/TargetPhrase.h"
00029 #include "moses/Util.h"
00030 #include <exception>
00031
00032 namespace Moses
00033 {
00034
00035 typedef boost::unordered_map<Word, double > lexicalMap;
00036 typedef boost::unordered_map<Word, lexicalMap > lexicalMapJoint;
00037 typedef std::pair<std::vector<float>, std::vector<float> > lexicalPair;
00038 typedef std::vector<std::vector<lexicalPair> > lexicalCache;
00039
00040 struct multiModelCountsStats : multiModelStats {
00041 std::vector<float> fst, ft;
00042 };
00043
00044 struct multiModelCountsStatsOptimization: multiModelCountsStats {
00045 std::vector<float> fs;
00046 lexicalCache lexCachee2f, lexCachef2e;
00047 size_t f;
00048 };
00049
00050 struct lexicalTable {
00051 lexicalMapJoint joint;
00052 lexicalMap marginal;
00053 };
00054
00055 double InstanceWeighting(std::vector<float> &joint_counts, std::vector<float> &marginals, std::vector<float> &multimodelweights);
00056 double LinearInterpolationFromCounts(std::vector<float> &joint_counts, std::vector<float> &marginals, std::vector<float> &multimodelweights);
00057
00058
00059
00060 class AlignmentException : public std::runtime_error
00061 {
00062 public:
00063 AlignmentException() : std::runtime_error("AlignmentException") { }
00064 };
00065
00066
00069 class PhraseDictionaryMultiModelCounts: public PhraseDictionaryMultiModel
00070 {
00071
00072 #ifdef WITH_DLIB
00073 friend class CrossEntropyCounts;
00074 #endif
00075
00076 typedef std::vector< std::set<size_t> > AlignVector;
00077
00078
00079 public:
00080 PhraseDictionaryMultiModelCounts(const std::string &line);
00081 ~PhraseDictionaryMultiModelCounts();
00082 void Load(AllOptions::ptr const& opts);
00083 TargetPhraseCollection::shared_ptr CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
00084 void CollectSufficientStats(const Phrase &src, std::vector<float> &fs, std::map<std::string,multiModelCountsStats*>* allStats) const;
00085 float GetTargetCount(const Phrase& target, size_t modelIndex) const;
00086 double GetLexicalProbability( Word &inner, Word &outer, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights ) const;
00087 double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, std::vector<float> &multimodelweights, bool is_input ) const;
00088 double ComputeWeightedLexicalTranslationFromCache( std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > &cache, std::vector<float> &weights ) const;
00089 std::pair<PhraseDictionaryMultiModelCounts::AlignVector,PhraseDictionaryMultiModelCounts::AlignVector> GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const;
00090 std::vector<std::vector<std::pair<std::vector<float>, std::vector<float> > > > CacheLexicalStats( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector<lexicalTable*> &tables, bool is_input );
00091 void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
00092 void FillLexicalCountsMarginal(Word &wordS, std::vector<float> &count, const std::vector<lexicalTable*> &tables) const;
00093 void LoadLexicalTable( std::string &fileName, lexicalTable* ltable);
00094 TargetPhraseCollection::shared_ptr GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
00095 #ifdef WITH_DLIB
00096 std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
00097 #endif
00098
00099 virtual void InitializeForInput(ttasksptr const& ttask) {
00100
00101 }
00102
00103 void SetParameter(const std::string& key, const std::string& value);
00104
00105 private:
00106 std::vector<PhraseDictionary*> m_inverse_pd;
00107 std::vector<lexicalTable*> m_lexTable_e2f, m_lexTable_f2e;
00108 double (*m_combineFunction) (std::vector<float> &joint_counts, std::vector<float> &marginals, std::vector<float> &multimodelweights);
00109
00110 std::vector<std::string> m_lexE2FStr, m_lexF2EStr, m_targetTable;
00111
00112 };
00113
00114 #ifdef WITH_DLIB
00115 class CrossEntropyCounts: public OptimizationObjective
00116 {
00117 public:
00118
00119 CrossEntropyCounts (
00120 std::vector<multiModelCountsStatsOptimization*> &optimizerStats,
00121 PhraseDictionaryMultiModelCounts * model,
00122 size_t iFeature
00123 ) {
00124 m_optimizerStats = optimizerStats;
00125 m_model = model;
00126 m_iFeature = iFeature;
00127 }
00128
00129 double operator() ( const dlib::matrix<double,0,1>& arg) const;
00130
00131 private:
00132 std::vector<multiModelCountsStatsOptimization*> m_optimizerStats;
00133 PhraseDictionaryMultiModelCounts * m_model;
00134 size_t m_iFeature;
00135 };
00136 #endif
00137
00138 }
00139
00140 #endif