00001
00002
00003
00004
00005 #ifndef moses_PDTAimp_h
00006 #define moses_PDTAimp_h
00007
00008 #include "StaticData.h"
00009 #include "moses/TranslationModel/PhraseDictionaryTree.h"
00010 #include "UniqueObject.h"
00011 #include "InputFileStream.h"
00012 #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
00013 #include "Util.h"
00014 #include "util/tokenize_piece.hh"
00015 #include "util/exception.hh"
00016 #include "moses/FF/InputFeature.h"
00017 #include "util/exception.hh"
00018
00019 namespace Moses
00020 {
00021
00022 inline double addLogScale(double x,double y)
00023 {
00024 if(x>y) return addLogScale(y,x);
00025 else return x+log(1.0+exp(y-x));
00026 }
00027
00028 inline double Exp(double x)
00029 {
00030 return exp(x);
00031 }
00032
00035 class PDTAimp
00036 {
00037
00038 friend class PhraseDictionaryTreeAdaptor;
00039
00040 protected:
00041 PDTAimp(PhraseDictionaryTreeAdaptor *p);
00042
00043 public:
00044 std::vector<FactorType> m_input,m_output;
00045 PhraseDictionaryTree *m_dict;
00046 const InputFeature *m_inputFeature;
00047 typedef std::vector<TargetPhraseCollectionWithSourcePhrase::shared_ptr> vTPC;
00048 mutable vTPC m_tgtColls;
00049
00050 typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase::shared_ptr> MapSrc2Tgt;
00051 mutable MapSrc2Tgt m_cache;
00052 PhraseDictionaryTreeAdaptor *m_obj;
00053 int useCache;
00054
00055 std::vector<vTPC> m_rangeCache;
00056 unsigned m_numInputScores;
00057
00058 UniqueObjectManager<Phrase> uniqSrcPhr;
00059
00060 size_t totalE,distinctE;
00061 std::vector<size_t> path1Best,pathExplored;
00062 std::vector<double> pathCN;
00063
00064 ~PDTAimp();
00065
00066 void Factors2String(Word const& w,std::string& s) const {
00067 s=w.GetString(m_input,false);
00068 }
00069
00070 void CleanUp();
00071
00072 TargetPhraseCollectionWithSourcePhrase::shared_ptr
00073 GetTargetPhraseCollection(Phrase const &src) const;
00074
00075 void Create(const std::vector<FactorType> &input
00076 , const std::vector<FactorType> &output
00077 , const std::string &filePath
00078 , const std::vector<float> &weight);
00079
00080
00081 typedef PhraseDictionaryTree::PrefixPtr PPtr;
00082 typedef unsigned short Position;
00083 typedef std::pair<Position,Position> Range;
00084 struct State {
00085 PPtr ptr;
00086 Range range;
00087 std::vector<float> scores;
00088 Phrase src;
00089
00090 State() : range(0,0),scores(0),src(ARRAY_SIZE_INCR) {}
00091 State(Position b,Position e,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
00092 : ptr(v),range(b,e),scores(sv),src(ARRAY_SIZE_INCR) {}
00093 State(Range const& r,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
00094 : ptr(v),range(r),scores(sv),src(ARRAY_SIZE_INCR) {}
00095
00096 Position begin() const {
00097 return range.first;
00098 }
00099 Position end() const {
00100 return range.second;
00101 }
00102 std::vector<float> GetScores() const {
00103 return scores;
00104 }
00105
00106 friend std::ostream& operator<<(std::ostream& out,State const& s) {
00107 out<<" R=("<<s.begin()<<","<<s.end()<<"),";
00108 for(std::vector<float>::const_iterator scoreIterator = s.GetScores().begin(); scoreIterator<s.GetScores().end(); scoreIterator++) {
00109 out<<", "<<*scoreIterator;
00110 }
00111 out<<")";
00112 return out;
00113 }
00114
00115 };
00116
00117 void CreateTargetPhrase(TargetPhrase& targetPhrase,
00118 StringTgtCand::Tokens const& factorStrings,
00119 std::string const& factorDelimiter,
00120 Scores const& transVector,
00121 Scores const& inputVector,
00122 const std::string *alignmentString,
00123 Phrase const* srcPtr=0) const;
00124
00125 TargetPhraseCollectionWithSourcePhrase::shared_ptr PruneTargetCandidates
00126 (const std::vector<TargetPhrase> & tCands,
00127 std::vector<std::pair<float,size_t> >& costs,
00128 const std::vector<Phrase> &sourcePhrases) const;
00129
00130
00131
00132 struct TScores {
00133 float total;
00134 Scores transScore, inputScores;
00135 Phrase const* src;
00136
00137 TScores() : total(0.0),src(0) {}
00138 };
00139
00140 void CacheSource(ConfusionNet const& src);
00141
00142 size_t GetNumInputScores() const {
00143 return m_numInputScores;
00144 }
00145 };
00146
00147 }
00148 #endif