00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_PhraseDecoder_h
00023 #define moses_PhraseDecoder_h
00024
00025 #include <sstream>
00026 #include <vector>
00027 #include <boost/unordered_map.hpp>
00028 #include <boost/unordered_set.hpp>
00029 #include <string>
00030 #include <iterator>
00031 #include <algorithm>
00032 #include <sys/stat.h>
00033
00034 #include "moses/TypeDef.h"
00035 #include "moses/FactorCollection.h"
00036 #include "moses/Word.h"
00037 #include "moses/Util.h"
00038 #include "moses/InputFileStream.h"
00039 #include "moses/StaticData.h"
00040 #include "moses/Range.h"
00041
00042 #include "PhraseDictionaryCompact.h"
00043 #include "StringVector.h"
00044 #include "CanonicalHuffman.h"
00045 #include "TargetPhraseCollectionCache.h"
00046
00047 namespace Moses
00048 {
00049
00050 class PhraseDictionaryCompact;
00051
00052 class PhraseDecoder
00053 {
00054 protected:
00055
00056 friend class PhraseDictionaryCompact;
00057
00058 typedef std::pair<unsigned char, unsigned char> AlignPoint;
00059 typedef std::pair<unsigned, unsigned> SrcTrg;
00060
00061 enum Coding { None, REnc, PREnc } m_coding;
00062
00063 size_t m_numScoreComponent;
00064 bool m_containsAlignmentInfo;
00065 size_t m_maxRank;
00066 size_t m_maxPhraseLength;
00067
00068 boost::unordered_map<std::string, unsigned> m_sourceSymbolsMap;
00069 StringVector<unsigned char, unsigned, std::allocator> m_sourceSymbols;
00070 StringVector<unsigned char, unsigned, std::allocator> m_targetSymbols;
00071
00072 std::vector<size_t> m_lexicalTableIndex;
00073 std::vector<SrcTrg> m_lexicalTable;
00074
00075 CanonicalHuffman<unsigned>* m_symbolTree;
00076
00077 bool m_multipleScoreTrees;
00078 std::vector<CanonicalHuffman<float>*> m_scoreTrees;
00079
00080 CanonicalHuffman<AlignPoint>* m_alignTree;
00081
00082 TargetPhraseCollectionCache m_decodingCache;
00083
00084 PhraseDictionaryCompact& m_phraseDictionary;
00085
00086
00087
00088 const std::vector<FactorType>* m_input;
00089 const std::vector<FactorType>* m_output;
00090
00091 std::string m_separator;
00092
00093
00094
00095 unsigned GetSourceSymbolId(std::string& s);
00096 std::string GetTargetSymbol(unsigned id) const;
00097
00098 size_t GetREncType(unsigned encodedSymbol);
00099 size_t GetPREncType(unsigned encodedSymbol);
00100
00101 unsigned GetTranslation(unsigned srcIdx, size_t rank);
00102
00103 size_t GetMaxSourcePhraseLength();
00104
00105 unsigned DecodeREncSymbol1(unsigned encodedSymbol);
00106 unsigned DecodeREncSymbol2Rank(unsigned encodedSymbol);
00107 unsigned DecodeREncSymbol2Position(unsigned encodedSymbol);
00108 unsigned DecodeREncSymbol3(unsigned encodedSymbol);
00109
00110 unsigned DecodePREncSymbol1(unsigned encodedSymbol);
00111 int DecodePREncSymbol2Left(unsigned encodedSymbol);
00112 int DecodePREncSymbol2Right(unsigned encodedSymbol);
00113 unsigned DecodePREncSymbol2Rank(unsigned encodedSymbol);
00114
00115 std::string MakeSourceKey(std::string &);
00116
00117 public:
00118
00119 PhraseDecoder(
00120 PhraseDictionaryCompact &phraseDictionary,
00121 const std::vector<FactorType>* input,
00122 const std::vector<FactorType>* output,
00123 size_t numScoreComponent
00124 );
00125
00126 ~PhraseDecoder();
00127
00128 size_t Load(std::FILE* in);
00129
00130 TargetPhraseVectorPtr CreateTargetPhraseCollection(const Phrase &sourcePhrase,
00131 bool topLevel = false, bool eval = true);
00132
00133 TargetPhraseVectorPtr DecodeCollection(TargetPhraseVectorPtr tpv,
00134 BitWrapper<> &encodedBitStream,
00135 const Phrase &sourcePhrase,
00136 bool topLevel,
00137 bool eval);
00138
00139 void PruneCache();
00140 };
00141
00142 }
00143
00144 #endif