00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_LexicalReorderingTableCreator_h
00023 #define moses_LexicalReorderingTableCreator_h
00024
00025 #include "PhraseTableCreator.h"
00026
00027 namespace Moses
00028 {
00029
00030 class LexicalReorderingTableCreator
00031 {
00032 private:
00033 std::string m_inPath;
00034 std::string m_outPath;
00035 std::string m_tempfilePath;
00036
00037 std::FILE* m_outFile;
00038
00039 size_t m_orderBits;
00040 size_t m_fingerPrintBits;
00041
00042 size_t m_numScoreComponent;
00043
00044 bool m_multipleScoreTrees;
00045 bool m_quantize;
00046
00047 std::string m_separator;
00048
00049 BlockHashIndex m_hash;
00050
00051 typedef Counter<float> ScoreCounter;
00052 typedef CanonicalHuffman<float> ScoreTree;
00053
00054 std::vector<ScoreCounter*> m_scoreCounters;
00055 std::vector<ScoreTree*> m_scoreTrees;
00056
00057 StringVector<unsigned char, unsigned long, MmapAllocator>* m_encodedScores;
00058 StringVector<unsigned char, unsigned long, MmapAllocator>* m_compressedScores;
00059
00060 std::priority_queue<PackedItem> m_queue;
00061 long m_lastFlushedLine;
00062 long m_lastFlushedSourceNum;
00063 std::string m_lastFlushedSourcePhrase;
00064 std::vector<std::string> m_lastRange;
00065
00066 #ifdef WITH_THREADS
00067 size_t m_threads;
00068 #endif
00069
00070 void PrintInfo();
00071
00072 void EncodeScores();
00073 void CalcHuffmanCodes();
00074 void CompressScores();
00075 void Save();
00076
00077 std::string MakeSourceTargetKey(std::string&, std::string&);
00078
00079 std::string EncodeLine(std::vector<std::string>& tokens);
00080 void AddEncodedLine(PackedItem& pi);
00081 void FlushEncodedQueue(bool force = false);
00082
00083 std::string CompressEncodedScores(std::string &encodedScores);
00084 void AddCompressedScores(PackedItem& pi);
00085 void FlushCompressedQueue(bool force = false);
00086
00087 public:
00088 LexicalReorderingTableCreator(std::string inPath,
00089 std::string outPath,
00090 std::string tempfilePath,
00091 size_t orderBits = 10,
00092 size_t fingerPrintBits = 16,
00093 bool multipleScoreTrees = true,
00094 size_t quantize = 0
00095 #ifdef WITH_THREADS
00096 , size_t threads = 2
00097 #endif
00098 );
00099
00100 ~LexicalReorderingTableCreator();
00101
00102 friend class EncodingTaskReordering;
00103 friend class CompressionTaskReordering;
00104 };
00105
00106 class EncodingTaskReordering
00107 {
00108 private:
00109 #ifdef WITH_THREADS
00110 static boost::mutex m_mutex;
00111 static boost::mutex m_fileMutex;
00112 #endif
00113 static size_t m_lineNum;
00114 static size_t m_sourcePhraseNum;
00115 static std::string m_lastSourcePhrase;
00116
00117 InputFileStream& m_inFile;
00118 LexicalReorderingTableCreator& m_creator;
00119
00120 public:
00121 EncodingTaskReordering(InputFileStream& inFile, LexicalReorderingTableCreator& creator);
00122 void operator()();
00123 };
00124
00125 class CompressionTaskReordering
00126 {
00127 private:
00128 #ifdef WITH_THREADS
00129 static boost::mutex m_mutex;
00130 #endif
00131 static size_t m_scoresNum;
00132 StringVector<unsigned char, unsigned long, MmapAllocator> &m_encodedScores;
00133 LexicalReorderingTableCreator &m_creator;
00134
00135 public:
00136 CompressionTaskReordering(StringVector<unsigned char, unsigned long, MmapAllocator>&
00137 m_encodedScores, LexicalReorderingTableCreator& creator);
00138 void operator()();
00139 };
00140
00141 }
00142
00143 #endif