00001 00002 #pragma once 00003 #include <boost/iostreams/device/mapped_file.hpp> 00004 #include <boost/bimap.hpp> 00005 #include <boost/unordered_map.hpp> 00006 #include "PhraseDictionary.h" 00007 #include "util/mmap.hh" 00008 00009 namespace probingpt 00010 { 00011 class QueryEngine; 00012 class target_text; 00013 } 00014 00015 namespace Moses 00016 { 00017 class ChartParser; 00018 class ChartCellCollectionBase; 00019 class ChartRuleLookupManager; 00020 00021 class ProbingPT : public PhraseDictionary 00022 { 00023 friend std::ostream& operator<<(std::ostream&, const ProbingPT&); 00024 00025 public: 00026 ProbingPT(const std::string &line); 00027 ~ProbingPT(); 00028 00029 void Load(AllOptions::ptr const& opts); 00030 00031 void InitializeForInput(ttasksptr const& ttask); 00032 00033 void SetParameter(const std::string& key, const std::string& value); 00034 00035 // for phrase-based model 00036 void GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const; 00037 00038 // for syntax/hiero model (CKY+ decoding) 00039 virtual ChartRuleLookupManager *CreateRuleLookupManager( 00040 const ChartParser &, 00041 const ChartCellCollectionBase &, 00042 std::size_t); 00043 00044 TO_STRING(); 00045 00046 00047 protected: 00048 probingpt::QueryEngine *m_engine; 00049 uint64_t m_unkId; 00050 00051 std::vector<uint64_t> m_sourceVocab; // factor id -> pt id 00052 std::vector<const Factor*> m_targetVocab; // pt id -> factor* 00053 std::vector<const AlignmentInfo*> m_aligns; 00054 util::LoadMethod load_method; 00055 00056 boost::iostreams::mapped_file_source file; 00057 const char *data; 00058 00059 // caching 00060 typedef boost::unordered_map<uint64_t, TargetPhraseCollection*> CachePb; 00061 CachePb m_cachePb; 00062 00063 void CreateAlignmentMap(const std::string path); 00064 00065 TargetPhraseCollection::shared_ptr CreateTargetPhrase(const Phrase &sourcePhrase) const; 00066 00067 std::pair<bool, uint64_t> GetKey(const Phrase &sourcePhrase) const; 00068 void GetSourceProbingIds(const Phrase &sourcePhrase, bool &ok, 00069 uint64_t probingSource[]) const; 00070 uint64_t GetSourceProbingId(const Word &word) const; 00071 uint64_t GetSourceProbingId(const Factor *factor) const; 00072 00073 TargetPhraseCollection *CreateTargetPhrases( 00074 const Phrase &sourcePhrase, uint64_t key) const; 00075 TargetPhrase *CreateTargetPhrase( 00076 const char *&offset) const; 00077 00078 inline const Factor *GetTargetFactor(uint32_t probingId) const { 00079 if (probingId >= m_targetVocab.size()) { 00080 return NULL; 00081 } 00082 return m_targetVocab[probingId]; 00083 } 00084 00085 }; 00086 00087 } // namespace Moses