00001 #include "moses/LM/oxlm/OxLMMapper.h" 00002 00003 #include "moses/FactorCollection.h" 00004 00005 using namespace std; 00006 00007 namespace Moses 00008 { 00009 00010 OxLMMapper::OxLMMapper( 00011 const boost::shared_ptr<oxlm::Vocabulary>& vocab, 00012 bool pos_back_off, 00013 const FactorType& pos_factor_type) 00014 : posBackOff(pos_back_off), posFactorType(pos_factor_type) 00015 { 00016 for (int i = 0; i < vocab->size(); ++i) { 00017 const string &str = vocab->convert(i); 00018 FactorCollection &fc = FactorCollection::Instance(); 00019 const Moses::Factor *factor = fc.AddFactor(str, false); 00020 moses2Oxlm[factor] = i; 00021 } 00022 00023 kUNKNOWN = vocab->convert("<unk>"); 00024 } 00025 00026 int OxLMMapper::convert(const Word& word) const 00027 { 00028 const Moses::Factor* word_factor = word.GetFactor(0); 00029 Coll::const_iterator iter = moses2Oxlm.find(word_factor); 00030 if (posBackOff && iter == moses2Oxlm.end()) { 00031 const Moses::Factor* pos_factor = word.GetFactor(posFactorType); 00032 iter = moses2Oxlm.find(pos_factor); 00033 } 00034 00035 return iter == moses2Oxlm.end() ? kUNKNOWN : iter->second; 00036 } 00037 00038 void OxLMMapper::convert( 00039 const vector<const Word*>& contextFactor, 00040 vector<int> &ids, int &word) const 00041 { 00042 ids.clear(); 00043 for (size_t i = 0; i < contextFactor.size() - 1; ++i) { 00044 ids.push_back(convert(*contextFactor[i])); 00045 } 00046 std::reverse(ids.begin(), ids.end()); 00047 00048 word = convert(*contextFactor.back()); 00049 } 00050 00051 } // namespace Moses