00001 00002 #include "moses/StaticData.h" 00003 #include "moses/FactorCollection.h" 00004 #include <boost/functional/hash.hpp> 00005 #include "NeuralLMWrapper.h" 00006 #include "neuralLM.h" 00007 00008 using namespace std; 00009 00010 namespace Moses 00011 { 00012 NeuralLMWrapper::NeuralLMWrapper(const std::string &line) 00013 :LanguageModelSingleFactor(line) 00014 { 00015 ReadParameters(); 00016 } 00017 00018 00019 NeuralLMWrapper::~NeuralLMWrapper() 00020 { 00021 delete m_neuralLM_shared; 00022 } 00023 00024 00025 void NeuralLMWrapper::Load(AllOptions::ptr const& opts) 00026 { 00027 00028 // Set parameters required by ancestor classes 00029 FactorCollection &factorCollection = FactorCollection::Instance(); 00030 m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_); 00031 m_sentenceStartWord[m_factorType] = m_sentenceStart; 00032 m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_); 00033 m_sentenceEndWord[m_factorType] = m_sentenceEnd; 00034 00035 m_neuralLM_shared = new nplm::neuralLM(); 00036 m_neuralLM_shared->read(m_filePath); 00037 m_neuralLM_shared->premultiply(); 00038 //TODO: config option? 00039 m_neuralLM_shared->set_cache(1000000); 00040 00041 m_unk = m_neuralLM_shared->lookup_word("<unk>"); 00042 00043 UTIL_THROW_IF2(m_nGramOrder != m_neuralLM_shared->get_order(), 00044 "Wrong order of neuralLM: LM has " << m_neuralLM_shared->get_order() << ", but Moses expects " << m_nGramOrder); 00045 00046 } 00047 00048 00049 LMResult NeuralLMWrapper::GetValue(const vector<const Word*> &contextFactor, State* finalState) const 00050 { 00051 00052 if (!m_neuralLM.get()) { 00053 m_neuralLM.reset(new nplm::neuralLM(*m_neuralLM_shared)); 00054 //TODO: config option? 00055 m_neuralLM->set_cache(1000000); 00056 } 00057 00058 vector<int> words(contextFactor.size()); 00059 const size_t n = contextFactor.size(); 00060 for (size_t i=0; i<n; i++) { 00061 const Word* word = contextFactor[i]; 00062 const Factor* factor = word->GetFactor(m_factorType); 00063 const std::string string = factor->GetString().as_string(); 00064 int neuralLM_wordID = m_neuralLM->lookup_word(string); 00065 words[i] = neuralLM_wordID; 00066 } 00067 // Generate hashCode for only the last n-1 words, that represents the next LM 00068 // state 00069 size_t hashCode = 0; 00070 for (size_t i=1; i<n; ++i) { 00071 boost::hash_combine(hashCode, words[i]); 00072 } 00073 00074 double value = m_neuralLM->lookup_ngram(words); 00075 00076 // Create a new struct to hold the result 00077 LMResult ret; 00078 ret.score = FloorScore(value); 00079 ret.unknown = (words.back() == m_unk); 00080 00081 (*finalState) = (State*) hashCode; 00082 00083 return ret; 00084 } 00085 00086 } 00087 00088