00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <limits>
00023 #include <iostream>
00024 #include <fstream>
00025
00026 #include "SRI.h"
00027 #include "moses/TypeDef.h"
00028 #include "moses/Util.h"
00029 #include "moses/FactorCollection.h"
00030 #include "moses/Phrase.h"
00031 #include "moses/StaticData.h"
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #ifdef __APPLE__
00045 #define HAVE_ZOPEN
00046 #endif
00047
00048 #include "Vocab.h"
00049 #include "Ngram.h"
00050
00051 using namespace std;
00052
00053 namespace Moses
00054 {
00055 LanguageModelSRI::LanguageModelSRI(const std::string &line)
00056 :LanguageModelSingleFactor(line)
00057 ,m_srilmVocab(0)
00058 ,m_srilmModel(0)
00059 {
00060 ReadParameters();
00061 }
00062
00063 LanguageModelSRI::~LanguageModelSRI()
00064 {
00065 delete m_srilmModel;
00066 delete m_srilmVocab;
00067 }
00068
00069 void LanguageModelSRI::Load(AllOptions::ptr const& opts)
00070 {
00071 m_srilmVocab = new ::Vocab();
00072 m_srilmModel = new Ngram(*m_srilmVocab, m_nGramOrder);
00073
00074 m_srilmModel->skipOOVs() = false;
00075
00076 File file( m_filePath.c_str(), "r" );
00077 m_srilmModel->read(file);
00078
00079
00080 CreateFactors();
00081 m_unknownId = m_srilmVocab->unkIndex();
00082 }
00083
00084 void LanguageModelSRI::CreateFactors()
00085 {
00086
00087 FactorCollection &factorCollection = FactorCollection::Instance();
00088
00089 std::map<size_t, VocabIndex> lmIdMap;
00090 size_t maxFactorId = 0;
00091
00092 VocabString str;
00093 VocabIter iter(*m_srilmVocab);
00094 while ( (str = iter.next()) != NULL) {
00095 VocabIndex lmId = GetLmID(str);
00096 size_t factorId = factorCollection.AddFactor(Output, m_factorType, str)->GetId();
00097 lmIdMap[factorId] = lmId;
00098 maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
00099 }
00100
00101 size_t factorId;
00102
00103 m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, BOS_);
00104 factorId = m_sentenceStart->GetId();
00105 lmIdMap[factorId] = GetLmID(BOS_);
00106 maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
00107 m_sentenceStartWord[m_factorType] = m_sentenceStart;
00108
00109 m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, EOS_);
00110 factorId = m_sentenceEnd->GetId();
00111 lmIdMap[factorId] = GetLmID(EOS_);
00112 maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
00113 m_sentenceEndWord[m_factorType] = m_sentenceEnd;
00114
00115
00116 m_lmIdLookup.resize(maxFactorId+1);
00117
00118 fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId);
00119
00120 map<size_t, VocabIndex>::iterator iterMap;
00121 for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap) {
00122 m_lmIdLookup[iterMap->first] = iterMap->second;
00123 }
00124 }
00125
00126 VocabIndex LanguageModelSRI::GetLmID( const std::string &str ) const
00127 {
00128 return m_srilmVocab->getIndex( str.c_str(), m_unknownId );
00129 }
00130 VocabIndex LanguageModelSRI::GetLmID( const Factor *factor ) const
00131 {
00132 size_t factorId = factor->GetId();
00133 return ( factorId >= m_lmIdLookup.size()) ? m_unknownId : m_lmIdLookup[factorId];
00134 }
00135
00136 LMResult LanguageModelSRI::GetValue(VocabIndex wordId, VocabIndex *context) const
00137 {
00138 LMResult ret;
00139 ret.score = FloorScore(TransformLMScore(m_srilmModel->wordProb( wordId, context)));
00140 ret.unknown = (wordId == m_unknownId);
00141 return ret;
00142 }
00143
00144 LMResult LanguageModelSRI::GetValue(const vector<const Word*> &contextFactor, State* finalState) const
00145 {
00146 LMResult ret;
00147 FactorType factorType = GetFactorType();
00148 size_t count = contextFactor.size();
00149 if (count <= 0) {
00150 if(finalState)
00151 *finalState = NULL;
00152 ret.score = 0.0;
00153 ret.unknown = false;
00154 return ret;
00155 }
00156
00157
00158 VocabIndex ngram[count + 1];
00159 for (size_t i = 0 ; i < count - 1 ; i++) {
00160 ngram[i+1] = GetLmID((*contextFactor[count-2-i])[factorType]);
00161 }
00162 ngram[count] = Vocab_None;
00163
00164 UTIL_THROW_IF2((*contextFactor[count-1])[factorType] == NULL,
00165 "No factor " << factorType << " at position " << (count-1));
00166
00167 VocabIndex lmId = GetLmID((*contextFactor[count-1])[factorType]);
00168 ret = GetValue(lmId, ngram+1);
00169
00170 if (finalState) {
00171 ngram[0] = lmId;
00172 unsigned int dummy;
00173 *finalState = m_srilmModel->contextID(ngram, dummy);
00174 }
00175 return ret;
00176 }
00177
00178 }
00179
00180
00181