00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <fstream>
00023 #include <string>
00024 #include <iterator>
00025 #include <queue>
00026 #include <algorithm>
00027 #include <sys/stat.h>
00028 #include <boost/algorithm/string/predicate.hpp>
00029 #include <boost/thread/tss.hpp>
00030
00031 #include "PhraseDictionaryCompact.h"
00032 #include "moses/FactorCollection.h"
00033 #include "moses/Word.h"
00034 #include "moses/Util.h"
00035 #include "moses/InputFileStream.h"
00036 #include "moses/StaticData.h"
00037 #include "moses/Range.h"
00038 #include "moses/ThreadPool.h"
00039 #include "util/exception.hh"
00040
00041 using namespace std;
00042 using namespace boost::algorithm;
00043
00044 namespace Moses
00045 {
00046
00047 PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache;
00048
00049 PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line)
00050 :PhraseDictionary(line, true)
00051 ,m_inMemory(s_inMemoryByDefault)
00052 ,m_useAlignmentInfo(true)
00053 ,m_hash(10, 16)
00054 ,m_phraseDecoder(0)
00055 {
00056 ReadParameters();
00057 }
00058
00059 void PhraseDictionaryCompact::Load(AllOptions::ptr const& opts)
00060 {
00061 m_options = opts;
00062 const StaticData &staticData = StaticData::Instance();
00063
00064 SetFeaturesToApply();
00065
00066 std::string tFilePath = m_filePath;
00067
00068 std::string suffix = ".minphr";
00069 if (!ends_with(tFilePath, suffix)) tFilePath += suffix;
00070 if (!FileExists(tFilePath))
00071 throw runtime_error("Error: File " + tFilePath + " does not exist.");
00072
00073 m_phraseDecoder
00074 = new PhraseDecoder(*this, &m_input, &m_output, m_numScoreComponents);
00075
00076 std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
00077
00078 size_t indexSize;
00079
00080
00081 indexSize = m_hash.Load(pFile);
00082
00083
00084
00085
00086 size_t coderSize = m_phraseDecoder->Load(pFile);
00087
00088 size_t phraseSize;
00089 if(m_inMemory)
00090
00091 phraseSize = m_targetPhrasesMemory.load(pFile, false);
00092 else
00093
00094 phraseSize = m_targetPhrasesMapped.load(pFile, true);
00095
00096 UTIL_THROW_IF2(indexSize == 0 || coderSize == 0 || phraseSize == 0,
00097 "Not successfully loaded");
00098 }
00099
00100 TargetPhraseCollection::shared_ptr
00101 PhraseDictionaryCompact::
00102 GetTargetPhraseCollectionNonCacheLEGACY(const Phrase &sourcePhrase) const
00103 {
00104
00105
00106 TargetPhraseCollection::shared_ptr ret;
00107
00108
00109 if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
00110 return ret;
00111
00112
00113 TargetPhraseVectorPtr decodedPhraseColl
00114 = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, true);
00115
00116 if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
00117 TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
00118 TargetPhraseCollection::shared_ptr phraseColl(new TargetPhraseCollection);
00119
00120
00121 TargetPhraseVector::iterator nth =
00122 (m_tableLimit == 0 || tpv->size() < m_tableLimit) ?
00123 tpv->end() : tpv->begin() + m_tableLimit;
00124 NTH_ELEMENT4(tpv->begin(), nth, tpv->end(), CompareTargetPhrase());
00125 for(TargetPhraseVector::iterator it = tpv->begin(); it != nth; it++) {
00126 TargetPhrase *tp = new TargetPhrase(*it);
00127 phraseColl->Add(tp);
00128 }
00129
00130
00131 const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
00132
00133 return phraseColl;
00134 } else
00135 return ret;
00136 }
00137
00138 TargetPhraseVectorPtr
00139 PhraseDictionaryCompact::
00140 GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase) const
00141 {
00142
00143
00144
00145 if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
00146 return TargetPhraseVectorPtr();
00147
00148
00149 return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true, false);
00150 }
00151
00152 PhraseDictionaryCompact::
00153 ~PhraseDictionaryCompact()
00154 {
00155 if(m_phraseDecoder)
00156 delete m_phraseDecoder;
00157 }
00158
00159 void
00160 PhraseDictionaryCompact::
00161 CacheForCleanup(TargetPhraseCollection::shared_ptr tpc)
00162 {
00163 if(!m_sentenceCache.get())
00164 m_sentenceCache.reset(new PhraseCache());
00165 m_sentenceCache->push_back(tpc);
00166 }
00167
00168 void
00169 PhraseDictionaryCompact::
00170 AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
00171 { }
00172
00173 void
00174 PhraseDictionaryCompact::
00175 CleanUpAfterSentenceProcessing(const InputType &source)
00176 {
00177 if(!m_sentenceCache.get())
00178 m_sentenceCache.reset(new PhraseCache());
00179
00180 m_phraseDecoder->PruneCache();
00181 m_sentenceCache->clear();
00182
00183 ReduceCache();
00184 }
00185
00186 bool PhraseDictionaryCompact::s_inMemoryByDefault = false;
00187 void
00188 PhraseDictionaryCompact::
00189 SetStaticDefaultParameters(Parameter const& param)
00190 {
00191 param.SetParameter(s_inMemoryByDefault, "minphr-memory", false);
00192 }
00193 }
00194