00001 // $Id$ 00002 00003 /*********************************************************************** 00004 Moses - factored phrase-based language decoder 00005 Copyright (C) 2006 University of Edinburgh 00006 00007 This library is free software; you can redistribute it and/or 00008 modify it under the terms of the GNU Lesser General Public 00009 License as published by the Free Software Foundation; either 00010 version 2.1 of the License, or (at your option) any later version. 00011 00012 This library is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 Lesser General Public License for more details. 00016 00017 You should have received a copy of the GNU Lesser General Public 00018 License along with this library; if not, write to the Free Software 00019 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 ***********************************************************************/ 00021 00022 #ifndef moses_LanguageModelIRST_h 00023 #define moses_LanguageModelIRST_h 00024 00025 #include <string> 00026 #include <vector> 00027 00028 #include "moses/Factor.h" 00029 #include "moses/LM/SingleFactor.h" 00030 #include "moses/Hypothesis.h" 00031 #include "moses/TypeDef.h" 00032 00033 #include "moses/Util.h" 00034 00035 //this is required because: 00036 //- IRSTLM package uses the namespace irstlm 00037 //- the compilation of "IRST.cpp" requires "using namespace irstlm", which is defined in any file of the IRSTLM package 00038 // but conflicts with these foward declaration of class lmContainer 00039 //- for files in moses/LM the IRSTLM include directory is set 00040 // but not for the rest of files 00041 #ifdef LM_IRST 00042 class lmContainer; // irst lm container for any lm type 00043 class ngram; 00044 class dictionary; 00045 #endif 00046 00047 00048 namespace Moses 00049 { 00050 00051 //class LanguageModel; 00052 class FFState; 00053 class Phrase; 00054 00058 class LanguageModelIRST : public LanguageModelSingleFactor 00059 { 00060 protected: 00061 mutable std::vector<int> m_lmIdLookup; 00062 lmContainer* m_lmtb; 00063 00064 int m_unknownId; //code of OOV 00065 int m_empty; //code of an empty position 00066 int m_lmtb_sentenceStart; //lmtb symbols to initialize ngram with 00067 int m_lmtb_sentenceEnd; //lmt symbol to initialize ngram with 00068 int m_lmtb_dub; //dictionary upperboud 00069 int m_lmtb_size; //max ngram stored in the table 00070 00071 dictionary* d; 00072 00073 std::string m_mapFilePath; 00074 00075 void CreateFactors(FactorCollection &factorCollection); 00076 00077 int GetLmID( const Word &word ) const; 00078 int GetLmID( const std::string &str ) const; 00079 int GetLmID( const Factor *factor ) const; 00080 00081 00082 public: 00083 LanguageModelIRST(const std::string &line); 00084 00085 ~LanguageModelIRST(); 00086 00087 void SetParameter(const std::string& key, const std::string& value); 00088 00089 bool IsUseable(const FactorMask &mask) const; 00090 00091 void Load(AllOptions::ptr const& opts); 00092 const FFState *EmptyHypothesisState(const InputType &/*input*/) const; 00093 00094 virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const; 00095 00096 00097 virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; 00098 00099 virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; 00100 /* 00101 virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; 00102 00103 virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const; 00104 */ 00105 00106 void InitializeForInput(ttasksptr const& ttask); 00107 void CleanUpAfterSentenceProcessing(const InputType& source); 00108 00109 void set_dictionary_upperbound(int dub) { 00110 m_lmtb_size=dub ; 00111 }; 00112 }; 00113 00114 } 00115 00116 #endif