00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef MF_LMCONTAINER_H
00024 #define MF_LMCONTAINER_H
00025
00026 #define _IRSTLM_LMUNKNOWN 0
00027 #define _IRSTLM_LMTABLE 1
00028 #define _IRSTLM_LMMACRO 2
00029 #define _IRSTLM_LMCLASS 3
00030 #define _IRSTLM_LMINTERPOLATION 4
00031
00032
00033 #include <stdio.h>
00034 #include <cstdlib>
00035 #include <stdlib.h>
00036 #include "util.h"
00037 #include "n_gram.h"
00038 #include "dictionary.h"
00039
00040 typedef enum {BINARY,TEXT,YRANIB,NONE} OUTFILE_TYPE;
00041
00042 class lmContainer
00043 {
00044 static const bool debug=true;
00045
00046 protected:
00047 int lmtype;
00048 int maxlev;
00049 int requiredMaxlev;
00050
00051 public:
00052
00053 lmContainer();
00054 virtual ~lmContainer() {};
00055
00056 virtual void load(const std::string filename, int mmap=0) {
00057 UNUSED(filename);
00058 UNUSED(mmap);
00059 };
00060
00061 virtual void savetxt(const char *filename) {
00062 UNUSED(filename);
00063 };
00064 virtual void savebin(const char *filename) {
00065 UNUSED(filename);
00066 };
00067
00068 virtual double getlogOOVpenalty() const {
00069 return 0.0;
00070 };
00071 virtual double setlogOOVpenalty(int dub) {
00072 UNUSED(dub);
00073 return 0.0;
00074 };
00075 virtual double setlogOOVpenalty(double oovp) {
00076 UNUSED(oovp);
00077 return 0.0;
00078 };
00079
00080 inline virtual dictionary* getDict() const {
00081 return NULL;
00082 };
00083 inline virtual void maxlevel(int lev) {
00084 maxlev = lev;
00085 };
00086 inline virtual int maxlevel() const {
00087 return maxlev;
00088 };
00089 inline virtual void stat(int lev=0) {
00090 UNUSED(lev);
00091 };
00092
00093 inline virtual void setMaxLoadedLevel(int lev) {
00094 requiredMaxlev=lev;
00095 };
00096 inline virtual int getMaxLoadedLevel() {
00097 return requiredMaxlev;
00098 };
00099
00100 virtual bool is_inverted(const bool flag) {
00101 UNUSED(flag);
00102 return false;
00103 };
00104 virtual bool is_inverted() {
00105 return false;
00106 };
00107 virtual double clprob(ngram ng, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
00108 UNUSED(ng);
00109 UNUSED(bow);
00110 UNUSED(bol);
00111 UNUSED(maxsuffptr);
00112 UNUSED(statesize);
00113 UNUSED(extendible);
00114 return 0.0;
00115 };
00116 virtual double clprob(int* ng, int ngsize, double* bow=NULL, int* bol=NULL, char** maxsuffptr=NULL, unsigned int* statesize=NULL,bool* extendible=NULL) {
00117 UNUSED(ng);
00118 UNUSED(ngsize);
00119 UNUSED(bow);
00120 UNUSED(bol);
00121 UNUSED(maxsuffptr);
00122 UNUSED(statesize);
00123 UNUSED(extendible);
00124 return 0.0;
00125 };
00126
00127 virtual void used_caches() {};
00128 virtual void init_caches(int uptolev) {
00129 UNUSED(uptolev);
00130 };
00131 virtual void check_caches_levels() {};
00132 virtual void reset_caches() {};
00133
00134 virtual void reset_mmap() {};
00135
00136 inline void setLanguageModelType(int type) {
00137 lmtype=type;
00138 };
00139 inline int getLanguageModelType() {
00140 return lmtype;
00141 };
00142 int getLanguageModelType(std::string filename);
00143
00144 inline virtual void dictionary_incflag(const bool flag) {
00145 UNUSED(flag);
00146 };
00147
00148 virtual bool filter(const string sfilter, lmContainer*& sublmt, const string skeepunigrams);
00149
00150 lmContainer* CreateLanguageModel(const std::string infile, float nlf=0.0, float dlf=0.0);
00151 lmContainer* CreateLanguageModel(int type, float nlf=0.0, float dlf=0.0);
00152
00153 inline virtual bool is_OOV(int code) {
00154 UNUSED(code);
00155 return false;
00156 };
00157
00158
00159 inline bool is_lmt_cache_enabled(){
00160 #ifdef LMT_CACHE_ENABLE
00161 return true;
00162 #endif
00163 return false;
00164 }
00165
00166 inline bool is_ps_cache_enabled(){
00167 #ifdef PS_CACHE_ENABLE
00168 return true;
00169 #endif
00170 return false;
00171 }
00172
00173 inline bool is_cache_enabled(){
00174 return is_lmt_cache_enabled() && is_ps_cache_enabled();
00175 }
00176 };
00177
00178
00179 #endif
00180