00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef MF_LMMACRO_H
00025 #define MF_LMMACRO_H
00026
00027 #ifndef WIN32
00028 #include <sys/types.h>
00029 #include <sys/mman.h>
00030 #endif
00031
00032 #include "util.h"
00033 #include "ngramcache.h"
00034 #include "dictionary.h"
00035 #include "n_gram.h"
00036 #include "lmtable.h"
00037
00038 #define MAX_TOKEN_N_MAP 4
00039
00040 class lmmacro: public lmtable
00041 {
00042
00043 dictionary *dict;
00044 int maxlev;
00045 int selectedField;
00046
00047 bool collapseFlag;
00048 bool mapFlag;
00049
00050 int microMacroMapN;
00051 int *microMacroMap;
00052 bool *collapsableMap;
00053 bool *collapsatorMap;
00054
00055 #ifdef DLEXICALLM
00056 int selectedFieldForLexicon;
00057 int *lexicaltoken2classMap;
00058 int lexicaltoken2classMapN;
00059 #endif
00060
00061
00062 void loadmap(const std::string mapfilename);
00063 void unloadmap();
00064
00065 bool transform(ngram &in, ngram &out);
00066 void field_selection(ngram &in, ngram &out);
00067 bool collapse(ngram &in, ngram &out);
00068 void mapping(ngram &in, ngram &out);
00069
00070 public:
00071
00072 lmmacro(float nlf=0.0, float dlfi=0.0);
00073 ~lmmacro();
00074
00075 void load(const std::string filename,int mmap=0);
00076
00077 double lprob(ngram ng);
00078 double clprob(ngram ng,double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
00079 double clprob(int* ng, int ngsize, double* bow=NULL,int* bol=NULL,char** maxsuffptr=NULL,unsigned int* statesize=NULL,bool* extendible=NULL);
00080
00081 const char *maxsuffptr(ngram ong, unsigned int* size=NULL);
00082 const char *cmaxsuffptr(ngram ong, unsigned int* size=NULL);
00083
00084 void map(ngram *in, ngram *out);
00085 void One2OneMapping(ngram *in, ngram *out);
00086 void Micro2MacroMapping(ngram *in, ngram *out);
00087 #ifdef DLEXICALLM
00088 void Micro2MacroMapping(ngram *in, ngram *out, char **lemma);
00089 void loadLexicalClasses(const char *fn);
00090 void cutLex(ngram *in, ngram *out);
00091 #endif
00092
00093
00094 inline dictionary* getDict() const {
00095 return dict;
00096 }
00097 inline int maxlevel() const {
00098 return maxlev;
00099 };
00100
00101 inline virtual void dictionary_incflag(const bool flag) {
00102 dict->incflag(flag);
00103 };
00104
00105 inline virtual bool filter(const string sfilter, lmContainer* sublmt, const string skeepunigrams) {
00106 UNUSED(sfilter);
00107 UNUSED(sublmt);
00108 UNUSED(skeepunigrams);
00109 return false;
00110 }
00111 };
00112
00113
00114
00115 #endif
00116