00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef MF_MDIADAPTLM_H
00024 #define MF_MDIADAPTLM_H
00025
00026 #include "ngramcache.h"
00027 #include "normcache.h"
00028 #include "interplm.h"
00029
00030 class mdiadaptlm:public interplm
00031 {
00032
00033 int adaptlev;
00034 interplm* forelm;
00035 double zeta0;
00036 double oovscaling;
00037 bool m_save_per_level;
00038
00039 protected:
00040 normcache *cache;
00041
00042
00043 NGRAMCACHE_t** probcache;
00044 NGRAMCACHE_t** backoffcache;
00045 int max_caching_level;
00046
00047 int saveARPA_per_word(char *filename,int backoff=0,char* subdictfile=NULL);
00048 int saveARPA_per_level(char *filename,int backoff=0,char* subdictfile=NULL);
00049 int saveBIN_per_word(char *filename,int backoff=0,char* subdictfile=NULL,int mmap=0);
00050 int saveBIN_per_level(char *filename,int backoff=0,char* subdictfile=NULL,int mmap=0);
00051 public:
00052
00053 mdiadaptlm(char* ngtfile,int depth=0,TABLETYPE tt=FULL);
00054
00055 inline normcache* get_zetacache() {
00056 return cache;
00057 }
00058 inline NGRAMCACHE_t* get_probcache(int level);
00059 inline NGRAMCACHE_t* get_backoffcache(int level);
00060
00061 void create_caches(int mcl);
00062 void init_caches();
00063 void init_caches(int level);
00064 void delete_caches();
00065 void delete_caches(int level);
00066
00067 void check_cache_levels();
00068 void check_cache_levels(int level);
00069 void reset_caches();
00070 void reset_caches(int level);
00071
00072 void caches_stat();
00073
00074 double gis_step;
00075
00076 double zeta(ngram ng,int size);
00077
00078 int discount(ngram ng,int size,double& fstar,double& lambda,int cv=0);
00079
00080 int bodiscount(ngram ng,int size,double& fstar,double& lambda,double& bo);
00081
00082 int compute_backoff()
00083 {
00084 cerr << "compute backoff probabilities ...";
00085
00086 if (m_save_per_level){
00087 cerr << " per level ...";
00088 return compute_backoff_per_level();
00089 }else{
00090 cerr << " per word ...";
00091 return compute_backoff_per_word();
00092 }
00093 }
00094 int compute_backoff_per_level();
00095 int compute_backoff_per_word();
00096
00097 double backunig(ngram ng);
00098
00099 double foreunig(ngram ng);
00100
00101 int adapt(char* ngtfile,int alev=1,double gis_step=0.4);
00102
00103 int scalefact(char* ngtfile);
00104
00105 int savescalefactor(char* filename);
00106
00107 double scalefact(ngram ng);
00108
00109 double prob(ngram ng,int size);
00110 double prob(ngram ng,int size,double& fstar,double& lambda, double& bo);
00111
00112 double prob2(ngram ng,int size,double & fstar);
00113
00114 double txclprob(ngram ng,int size);
00115
00116 int saveASR(char *filename,int backoff,char* subdictfile=NULL);
00117 int saveMT(char *filename,int backoff,char* subdictfile=NULL,int resolution=10000000,double decay=0.999900);
00118
00119 int saveARPA(char *filename,int backoff=0,char* subdictfile=NULL){
00120 if (m_save_per_level){
00121 cerr << " per level ...";
00122 return saveARPA_per_level(filename, backoff, subdictfile);
00123 }else{
00124 cerr << " per word ...";
00125 return saveARPA_per_word(filename, backoff, subdictfile);
00126 }
00127 }
00128 int saveBIN(char *filename,int backoff=0,char* subdictfile=NULL,int mmap=0){
00129 if (m_save_per_level){
00130 cerr << " per level ...";
00131 return saveBIN_per_level(filename, backoff, subdictfile, mmap);
00132 }else{
00133 cerr << " per word ...";
00134 return saveBIN_per_word(filename, backoff, subdictfile, mmap);
00135 }
00136 }
00137
00138 inline void save_per_level(bool value){ m_save_per_level=value; }
00139 inline bool save_per_level(){ return m_save_per_level; }
00140
00141 int netsize();
00142
00143 ~mdiadaptlm();
00144
00145 double myround(double x) {
00146 long int value = (long int) x;
00147 return (x-value)>0.500?value+1.0:(double)value;
00148 }
00149
00150 inline bool is_train_cache_enabled(){
00151 #ifdef MDIADAPTLM_CACHE_ENABLE
00152 return true;
00153 #endif
00154 return false;
00155 }
00156
00157 };
00158
00159 #endif
00160
00161
00162
00163
00164
00165