00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <cstdlib>
00022 #include <cstring>
00023 #include <fstream>
00024 #include <iostream>
00025 #include <sstream>
00026 #include <string>
00027 #include "cmd.h"
00028 #include <lmtable.h>
00029 #include <n_gram.h>
00030
00031 void print_help(int TypeFlag=0){
00032 std::cerr << std::endl << "score-lm - scores sentences with a language model" << std::endl;
00033 std::cerr << std::endl << "USAGE:" << std::endl
00034 << " score-lm -lm <model> [options]" << std::endl;
00035 std::cerr << std::endl << "OPTIONS:" << std::endl;
00036 std::cerr << " -lm language model to use (must be specified)" << std::endl;
00037 std::cerr << " -dub dictionary upper bound (default: 10000000" << std::endl;
00038 std::cerr << " -level max level to load from the language models (default: 1000," << std::endl;
00039 std::cerr << " meaning the actual LM order)" << std::endl;
00040 std::cerr << " -mm 1 memory-mapped access to lm (default: 0)" << std::endl;
00041 std::cerr << std::endl;
00042
00043 FullPrintParams(TypeFlag, 0, 1, stderr);
00044 }
00045
00046 void usage(const char *msg = 0)
00047 {
00048 if (msg){
00049 std::cerr << msg << std::endl;
00050 }
00051 else{
00052 print_help();
00053 }
00054 exit(1);
00055 }
00056
00057 int main(int argc, char **argv)
00058 {
00059 int mmap = 0;
00060 int dub = 10000000;
00061 int requiredMaxlev = 1000;
00062 char *lm = NULL;
00063
00064 bool help=false;
00065
00066 DeclareParams((char*)
00067 "lm", CMDSTRINGTYPE|CMDMSG, &lm, "language model to use (must be specified)",
00068 "DictionaryUpperBound", CMDINTTYPE|CMDMSG, &dub, "dictionary upperbound to compute OOV word penalty: default 10^7",
00069 "dub", CMDINTTYPE|CMDMSG, &dub, "dictionary upperbound to compute OOV word penalty: default 10^7",
00070 "memmap", CMDINTTYPE|CMDMSG, &mmap, "uses memory map to read a binary LM",
00071 "mm", CMDINTTYPE|CMDMSG, &mmap, "uses memory map to read a binary LM",
00072 "level", CMDINTTYPE|CMDMSG, &requiredMaxlev, "maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken",
00073 "lev", CMDINTTYPE|CMDMSG, &requiredMaxlev, "maximum level to load from the LM; if value is larger than the actual LM order, the latter is taken",
00074
00075 "Help", CMDBOOLTYPE|CMDMSG, &help, "print this help",
00076 "h", CMDBOOLTYPE|CMDMSG, &help, "print this help",
00077
00078 (char *)NULL
00079 );
00080
00081 if (argc == 1){
00082 usage();
00083 }
00084
00085 GetParams(&argc, &argv, (char*) NULL);
00086
00087 if (help){
00088 usage();
00089 }
00090
00091
00092 if(lm == NULL){
00093 usage("Missing parameter: please, specify the LM to use (-lm)");
00094 }
00095
00096 std::ifstream lmstr(lm);
00097 lmtable lmt;
00098 lmt.setMaxLoadedLevel(requiredMaxlev);
00099 lmt.load(lmstr, lm, NULL, mmap);
00100 lmt.setlogOOVpenalty(dub);
00101
00102 for(;;) {
00103 std::string line;
00104 std::getline(std::cin, line);
00105 if(!std::cin.good())
00106 return !std::cin.eof();
00107
00108 std::istringstream linestr(line);
00109 ngram ng(lmt.dict);
00110
00111 double logprob = .0;
00112 while((linestr >> ng))
00113 logprob += lmt.lprob(ng);
00114
00115 std::cout << logprob << std::endl;
00116 }
00117 }