00001
00002 #include "ug_mm_ttrack.h"
00003 #include "ug_mm_tsa.h"
00004 #include "tpt_tokenindex.h"
00005 #include "ug_corpus_token.h"
00006 #include <string>
00007 #include <vector>
00008 #include <cassert>
00009 #include <boost/unordered_map.hpp>
00010 #include <boost/foreach.hpp>
00011 #include <iomanip>
00012 #include "ug_typedefs.h"
00013 #include "tpt_pickler.h"
00014 #include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h"
00015 #include "moses/TranslationModel/UG/generic/sampling/Sampling.h"
00016 #include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"
00017 #include <algorithm>
00018 #include "moses/TranslationModel/UG/generic/program_options/ug_get_options.h"
00019
00020 using namespace std;
00021 using namespace ugdiss;
00022 using namespace Moses;
00023 typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
00024 typedef sapt::mmTSA<Token>::tree_iterator iter;
00025 typedef boost::unordered_map<pair<size_t,size_t>,size_t> phrase_counter_t;
00026
00027 #define CACHING_THRESHOLD 1000
00028
00029 sapt::mmTtrack<Token> T;
00030 sapt::TokenIndex V;
00031 sapt::mmTSA<Token> I;
00032
00033 void interpret_args(int ac, char* av[]);
00034 string bname;
00035 bool echo;
00036 int main(int argc, char* argv[])
00037 {
00038 interpret_args(argc,argv);
00039
00040 T.open(bname+".mct");
00041 V.open(bname+".tdx"); V.iniReverseIndex();
00042 I.open(bname+".sfa",&T);
00043 string line;
00044 while (getline(cin,line))
00045 {
00046 vector<id_type> phr;
00047 V.fillIdSeq(line,phr);
00048 TSA<Token>::tree_iterator m(&I);
00049 size_t i = 0;
00050 while (i < phr.size() && m.extend(phr[i])) ++i;
00051 if (echo) cout << line << ": ";
00052 if (i < phr.size()) cout << 0 << endl;
00053 else cout << m.rawCnt() << endl;
00054 }
00055 exit(0);
00056 }
00057
00058 void
00059 interpret_args(int ac, char* av[])
00060 {
00061 namespace po=boost::program_options;
00062 po::variables_map vm;
00063 po::options_description o("Options");
00064 po::options_description h("Hidden Options");
00065 po::positional_options_description a;
00066
00067 o.add_options()
00068 ("help,h", "print this message")
00069 ("echo,e", po::bool_switch(&echo), "repeat lookup phrases")
00070 ;
00071
00072 h.add_options()
00073 ("bname", po::value<string>(&bname), "base name")
00074 ;
00075 a.add("bname",1);
00076 get_options(ac,av,h.add(o),a,vm);
00077 }