00001
00002
00003 #include "ug_mm_ttrack.h"
00004 #include "tpt_tokenindex.h"
00005 #include "ug_corpus_token.h"
00006 #include <string>
00007 #include <vector>
00008 #include <cassert>
00009 #include <boost/unordered_map.hpp>
00010 #include <boost/foreach.hpp>
00011 #include <iomanip>
00012 #include "ug_typedefs.h"
00013 #include "tpt_pickler.h"
00014
00015
00016
00017 #include <algorithm>
00018 #include "moses/TranslationModel/UG/generic/program_options/ug_get_options.h"
00019
00020 using namespace std;
00021 using namespace sapt;
00022 using namespace ugdiss;
00023 using namespace Moses;
00024 typedef L2R_Token<SimpleWordId> Token;
00025
00026 typedef boost::unordered_map<pair<size_t,size_t>,size_t> phrase_counter_t;
00027
00028 #define CACHING_THRESHOLD 1000
00029
00030 mmTtrack<Token> T;
00031 TokenIndex V;
00032
00033
00034 void interpret_args(int ac, char* av[]);
00035 string bname;
00036 bool echo;
00037 int main(int argc, char* argv[])
00038 {
00039 interpret_args(argc,argv);
00040 T.open(bname+".mct");
00041 V.open(bname+".tdx");
00042 vector<size_t> cnt(V.ksize(),0);
00043 for (size_t sid = 0; sid < T.size(); ++sid)
00044 {
00045 Token const* stop = T.sntEnd(sid);
00046 for (Token const* t = T.sntStart(sid); t < stop; ++cnt[(t++)->id()]);
00047 }
00048 for (size_t wid = 2; wid < V.ksize(); ++wid)
00049 cout << V[wid] << " " << cnt[wid] << endl;
00050 exit(0);
00051 }
00052
00053 void
00054 interpret_args(int ac, char* av[])
00055 {
00056 namespace po=boost::program_options;
00057 po::variables_map vm;
00058 po::options_description o("Options");
00059 po::options_description h("Hidden Options");
00060 po::positional_options_description a;
00061
00062 o.add_options()
00063 ("help,h", "print this message")
00064 ;
00065
00066 h.add_options()
00067 ("bname", po::value<string>(&bname), "base name")
00068 ;
00069 a.add("bname",1);
00070 get_options(ac,av,h.add(o),a,vm);
00071 }