00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <queue>
00011 #include <iomanip>
00012 #include <vector>
00013 #include <iterator>
00014 #include <sstream>
00015 #include <algorithm>
00016
00017 #include <boost/program_options.hpp>
00018 #include <boost/dynamic_bitset.hpp>
00019 #include <boost/shared_ptr.hpp>
00020 #include <boost/foreach.hpp>
00021 #include <boost/thread.hpp>
00022 #include <boost/math/distributions/binomial.hpp>
00023 #include <boost/unordered_map.hpp>
00024 #include <boost/unordered_set.hpp>
00025
00026 #include "moses/TranslationModel/UG/generic/program_options/ug_get_options.h"
00027 #include "ug_mm_2d_table.h"
00028 #include "ug_mm_ttrack.h"
00029 #include "ug_corpus_token.h"
00030
00031 using namespace std;
00032 using namespace sapt;
00033 using namespace ugdiss;
00034 using namespace boost::math;
00035
00036 typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> LEX_t;
00037 typedef SimpleWordId Token;
00038
00039
00040 void interpret_args(int ac, char* av[]);
00041
00042 string swrd,twrd,L1,L2,bname;
00043 TokenIndex V1,V2;
00044 LEX_t LEX;
00045
00046
00047 void
00048 lookup_source(ostream& out, id_type r)
00049 {
00050 vector<LEX_t::Cell> foo(LEX[r].start,LEX[r].stop);
00051 sort(foo.begin(),foo.end(),LEX_t::Cell::SortDescendingByValue());
00052 out << V1[r] << " " << LEX.m1(r) << endl;
00053 BOOST_FOREACH(LEX_t::Cell const& c, foo)
00054 {
00055 out << setw(10) << float(c.val)/LEX.m1(r) << " "
00056 << setw(10) << float(c.val)/LEX.m2(c.id) << " "
00057 << V2[c.id] << " " << c.val << "/" << LEX.m2(c.id) << endl;
00058 }
00059 }
00060
00061 void
00062 lookup_target(ostream& out, id_type c)
00063 {
00064 vector<LEX_t::Cell> foo;
00065 LEX_t::Cell cell;
00066 for (size_t r = 0; r < LEX.numRows; ++r)
00067 {
00068 size_t j = LEX[r][c];
00069 if (j)
00070 {
00071 cell.id = r;
00072 cell.val = j;
00073 foo.push_back(cell);
00074 }
00075 }
00076 sort(foo.begin(),foo.end(),LEX_t::Cell::SortDescendingByValue());
00077 out << V2[c] << " " << LEX.m2(c) << endl;
00078 BOOST_FOREACH(LEX_t::Cell const& r, foo)
00079 {
00080 out << setw(10) << float(r.val)/LEX.m2(c) << " "
00081 << setw(10) << float(r.val)/LEX.m1(r.id) << " "
00082 << V1[r.id] << " " << r.val << "/" << LEX.m1(r.id) << endl;
00083 }
00084 }
00085
00086 void
00087 dump(ostream& out)
00088 {
00089 for (size_t r = 0; r < LEX.numRows; ++r)
00090 lookup_source(out,r);
00091 out << endl;
00092 }
00093
00094
00095 int
00096 main(int argc, char* argv[])
00097 {
00098 interpret_args(argc,argv);
00099 char c = *bname.rbegin();
00100 if (c != '/' && c != '.') bname += '.';
00101 V1.open(bname+L1+".tdx");
00102 V2.open(bname+L2+".tdx");
00103 LEX.open(bname+L1+"-"+L2+".lex");
00104
00105 cout.precision(2);
00106 id_type swid = V1[swrd];
00107 id_type twid = V2[twrd];
00108 if (swid != 1 && twid != 1)
00109 {
00110 cout << swrd << " " << twrd << " "
00111 << LEX.m1(swid) << " / "
00112 << LEX[swid][twid] << " / "
00113 << LEX.m2(twid) << endl;
00114 }
00115 else if (swid != 1)
00116 lookup_source(cout,swid);
00117 else if (twid != 1)
00118 lookup_target(cout,twid);
00119 else
00120 dump(cout);
00121 }
00122
00123 void
00124 interpret_args(int ac, char* av[])
00125 {
00126 namespace po=boost::program_options;
00127 po::variables_map vm;
00128 po::options_description o("Options");
00129 po::options_description h("Hidden Options");
00130 po::positional_options_description a;
00131
00132 o.add_options()
00133 ("help,h", "print this message")
00134 ("source,s",po::value<string>(&swrd),"source word")
00135 ("target,t",po::value<string>(&twrd),"target word")
00136 ;
00137
00138 h.add_options()
00139 ("bname", po::value<string>(&bname), "base name")
00140 ("L1", po::value<string>(&L1),"L1 tag")
00141 ("L2", po::value<string>(&L2),"L2 tag")
00142 ;
00143 a.add("bname",1);
00144 a.add("L1",1);
00145 a.add("L2",1);
00146 get_options(ac,av,h.add(o),a,vm,"cfg");
00147
00148 }
00149
00150