00001 #include "mmsapt.h"
00002 #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
00003 #include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
00004 #include <boost/foreach.hpp>
00005 #include <boost/format.hpp>
00006 #include <boost/tokenizer.hpp>
00007 #include <boost/shared_ptr.hpp>
00008 #include <algorithm>
00009 #include <iostream>
00010
00011 using namespace Moses;
00012 using namespace sapt;
00013 using namespace std;
00014 using namespace boost;
00015
00016 typedef L2R_Token<SimpleWordId> Token;
00017 typedef mmBitext<Token> mmbitext;
00018 typedef imBitext<Token> imbitext;
00019 typedef Bitext<Token>::iter iter;
00020
00021 mmbitext bg;
00022
00023 void
00024 show(ostream& out, iter& f)
00025 {
00026 iter b(bg.I2.get(),f.getToken(0),f.size());
00027 if (b.size() == f.size())
00028 out << setw(12) << int(round(b.approxOccurrenceCount()));
00029 else
00030 out << string(12,' ');
00031 out << " " << setw(5) << int(round(f.approxOccurrenceCount())) << " ";
00032 out << f.str(bg.V1.get()) << endl;
00033 }
00034
00035
00036 void
00037 dump(ostream& out, iter& f)
00038 {
00039 float cnt = f.size() ? f.approxOccurrenceCount() : 0;
00040 if (f.down())
00041 {
00042 cnt = f.approxOccurrenceCount();
00043 do { dump(out,f); }
00044 while (f.over());
00045 f.up();
00046 }
00047 if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1)
00048 show(out,f);
00049 }
00050
00051
00052 void
00053 read_data(string fname, vector<string>& dest)
00054 {
00055 ifstream in(fname.c_str());
00056 string line;
00057 while (getline(in,line)) dest.push_back(line);
00058 in.close();
00059 }
00060
00061 int main(int argc, char* argv[])
00062 {
00063 bg.open(argv[1],argv[2],argv[3]);
00064 sptr<imbitext> fg(new imbitext(bg.V1,bg.V2));
00065 vector<string> src,trg,aln;
00066 read_data(argv[4],src);
00067 read_data(argv[5],trg);
00068 read_data(argv[6],aln);
00069 fg = fg->add(src,trg,aln);
00070 iter mfg(fg->I1.get());
00071 dump(cout,mfg);
00072 exit(0);
00073 }
00074
00075
00076