00001 #include "mmsapt.h" 00002 #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h" 00003 #include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h" 00004 #include <boost/foreach.hpp> 00005 #include <boost/format.hpp> 00006 #include <boost/tokenizer.hpp> 00007 #include <boost/shared_ptr.hpp> 00008 #include <algorithm> 00009 #include <iostream> 00010 00011 using namespace Moses; 00012 using namespace sapt; 00013 using namespace std; 00014 using namespace boost; 00015 00016 typedef L2R_Token<SimpleWordId> Token; 00017 typedef mmBitext<Token> mmbitext; 00018 typedef imBitext<Token> imbitext; 00019 typedef Bitext<Token>::iter iter; 00020 00021 mmbitext bg; 00022 00023 void 00024 show(ostream& out, iter& f) 00025 { 00026 iter b(bg.I2.get(),f.getToken(0),f.size()); 00027 if (b.size() == f.size()) 00028 out << setw(12) << int(round(b.approxOccurrenceCount())); 00029 else 00030 out << string(12,' '); 00031 out << " " << setw(5) << int(round(f.approxOccurrenceCount())) << " "; 00032 out << f.str(bg.V1.get()) << endl; 00033 } 00034 00035 00036 void 00037 dump(ostream& out, iter& f) 00038 { 00039 float cnt = f.size() ? f.approxOccurrenceCount() : 0; 00040 if (f.down()) 00041 { 00042 cnt = f.approxOccurrenceCount(); 00043 do { dump(out,f); } 00044 while (f.over()); 00045 f.up(); 00046 } 00047 if (f.size() && cnt < f.approxOccurrenceCount() && f.approxOccurrenceCount() > 1) 00048 show(out,f); 00049 } 00050 00051 00052 void 00053 read_data(string fname, vector<string>& dest) 00054 { 00055 ifstream in(fname.c_str()); 00056 string line; 00057 while (getline(in,line)) dest.push_back(line); 00058 in.close(); 00059 } 00060 00061 int main(int argc, char* argv[]) 00062 { 00063 bg.open(argv[1],argv[2],argv[3]); 00064 sptr<imbitext> fg(new imbitext(bg.V1,bg.V2)); 00065 vector<string> src,trg,aln; 00066 read_data(argv[4],src); 00067 read_data(argv[5],trg); 00068 read_data(argv[6],aln); 00069 fg = fg->add(src,trg,aln); 00070 iter mfg(fg->I1.get()); 00071 dump(cout,mfg); 00072 exit(0); 00073 } 00074 00075 00076