00001 #if 0
00002
00003 #include "mmsapt.h"
00004 #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
00005 #include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h"
00006 #include <boost/algorithm/string/predicate.hpp>
00007 #include <boost/foreach.hpp>
00008 #include <boost/format.hpp>
00009 #include <boost/tokenizer.hpp>
00010 #include <boost/shared_ptr.hpp>
00011 #include <algorithm>
00012 #include <iostream>
00013
00014 using namespace Moses;
00015 using namespace sapt;
00016 using namespace std;
00017 using namespace boost;
00018 using namespace boost::algorithm;
00019
00020 vector<FactorType> fo(1,FactorType(0));
00021
00022 class SimplePhrase : public Moses::Phrase
00023 {
00024 vector<FactorType> const m_fo;
00025 public:
00026 SimplePhrase(): m_fo(1,FactorType(0)) {}
00027
00028 void init(string const& s)
00029 {
00030 istringstream buf(s); string w;
00031 while (buf >> w)
00032 {
00033 Word wrd;
00034 this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false);
00035 }
00036 }
00037 };
00038
00039 class TargetPhraseIndexSorter
00040 {
00041 TargetPhraseCollection const& my_tpc;
00042 CompareTargetPhrase cmp;
00043 public:
00044 TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {}
00045 bool operator()(size_t a, size_t b) const
00046 {
00047
00048 return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() >
00049 my_tpc[b]->GetScoreBreakdown().GetWeightedScore());
00050 }
00051 };
00052
00053 int main(int argc, char* argv[])
00054 {
00055
00056 string vlevel = "alt";
00057 vector<pair<string,int> > argfilter(5);
00058 argfilter[0] = std::make_pair(string("--spe-src"),1);
00059 argfilter[1] = std::make_pair(string("--spe-trg"),1);
00060 argfilter[2] = std::make_pair(string("--spe-aln"),1);
00061 argfilter[3] = std::make_pair(string("--spe-show"),1);
00062
00063 char** my_args; int my_acnt;
00064 char** mo_args; int mo_acnt;
00065 filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter);
00066
00067 ifstream spe_src,spe_trg,spe_aln;
00068
00069 for (int i = 0; i < my_acnt; i += 2)
00070 {
00071 if (!strcmp(my_args[i],"--spe-src"))
00072 spe_src.open(my_args[i+1]);
00073 else if (!strcmp(my_args[i],"--spe-trg"))
00074 spe_trg.open(my_args[i+1]);
00075 else if (!strcmp(my_args[i],"--spe-aln"))
00076 spe_aln.open(my_args[i+1]);
00077 else if (!strcmp(my_args[i],"--spe-show"))
00078 vlevel = my_args[i+1];
00079 }
00080
00081 Parameter params;
00082 if (!params.LoadParam(mo_acnt,mo_args) ||
00083 !StaticData::LoadDataStatic(¶ms, mo_args[0]))
00084 exit(1);
00085
00086 StaticData const& global = StaticData::Instance();
00087 global.SetVerboseLevel(0);
00088 vector<FactorType> ifo = global.GetInputFactorOrder();
00089
00090 PhraseDictionary* PT = PhraseDictionary::GetColl()[0];
00091 Mmsapt* mmsapt = dynamic_cast<Mmsapt*>(PT);
00092 if (!mmsapt)
00093 {
00094 cerr << "Phrase table implementation not supported by this utility." << endl;
00095 exit(1);
00096 }
00097 mmsapt->SetTableLimit(0);
00098
00099 string srcline,trgline,alnline;
00100 cout.precision(2);
00101 vector<string> fname = mmsapt->GetFeatureNames();
00102 while (getline(spe_src,srcline))
00103 {
00104 UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE
00105 << ": missing data for online updates.");
00106 UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE
00107 << ": missing data for online updates.");
00108 cout << string(80,'-') << "\n" << srcline << "\n" << trgline << "\n" << endl;
00109
00110
00111 Sentence snt;
00112 istringstream buf(srcline+"\n");
00113 if (!snt.Read(buf,ifo)) break;
00114
00115 int dynprovidx = -1;
00116 for (size_t i = 0; i < fname.size(); ++i)
00117 {
00118 if (starts_with(fname[i], "prov-1."))
00119 dynprovidx = i;
00120 }
00121 cout << endl;
00122 for (size_t i = 0; i < snt.GetSize(); ++i)
00123 {
00124 for (size_t k = i; k < snt.GetSize(); ++k)
00125 {
00126 Phrase p = snt.GetSubString(Range(i,k));
00127 if (!mmsapt->PrefixExists(p)) break;
00128 TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p);
00129 if (!trg || !trg->GetSize()) continue;
00130
00131 bool header_done = false;
00132 bool has_dynamic_match = vlevel == "all" || vlevel == "ALL";
00133 vector<size_t> order; order.reserve(trg->GetSize());
00134 size_t stop = trg->GetSize();
00135
00136 vector<size_t> o2(trg->GetSize());
00137 for (size_t i = 0; i < stop; ++i) o2[i] = i;
00138 sort(o2.begin(),o2.end(),TargetPhraseIndexSorter(*trg));
00139
00140 for (size_t r = 0; r < stop; ++r)
00141 {
00142 if (vlevel != "ALL")
00143 {
00144 Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
00145 ostringstream buf; buf << phr;
00146 string tphrase = buf.str();
00147 tphrase.erase(tphrase.size()-1);
00148 size_t s = trgline.find(tphrase);
00149 if (s == string::npos) continue;
00150 size_t e = s + tphrase.size();
00151 if ((s && trgline[s-1] != ' ') || (e < trgline.size() && trgline[e] != ' '))
00152 continue;
00153 }
00154 order.push_back(r);
00155 if (!has_dynamic_match)
00156 {
00157 ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown();
00158 ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
00159 FVector const& scores = scc.GetScoresVector();
00160 has_dynamic_match = scores[idx.first + dynprovidx] > 0;
00161 }
00162 }
00163 if ((vlevel == "alt" || vlevel == "new") && !has_dynamic_match)
00164 continue;
00165
00166
00167 BOOST_FOREACH(size_t const& r, order)
00168 {
00169 ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown();
00170 ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
00171 FVector const& scores = scc.GetScoresVector();
00172 float wscore = scc.GetWeightedScore();
00173 if (vlevel == "new" && scores[idx.first + dynprovidx] == 0)
00174 continue;
00175 if (!header_done)
00176 {
00177 cout << endl;
00178 if (trg->GetSize() == 1)
00179 cout << p << " (1 translation option)" << endl;
00180 else
00181 cout << p << " (" << trg->GetSize() << " translation options)" << endl;
00182 header_done = true;
00183 }
00184 Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]);
00185 cout << setw(3) << r+1 << " " << phr << endl;
00186 cout << " ";
00187 BOOST_FOREACH(string const& fn, fname)
00188 cout << " " << format("%10.10s") % fn;
00189 cout << endl;
00190 cout << " ";
00191 for (size_t x = idx.first; x < idx.second; ++x)
00192 {
00193 size_t j = x-idx.first;
00194 float f = (mmsapt && mmsapt->isLogVal(j)) ? exp(scores[x]) : scores[x];
00195 string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
00196 if (starts_with(fname[j], "lex")) fmt = "%10.3e";
00197 else if (starts_with(fname[j], "prov-1."))
00198 {
00199 f = round(f/(1-f));
00200 fmt = "%10d";
00201 }
00202 cout << " " << format(fmt) % (mmsapt->isInteger(j) ? round(f) : f);
00203 }
00204 cout << " " << format("%10.3e") % exp(wscore)
00205 << " " << format("%10.3e") % exp((*trg)[o2[r]]->GetFutureScore()) << endl;
00206 }
00207 mmsapt->Release(trg);
00208 continue;
00209 }
00210 }
00211 mmsapt->add(srcline,trgline,alnline);
00212 }
00213
00214 exit(0);
00215 }
00216 #endif
00217
00218