00001 #include <sstream>
00002 #include "Manager.h"
00003 #include "PVertex.h"
00004 #include "moses/OutputCollector.h"
00005 #include "moses/Util.h"
00006 
00007 namespace Moses
00008 {
00009 namespace Syntax
00010 {
00011 
00012 Manager::Manager(ttasksptr const& ttask)
00013   : Moses::BaseManager(ttask)
00014 { }
00015 
00016 void Manager::OutputBest(OutputCollector *collector) const
00017 {
00018   if (!collector) {
00019     return;
00020   }
00021   std::ostringstream out;
00022   FixPrecision(out);
00023   const SHyperedge *best = GetBestSHyperedge();
00024   if (best == NULL) {
00025     VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
00026     if (options()->output.ReportHypoScore) {
00027       out << "0 ";
00028     }
00029     out << '\n';
00030   } else {
00031     if (options()->output.ReportHypoScore) {
00032       out << best->label.futureScore << " ";
00033     }
00034     Phrase yield = GetOneBestTargetYield(*best);
00035     
00036     UTIL_THROW_IF2(yield.GetSize() < 2,
00037                    "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
00038     yield.RemoveWord(0);
00039     yield.RemoveWord(yield.GetSize()-1);
00040     out << yield.GetStringRep(options()->output.factor_order);
00041     out << '\n';
00042   }
00043   collector->Write(m_source.GetTranslationId(), out.str());
00044 }
00045 
00046 void Manager::OutputNBest(OutputCollector *collector) const
00047 {
00048   if (collector) {
00049     long translationId = m_source.GetTranslationId();
00050     KBestExtractor::KBestVec nBestList;
00051     ExtractKBest(options()->nbest.nbest_size, nBestList,
00052                  options()->nbest.only_distinct);
00053     OutputNBestList(collector, nBestList, translationId);
00054   }
00055 }
00056 
00057 void Manager::OutputUnknowns(OutputCollector *collector) const
00058 {
00059   if (collector) {
00060     long translationId = m_source.GetTranslationId();
00061 
00062     std::ostringstream out;
00063     for (boost::unordered_set<Moses::Word>::const_iterator p = m_oovs.begin();
00064          p != m_oovs.end(); ++p) {
00065       out << *p;
00066     }
00067     out << std::endl;
00068     collector->Write(translationId, out.str());
00069   }
00070 }
00071 
00072 void Manager::OutputNBestList(OutputCollector *collector,
00073                               const KBestExtractor::KBestVec &nBestList,
00074                               long translationId) const
00075 {
00076   const std::vector<FactorType> &outputFactorOrder = options()->output.factor_order;
00077 
00078   std::ostringstream out;
00079 
00080   if (collector->OutputIsCout()) {
00081     
00082     
00083     FixPrecision(out);
00084   }
00085 
00086   bool includeWordAlignment = options()->nbest.include_alignment_info;
00087   bool PrintNBestTrees = options()->nbest.print_trees; 
00088 
00089   for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
00090        p != nBestList.end(); ++p) {
00091     const KBestExtractor::Derivation &derivation = **p;
00092 
00093     
00094     Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation);
00095 
00096     
00097     UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
00098                    "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
00099     outputPhrase.RemoveWord(0);
00100     outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
00101 
00102     
00103     out << translationId << " ||| ";
00104     OutputSurface(out, outputPhrase); 
00105     out << " ||| ";
00106     bool with_labels = options()->nbest.include_feature_labels;
00107     derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels);
00108     out << " ||| " << derivation.score;
00109 
00110     
00111     if (includeWordAlignment) {
00112       out << " ||| ";
00113       Alignments align;
00114       OutputAlignmentNBest(align, derivation, 0);
00115       for (Alignments::const_iterator q = align.begin(); q != align.end();
00116            ++q) {
00117         out << q->first << "-" << q->second << " ";
00118       }
00119     }
00120 
00121     
00122     if (PrintNBestTrees) {
00123       TreePointer tree = KBestExtractor::GetOutputTree(derivation);
00124       out << " ||| " << tree->GetString();
00125     }
00126 
00127     out << std::endl;
00128   }
00129 
00130   assert(collector);
00131   collector->Write(translationId, out.str());
00132 }
00133 
00134 std::size_t Manager::OutputAlignmentNBest(
00135   Alignments &retAlign,
00136   const KBestExtractor::Derivation &derivation,
00137   std::size_t startTarget) const
00138 {
00139   const SHyperedge ­peredge = derivation.edge->shyperedge;
00140 
00141   std::size_t totalTargetSize = 0;
00142   std::size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
00143 
00144   const TargetPhrase &tp = *(shyperedge.label.translation);
00145 
00146   std::size_t thisSourceSize = CalcSourceSize(derivation);
00147 
00148   
00149   
00150   std::vector<std::size_t> sourceOffsets(thisSourceSize, 0);
00151   std::vector<std::size_t> targetOffsets(tp.GetSize(), 0);
00152 
00153   const AlignmentInfo &aiNonTerm =
00154     shyperedge.label.translation->GetAlignNonTerm();
00155   std::vector<std::size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
00156   const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd =
00157     aiNonTerm.GetNonTermIndexMap();
00158 
00159   UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
00160                  "Error");
00161 
00162   std::size_t targetInd = 0;
00163   for (std::size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
00164     if (tp.GetWord(targetPos).IsNonTerminal()) {
00165       UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
00166       std::size_t sourceInd = targetPos2SourceInd[targetPos];
00167       std::size_t sourcePos = sourceInd2pos[sourceInd];
00168 
00169       const KBestExtractor::Derivation &subderivation =
00170         *derivation.subderivations[sourceInd];
00171 
00172       
00173       std::size_t sourceSize =
00174         subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
00175       sourceOffsets[sourcePos] = sourceSize;
00176 
00177       
00178       
00179       std::size_t currStartTarget = startTarget + totalTargetSize;
00180       std::size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
00181                                currStartTarget);
00182       targetOffsets[targetPos] = targetSize;
00183 
00184       totalTargetSize += targetSize;
00185       ++targetInd;
00186     } else {
00187       ++totalTargetSize;
00188     }
00189   }
00190 
00191   
00192   
00193   ShiftOffsets(sourceOffsets, startSource);
00194   ShiftOffsets(targetOffsets, startTarget);
00195 
00196   
00197   const AlignmentInfo &aiTerm = shyperedge.label.translation->GetAlignTerm();
00198 
00199   
00200   AlignmentInfo::const_iterator iter;
00201   for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
00202     const std::pair<std::size_t, std::size_t> &align = *iter;
00203     std::size_t relSource = align.first;
00204     std::size_t relTarget = align.second;
00205     std::size_t absSource = sourceOffsets[relSource];
00206     std::size_t absTarget = targetOffsets[relTarget];
00207 
00208     std::pair<std::size_t, std::size_t> alignPoint(absSource, absTarget);
00209     std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
00210     UTIL_THROW_IF2(!ret.second, "Error");
00211   }
00212 
00213   return totalTargetSize;
00214 }
00215 
00216 std::size_t Manager::CalcSourceSize(const KBestExtractor::Derivation &d) const
00217 {
00218   const SHyperedge ­peredge = d.edge->shyperedge;
00219   std::size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
00220   for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
00221     std::size_t childSize =
00222       shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
00223     ret -= (childSize - 1);
00224   }
00225   return ret;
00226 }
00227 
00228 }  
00229 }