00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <iostream>
00034 #include <stack>
00035 #include <boost/algorithm/string.hpp>
00036 #include <boost/foreach.hpp>
00037
00038 #include "moses/Syntax/KBestExtractor.h"
00039 #include "moses/Syntax/PVertex.h"
00040 #include "moses/Syntax/S2T/DerivationWriter.h"
00041
00042 #include "moses/Hypothesis.h"
00043 #include "moses/TrellisPathList.h"
00044 #include "moses/StaticData.h"
00045 #include "moses/InputFileStream.h"
00046 #include "moses/FF/StatefulFeatureFunction.h"
00047 #include "moses/TreeInput.h"
00048 #include "moses/ForestInput.h"
00049 #include "moses/ConfusionNet.h"
00050 #include "moses/WordLattice.h"
00051 #include "moses/ChartManager.h"
00052
00053 #include "IOWrapper.h"
00054
00055 #include <boost/filesystem.hpp>
00056 #include <boost/iostreams/device/file.hpp>
00057 #include <boost/iostreams/filter/bzip2.hpp>
00058 #include <boost/iostreams/filter/gzip.hpp>
00059 #include <boost/iostreams/filtering_stream.hpp>
00060
00061 using namespace std;
00062
00063 namespace Moses
00064 {
00065
00066 IOWrapper::IOWrapper(AllOptions const& opts)
00067 : m_options(new AllOptions(opts))
00068 , m_nBestStream(NULL)
00069 , m_surpressSingleBestOutput(false)
00070 , m_look_ahead(0)
00071 , m_look_back(0)
00072 , m_buffered_ahead(0)
00073 , spe_src(NULL)
00074 , spe_trg(NULL)
00075 , spe_aln(NULL)
00076 {
00077 const StaticData &staticData = StaticData::Instance();
00078 Parameter const& P = staticData.GetParameter();
00079
00080
00081 m_look_ahead = m_options->context.look_ahead;
00082 m_look_back = m_options->context.look_back;
00083 m_inputType = m_options->input.input_type;
00084
00085 UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput,
00086 "Context-sensitive decoding currently works only with sentence input.");
00087
00088 m_currentLine = m_options->output.start_translation_id;
00089 m_inputFactorOrder = &m_options->input.factor_order;
00090
00091 size_t nBestSize = m_options->nbest.nbest_size;
00092 string nBestFilePath = m_options->nbest.output_file_path;
00093
00094 staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
00095 if (m_inputFilePath.empty()) {
00096 m_inputFile = NULL;
00097 m_inputStream = &cin;
00098 } else {
00099 VERBOSE(2,"IO from File" << endl);
00100 m_inputFile = new InputFileStream(m_inputFilePath);
00101 m_inputStream = m_inputFile;
00102 }
00103
00104 if (nBestSize > 0) {
00105 m_nBestOutputCollector.reset(new Moses::OutputCollector(nBestFilePath));
00106 if (m_nBestOutputCollector->OutputIsCout()) {
00107 m_surpressSingleBestOutput = true;
00108 }
00109 }
00110
00111 std::string path;
00112 P.SetParameter<std::string>(path, "output-search-graph-extended", "");
00113 if (!path.size()) P.SetParameter<std::string>(path, "output-search-graph", "");
00114 if (path.size()) m_searchGraphOutputCollector.reset(new OutputCollector(path));
00115
00116 P.SetParameter<std::string>(path, "output-unknowns", "");
00117 if (path.size()) m_unknownsCollector.reset(new OutputCollector(path));
00118
00119 P.SetParameter<std::string>(path, "alignment-output-file", "");
00120 if (path.size()) m_alignmentInfoCollector.reset(new OutputCollector(path));
00121
00122 P.SetParameter<string>(path, "translation-details", "");
00123 if (path.size()) m_detailedTranslationCollector.reset(new OutputCollector(path));
00124
00125 P.SetParameter<string>(path, "tree-translation-details", "");
00126 if (path.size()) m_detailTreeFragmentsOutputCollector.reset(new OutputCollector(path));
00127
00128 P.SetParameter<string>(path, "output-word-graph", "");
00129 if (path.size()) m_wordGraphCollector.reset(new OutputCollector(path));
00130
00131 size_t latticeSamplesSize = staticData.options()->output.lattice_sample_size;
00132 string latticeSamplesFile = staticData.options()->output.lattice_sample_filepath;
00133 if (latticeSamplesSize) {
00134 m_latticeSamplesCollector.reset(new OutputCollector(latticeSamplesFile));
00135 if (m_latticeSamplesCollector->OutputIsCout()) {
00136 m_surpressSingleBestOutput = true;
00137 }
00138 }
00139
00140 if (!m_surpressSingleBestOutput) {
00141 m_singleBestOutputCollector.reset(new Moses::OutputCollector(&std::cout));
00142 }
00143
00144
00145 char const* key = "output-search-graph-hypergraph";
00146 PARAM_VEC const* p = staticData.GetParameter().GetParam(key);
00147 std::string& fmt = m_hypergraph_output_filepattern;
00148
00149 if (p && p->size() > 2) fmt = p->at(2);
00150 else if (nBestFilePath.size() && nBestFilePath != "-" &&
00151 ! boost::starts_with(nBestFilePath, "/dev/stdout")) {
00152 fmt = boost::filesystem::path(nBestFilePath).parent_path().string();
00153 if (fmt.empty()) fmt = ".";
00154 } else fmt = boost::filesystem::current_path().string() + "/hypergraph";
00155 if (*fmt.rbegin() != '/') fmt += "/";
00156 std::string extension = (p && p->size() > 1 ? p->at(1) : std::string("txt"));
00157 UTIL_THROW_IF2(extension != "txt" && extension != "gz" && extension != "bz2",
00158 "Unknown compression type '" << extension
00159 << "' for hypergraph output!");
00160 fmt += string("%d.") + extension;
00161
00162
00163 if (staticData.GetParameter().GetParam("spe-src")) {
00164 spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
00165 spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str());
00166 spe_aln = new ifstream(staticData.GetParameter().GetParam("spe-aln")->at(0).c_str());
00167 }
00168 }
00169
00170 IOWrapper::~IOWrapper()
00171 {
00172 if (m_inputFile != NULL)
00173 delete m_inputFile;
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185 }
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199 boost::shared_ptr<InputType>
00200 IOWrapper::
00201 GetBufferedInput()
00202 {
00203 switch(m_inputType) {
00204 case SentenceInput:
00205 return BufferInput<Sentence>();
00206 case ConfusionNetworkInput:
00207 return BufferInput<ConfusionNet>();
00208 case WordLatticeInput:
00209 return BufferInput<WordLattice>();
00210 case TreeInputType:
00211 return BufferInput<TreeInput>();
00212 case TabbedSentenceInput:
00213 return BufferInput<TabbedSentence>();
00214 case ForestInputType:
00215 return BufferInput<ForestInput>();
00216 default:
00217 TRACE_ERR("Unknown input type: " << m_inputType << "\n");
00218 return boost::shared_ptr<InputType>();
00219 }
00220
00221 }
00222
00223 boost::shared_ptr<InputType>
00224 IOWrapper::
00225 ReadInput(boost::shared_ptr<std::vector<std::string> >* cw)
00226 {
00227 #ifdef WITH_THREADS
00228 boost::lock_guard<boost::mutex> lock(m_lock);
00229 #endif
00230 boost::shared_ptr<InputType> source = GetBufferedInput();
00231 if (source) {
00232 source->SetTranslationId(m_currentLine++);
00233
00234
00235 if (m_past_input.size() && m_look_back != std::numeric_limits<size_t>::max()) {
00236 list<boost::shared_ptr<InputType> >::iterator m = m_past_input.end();
00237 for (size_t cnt = 0; cnt < m_look_back && --m != m_past_input.begin();)
00238 cnt += (*m)->GetSize();
00239 while (m_past_input.begin() != m) m_past_input.pop_front();
00240 }
00241
00242 if (m_look_back)
00243 m_past_input.push_back(source);
00244 }
00245 if (cw) *cw = GetCurrentContextWindow();
00246 return source;
00247 }
00248
00249 boost::shared_ptr<std::vector<std::string> >
00250 IOWrapper::
00251 GetCurrentContextWindow() const
00252 {
00253 boost::shared_ptr<std::vector<string> > context(new std::vector<string>);
00254 BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_past_input)
00255 context->push_back(i->ToString());
00256 BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_future_input)
00257 context->push_back(i->ToString());
00258 return context;
00259 }
00260
00261
00262
00263 std::string
00264 IOWrapper::
00265 GetHypergraphOutputFileName(size_t const id) const
00266 {
00267 return str(boost::format(m_hypergraph_output_filepattern) % id);
00268 }
00269
00270
00271 }
00272