00001 // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- 00002 // $Id$ 00003 00004 /*********************************************************************** 00005 Moses - factored phrase-based language decoder 00006 Copyright (c) 2006 University of Edinburgh 00007 All rights reserved. 00008 00009 Redistribution and use in source and binary forms, with or without modification, 00010 are permitted provided that the following conditions are met: 00011 00012 * Redistributions of source code must retain the above copyright notice, 00013 this list of conditions and the following disclaimer. 00014 * Redistributions in binary form must reproduce the above copyright notice, 00015 this list of conditions and the following disclaimer in the documentation 00016 and/or other materials provided with the distribution. 00017 * Neither the name of the University of Edinburgh nor the names of its contributors 00018 may be used to endorse or promote products derived from this software 00019 without specific prior written permission. 00020 00021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00022 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00023 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00024 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS 00025 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00026 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00027 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00028 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER 00029 IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00030 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00031 POSSIBILITY OF SUCH DAMAGE. 00032 ***********************************************************************/ 00033 00034 // example file on how to use moses library 00035 00036 #pragma once 00037 00038 #ifdef WITH_THREADS 00039 #include <boost/thread.hpp> 00040 #endif 00041 00042 #include <cassert> 00043 #include <fstream> 00044 #include <ostream> 00045 #include <vector> 00046 #include <list> 00047 #include <iomanip> 00048 #include <limits> 00049 00050 #include "moses/TypeDef.h" 00051 #include "moses/Sentence.h" 00052 #include "moses/TabbedSentence.h" 00053 #include "moses/FactorTypeSet.h" 00054 #include "moses/FactorCollection.h" 00055 #include "moses/Hypothesis.h" 00056 #include "moses/OutputCollector.h" 00057 #include "moses/TrellisPathList.h" 00058 #include "moses/InputFileStream.h" 00059 #include "moses/InputType.h" 00060 #include "moses/WordLattice.h" 00061 #include "moses/LatticeMBR.h" 00062 #include "moses/ChartKBestExtractor.h" 00063 #include "moses/Syntax/KBestExtractor.h" 00064 #include "moses/parameters/AllOptions.h" 00065 00066 #include <boost/format.hpp> 00067 #include <boost/shared_ptr.hpp> 00068 00069 namespace Moses 00070 { 00071 class ScoreComponentCollection; 00072 class Hypothesis; 00073 class ChartHypothesis; 00074 class Factor; 00075 class TranslationTask; 00076 namespace Syntax 00077 { 00078 struct SHyperedge; 00079 } 00080 00083 class IOWrapper 00084 { 00085 protected: 00086 boost::shared_ptr<AllOptions const> m_options; 00087 const std::vector<Moses::FactorType> *m_inputFactorOrder; 00088 std::string m_inputFilePath; 00089 Moses::InputFileStream *m_inputFile; 00090 std::istream *m_inputStream; 00091 std::ostream *m_nBestStream; 00092 // std::ostream *m_outputWordGraphStream; 00093 // std::auto_ptr<std::ostream> m_outputSearchGraphStream; 00094 // std::ostream *m_detailedTranslationReportingStream; 00095 std::ostream *m_unknownsStream; 00096 // std::ostream *m_detailedTreeFragmentsTranslationReportingStream; 00097 std::ofstream *m_alignmentInfoStream; 00098 std::ofstream *m_latticeSamplesStream; 00099 00100 std::auto_ptr<Moses::OutputCollector> m_singleBestOutputCollector; 00101 std::auto_ptr<Moses::OutputCollector> m_nBestOutputCollector; 00102 std::auto_ptr<Moses::OutputCollector> m_unknownsCollector; 00103 std::auto_ptr<Moses::OutputCollector> m_alignmentInfoCollector; 00104 std::auto_ptr<Moses::OutputCollector> m_searchGraphOutputCollector; 00105 std::auto_ptr<Moses::OutputCollector> m_detailedTranslationCollector; 00106 std::auto_ptr<Moses::OutputCollector> m_wordGraphCollector; 00107 std::auto_ptr<Moses::OutputCollector> m_latticeSamplesCollector; 00108 std::auto_ptr<Moses::OutputCollector> m_detailTreeFragmentsOutputCollector; 00109 00110 bool m_surpressSingleBestOutput; 00111 00112 #ifdef WITH_THREADS 00113 boost::mutex m_lock; 00114 #endif 00115 size_t m_currentLine; /* line counter, initialized from static data at construction 00116 * incremented with every call to ReadInput */ 00117 00118 InputTypeEnum m_inputType; // initialized from StaticData at construction 00119 std::list<boost::shared_ptr<InputType> > m_past_input; 00120 std::list<boost::shared_ptr<InputType> > m_future_input; 00121 size_t m_look_ahead; 00122 size_t m_look_back; 00123 size_t m_buffered_ahead; 00124 // For context-sensitive decoding: 00125 // Number of context words ahead and before the current sentence. 00126 00127 std::string m_hypergraph_output_filepattern; 00128 00129 public: 00130 IOWrapper(AllOptions const& opts); 00131 ~IOWrapper(); 00132 00133 // Moses::InputType* GetInput(Moses::InputType *inputType); 00134 00135 boost::shared_ptr<InputType> 00136 ReadInput(boost::shared_ptr<std::vector<std::string> >* cw = NULL); 00137 00138 Moses::OutputCollector *GetSingleBestOutputCollector() { 00139 return m_singleBestOutputCollector.get(); 00140 } 00141 00142 void SetOutputStream2SingleBestOutputCollector(std::ostream* outStream) { 00143 if (m_singleBestOutputCollector.get()) 00144 m_singleBestOutputCollector->SetOutputStream(outStream); 00145 else 00146 m_singleBestOutputCollector.reset(new Moses::OutputCollector(outStream)); 00147 } 00148 00149 Moses::OutputCollector *GetNBestOutputCollector() { 00150 return m_nBestOutputCollector.get(); 00151 } 00152 00153 Moses::OutputCollector *GetUnknownsCollector() { 00154 return m_unknownsCollector.get(); 00155 } 00156 00157 Moses::OutputCollector *GetAlignmentInfoCollector() { 00158 return m_alignmentInfoCollector.get(); 00159 } 00160 00161 Moses::OutputCollector *GetSearchGraphOutputCollector() { 00162 return m_searchGraphOutputCollector.get(); 00163 } 00164 00165 Moses::OutputCollector *GetDetailedTranslationCollector() { 00166 return m_detailedTranslationCollector.get(); 00167 } 00168 00169 Moses::OutputCollector *GetWordGraphCollector() { 00170 return m_wordGraphCollector.get(); 00171 } 00172 00173 Moses::OutputCollector *GetLatticeSamplesCollector() { 00174 return m_latticeSamplesCollector.get(); 00175 } 00176 00177 Moses::OutputCollector *GetDetailTreeFragmentsOutputCollector() { 00178 return m_detailTreeFragmentsOutputCollector.get(); 00179 } 00180 00181 void SetInputStreamFromString(std::istringstream &input) { 00182 m_inputStream = &input; 00183 } 00184 00185 std::string GetHypergraphOutputFileName(size_t const id) const; 00186 00187 // post editing 00188 std::ifstream *spe_src, *spe_trg, *spe_aln; 00189 00190 std::list<boost::shared_ptr<InputType> > const& GetPastInput() const { 00191 return m_past_input; 00192 } 00193 00194 std::list<boost::shared_ptr<InputType> > const& GetFutureInput() const { 00195 return m_future_input; 00196 } 00197 size_t GetLookAhead() const { 00198 return m_look_ahead; 00199 } 00200 00201 size_t GetLookBack() const { 00202 return m_look_back; 00203 } 00204 00205 private: 00206 template<class itype> 00207 boost::shared_ptr<InputType> 00208 BufferInput(); 00209 00210 boost::shared_ptr<InputType> 00211 GetBufferedInput(); 00212 00213 boost::shared_ptr<std::vector<std::string> > 00214 GetCurrentContextWindow() const; 00215 }; 00216 00217 template<class itype> 00218 boost::shared_ptr<InputType> 00219 IOWrapper:: 00220 BufferInput() 00221 { 00222 boost::shared_ptr<itype> source; 00223 boost::shared_ptr<InputType> ret; 00224 if (m_future_input.size()) { 00225 ret = m_future_input.front(); 00226 m_future_input.pop_front(); 00227 m_buffered_ahead -= ret->GetSize(); 00228 } else { 00229 source.reset(new itype(m_options)); 00230 if (!source->Read(*m_inputStream)) 00231 return ret; 00232 ret = source; 00233 } 00234 while (m_buffered_ahead < m_look_ahead) { 00235 source.reset(new itype(m_options)); 00236 if (!source->Read(*m_inputStream)) 00237 break; 00238 m_future_input.push_back(source); 00239 m_buffered_ahead += source->GetSize(); 00240 } 00241 return ret; 00242 } 00243 00244 00245 } 00246