00001 #include "PostprocessEgretForests.h"
00002
00003 #include <cassert>
00004 #include <cstdlib>
00005 #include <fstream>
00006 #include <iostream>
00007 #include <iterator>
00008 #include <string>
00009 #include <sstream>
00010 #include <vector>
00011
00012 #include <boost/program_options.hpp>
00013 #include <boost/scoped_ptr.hpp>
00014
00015 #include "syntax-common/exception.h"
00016
00017 #include "Forest.h"
00018 #include "ForestParser.h"
00019 #include "ForestWriter.h"
00020 #include "Options.h"
00021 #include "SplitPoint.h"
00022 #include "SplitPointFileParser.h"
00023
00024 namespace MosesTraining
00025 {
00026 namespace Syntax
00027 {
00028 namespace PostprocessEgretForests
00029 {
00030
00031 int PostprocessEgretForests::Main(int argc, char *argv[])
00032 {
00033 try {
00034
00035 Options options;
00036 ProcessOptions(argc, argv, options);
00037
00038
00039 boost::scoped_ptr<SplitPointFileParser> splitPointParser;
00040 std::ifstream splitPointFileStream;
00041 if (!options.splitPointsFile.empty()) {
00042 OpenInputFileOrDie(options.splitPointsFile, splitPointFileStream);
00043 splitPointParser.reset(new SplitPointFileParser(splitPointFileStream));
00044 }
00045
00046 ProcessForest(std::cin, std::cout, splitPointParser.get(), options);
00047 } catch (const MosesTraining::Syntax::Exception &e) {
00048 Error(e.msg());
00049 }
00050 return 0;
00051 }
00052
00053 void PostprocessEgretForests::ProcessForest(
00054 std::istream &in, std::ostream &out, SplitPointFileParser *splitPointParser,
00055 const Options &options)
00056 {
00057 std::size_t sentNum = 0;
00058 ForestWriter writer(options, out);
00059 ForestParser end;
00060 for (ForestParser p(in); p != end; ++p) {
00061 ++sentNum;
00062 if (splitPointParser) {
00063 if (*splitPointParser == SplitPointFileParser()) {
00064 throw Exception("prematurely reached end of split point file");
00065 }
00066 if (!p->forest.vertices.empty()) {
00067 try {
00068 MarkSplitPoints((*splitPointParser)->splitPoints, p->forest);
00069 MarkSplitPoints((*splitPointParser)->splitPoints, p->sentence);
00070 } catch (const Exception &e) {
00071 std::ostringstream msg;
00072 msg << "failed to mark split point for sentence " << sentNum << ": "
00073 << e.msg();
00074 throw Exception(msg.str());
00075 }
00076 }
00077 ++(*splitPointParser);
00078 }
00079 writer.Write(p->sentence, p->forest, p->sentNum);
00080 }
00081 }
00082
00083 void PostprocessEgretForests::ProcessOptions(int argc, char *argv[],
00084 Options &options) const
00085 {
00086 namespace po = boost::program_options;
00087 namespace cls = boost::program_options::command_line_style;
00088
00089
00090
00091 std::ostringstream usageTop;
00092 usageTop << "Usage: " << name()
00093 << " [OPTION]...\n\n"
00094 << "TODO\n\n"
00095 << "Options";
00096
00097
00098 std::ostringstream usageBottom;
00099 usageBottom << "TODO";
00100
00101
00102 po::options_description visible(usageTop.str());
00103 visible.add_options()
00104 ("Escape",
00105 "escape Moses special characters")
00106 ("MarkSplitPoints",
00107 po::value(&options.splitPointsFile),
00108 "read split points from named file and mark (using @) in output")
00109 ;
00110
00111
00112
00113 po::options_description hidden("Hidden options");
00114 hidden.add_options()
00115
00116 ;
00117
00118
00119 po::options_description cmdLineOptions;
00120 cmdLineOptions.add(visible).add(hidden);
00121
00122
00123 po::positional_options_description p;
00124
00125
00126
00127 po::variables_map vm;
00128 try {
00129 po::store(po::command_line_parser(argc, argv).style(MosesOptionStyle()).
00130 options(cmdLineOptions).positional(p).run(), vm);
00131 po::notify(vm);
00132 } catch (const std::exception &e) {
00133 std::ostringstream msg;
00134 msg << e.what() << "\n\n" << visible << usageBottom.str();
00135 Error(msg.str());
00136 }
00137
00138 if (vm.count("help")) {
00139 std::cout << visible << usageBottom.str() << std::endl;
00140 std::exit(0);
00141 }
00142
00143
00144 if (vm.count("Escape")) {
00145 options.escape = true;
00146 }
00147 }
00148
00149 }
00150 }
00151 }