00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 #include "pcfg_extract.h"
00021 
00022 #include <cassert>
00023 #include <cstdlib>
00024 #include <fstream>
00025 #include <iostream>
00026 #include <map>
00027 #include <memory>
00028 #include <set>
00029 #include <string>
00030 #include <vector>
00031 
00032 #include <boost/program_options.hpp>
00033 
00034 #include "syntax-common/exception.h"
00035 #include "syntax-common/pcfg.h"
00036 #include "syntax-common/vocabulary.h"
00037 #include "syntax-common/xml_tree_parser.h"
00038 
00039 #include "SyntaxTree.h"
00040 
00041 #include "options.h"
00042 #include "rule_collection.h"
00043 #include "rule_extractor.h"
00044 
00045 namespace MosesTraining
00046 {
00047 namespace Syntax
00048 {
00049 namespace PCFG
00050 {
00051 
00052 int PcfgExtract::Main(int argc, char *argv[])
00053 {
00054   
00055   Options options;
00056   ProcessOptions(argc, argv, options);
00057 
00058   
00059   Vocabulary non_term_vocab;
00060   RuleExtractor rule_extractor(non_term_vocab);
00061   RuleCollection rule_collection;
00062   XmlTreeParser parser;
00063   std::string line;
00064   std::size_t line_num = 0;
00065   std::auto_ptr<MosesTraining::SyntaxTree> tree;
00066   while (std::getline(std::cin, line)) {
00067     ++line_num;
00068     try {
00069       tree = parser.Parse(line);
00070     } catch (Exception &e) {
00071       std::ostringstream msg;
00072       msg << "line " << line_num << ": " << e.msg();
00073       Error(msg.str());
00074     }
00075     if (!tree.get()) {
00076       std::ostringstream msg;
00077       msg << "no tree at line " << line_num;
00078       Warn(msg.str());
00079       continue;
00080     }
00081     rule_extractor.Extract(*tree, rule_collection);
00082   }
00083 
00084   
00085   Pcfg pcfg;
00086   rule_collection.CreatePcfg(pcfg);
00087   pcfg.Write(non_term_vocab, std::cout);
00088 
00089   return 0;
00090 }
00091 
00092 void PcfgExtract::ProcessOptions(int argc, char *argv[],
00093                                  Options &options) const
00094 {
00095   namespace po = boost::program_options;
00096 
00097   std::ostringstream usage_top;
00098   usage_top << "Usage: " << name() << "\n\n" << "Options";
00099 
00100   
00101   po::options_description visible(usage_top.str());
00102   visible.add_options()
00103   ("help", "print help message and exit")
00104   ;
00105 
00106   
00107   
00108   po::options_description hidden("Hidden options");
00109   hidden.add_options();
00110 
00111   
00112   po::options_description cmd_line_options;
00113   cmd_line_options.add(visible).add(hidden);
00114 
00115   
00116   po::positional_options_description p;
00117 
00118   
00119   po::variables_map vm;
00120   try {
00121     po::store(po::command_line_parser(argc, argv).style(MosesOptionStyle()).
00122               options(cmd_line_options).positional(p).run(), vm);
00123     po::notify(vm);
00124   } catch (const std::exception &e) {
00125     std::ostringstream msg;
00126     msg << e.what() << "\n\n" << visible;
00127     Error(msg.str());
00128   }
00129 
00130   if (vm.count("help")) {
00131     std::cout << visible << std::endl;
00132     std::exit(0);
00133   }
00134 }
00135 
00136 }  
00137 }  
00138 }