00001 #pragma once
00002
00003 #include <istream>
00004 #include <string>
00005 #include <vector>
00006 #include <utility>
00007
00008 #include <boost/shared_ptr.hpp>
00009 #include <boost/unordered_set.hpp>
00010
00011 #include "util/string_piece.hh"
00012
00013 #include "Forest.h"
00014 #include "Symbol.h"
00015
00016 namespace MosesTraining
00017 {
00018 namespace Syntax
00019 {
00020 namespace PostprocessEgretForests
00021 {
00022
00023 class ForestParser
00024 {
00025 public:
00026 struct Entry {
00027 std::size_t sentNum;
00028 std::string sentence;
00029 Forest forest;
00030 };
00031
00032 ForestParser();
00033 ForestParser(std::istream &);
00034
00035 Entry &operator*() {
00036 return m_entry;
00037 }
00038 Entry *operator->() {
00039 return &m_entry;
00040 }
00041
00042 ForestParser &operator++();
00043
00044 friend bool operator==(const ForestParser &, const ForestParser &);
00045 friend bool operator!=(const ForestParser &, const ForestParser &);
00046
00047 private:
00048 typedef boost::shared_ptr<Forest::Vertex> VertexSP;
00049 typedef boost::shared_ptr<Forest::Hyperedge> HyperedgeSP;
00050
00051 struct VertexSetHash {
00052 std::size_t operator()(const VertexSP &v) const {
00053 std::size_t seed = 0;
00054 boost::hash_combine(seed, v->symbol);
00055 boost::hash_combine(seed, v->start);
00056 boost::hash_combine(seed, v->end);
00057 return seed;
00058 }
00059 };
00060
00061 struct VertexSetPred {
00062 bool operator()(const VertexSP &v, const VertexSP &w) const {
00063 return v->symbol == w->symbol && v->start == w->start && v->end == w->end;
00064 }
00065 };
00066
00067 typedef boost::unordered_set<VertexSP, VertexSetHash,
00068 VertexSetPred> VertexSet;
00069
00070
00071 ForestParser(const ForestParser &);
00072 ForestParser &operator=(const ForestParser &);
00073
00074 VertexSP AddVertex(const VertexSP &);
00075 void ParseHyperedgeLine(const std::string &, Forest &);
00076 void ParseSentenceNumLine(const std::string &, std::size_t &);
00077 VertexSP ParseVertex(const StringPiece &);
00078
00079 Entry m_entry;
00080 std::istream *m_input;
00081 std::string m_tmpLine;
00082 VertexSet m_vertexSet;
00083 };
00084
00085 }
00086 }
00087 }