00001 #include "ForestWriter.h"
00002
00003 #include <cassert>
00004 #include <vector>
00005
00006 #include "TopologicalSorter.h"
00007
00008 namespace MosesTraining
00009 {
00010 namespace Syntax
00011 {
00012 namespace PostprocessEgretForests
00013 {
00014
00015 void ForestWriter::Write(const std::string &sentence, const Forest &forest,
00016 std::size_t sentNum)
00017 {
00018 m_out << "sentence " << sentNum << " :" << std::endl;
00019 m_out << PossiblyEscape(sentence) << std::endl;
00020
00021
00022 if (forest.vertices.empty()) {
00023 m_out << std::endl << std::endl;
00024 return;
00025 }
00026
00027
00028 std::vector<const Forest::Vertex *> vertices;
00029 TopologicalSorter sorter;
00030 sorter.Sort(forest, vertices);
00031 for (std::vector<const Forest::Vertex *>::const_iterator p = vertices.begin();
00032 p != vertices.end(); ++p) {
00033 const Forest::Vertex &v = **p;
00034 for (std::vector<boost::shared_ptr<Forest::Hyperedge> >::const_iterator
00035 q = v.incoming.begin(); q != v.incoming.end(); ++q) {
00036 WriteHyperedgeLine(**q);
00037 }
00038 }
00039
00040
00041 m_out << std::endl;
00042 }
00043
00044 void ForestWriter::WriteHyperedgeLine(const Forest::Hyperedge &e)
00045 {
00046 WriteVertex(*e.head);
00047 m_out << " =>";
00048 for (std::vector<Forest::Vertex *>::const_iterator p = e.tail.begin();
00049 p != e.tail.end(); ++p) {
00050 m_out << " ";
00051 WriteVertex(**p);
00052 }
00053 m_out << " ||| " << e.weight << std::endl;
00054 }
00055
00056 void ForestWriter::WriteVertex(const Forest::Vertex &v)
00057 {
00058 m_out << PossiblyEscape(v.symbol.value);
00059 if (!v.incoming.empty()) {
00060 m_out << "[" << v.start << "," << v.end << "]";
00061 }
00062 }
00063
00064 std::string ForestWriter::PossiblyEscape(const std::string &s) const
00065 {
00066 if (m_options.escape) {
00067 return Escape(s);
00068 } else {
00069 return s;
00070 }
00071 }
00072
00073
00074 std::string ForestWriter::Escape(const std::string &s) const
00075 {
00076 std::string t;
00077 std::size_t len = s.size();
00078 t.reserve(len);
00079 for (std::size_t i = 0; i < len; ++i) {
00080 if (s[i] == '<') {
00081 t += "<";
00082 } else if (s[i] == '>') {
00083 t += ">";
00084 } else if (s[i] == '[') {
00085 t += "[";
00086 } else if (s[i] == ']') {
00087 t += "]";
00088 } else if (s[i] == '|') {
00089 t += "|";
00090 } else if (s[i] == '&') {
00091 t += "&";
00092 } else if (s[i] == '\'') {
00093 t += "'";
00094 } else if (s[i] == '"') {
00095 t += """;
00096 } else {
00097 t += s[i];
00098 }
00099 }
00100 return t;
00101 }
00102
00103 }
00104 }
00105 }