00001 #include "InputTreeBuilder.h"
00002
00003 #include "moses/StaticData.h"
00004
00005 namespace Moses
00006 {
00007 namespace Syntax
00008 {
00009 namespace T2S
00010 {
00011
00012 InputTreeBuilder::InputTreeBuilder(std::vector<FactorType> const& oFactors)
00013 : m_outputFactorOrder(oFactors)
00014 {
00015 }
00016
00017 void InputTreeBuilder::Build(const TreeInput &in,
00018 const std::string &topLevelLabel,
00019 InputTree &out)
00020 {
00021 CreateNodes(in, topLevelLabel, out);
00022 ConnectNodes(out);
00023 }
00024
00025
00026 void InputTreeBuilder::CreateNodes(const TreeInput &in,
00027 const std::string &topLevelLabel,
00028 InputTree &out)
00029 {
00030
00031 const std::size_t numWords = in.GetSize();
00032
00033
00034
00035
00036 std::vector<XMLParseOutput> xmlNodes = in.GetLabelledSpans();
00037
00038
00039
00040
00041
00042
00043 SortXmlNodesIntoPostOrder(xmlNodes);
00044
00045
00046
00047 std::vector<XMLParseOutput> nonTerms;
00048 nonTerms.reserve(xmlNodes.size()+1);
00049 for (std::vector<XMLParseOutput>::const_iterator p = xmlNodes.begin();
00050 p != xmlNodes.end(); ++p) {
00051 std::size_t start = p->m_range.GetStartPos();
00052 std::size_t end = p->m_range.GetEndPos();
00053 nonTerms.push_back(XMLParseOutput(p->m_label, Range(start+1, end+1)));
00054 }
00055
00056 nonTerms.push_back(XMLParseOutput(topLevelLabel, Range(0, numWords-1)));
00057
00058
00059
00060
00061
00062 out.nodes.reserve(numWords + nonTerms.size());
00063 out.nodesAtPos.resize(numWords);
00064
00065
00066 int prevStart = -1;
00067 int prevEnd = -1;
00068 for (std::vector<XMLParseOutput>::const_iterator p = nonTerms.begin();
00069 p != nonTerms.end(); ++p) {
00070 int start = static_cast<int>(p->m_range.GetStartPos());
00071 int end = static_cast<int>(p->m_range.GetEndPos());
00072
00073
00074 if (start != prevStart && end != prevEnd) {
00075
00076
00077 for (int i = prevEnd+1; i <= end; ++i) {
00078 PVertex v(Range(i, i), in.GetWord(i));
00079 out.nodes.push_back(InputTree::Node(v));
00080 out.nodesAtPos[i].push_back(&out.nodes.back());
00081 }
00082 }
00083
00084 Word w(true);
00085 w.CreateFromString(Moses::Output, m_outputFactorOrder, p->m_label, true);
00086 PVertex v(Range(start, end), w);
00087 out.nodes.push_back(InputTree::Node(v));
00088 out.nodesAtPos[start].push_back(&out.nodes.back());
00089
00090 prevStart = start;
00091 prevEnd = end;
00092 }
00093 }
00094
00095
00096 void InputTreeBuilder::ConnectNodes(InputTree &out)
00097 {
00098
00099 std::vector<InputTree::Node*> parents(out.nodes.size(), NULL);
00100 for (std::size_t i = 0; i < out.nodes.size()-1; ++i) {
00101 const InputTree::Node &node = out.nodes[i];
00102 std::size_t start = node.pvertex.span.GetStartPos();
00103 std::size_t end = node.pvertex.span.GetEndPos();
00104
00105 std::size_t j = i+1;
00106 while (true) {
00107 const InputTree::Node &succ = out.nodes[j];
00108 std::size_t succStart = succ.pvertex.span.GetStartPos();
00109 std::size_t succEnd = succ.pvertex.span.GetEndPos();
00110 if (succStart <= start && succEnd >= end) {
00111 break;
00112 }
00113 ++j;
00114 }
00115 parents[i] = &(out.nodes[j]);
00116 }
00117
00118
00119 for (std::size_t i = 0; i < out.nodes.size()-1; ++i) {
00120 InputTree::Node &child = out.nodes[i];
00121 InputTree::Node &parent = *(parents[i]);
00122 parent.children.push_back(&child);
00123 }
00124 }
00125
00126 void InputTreeBuilder::SortXmlNodesIntoPostOrder(
00127 std::vector<XMLParseOutput> &nodes)
00128 {
00129
00130
00131 std::vector<std::pair<XMLParseOutput *, int> > pairs;
00132 pairs.reserve(nodes.size());
00133 for (std::size_t i = 0; i < nodes.size(); ++i) {
00134 pairs.push_back(std::make_pair(&(nodes[i]), i));
00135 }
00136
00137
00138 std::sort(pairs.begin(), pairs.end(), PostOrderComp);
00139
00140
00141 std::vector<XMLParseOutput> tmp;
00142 tmp.reserve(nodes.size());
00143 for (std::size_t i = 0; i < pairs.size(); ++i) {
00144 tmp.push_back(nodes[pairs[i].second]);
00145 }
00146 nodes.swap(tmp);
00147 }
00148
00149
00150 bool InputTreeBuilder::PostOrderComp(const std::pair<XMLParseOutput *, int> &x,
00151 const std::pair<XMLParseOutput *, int> &y)
00152 {
00153 std::size_t xStart = x.first->m_range.GetStartPos();
00154 std::size_t xEnd = x.first->m_range.GetEndPos();
00155 std::size_t yStart = y.first->m_range.GetStartPos();
00156 std::size_t yEnd = y.first->m_range.GetEndPos();
00157
00158 if (xEnd == yEnd) {
00159 if (xStart == yStart) {
00160 return x.second < y.second;
00161 } else {
00162 return xStart > yStart;
00163 }
00164 } else {
00165 return xEnd < yEnd;
00166 }
00167 }
00168
00169 }
00170 }
00171 }