00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #pragma once
00021 #ifndef EXTRACT_GHKM_NODE_H_
00022 #define EXTRACT_GHKM_NODE_H_
00023
00024 #include <cassert>
00025 #include <iterator>
00026 #include <string>
00027 #include <vector>
00028
00029 #include "Span.h"
00030
00031 namespace MosesTraining
00032 {
00033 namespace Syntax
00034 {
00035 namespace GHKM
00036 {
00037
00038 class Subgraph;
00039
00040 enum NodeType { SOURCE, TARGET, TREE };
00041
00042 class Node
00043 {
00044 public:
00045 Node(const std::string &label, NodeType type)
00046 : m_label(label)
00047 , m_type(type)
00048 , m_pcfgScore(0.0f) {}
00049
00050 ~Node();
00051
00052 const std::string &GetLabel() const {
00053 return m_label;
00054 }
00055 NodeType GetType() const {
00056 return m_type;
00057 }
00058 const std::vector<Node*> &GetChildren() const {
00059 return m_children;
00060 }
00061 const std::vector<Node*> &GetParents() const {
00062 return m_parents;
00063 }
00064 float GetPcfgScore() const {
00065 return m_pcfgScore;
00066 }
00067 const Span &GetSpan() const {
00068 return m_span;
00069 }
00070 const Span &GetComplementSpan() const {
00071 return m_complementSpan;
00072 }
00073 const std::vector<const Subgraph*> &GetRules() const {
00074 return m_rules;
00075 }
00076
00077 void SetChildren(const std::vector<Node*> &c) {
00078 m_children = c;
00079 }
00080 void SetParents(const std::vector<Node*> &p) {
00081 m_parents = p;
00082 }
00083 void SetPcfgScore(float s) {
00084 m_pcfgScore = s;
00085 }
00086 void SetSpan(const Span &s) {
00087 m_span = s;
00088 }
00089 void SetComplementSpan(const Span &cs) {
00090 m_complementSpan = cs;
00091 }
00092
00093 void AddChild(Node *c) {
00094 m_children.push_back(c);
00095 }
00096 void AddParent(Node *p) {
00097 m_parents.push_back(p);
00098 }
00099 void AddRule(const Subgraph *s) {
00100 m_rules.push_back(s);
00101 }
00102
00103 bool IsSink() const {
00104 return m_children.empty();
00105 }
00106 bool IsPreterminal() const;
00107
00108 void PropagateIndex(int);
00109
00110 std::vector<std::string> GetTargetWords() const;
00111
00112
00113
00114
00115 template<typename OutputIterator>
00116 void GetTreeAncestors(OutputIterator result, bool includeSelf=false);
00117
00118
00119
00120 template<typename InputIterator>
00121 static Node *LowestCommonAncestor(InputIterator first, InputIterator last);
00122
00123 private:
00124
00125 Node(const Node &);
00126 Node &operator=(const Node &);
00127
00128 void GetTargetWords(std::vector<std::string> &) const;
00129
00130 std::string m_label;
00131 NodeType m_type;
00132 std::vector<Node*> m_children;
00133 std::vector<Node*> m_parents;
00134 float m_pcfgScore;
00135 Span m_span;
00136 Span m_complementSpan;
00137 std::vector<const Subgraph*> m_rules;
00138 };
00139
00140 template<typename OutputIterator>
00141 void Node::GetTreeAncestors(OutputIterator result, bool includeSelf)
00142 {
00143
00144 assert(m_type == TARGET || m_type == TREE);
00145
00146 if (includeSelf) {
00147 *result++ = this;
00148 }
00149
00150 Node *ancestor = !(m_parents.empty()) ? m_parents[0] : 0;
00151 while (ancestor != 0) {
00152 *result++ = ancestor;
00153 ancestor = !(ancestor->m_parents.empty()) ? ancestor->m_parents[0] : 0;
00154 }
00155 }
00156
00157 template<typename InputIterator>
00158 Node *Node::LowestCommonAncestor(InputIterator first, InputIterator last)
00159 {
00160
00161 if (first == last) {
00162 return 0;
00163 }
00164
00165
00166
00167 InputIterator p = first;
00168 Node *lca = *p++;
00169 for (; p != last; ++p) {
00170 Node *node = *p;
00171 assert(node->m_type != SOURCE);
00172 if (node != lca) {
00173 lca = 0;
00174 }
00175 }
00176 if (lca) {
00177 return lca;
00178 }
00179
00180
00181 size_t minPathLength = 0;
00182 std::vector<std::vector<Node *> > paths;
00183 for (p = first; p != last; ++p) {
00184 paths.resize(paths.size()+1);
00185 (*p)->GetTreeAncestors(std::back_inserter(paths.back()), true);
00186 size_t pathLength = paths.back().size();
00187 assert(pathLength > 0);
00188 if (paths.size() == 1 || pathLength < minPathLength) {
00189 minPathLength = pathLength;
00190 }
00191 }
00192
00193
00194
00195 for (size_t i = 0; i < minPathLength; ++i) {
00196 bool match = true;
00197 for (size_t j = 0; j < paths.size(); ++j) {
00198 size_t index = paths[j].size() - minPathLength + i;
00199 assert(index >= 0);
00200 assert(index < paths[j].size());
00201 if (j == 0) {
00202 lca = paths[j][index];
00203 assert(lca);
00204 } else if (lca != paths[j][index]) {
00205 match = false;
00206 break;
00207 }
00208 }
00209 if (match) {
00210 return lca;
00211 }
00212 }
00213
00214
00215 assert(false);
00216 return 0;
00217 }
00218
00219 }
00220 }
00221 }
00222
00223 #endif