00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <iostream>
00021
00022 #include "Node.h"
00023 #include "Subgraph.h"
00024
00025 namespace MosesTraining
00026 {
00027 namespace Syntax
00028 {
00029 namespace GHKM
00030 {
00031
00032 void Subgraph::GetTargetLeaves(std::vector<const Node *> &result) const
00033 {
00034 result.clear();
00035 GetTargetLeaves(m_root, result);
00036 }
00037
00038 void Subgraph::GetTargetLeaves(const Node *root,
00039 std::vector<const Node *> &result) const
00040 {
00041 if (root->GetType() == TARGET || m_leaves.find(root) != m_leaves.end()) {
00042 result.push_back(root);
00043 } else {
00044 const std::vector<Node*> &children = root->GetChildren();
00045 for (std::vector<Node *>::const_iterator p(children.begin());
00046 p != children.end(); ++p) {
00047 GetTargetLeaves(*p, result);
00048 }
00049 }
00050 }
00051
00052 int Subgraph::CountNodes(const Node *n) const
00053 {
00054 if (n->GetType() != TREE) {
00055 return 0;
00056 }
00057 if (IsTrivial()) {
00058 return 1;
00059 }
00060 int count = 1;
00061 const std::vector<Node*> &children = n->GetChildren();
00062 for (std::vector<Node *>::const_iterator p = children.begin();
00063 p != children.end(); ++p) {
00064 const Node *child = *p;
00065 if (m_leaves.find(child) == m_leaves.end()) {
00066 count += CountNodes(child);
00067 } else if (child->GetType() == TREE) {
00068 ++count;
00069 }
00070 }
00071 return count;
00072 }
00073
00074 int Subgraph::CalcSize(const Node *n) const
00075 {
00076 if (n->GetType() != TREE || n->IsPreterminal()) {
00077 return 0;
00078 }
00079 if (IsTrivial()) {
00080 return 1;
00081 }
00082 int count = 1;
00083 const std::vector<Node*> &children = n->GetChildren();
00084 for (std::vector<Node *>::const_iterator p = children.begin();
00085 p != children.end(); ++p) {
00086 if (m_leaves.find(*p) == m_leaves.end()) {
00087 count += CalcSize(*p);
00088 }
00089 }
00090 return count;
00091 }
00092
00093 int Subgraph::CalcDepth(const Node *n) const
00094 {
00095 if (n->GetType() != TREE || n->IsPreterminal() || m_leaves.empty()) {
00096 return 0;
00097 }
00098 int maxChildDepth = 0;
00099 const std::vector<Node*> &children = n->GetChildren();
00100 for (std::vector<Node *>::const_iterator p = children.begin();
00101 p != children.end(); ++p) {
00102 if (m_leaves.find(*p) == m_leaves.end()) {
00103 maxChildDepth = std::max(maxChildDepth, CalcDepth(*p));
00104 }
00105 }
00106 return maxChildDepth + 1;
00107 }
00108
00109 float Subgraph::CalcPcfgScore() const
00110 {
00111 if (m_root->GetType() != TREE || m_leaves.empty()) {
00112 return 0.0f;
00113 }
00114 float score = m_root->GetPcfgScore();
00115 for (std::set<const Node *>::const_iterator p = m_leaves.begin();
00116 p != m_leaves.end(); ++p) {
00117 const Node *leaf = *p;
00118 if (leaf->GetType() == TREE) {
00119 score -= leaf->GetPcfgScore();
00120 }
00121 }
00122 return score;
00123 }
00124
00125 void Subgraph::PrintTree(std::ostream &out) const
00126 {
00127 RecursivelyPrintTree(m_root,out);
00128 }
00129
00130 void Subgraph::RecursivelyPrintTree(const Node *n, std::ostream &out) const
00131 {
00132 NodeType nodeType = n->GetType();
00133 if (nodeType == TREE) {
00134 out << "[" << n->GetLabel();
00135 if (m_leaves.find(n) == m_leaves.end()) {
00136 const std::vector<Node *> &children = n->GetChildren();
00137 for (std::vector<Node *>::const_iterator p(children.begin());
00138 p != children.end(); ++p) {
00139 Node *child = *p;
00140 if (child->GetType() == SOURCE) {
00141
00142
00143 continue;
00144 }
00145 out << " ";
00146 RecursivelyPrintTree(child,out);
00147 }
00148 }
00149 out << "]";
00150 } else if (nodeType == TARGET) {
00151 out << n->GetLabel();
00152 }
00153 }
00154
00155 void Subgraph::PrintPartsOfSpeech(std::ostream &out) const
00156 {
00157 RecursivelyPrintPartsOfSpeech(m_root,out);
00158 }
00159
00160 void Subgraph::RecursivelyPrintPartsOfSpeech(const Node *n, std::ostream &out) const
00161 {
00162 NodeType nodeType = n->GetType();
00163 if (nodeType == TREE) {
00164 if (m_leaves.find(n) == m_leaves.end()) {
00165 const std::vector<Node *> &children = n->GetChildren();
00166 for (std::vector<Node *>::const_iterator p(children.begin());
00167 p != children.end(); ++p) {
00168 Node *child = *p;
00169 if (child->GetType() == TARGET) {
00170 out << " " << n->GetLabel();
00171 } else {
00172 RecursivelyPrintPartsOfSpeech(child,out);
00173 }
00174 }
00175 }
00176 }
00177 }
00178
00179 void Subgraph::GetPartsOfSpeech(std::vector<std::string> &out) const
00180 {
00181 out.clear();
00182 RecursivelyGetPartsOfSpeech(m_root,out);
00183 }
00184
00185 void Subgraph::RecursivelyGetPartsOfSpeech(const Node *n, std::vector<std::string> &out) const
00186 {
00187 NodeType nodeType = n->GetType();
00188 if (nodeType == TREE) {
00189 if (m_leaves.find(n) == m_leaves.end()) {
00190 const std::vector<Node *> &children = n->GetChildren();
00191 for (std::vector<Node *>::const_iterator p(children.begin());
00192 p != children.end(); ++p) {
00193 Node *child = *p;
00194 if (child->GetType() == TARGET) {
00195 out.push_back(n->GetLabel());
00196 } else {
00197 RecursivelyGetPartsOfSpeech(child,out);
00198 }
00199 }
00200 }
00201 }
00202 }
00203
00204 }
00205 }
00206 }