00001 #include "TreeTsgFilter.h"
00002
00003 namespace MosesTraining
00004 {
00005 namespace Syntax
00006 {
00007 namespace FilterRuleTable
00008 {
00009
00010 TreeTsgFilter::TreeTsgFilter(
00011 const std::vector<boost::shared_ptr<SyntaxTree> > &sentences)
00012 {
00013
00014 m_sentences.reserve(sentences.size());
00015 for (std::vector<boost::shared_ptr<SyntaxTree> >::const_iterator p =
00016 sentences.begin(); p != sentences.end(); ++p) {
00017 m_sentences.push_back(boost::shared_ptr<IdTree>(SyntaxTreeToIdTree(**p)));
00018 }
00019
00020 m_labelToTree.resize(m_testVocab.Size());
00021
00022 for (std::vector<boost::shared_ptr<IdTree> >::const_iterator p =
00023 m_sentences.begin(); p != m_sentences.end(); ++p) {
00024 AddNodesToMap(**p);
00025 }
00026 }
00027
00028 TreeTsgFilter::IdTree *TreeTsgFilter::SyntaxTreeToIdTree(const SyntaxTree &s)
00029 {
00030 IdTree *t = new IdTree(m_testVocab.Insert(s.value().label));
00031 const std::vector<SyntaxTree*> &sChildren = s.children();
00032 std::vector<IdTree*> &tChildren = t->children();
00033 tChildren.reserve(sChildren.size());
00034 for (std::vector<SyntaxTree*>::const_iterator p = sChildren.begin();
00035 p != sChildren.end(); ++p) {
00036 IdTree *child = SyntaxTreeToIdTree(**p);
00037 child->parent() = t;
00038 tChildren.push_back(child);
00039 }
00040 return t;
00041 }
00042
00043 void TreeTsgFilter::AddNodesToMap(const IdTree &tree)
00044 {
00045 m_labelToTree[tree.value()].push_back(&tree);
00046 const std::vector<IdTree*> &children = tree.children();
00047 for (std::vector<IdTree*>::const_iterator p = children.begin();
00048 p != children.end(); ++p) {
00049 AddNodesToMap(**p);
00050 }
00051 }
00052
00053 bool TreeTsgFilter::MatchFragment(const IdTree &fragment,
00054 const std::vector<IdTree *> &leaves)
00055 {
00056 typedef std::vector<const IdTree *> TreeVec;
00057
00058
00059
00060
00061
00062
00063 const IdTree *rarestLeaf = leaves[0];
00064 std::size_t lowestCount = m_labelToTree[rarestLeaf->value()].size();
00065 for (std::size_t i = 1; i < leaves.size(); ++i) {
00066 const IdTree *leaf = leaves[i];
00067 std::size_t count = m_labelToTree[leaf->value()].size();
00068 if (count < lowestCount) {
00069 lowestCount = count;
00070 rarestLeaf = leaf;
00071 }
00072 }
00073
00074
00075 const std::size_t depth = rarestLeaf->Depth();
00076
00077
00078
00079 TreeVec &nodes = m_labelToTree[rarestLeaf->value()];
00080 for (TreeVec::const_iterator p = nodes.begin(); p != nodes.end(); ++p) {
00081
00082
00083 const IdTree *t = *p;
00084 std::size_t d = depth;
00085 while (d && t->parent()) {
00086 t = t->parent();
00087 --d;
00088 }
00089 if (d > 0) {
00090
00091 continue;
00092 }
00093 if (MatchFragment(fragment, *t)) {
00094 return true;
00095 }
00096 }
00097 return false;
00098 }
00099
00100 bool TreeTsgFilter::MatchFragment(const IdTree &fragment, const IdTree &tree)
00101 {
00102 if (fragment.value() != tree.value()) {
00103 return false;
00104 }
00105 const std::vector<IdTree*> &fragChildren = fragment.children();
00106 const std::vector<IdTree*> &treeChildren = tree.children();
00107 if (!fragChildren.empty() && fragChildren.size() != treeChildren.size()) {
00108 return false;
00109 }
00110 for (std::size_t i = 0; i < fragChildren.size(); ++i) {
00111 if (!MatchFragment(*fragChildren[i], *treeChildren[i])) {
00112 return false;
00113 }
00114 }
00115 return true;
00116 }
00117
00118 }
00119 }
00120 }