00001 #include "TreeTsgFilter.h"
00002 
00003 namespace MosesTraining
00004 {
00005 namespace Syntax
00006 {
00007 namespace FilterRuleTable
00008 {
00009 
00010 TreeTsgFilter::TreeTsgFilter(
00011   const std::vector<boost::shared_ptr<SyntaxTree> > &sentences)
00012 {
00013   
00014   m_sentences.reserve(sentences.size());
00015   for (std::vector<boost::shared_ptr<SyntaxTree> >::const_iterator p =
00016          sentences.begin(); p != sentences.end(); ++p) {
00017     m_sentences.push_back(boost::shared_ptr<IdTree>(SyntaxTreeToIdTree(**p)));
00018   }
00019 
00020   m_labelToTree.resize(m_testVocab.Size());
00021   
00022   for (std::vector<boost::shared_ptr<IdTree> >::const_iterator p =
00023          m_sentences.begin(); p != m_sentences.end(); ++p) {
00024     AddNodesToMap(**p);
00025   }
00026 }
00027 
00028 TreeTsgFilter::IdTree *TreeTsgFilter::SyntaxTreeToIdTree(const SyntaxTree &s)
00029 {
00030   IdTree *t = new IdTree(m_testVocab.Insert(s.value().label));
00031   const std::vector<SyntaxTree*> &sChildren = s.children();
00032   std::vector<IdTree*> &tChildren = t->children();
00033   tChildren.reserve(sChildren.size());
00034   for (std::vector<SyntaxTree*>::const_iterator p = sChildren.begin();
00035        p != sChildren.end(); ++p) {
00036     IdTree *child = SyntaxTreeToIdTree(**p);
00037     child->parent() = t;
00038     tChildren.push_back(child);
00039   }
00040   return t;
00041 }
00042 
00043 void TreeTsgFilter::AddNodesToMap(const IdTree &tree)
00044 {
00045   m_labelToTree[tree.value()].push_back(&tree);
00046   const std::vector<IdTree*> &children = tree.children();
00047   for (std::vector<IdTree*>::const_iterator p = children.begin();
00048        p != children.end(); ++p) {
00049     AddNodesToMap(**p);
00050   }
00051 }
00052 
00053 bool TreeTsgFilter::MatchFragment(const IdTree &fragment,
00054                                   const std::vector<IdTree *> &leaves)
00055 {
00056   typedef std::vector<const IdTree *> TreeVec;
00057 
00058   
00059   
00060   
00061   
00062   
00063   const IdTree *rarestLeaf = leaves[0];
00064   std::size_t lowestCount = m_labelToTree[rarestLeaf->value()].size();
00065   for (std::size_t i = 1; i < leaves.size(); ++i) {
00066     const IdTree *leaf = leaves[i];
00067     std::size_t count = m_labelToTree[leaf->value()].size();
00068     if (count < lowestCount) {
00069       lowestCount = count;
00070       rarestLeaf = leaf;
00071     }
00072   }
00073 
00074   
00075   const std::size_t depth = rarestLeaf->Depth();
00076 
00077   
00078   
00079   TreeVec &nodes = m_labelToTree[rarestLeaf->value()];
00080   for (TreeVec::const_iterator p = nodes.begin(); p != nodes.end(); ++p) {
00081     
00082     
00083     const IdTree *t = *p;
00084     std::size_t d = depth;
00085     while (d && t->parent()) {
00086       t = t->parent();
00087       --d;
00088     }
00089     if (d > 0) {
00090       
00091       continue;
00092     }
00093     if (MatchFragment(fragment, *t)) {
00094       return true;
00095     }
00096   }
00097   return false;
00098 }
00099 
00100 bool TreeTsgFilter::MatchFragment(const IdTree &fragment, const IdTree &tree)
00101 {
00102   if (fragment.value() != tree.value()) {
00103     return false;
00104   }
00105   const std::vector<IdTree*> &fragChildren = fragment.children();
00106   const std::vector<IdTree*> &treeChildren = tree.children();
00107   if (!fragChildren.empty() && fragChildren.size() != treeChildren.size()) {
00108     return false;
00109   }
00110   for (std::size_t i = 0; i < fragChildren.size(); ++i) {
00111     if (!MatchFragment(*fragChildren[i], *treeChildren[i])) {
00112       return false;
00113     }
00114   }
00115   return true;
00116 }
00117 
00118 }  
00119 }  
00120 }