00001 // -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- 00002 // (c) 2007-2012 Ulrich Germann 00003 // Stuff related to dependency trees 00004 00005 #ifndef __ug_deptree_h 00006 #define __ug_deptree_h 00007 00008 #include <string> 00009 #include <iostream> 00010 00011 #include "tpt_tokenindex.h" 00012 #include "ug_ttrack_base.h" 00013 00014 #include "ug_conll_record.h" 00015 #include "ug_conll_bottom_up_token.h" 00016 #include "ug_typedefs.h" 00017 00018 namespace sapt 00019 { 00020 00021 // Fills the std::vector v with pointers to the internal root r_x for the 00022 // stretch [start,x] for all x: start <= x < stop. If the stretch 00023 // is incoherent, r_x is NULL 00024 template<typename T> 00025 void 00026 fill_L2R_roots(T const* start,T const* stop, std::vector<T const*>& v) 00027 { 00028 assert(stop>start); 00029 v.resize(stop-start); 00030 v[0] = start; 00031 bitvector isR(v.size()); 00032 std::vector<T const*> root(v.size()); 00033 isR.set(0); 00034 root[0] = start+start->parent; 00035 for (T const* x = start+1; x < stop; ++x) 00036 { 00037 size_t p = x-start; 00038 root[p] = x+x->parent; 00039 for (size_t i = isR.find_first(); i < isR.size(); i = isR.find_next(i)) 00040 if (root[i]==x) 00041 isR.reset(i); 00042 if (root[p] < start || root[p] >= stop) 00043 isR.set(x-start); 00044 v[p] = (isR.count()==1) ? start+isR.find_first() : NULL; 00045 } 00046 } 00047 00048 // return the root of the tree if the span [start,stop) constitutes a 00049 // tree, NULL otherwise 00050 template<typename T> 00051 T const* 00052 findInternalRoot(T const* start, T const* stop) 00053 { 00054 int outOfRange=0; 00055 T const* root = NULL; 00056 for (T const* t = start; t < stop && outOfRange <= 1; t++) 00057 { 00058 T const* n = reinterpret_cast<T const*>(t->up()); 00059 if (!n || n < start || n >=stop) 00060 { 00061 outOfRange++; 00062 root = t; 00063 } 00064 } 00065 assert(outOfRange); 00066 return outOfRange == 1 ? root : NULL; 00067 } 00068 00069 // return the governor of the tree given by [start,stop) if the span 00070 // constitutes a tree, NULL otherwise 00071 template<typename T> 00072 T const* 00073 findExternalRoot(T const* start, T const* stop) 00074 { 00075 int numRoots=0; 00076 T const* root = NULL; 00077 for (T const* t = start; t < stop && numRoots <= 1; t++) 00078 { 00079 T const* n = reinterpret_cast<T const*>(t->up()); 00080 if (!n || n < start || n >=stop) 00081 { 00082 if (root && n != root) 00083 numRoots++; 00084 else 00085 { 00086 root = n; 00087 if (!numRoots) numRoots++; 00088 } 00089 } 00090 } 00091 assert(numRoots); 00092 return numRoots == 1 ? root : NULL; 00093 } 00094 00095 template<typename T> 00096 T const* 00097 findInternalRoot(std::vector<T> const& v) 00098 { 00099 T const* a = as<T>(&(*v.begin())); 00100 T const* b = as<T>(&(*v.end())); 00101 return (a==b) ? NULL : findInternalRoot<T>(a,b); 00102 } 00103 00104 #if 1 00105 class DTNode 00106 { 00107 public: 00108 Conll_Record const* rec; // pointer to the record (see below) for this node 00109 DTNode* parent; // pointer to my parent 00110 std::vector<DTNode*> children; // children (in the order they appear in the sentence) 00111 DTNode(Conll_Record const* p); 00112 }; 00113 00115 class 00116 DependencyTree 00117 { 00118 public: 00119 std::vector<DTNode> w; 00120 DependencyTree(Conll_Record const* first, Conll_Record const* last); 00121 }; 00122 #endif 00123 00124 class 00125 Conll_Lemma : public Conll_Record 00126 { 00127 public: 00128 Conll_Lemma(); 00129 Conll_Lemma(id_type _id); 00130 id_type id() const; 00131 int cmp(Conll_Record const& other) const; 00132 }; 00133 00134 class 00135 Conll_Sform : public Conll_Record 00136 { 00137 public: 00138 Conll_Sform(); 00139 Conll_Sform(id_type _id); 00140 id_type id() const; 00141 int cmp(Conll_Record const& other) const; 00142 }; 00143 00144 class 00145 Conll_MajPos : public Conll_Record 00146 { 00147 public: 00148 Conll_MajPos(); 00149 Conll_MajPos(id_type _id); 00150 id_type id() const; 00151 int cmp(Conll_Record const& other) const; 00152 }; 00153 00154 00155 class 00156 Conll_MinPos : public Conll_Record 00157 { 00158 public: 00159 Conll_MinPos(); 00160 Conll_MinPos(id_type _id); 00161 id_type id() const; 00162 int cmp(Conll_Record const& other) const; 00163 }; 00164 00165 class 00166 Conll_MinPos_Lemma : public Conll_Record 00167 { 00168 public: 00169 Conll_MinPos_Lemma(); 00170 id_type id() const; 00171 int cmp(Conll_Record const& other) const; 00172 }; 00173 00174 class 00175 Conll_AllFields : public Conll_Record 00176 { 00177 public: 00178 Conll_AllFields(); 00179 int cmp(Conll_Record const& other) const; 00180 bool operator==(Conll_AllFields const& other) const; 00181 }; 00182 00183 class 00184 Conll_WildCard : public Conll_Record 00185 { 00186 public: 00187 Conll_WildCard(); 00188 int cmp(Conll_Record const& other) const; 00189 }; 00190 00193 bool 00194 isCoherent(Conll_Record const* start, Conll_Record const* const stop); 00195 00196 00199 template<typename T> 00200 T const* topNode(T const* start , T const* stop) 00201 { 00202 T const* ret = NULL; 00203 for (T const* x = start; x < stop; ++x) 00204 { 00205 T const* n = reinterpret_cast<T const*>(x->up()); 00206 if (!n || n < start || n >= stop) 00207 { 00208 if (ret) return NULL; 00209 else ret = x; 00210 } 00211 } 00212 return ret; 00213 } 00214 00215 } 00216 #endif