00001 // -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- 00002 #ifndef __ug_conll_record_h 00003 #define __ug_conll_record_h 00004 #include "ug_typedefs.h" 00005 #include <stdint.h> 00006 // Base class for dependency tree corpora with POS and Lemma annotations 00007 00008 namespace sapt 00009 { 00010 00011 using tpt::id_type; 00012 using tpt::uchar; 00013 class 00014 Conll_Record 00015 { 00016 public: 00017 id_type sform; // surface form 00018 id_type lemma; // lemma 00019 uchar majpos; // major part of speech 00020 uchar minpos; // minor part of speech 00021 short parent; // id of parent 00022 uchar dtype; // dependency type 00023 uchar info[3]; /* additional information (depends on the part of speech) 00024 * a place holder for the time being, to ensure proper 00025 * alignment in memory */ 00026 Conll_Record(); 00027 Conll_Record const* up(int length=1) const; 00028 00029 Conll_Record& operator=(Conll_Record const& other); 00030 00031 bool isDescendentOf(Conll_Record const* other) const; 00032 00033 // virtual bool operator==(Conll_Record const& other) const; 00034 // virtual bool operator<(Conll_Record const& other) const; 00035 Conll_Record remap(std::vector<id_type const*> const& m) const; 00036 00037 #if 0 00038 00044 Conll_Record(string const& line, 00045 TokenIndex const& SF, TokenIndex const& LM, 00046 TokenIndex const& PS, TokenIndex const& DT); 00047 00049 void store(ostream& out); 00050 #endif 00051 }; 00052 00053 template<typename T> 00054 T const* as(Conll_Record const* p) 00055 { 00056 return reinterpret_cast<T const*>(p); 00057 } 00058 00059 template<typename T> 00060 T const* up(T const* p,int length=1) 00061 { 00062 return as<T>(p->up(length)); 00063 } 00064 00065 // this is for contigous word sequences extracted from longer sequences 00066 // adjust parent pointers to 0 (no parent) if they point out of the 00067 // subsequence 00068 void 00069 fixParse(Conll_Record* start, Conll_Record* stop); 00070 00071 } // end of namespace ugdiss 00072 00073 #endif