00001 // -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- 00002 00003 // This code is part of the re-factorization of the earlier 00004 // non-template implementation of "corpus tracks" and suffix and 00005 // prefix arrays over them as template classes. 00006 00007 // (c) 2007-2009 Ulrich Germann 00008 00009 #ifndef __ug_corpus_token_h 00010 #define __ug_corpus_token_h 00011 00012 // This file defines a few simple token classes for use with the Ttrack/TSA template classes 00013 // - SimpleWordId is a simple wrapper around an integer ID 00014 // - L2R_Token defines next() for building suffix arrays 00015 // - R2L_Token defines next() for building prefix arrays 00016 00017 00018 #include "tpt_typedefs.h" 00019 #include "ug_typedefs.h" 00020 #include "ug_ttrack_base.h" 00021 00022 namespace sapt 00023 { 00026 class SimpleWordId 00027 { 00028 id_type theID; 00029 public: 00030 SimpleWordId(id_type const& id); 00031 id_type const& id() const; 00032 int cmp(SimpleWordId const& other) const; 00033 bool operator==(SimpleWordId const& other) const; 00034 id_type remap(std::vector<id_type const*> const& m) const; 00035 }; 00036 00038 template<typename T> 00039 class 00040 L2R_Token : public T 00041 { 00042 public: 00043 typedef T Token; 00044 00045 L2R_Token() : T() {}; 00046 L2R_Token(id_type id) : T(id) {}; 00047 00048 L2R_Token const* next(int n=1) const { return this+n; } 00049 00052 template<typename TTRACK_TYPE> 00053 L2R_Token const* stop(TTRACK_TYPE const& C, id_type sid) const 00054 { 00055 return reinterpret_cast<L2R_Token<T> const*>(C.sntEnd(sid)); 00056 } 00057 00058 L2R_Token const* stop(L2R_Token const* seqStart, L2R_Token const* seqEnd) const 00059 { 00060 return seqEnd; 00061 } 00062 00063 bool operator<(T const& other) const { return this->cmp(other) < 0; } 00064 bool operator>(T const& other) const { return this->cmp(other) > 0; } 00065 bool operator==(T const& other) const { return this->cmp(other) == 0; } 00066 bool operator!=(T const& other) const { return this->cmp(other) != 0; } 00067 }; 00068 00070 template<typename T> 00071 class 00072 R2L_Token : public T 00073 { 00074 public: 00075 typedef T Token; 00076 00077 R2L_Token() : T() {}; 00078 R2L_Token(id_type id) : T(id) {}; 00079 00080 R2L_Token const* next(int n = 1) const { return this - n; } 00081 00082 template<typename TTRACK_TYPE> 00083 R2L_Token const* stop(TTRACK_TYPE const& C, id_type sid) const 00084 { 00085 return reinterpret_cast<R2L_Token<T> const*>(C.sntStart(sid) - 1); 00086 } 00087 00088 R2L_Token const* stop(R2L_Token const* seqStart, R2L_Token const* seqEnd) const 00089 { 00090 assert(seqStart); 00091 return seqStart - 1; 00092 } 00093 00094 bool operator<(T const& other) const { return this->cmp(other) < 0; } 00095 bool operator>(T const& other) const { return this->cmp(other) > 0; } 00096 bool operator==(T const& other) const { return this->cmp(other) == 0; } 00097 bool operator!=(T const& other) const { return this->cmp(other) != 0; } 00098 }; 00099 00100 } 00101 #endif