#include <ug_tsa_base.h>
Public Types | |
typedef TSA_tree_iterator< TKN > | tree_iterator |
typedef tsa::ArrayEntry | ArrayEntry |
typedef boost::shared_ptr < bitvector > | bitset_pointer |
typedef TKN | Token |
typedef BitSetCache< TSA< TKN > > | BSC_t |
Public Member Functions | |
virtual | ~TSA () |
char const * | arrayStart () const |
char const * | arrayEnd () const |
char const * | lower_bound (typename std::vector< TKN >::const_iterator const &keyStart, typename std::vector< TKN >::const_iterator const &keyStop) const |
char const * | lower_bound (TKN const *keyStart, TKN const *keyStop) const |
char const * | lower_bound (TKN const *keyStart, int keyLen) const |
char const * | upper_bound (typename std::vector< TKN >::const_iterator const &keyStart, typename std::vector< TKN >::const_iterator const &keyStop) const |
char const * | upper_bound (TKN const *keyStart, int keyLength) const |
void | dump (std::ostream &out, TokenIndex const &T) const |
count_type | fillBitSet (std::vector< TKN > const &phrase, bdBitset &dest) const |
count_type | fillBitSet (TKN const *key, size_t keyLen, bdBitset &dest) const |
count_type | setBits (char const *startRange, char const *endRange, boost::dynamic_bitset< uint64_t > &bs) const |
void | setTokenBits (char const *startRange, char const *endRange, size_t len, bitvector &bs) const |
virtual char const * | readSid (char const *p, char const *q, id_type &sid) const =0 |
virtual char const * | readSid (char const *p, char const *q,::uint64_t &sid) const =0 |
virtual char const * | readOffset (char const *p, char const *q, uint16_t &offset) const =0 |
virtual char const * | readOffset (char const *p, char const *q,::uint64_t &offset) const =0 |
count_type | sntCnt (char const *p, char const *const q) const |
count_type | rawCnt2 (TKN const *keyStart, size_t keyLen) const |
virtual count_type | rawCnt (char const *p, char const *const q) const =0 |
virtual void | getCounts (char const *p, char const *const q, count_type &sids, count_type &raw) const =0 |
std::string | suffixAt (char const *p, TokenIndex const *V=NULL, size_t maxlen=0) const |
std::string | suffixAt (ArrayEntry const &I, TokenIndex const *V=NULL, size_t maxlen=0) const |
tsa::ArrayEntry & | readEntry (char const *p, tsa::ArrayEntry &I) const |
char const * | dataEnd () const |
bool | sanityCheck1 () const |
::uint64_t | getSequenceId (typename std::vector< TKN >::const_iterator const &pstart, typename std::vector< TKN >::const_iterator const &pstop) const |
::uint64_t | getSequenceId (TKN const *t, ushort plen) const |
std::string | getSequence (::uint64_t pid, TokenIndex const &V) const |
std::vector< TKN > | getSequence (::uint64_t pid) const |
TKN const * | getSequenceStart (::uint64_t) const |
ushort | getSequenceLength (::uint64_t) const |
size_t | getCorpusSize () const |
Ttrack< TKN > const * | getCorpus () const |
bitset_pointer | getBitSet (TKN const *startKey, size_t keyLen) const |
find all instances of the tree described by [treeStart, treeEnd) | |
boost::shared_ptr< bitvector > | findTree (TKN const *treeStart, TKN const *treeEnd, bitvector const *filter) const |
size_t | markOccurrences (char const *lo, char const *up, size_t len, bitvector &bitset, bool markOnlyStartPosition) const |
bool | findBranches (TKN const *base, bitvector const &terminals, std::vector< tree_iterator > &dest) const |
double | aveIndexEntrySize () const |
SPTR< TSA_tree_iterator< TKN > > | find (TKN const *start, size_t len) const |
Public Attributes | |
boost::shared_ptr< BSC_t > | bsc |
Protected Member Functions | |
virtual char const * | index_jump (char const *startRange, char const *stopRange, float fraction) const =0 |
char const * | find_start (char const *lo, char const *const upX, TKN const *const refStart, int refLen, size_t d) const |
char const * | find_end (char const *lo, char const *const upX, TKN const *const refStart, int refLen, size_t d) const |
char const * | find_longer (char const *lo, char const *const upX, TKN const *const refStart, int refLen, size_t d) const |
virtual char const * | getLowerBound (id_type id) const =0 |
virtual char const * | getUpperBound (id_type id) const =0 |
Protected Attributes | |
boost::shared_ptr< Ttrack< TKN > const > | corpus |
char const * | startArray |
char const * | endArray |
size_t | corpusSize |
id_type | numTokens |
id_type | indexSize |
size_t | BitSetCachingThreshold |
Friends | |
class | TSA_tree_iterator< TKN > |
Token types (TKN) must provide a number of functions, see the class SimpleWordId (as a simple example of a "core token base class") and the template class L2R_Token (a class derived from its template parameter (e.g. SimpleWordId) that handles the ordering of sequences. Both are decleared/defined in ug_corpus_token.{h|cc}
Definition at line 44 of file ug_tsa_base.h.
typedef tsa::ArrayEntry sapt::TSA< TKN >::ArrayEntry |
Definition at line 50 of file ug_tsa_base.h.
typedef boost::shared_ptr<bitvector> sapt::TSA< TKN >::bitset_pointer |
Definition at line 54 of file ug_tsa_base.h.
typedef BitSetCache<TSA<TKN> > sapt::TSA< TKN >::BSC_t |
Definition at line 56 of file ug_tsa_base.h.
Definition at line 55 of file ug_tsa_base.h.
typedef TSA_tree_iterator<TKN> sapt::TSA< TKN >::tree_iterator |
Reimplemented in sapt::mmTSA< TOKEN >, sapt::imTSA< TOKEN >, and sapt::mmTSA< Token >.
Definition at line 47 of file ug_tsa_base.h.
Definition at line 47 of file ug_tsa_base.h.
char const* sapt::TSA< TKN >::arrayEnd | ( | ) | const [inline] |
Definition at line 143 of file ug_tsa_base.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), and sapt::TSA< TKN >::upper_bound().
char const* sapt::TSA< TKN >::arrayStart | ( | ) | const [inline] |
Definition at line 142 of file ug_tsa_base.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down().
double sapt::TSA< TKN >::aveIndexEntrySize | ( | ) | const [inline] |
Definition at line 311 of file ug_tsa_base.h.
Referenced by sapt::TSA_tree_iterator< Token >::ca(), and sapt::TSA_tree_iterator< TKN >::randomSample().
char const* sapt::TSA< TKN >::dataEnd | ( | ) | const |
return pointer to the end of the data block
void sapt::TSA< TKN >::dump | ( | std::ostream & | out, | |
TokenIndex const & | T | |||
) | const |
dump all suffixes in order to /out/
count_type sapt::TSA< TKN >::fillBitSet | ( | TKN const * | key, | |
size_t | keyLen, | |||
bdBitset & | dest | |||
) | const [inline] |
fill the dynamic bitset with information as to which sentences the phrase occurs in
Definition at line 373 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::lower_bound(), sapt::TSA< TKN >::setBits(), sapt::up(), and sapt::TSA< TKN >::upper_bound().
count_type sapt::TSA< TKN >::fillBitSet | ( | std::vector< TKN > const & | key, | |
bdBitset & | dest | |||
) | const [inline] |
fill the dynamic bit set with true for all sentences that contain /phrase/.
Definition at line 357 of file ug_tsa_base.h.
Referenced by sapt::TSA< TKN >::getBitSet().
SPTR<TSA_tree_iterator<TKN> > sapt::TSA< TKN >::find | ( | TKN const * | start, | |
size_t | len | |||
) | const [inline] |
Definition at line 319 of file ug_tsa_base.h.
char const * sapt::TSA< TKN >::find_end | ( | char const * | lo, | |
char const *const | upX, | |||
TKN const *const | refStart, | |||
int | refLen, | |||
size_t | d | |||
) | const [inline, protected] |
return the index position of the first item that is greater than [refStart,refStart+refLen) and does not include it as a prefix
return the upper bound (first entry beyond) of the token range matching [startKey,endKey)
Definition at line 488 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus, I, sapt::TSA< TKN >::index_jump(), NULL, sapt::TSA< TKN >::readEntry(), and sapt::up().
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), and sapt::TSA< TKN >::upper_bound().
char const * sapt::TSA< TKN >::find_longer | ( | char const * | lo, | |
char const *const | upX, | |||
TKN const *const | refStart, | |||
int | refLen, | |||
size_t | d | |||
) | const [inline, protected] |
return the index position of the first item that is longer than [refStart,refStart+refLen) and includes it as a prefix
return the first entry that has the prefix [refStart,refStart+refLen) but continues on
Definition at line 523 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus, I, sapt::TSA< TKN >::index_jump(), NULL, sapt::TSA< TKN >::readEntry(), and sapt::up().
Referenced by sapt::TSA_tree_iterator< TKN >::down().
char const * sapt::TSA< TKN >::find_start | ( | char const * | lo, | |
char const *const | upX, | |||
TKN const *const | refStart, | |||
int | refLen, | |||
size_t | d | |||
) | const [inline, protected] |
return the index position of the first item that is equal to or includes [refStart,refStart+refLen) as a prefix
return the lower bound (first matching entry) of the token range matching [startKey,endKey)
Definition at line 455 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus, I, sapt::TSA< TKN >::index_jump(), NULL, sapt::TSA< TKN >::readEntry(), and sapt::up().
Referenced by sapt::TSA_tree_iterator< TKN >::extend(), and sapt::TSA< TKN >::lower_bound().
bool sapt::TSA< TKN >::findBranches | ( | TKN const * | base, | |
bitvector const & | terminals, | |||
std::vector< tree_iterator > & | dest | |||
) | const [inline] |
Definition at line 806 of file ug_tsa_base.h.
References k, sort(), and sorter.
boost::shared_ptr<bitvector> sapt::TSA< TKN >::findTree | ( | TKN const * | treeStart, | |
TKN const * | treeEnd, | |||
bitvector const * | filter | |||
) | const |
TSA< TKN >::bitset_pointer sapt::TSA< TKN >::getBitSet | ( | TKN const * | startKey, | |
size_t | keyLen | |||
) | const [inline] |
find all instances of the tree described by [treeStart, treeEnd)
Definition at line 762 of file ug_tsa_base.h.
References sapt::TSA< TKN >::bsc, sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::fillBitSet(), and NULL.
Definition at line 736 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus.
Referenced by fill(), sapt::BitSetCache< TSA >::get(), and sapt::BitSetCache< TSA >::get2().
size_t sapt::TSA< TKN >::getCorpusSize | ( | ) | const [inline] |
Definition at line 726 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpusSize.
Referenced by sapt::TSA_tree_iterator< TKN >::rawCnt().
virtual void sapt::TSA< TKN >::getCounts | ( | char const * | p, | |
char const *const | q, | |||
count_type & | sids, | |||
count_type & | raw | |||
) | const [pure virtual] |
get both sentence and word counts.
Avoids having to go over the byte range representing the range of suffixes in question twice when dealing with memory-mapped suffix arrays.
Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.
virtual char const* sapt::TSA< TKN >::getLowerBound | ( | id_type | id | ) | const [protected, pure virtual] |
Returns a char const* pointing to the position in the data block where the first item starting with token /id/ is located.
Referenced by sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA< TKN >::lower_bound(), and sapt::TSA< TKN >::upper_bound().
std::vector< TKN > sapt::TSA< TKN >::getSequence | ( | ::uint64_t | pid | ) | const [inline] |
Return the phrase represented by phrase ID pid_
Definition at line 671 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus.
std::string sapt::TSA< TKN >::getSequence | ( | ::uint64_t | pid, | |
TokenIndex const & | V | |||
) | const [inline] |
Return the phrase represented by phrase ID pid_
Definition at line 688 of file ug_tsa_base.h.
References sapt::TSA< TKN >::getSequenceLength(), and sapt::TSA< TKN >::getSequenceStart().
uint64_t sapt::TSA< TKN >::getSequenceId | ( | TKN const * | t, | |
ushort | plen | |||
) | const [inline] |
Definition at line 652 of file ug_tsa_base.h.
References I, sapt::TSA< TKN >::lower_bound(), and sapt::TSA< TKN >::readEntry().
uint64_t sapt::TSA< TKN >::getSequenceId | ( | typename std::vector< TKN >::const_iterator const & | pstart, | |
typename std::vector< TKN >::const_iterator const & | pstop | |||
) | const [inline] |
Return an ID that represents a given phrase; This should NEVER be 0! Structure of a phrase ID: leftmost 32 bits: sentence ID in the corpus next 16 bits: offset from the start of the sentence next 16 bits: length of the phrase
Definition at line 641 of file ug_tsa_base.h.
ushort sapt::TSA< TKN >::getSequenceLength | ( | ::uint64_t | pid | ) | const [inline] |
Definition at line 716 of file ug_tsa_base.h.
Referenced by sapt::TSA< TKN >::getSequence().
TKN const * sapt::TSA< TKN >::getSequenceStart | ( | ::uint64_t | pid | ) | const [inline] |
Definition at line 705 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus.
Referenced by sapt::TSA< TKN >::getSequence().
virtual char const* sapt::TSA< TKN >::getUpperBound | ( | id_type | id | ) | const [protected, pure virtual] |
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA< TKN >::lower_bound(), and sapt::TSA< TKN >::upper_bound().
virtual char const* sapt::TSA< TKN >::index_jump | ( | char const * | startRange, | |
char const * | stopRange, | |||
float | fraction | |||
) | const [protected, pure virtual] |
Referenced by sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::find_longer(), and sapt::TSA< TKN >::find_start().
char const * sapt::TSA< TKN >::lower_bound | ( | TKN const * | keyStart, | |
int | keyLen | |||
) | const [inline] |
Definition at line 582 of file ug_tsa_base.h.
References sapt::TSA< TKN >::find_start(), sapt::TSA< TKN >::getLowerBound(), sapt::TSA< TKN >::getUpperBound(), and sapt::TSA< TKN >::startArray.
char const * sapt::TSA< TKN >::lower_bound | ( | TKN const * | keyStart, | |
TKN const * | keyStop | |||
) | const [inline] |
returns the start position in the byte array representing the tightly packed sorted list of corpus positions for the given search phrase
Definition at line 573 of file ug_tsa_base.h.
References sapt::TSA< TKN >::lower_bound().
char const * sapt::TSA< TKN >::lower_bound | ( | typename std::vector< TKN >::const_iterator const & | keyStart, | |
typename std::vector< TKN >::const_iterator const & | keyStop | |||
) | const [inline] |
Definition at line 556 of file ug_tsa_base.h.
Referenced by sapt::TSA< TKN >::fillBitSet(), sapt::BitSetCache< TSA >::get(), sapt::BitSetCache< TSA >::get2(), sapt::TSA< TKN >::getSequenceId(), sapt::TSA< TKN >::lower_bound(), and sapt::TSA< TKN >::rawCnt2().
size_t sapt::TSA< TKN >::markOccurrences | ( | char const * | lo, | |
char const * | up, | |||
size_t | len, | |||
bitvector & | bitset, | |||
bool | markOnlyStartPosition | |||
) | const [inline] |
Definition at line 780 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), and sid.
virtual count_type sapt::TSA< TKN >::rawCnt | ( | char const * | p, | |
char const *const | q | |||
) | const [pure virtual] |
Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.
Referenced by sapt::TSA_tree_iterator< TKN >::rawCnt(), and sapt::TSA< TKN >::rawCnt2().
count_type sapt::TSA< TKN >::rawCnt2 | ( | TKN const * | keyStart, | |
size_t | keyLen | |||
) | const [inline] |
Definition at line 628 of file ug_tsa_base.h.
References sapt::TSA< TKN >::lower_bound(), sapt::TSA< TKN >::rawCnt(), sapt::up(), and sapt::TSA< TKN >::upper_bound().
tsa::ArrayEntry & sapt::TSA< TKN >::readEntry | ( | char const * | p, | |
tsa::ArrayEntry & | I | |||
) | const [inline] |
Definition at line 746 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::endArray, sapt::TSA< TKN >::readOffset(), and sapt::TSA< TKN >::readSid().
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::find_longer(), sapt::TSA< TKN >::find_start(), sapt::TSA_tree_iterator< TKN >::getSequenceId(), sapt::TSA< TKN >::getSequenceId(), sapt::TSA_tree_iterator< TKN >::randomSample(), sapt::TSA< TKN >::setTokenBits(), and sapt::TSA_tree_iterator< TKN >::tfAndRoot().
virtual char const* sapt::TSA< TKN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | offset | |||
) | const [pure virtual] |
Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.
virtual char const* sapt::TSA< TKN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
uint16_t & | offset | |||
) | const [pure virtual] |
read the offset part of the index entry into /offset/
Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.
Referenced by sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::TSA< TKN >::readEntry(), sapt::TSA< TKN >::setBits(), and sapt::TSA< TKN >::sntCnt().
virtual char const* sapt::TSA< TKN >::readSid | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | sid | |||
) | const [pure virtual] |
Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.
virtual char const* sapt::TSA< TKN >::readSid | ( | char const * | p, | |
char const * | q, | |||
id_type & | sid | |||
) | const [pure virtual] |
read the sentence ID into /sid/
Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.
Referenced by sapt::TSA_tree_iterator< TKN >::getSid(), sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::TSA< TKN >::readEntry(), sapt::TSA< TKN >::setBits(), and sapt::TSA< TKN >::sntCnt().
bool sapt::TSA< TKN >::sanityCheck1 | ( | ) | const |
count_type sapt::TSA< TKN >::setBits | ( | char const * | startRange, | |
char const * | endRange, | |||
boost::dynamic_bitset< uint64_t > & | bs | |||
) | const [inline] |
Definition at line 388 of file ug_tsa_base.h.
References sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), and sid.
Referenced by sapt::TSA< TKN >::fillBitSet().
void sapt::TSA< TKN >::setTokenBits | ( | char const * | startRange, | |
char const * | endRange, | |||
size_t | len, | |||
bitvector & | bs | |||
) | const [inline] |
Definition at line 410 of file ug_tsa_base.h.
References sapt::TSA< TKN >::corpus, I, sapt::L2R_Token< T >::next(), sapt::TSA< TKN >::readEntry(), sapt::L2R_Token< T >::stop(), and stop.
count_type sapt::TSA< TKN >::sntCnt | ( | char const * | p, | |
char const *const | q | |||
) | const [inline] |
Reimplemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.
Definition at line 434 of file ug_tsa_base.h.
References sapt::check(), sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), and sid.
std::string sapt::TSA< TKN >::suffixAt | ( | ArrayEntry const & | I, | |
TokenIndex const * | V = NULL , |
|||
size_t | maxlen = 0 | |||
) | const |
std::string sapt::TSA< TKN >::suffixAt | ( | char const * | p, | |
TokenIndex const * | V = NULL , |
|||
size_t | maxlen = 0 | |||
) | const |
char const * sapt::TSA< TKN >::upper_bound | ( | TKN const * | keyStart, | |
int | keyLength | |||
) | const [inline] |
returns the upper bound in the byte array representing the tightly packed sorted list of corpus positions for the given search phrase (i.e., points just beyond the range)
Definition at line 615 of file ug_tsa_base.h.
References sapt::TSA< TKN >::arrayEnd(), sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::getLowerBound(), and sapt::TSA< TKN >::getUpperBound().
char const * sapt::TSA< TKN >::upper_bound | ( | typename std::vector< TKN >::const_iterator const & | keyStart, | |
typename std::vector< TKN >::const_iterator const & | keyStop | |||
) | const [inline] |
Definition at line 598 of file ug_tsa_base.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA< TKN >::fillBitSet(), sapt::BitSetCache< TSA >::get(), sapt::BitSetCache< TSA >::get2(), and sapt::TSA< TKN >::rawCnt2().
friend class TSA_tree_iterator< TKN > [friend] |
Definition at line 60 of file ug_tsa_base.h.
size_t sapt::TSA< TKN >::BitSetCachingThreshold [protected] |
Definition at line 90 of file ug_tsa_base.h.
Definition at line 63 of file ug_tsa_base.h.
Referenced by sapt::TSA_tree_iterator< Token >::ca(), sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA< TKN >::fillBitSet(), sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::find_longer(), sapt::TSA< TKN >::find_start(), sapt::TSA< TKN >::getBitSet(), sapt::TSA< TKN >::getCorpus(), sapt::TSA< TKN >::getSequence(), sapt::TSA< TKN >::getSequenceStart(), sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::TSA< TKN >::readEntry(), sapt::TSA< TKN >::setTokenBits(), sapt::TSA< TKN >::sntCnt(), and sapt::TSA_tree_iterator< TKN >::tfAndRoot().
size_t sapt::TSA< TKN >::corpusSize [protected] |
Definition at line 65 of file ug_tsa_base.h.
Referenced by sapt::TSA< TOKEN >::arrayEnd(), sapt::TSA< TOKEN >::aveIndexEntrySize(), sapt::TSA_tree_iterator< TKN >::getSid(), and sapt::TSA< TKN >::readEntry().
size of the corpus (in number of tokens) of the corpus underlying the sequence array.
ATTENTION: This number may differ from corpus->numTokens(), namely when the suffix array is based on a subset of the sentences of /corpus/.
Definition at line 87 of file ug_tsa_base.h.
size of the corpus (in number of sentences) of the corpus underlying the sequence array.
ATTENTION: This number may differ from corpus->size(), namely when the suffix array is based on a subset of the sentences of /corpus/.
Definition at line 78 of file ug_tsa_base.h.
Referenced by sapt::TSA< TOKEN >::aveIndexEntrySize().
char const* sapt::TSA< TKN >::startArray [protected] |
Definition at line 64 of file ug_tsa_base.h.
Referenced by sapt::TSA< TOKEN >::arrayStart(), sapt::TSA< TOKEN >::aveIndexEntrySize(), sapt::TSA_tree_iterator< TKN >::getSid(), and sapt::TSA< TKN >::lower_bound().