sapt::TSA< TKN > Class Template Reference

#include <ug_tsa_base.h>

Inheritance diagram for sapt::TSA< TKN >:

Inheritance graph
[legend]

List of all members.

Public Types

typedef TSA_tree_iterator< TKN > tree_iterator
typedef tsa::ArrayEntry ArrayEntry
typedef boost::shared_ptr
< bitvector
bitset_pointer
typedef TKN Token
typedef BitSetCache< TSA< TKN > > BSC_t

Public Member Functions

virtual ~TSA ()
char const * arrayStart () const
char const * arrayEnd () const
char const * lower_bound (typename std::vector< TKN >::const_iterator const &keyStart, typename std::vector< TKN >::const_iterator const &keyStop) const
char const * lower_bound (TKN const *keyStart, TKN const *keyStop) const
char const * lower_bound (TKN const *keyStart, int keyLen) const
char const * upper_bound (typename std::vector< TKN >::const_iterator const &keyStart, typename std::vector< TKN >::const_iterator const &keyStop) const
char const * upper_bound (TKN const *keyStart, int keyLength) const
void dump (std::ostream &out, TokenIndex const &T) const
count_type fillBitSet (std::vector< TKN > const &phrase, bdBitset &dest) const
count_type fillBitSet (TKN const *key, size_t keyLen, bdBitset &dest) const
count_type setBits (char const *startRange, char const *endRange, boost::dynamic_bitset< uint64_t > &bs) const
void setTokenBits (char const *startRange, char const *endRange, size_t len, bitvector &bs) const
virtual char const * readSid (char const *p, char const *q, id_type &sid) const =0
virtual char const * readSid (char const *p, char const *q,::uint64_t &sid) const =0
virtual char const * readOffset (char const *p, char const *q, uint16_t &offset) const =0
virtual char const * readOffset (char const *p, char const *q,::uint64_t &offset) const =0
count_type sntCnt (char const *p, char const *const q) const
count_type rawCnt2 (TKN const *keyStart, size_t keyLen) const
virtual count_type rawCnt (char const *p, char const *const q) const =0
virtual void getCounts (char const *p, char const *const q, count_type &sids, count_type &raw) const =0
std::string suffixAt (char const *p, TokenIndex const *V=NULL, size_t maxlen=0) const
std::string suffixAt (ArrayEntry const &I, TokenIndex const *V=NULL, size_t maxlen=0) const
tsa::ArrayEntry & readEntry (char const *p, tsa::ArrayEntry &I) const
char const * dataEnd () const
bool sanityCheck1 () const
::uint64_t getSequenceId (typename std::vector< TKN >::const_iterator const &pstart, typename std::vector< TKN >::const_iterator const &pstop) const
::uint64_t getSequenceId (TKN const *t, ushort plen) const
std::string getSequence (::uint64_t pid, TokenIndex const &V) const
std::vector< TKN > getSequence (::uint64_t pid) const
TKN const * getSequenceStart (::uint64_t) const
ushort getSequenceLength (::uint64_t) const
size_t getCorpusSize () const
Ttrack< TKN > const * getCorpus () const
bitset_pointer getBitSet (TKN const *startKey, size_t keyLen) const
 find all instances of the tree described by [treeStart, treeEnd)
boost::shared_ptr< bitvectorfindTree (TKN const *treeStart, TKN const *treeEnd, bitvector const *filter) const
size_t markOccurrences (char const *lo, char const *up, size_t len, bitvector &bitset, bool markOnlyStartPosition) const
bool findBranches (TKN const *base, bitvector const &terminals, std::vector< tree_iterator > &dest) const
double aveIndexEntrySize () const
SPTR< TSA_tree_iterator< TKN > > find (TKN const *start, size_t len) const

Public Attributes

boost::shared_ptr< BSC_tbsc

Protected Member Functions

virtual char const * index_jump (char const *startRange, char const *stopRange, float fraction) const =0
char const * find_start (char const *lo, char const *const upX, TKN const *const refStart, int refLen, size_t d) const
char const * find_end (char const *lo, char const *const upX, TKN const *const refStart, int refLen, size_t d) const
char const * find_longer (char const *lo, char const *const upX, TKN const *const refStart, int refLen, size_t d) const
virtual char const * getLowerBound (id_type id) const =0
virtual char const * getUpperBound (id_type id) const =0

Protected Attributes

boost::shared_ptr< Ttrack< TKN >
const > 
corpus
char const * startArray
char const * endArray
size_t corpusSize
id_type numTokens
id_type indexSize
size_t BitSetCachingThreshold

Friends

class TSA_tree_iterator< TKN >


Detailed Description

template<typename TKN>
class sapt::TSA< TKN >

Base class for [T]oken [S]equence [A]arrays, a generalization of Suffix arrays.

Token types (TKN) must provide a number of functions, see the class SimpleWordId (as a simple example of a "core token base class") and the template class L2R_Token (a class derived from its template parameter (e.g. SimpleWordId) that handles the ordering of sequences. Both are decleared/defined in ug_corpus_token.{h|cc}

Definition at line 44 of file ug_tsa_base.h.


Member Typedef Documentation

template<typename TKN>
typedef tsa::ArrayEntry sapt::TSA< TKN >::ArrayEntry

Definition at line 50 of file ug_tsa_base.h.

template<typename TKN>
typedef boost::shared_ptr<bitvector> sapt::TSA< TKN >::bitset_pointer

Definition at line 54 of file ug_tsa_base.h.

template<typename TKN>
typedef BitSetCache<TSA<TKN> > sapt::TSA< TKN >::BSC_t

Definition at line 56 of file ug_tsa_base.h.

template<typename TKN>
typedef TKN sapt::TSA< TKN >::Token

Definition at line 55 of file ug_tsa_base.h.

template<typename TKN>
typedef TSA_tree_iterator<TKN> sapt::TSA< TKN >::tree_iterator

Reimplemented in sapt::mmTSA< TOKEN >, sapt::imTSA< TOKEN >, and sapt::mmTSA< Token >.

Definition at line 47 of file ug_tsa_base.h.


Constructor & Destructor Documentation

template<typename TKN>
virtual sapt::TSA< TKN >::~TSA (  )  [inline, virtual]

Definition at line 47 of file ug_tsa_base.h.


Member Function Documentation

template<typename TKN>
char const* sapt::TSA< TKN >::arrayEnd (  )  const [inline]

Definition at line 143 of file ug_tsa_base.h.

Referenced by sapt::TSA_tree_iterator< TKN >::down(), and sapt::TSA< TKN >::upper_bound().

Here is the caller graph for this function:

template<typename TKN>
char const* sapt::TSA< TKN >::arrayStart (  )  const [inline]

Definition at line 142 of file ug_tsa_base.h.

Referenced by sapt::TSA_tree_iterator< TKN >::down().

Here is the caller graph for this function:

template<typename TKN>
double sapt::TSA< TKN >::aveIndexEntrySize (  )  const [inline]

Definition at line 311 of file ug_tsa_base.h.

Referenced by sapt::TSA_tree_iterator< Token >::ca(), and sapt::TSA_tree_iterator< TKN >::randomSample().

Here is the caller graph for this function:

template<typename TKN>
char const* sapt::TSA< TKN >::dataEnd (  )  const

return pointer to the end of the data block

template<typename TKN>
void sapt::TSA< TKN >::dump ( std::ostream &  out,
TokenIndex const &  T 
) const

dump all suffixes in order to /out/

template<typename TKN>
count_type sapt::TSA< TKN >::fillBitSet ( TKN const *  key,
size_t  keyLen,
bdBitset dest 
) const [inline]

fill the dynamic bitset with information as to which sentences the phrase occurs in

Returns:
number of total occurrences of the phrase in the corpus

Definition at line 373 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::lower_bound(), sapt::TSA< TKN >::setBits(), sapt::up(), and sapt::TSA< TKN >::upper_bound().

Here is the call graph for this function:

template<typename TKN>
count_type sapt::TSA< TKN >::fillBitSet ( std::vector< TKN > const &  key,
bdBitset dest 
) const [inline]

fill the dynamic bit set with true for all sentences that contain /phrase/.

Returns:
the raw number of occurrences.
fill the dynamic bitset with information as to which sentences the phrase occurs in
Returns:
number of total occurrences of the phrase in the corpus

Definition at line 357 of file ug_tsa_base.h.

Referenced by sapt::TSA< TKN >::getBitSet().

Here is the caller graph for this function:

template<typename TKN>
SPTR<TSA_tree_iterator<TKN> > sapt::TSA< TKN >::find ( TKN const *  start,
size_t  len 
) const [inline]

Definition at line 319 of file ug_tsa_base.h.

template<typename TKN>
char const * sapt::TSA< TKN >::find_end ( char const *  lo,
char const *const   upX,
TKN const *const   refStart,
int  refLen,
size_t  d 
) const [inline, protected]

return the index position of the first item that is greater than [refStart,refStart+refLen) and does not include it as a prefix

return the upper bound (first entry beyond) of the token range matching [startKey,endKey)

Definition at line 488 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpus, I, sapt::TSA< TKN >::index_jump(), NULL, sapt::TSA< TKN >::readEntry(), and sapt::up().

Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), and sapt::TSA< TKN >::upper_bound().

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename TKN>
char const * sapt::TSA< TKN >::find_longer ( char const *  lo,
char const *const   upX,
TKN const *const   refStart,
int  refLen,
size_t  d 
) const [inline, protected]

return the index position of the first item that is longer than [refStart,refStart+refLen) and includes it as a prefix

return the first entry that has the prefix [refStart,refStart+refLen) but continues on

Definition at line 523 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpus, I, sapt::TSA< TKN >::index_jump(), NULL, sapt::TSA< TKN >::readEntry(), and sapt::up().

Referenced by sapt::TSA_tree_iterator< TKN >::down().

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename TKN>
char const * sapt::TSA< TKN >::find_start ( char const *  lo,
char const *const   upX,
TKN const *const   refStart,
int  refLen,
size_t  d 
) const [inline, protected]

return the index position of the first item that is equal to or includes [refStart,refStart+refLen) as a prefix

return the lower bound (first matching entry) of the token range matching [startKey,endKey)

Definition at line 455 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpus, I, sapt::TSA< TKN >::index_jump(), NULL, sapt::TSA< TKN >::readEntry(), and sapt::up().

Referenced by sapt::TSA_tree_iterator< TKN >::extend(), and sapt::TSA< TKN >::lower_bound().

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename TKN>
bool sapt::TSA< TKN >::findBranches ( TKN const *  base,
bitvector const &  terminals,
std::vector< tree_iterator > &  dest 
) const [inline]

Definition at line 806 of file ug_tsa_base.h.

References k, sort(), and sorter.

Here is the call graph for this function:

template<typename TKN>
boost::shared_ptr<bitvector> sapt::TSA< TKN >::findTree ( TKN const *  treeStart,
TKN const *  treeEnd,
bitvector const *  filter 
) const

template<typename TKN>
TSA< TKN >::bitset_pointer sapt::TSA< TKN >::getBitSet ( TKN const *  startKey,
size_t  keyLen 
) const [inline]

find all instances of the tree described by [treeStart, treeEnd)

Definition at line 762 of file ug_tsa_base.h.

References sapt::TSA< TKN >::bsc, sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::fillBitSet(), and NULL.

Here is the call graph for this function:

template<typename TKN >
Ttrack< TKN > const * sapt::TSA< TKN >::getCorpus (  )  const [inline]

Definition at line 736 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpus.

Referenced by fill(), sapt::BitSetCache< TSA >::get(), and sapt::BitSetCache< TSA >::get2().

Here is the caller graph for this function:

template<typename TKN >
size_t sapt::TSA< TKN >::getCorpusSize (  )  const [inline]

Definition at line 726 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpusSize.

Referenced by sapt::TSA_tree_iterator< TKN >::rawCnt().

Here is the caller graph for this function:

template<typename TKN>
virtual void sapt::TSA< TKN >::getCounts ( char const *  p,
char const *const   q,
count_type &  sids,
count_type &  raw 
) const [pure virtual]

get both sentence and word counts.

Avoids having to go over the byte range representing the range of suffixes in question twice when dealing with memory-mapped suffix arrays.

Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.

template<typename TKN>
virtual char const* sapt::TSA< TKN >::getLowerBound ( id_type  id  )  const [protected, pure virtual]

Returns a char const* pointing to the position in the data block where the first item starting with token /id/ is located.

Referenced by sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA< TKN >::lower_bound(), and sapt::TSA< TKN >::upper_bound().

Here is the caller graph for this function:

template<typename TKN >
std::vector< TKN > sapt::TSA< TKN >::getSequence ( ::uint64_t  pid  )  const [inline]

Return the phrase represented by phrase ID pid_

Definition at line 671 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpus.

template<typename TKN >
std::string sapt::TSA< TKN >::getSequence ( ::uint64_t  pid,
TokenIndex const &  V 
) const [inline]

Return the phrase represented by phrase ID pid_

Definition at line 688 of file ug_tsa_base.h.

References sapt::TSA< TKN >::getSequenceLength(), and sapt::TSA< TKN >::getSequenceStart().

Here is the call graph for this function:

template<typename TKN>
uint64_t sapt::TSA< TKN >::getSequenceId ( TKN const *  t,
ushort  plen 
) const [inline]

Definition at line 652 of file ug_tsa_base.h.

References I, sapt::TSA< TKN >::lower_bound(), and sapt::TSA< TKN >::readEntry().

Here is the call graph for this function:

template<typename TKN>
uint64_t sapt::TSA< TKN >::getSequenceId ( typename std::vector< TKN >::const_iterator const &  pstart,
typename std::vector< TKN >::const_iterator const &  pstop 
) const [inline]

Return an ID that represents a given phrase; This should NEVER be 0! Structure of a phrase ID: leftmost 32 bits: sentence ID in the corpus next 16 bits: offset from the start of the sentence next 16 bits: length of the phrase

Definition at line 641 of file ug_tsa_base.h.

template<typename TKN >
ushort sapt::TSA< TKN >::getSequenceLength ( ::uint64_t  pid  )  const [inline]

Definition at line 716 of file ug_tsa_base.h.

Referenced by sapt::TSA< TKN >::getSequence().

Here is the caller graph for this function:

template<typename TKN >
TKN const * sapt::TSA< TKN >::getSequenceStart ( ::uint64_t  pid  )  const [inline]

Definition at line 705 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpus.

Referenced by sapt::TSA< TKN >::getSequence().

Here is the caller graph for this function:

template<typename TKN>
virtual char const* sapt::TSA< TKN >::getUpperBound ( id_type  id  )  const [protected, pure virtual]

template<typename TKN>
virtual char const* sapt::TSA< TKN >::index_jump ( char const *  startRange,
char const *  stopRange,
float  fraction 
) const [protected, pure virtual]

Returns:
an index position approximately /fraction/ between /startRange/ and /endRange/.

Referenced by sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::find_longer(), and sapt::TSA< TKN >::find_start().

Here is the caller graph for this function:

template<typename TKN>
char const * sapt::TSA< TKN >::lower_bound ( TKN const *  keyStart,
int  keyLen 
) const [inline]

Definition at line 582 of file ug_tsa_base.h.

References sapt::TSA< TKN >::find_start(), sapt::TSA< TKN >::getLowerBound(), sapt::TSA< TKN >::getUpperBound(), and sapt::TSA< TKN >::startArray.

Here is the call graph for this function:

template<typename TKN>
char const * sapt::TSA< TKN >::lower_bound ( TKN const *  keyStart,
TKN const *  keyStop 
) const [inline]

returns the start position in the byte array representing the tightly packed sorted list of corpus positions for the given search phrase

Definition at line 573 of file ug_tsa_base.h.

References sapt::TSA< TKN >::lower_bound().

Here is the call graph for this function:

template<typename TKN>
char const * sapt::TSA< TKN >::lower_bound ( typename std::vector< TKN >::const_iterator const &  keyStart,
typename std::vector< TKN >::const_iterator const &  keyStop 
) const [inline]

Returns:
a pointer to the beginning of the index entry range covering [keyStart,keyStop)
returns the start position in the byte array representing the tightly packed sorted list of corpus positions for the given search phrase

Definition at line 556 of file ug_tsa_base.h.

Referenced by sapt::TSA< TKN >::fillBitSet(), sapt::BitSetCache< TSA >::get(), sapt::BitSetCache< TSA >::get2(), sapt::TSA< TKN >::getSequenceId(), sapt::TSA< TKN >::lower_bound(), and sapt::TSA< TKN >::rawCnt2().

Here is the caller graph for this function:

template<typename TKN >
size_t sapt::TSA< TKN >::markOccurrences ( char const *  lo,
char const *  up,
size_t  len,
bitvector bitset,
bool  markOnlyStartPosition 
) const [inline]

Definition at line 780 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), and sid.

Here is the call graph for this function:

template<typename TKN>
virtual count_type sapt::TSA< TKN >::rawCnt ( char const *  p,
char const *const   q 
) const [pure virtual]

Returns:
raw occurrence count
depending on the subclass, this is constant time (imTSA) or linear in in the number of occurrences (mmTSA).

Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.

Referenced by sapt::TSA_tree_iterator< TKN >::rawCnt(), and sapt::TSA< TKN >::rawCnt2().

Here is the caller graph for this function:

template<typename TKN>
count_type sapt::TSA< TKN >::rawCnt2 ( TKN const *  keyStart,
size_t  keyLen 
) const [inline]

Definition at line 628 of file ug_tsa_base.h.

References sapt::TSA< TKN >::lower_bound(), sapt::TSA< TKN >::rawCnt(), sapt::up(), and sapt::TSA< TKN >::upper_bound().

Here is the call graph for this function:

template<typename TKN >
tsa::ArrayEntry & sapt::TSA< TKN >::readEntry ( char const *  p,
tsa::ArrayEntry &  I 
) const [inline]

template<typename TKN>
virtual char const* sapt::TSA< TKN >::readOffset ( char const *  p,
char const *  q,
::uint64_t &  offset 
) const [pure virtual]

template<typename TKN>
virtual char const* sapt::TSA< TKN >::readOffset ( char const *  p,
char const *  q,
uint16_t &  offset 
) const [pure virtual]

read the offset part of the index entry into /offset/

Returns:
position of the next entry in the index.
The function provides an abstraction that uses the right interpretation of the position based on the subclass (memory-mapped or in-memory).

Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.

Referenced by sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::TSA< TKN >::readEntry(), sapt::TSA< TKN >::setBits(), and sapt::TSA< TKN >::sntCnt().

Here is the caller graph for this function:

template<typename TKN>
virtual char const* sapt::TSA< TKN >::readSid ( char const *  p,
char const *  q,
::uint64_t &  sid 
) const [pure virtual]

template<typename TKN>
virtual char const* sapt::TSA< TKN >::readSid ( char const *  p,
char const *  q,
id_type sid 
) const [pure virtual]

read the sentence ID into /sid/

Returns:
position of associated offset.
The function provides an abstraction that uses the right interpretation of the position based on the subclass (memory-mapped or in-memory).

Implemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.

Referenced by sapt::TSA_tree_iterator< TKN >::getSid(), sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::TSA< TKN >::readEntry(), sapt::TSA< TKN >::setBits(), and sapt::TSA< TKN >::sntCnt().

Here is the caller graph for this function:

template<typename TKN>
bool sapt::TSA< TKN >::sanityCheck1 (  )  const

template<typename TKN >
count_type sapt::TSA< TKN >::setBits ( char const *  startRange,
char const *  endRange,
boost::dynamic_bitset< uint64_t > &  bs 
) const [inline]

Definition at line 388 of file ug_tsa_base.h.

References sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), and sid.

Referenced by sapt::TSA< TKN >::fillBitSet().

Here is the call graph for this function:

Here is the caller graph for this function:

template<typename TKN >
void sapt::TSA< TKN >::setTokenBits ( char const *  startRange,
char const *  endRange,
size_t  len,
bitvector bs 
) const [inline]

Definition at line 410 of file ug_tsa_base.h.

References sapt::TSA< TKN >::corpus, I, sapt::L2R_Token< T >::next(), sapt::TSA< TKN >::readEntry(), sapt::L2R_Token< T >::stop(), and stop.

Here is the call graph for this function:

template<typename TKN >
count_type sapt::TSA< TKN >::sntCnt ( char const *  p,
char const *const   q 
) const [inline]

Returns:
sentence count

Reimplemented in sapt::imTSA< TOKEN >, sapt::mmTSA< TOKEN >, and sapt::mmTSA< Token >.

Definition at line 434 of file ug_tsa_base.h.

References sapt::check(), sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), and sid.

Here is the call graph for this function:

template<typename TKN>
std::string sapt::TSA< TKN >::suffixAt ( ArrayEntry const &  I,
TokenIndex const *  V = NULL,
size_t  maxlen = 0 
) const

template<typename TKN>
std::string sapt::TSA< TKN >::suffixAt ( char const *  p,
TokenIndex const *  V = NULL,
size_t  maxlen = 0 
) const

template<typename TKN>
char const * sapt::TSA< TKN >::upper_bound ( TKN const *  keyStart,
int  keyLength 
) const [inline]

returns the upper bound in the byte array representing the tightly packed sorted list of corpus positions for the given search phrase (i.e., points just beyond the range)

Definition at line 615 of file ug_tsa_base.h.

References sapt::TSA< TKN >::arrayEnd(), sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::getLowerBound(), and sapt::TSA< TKN >::getUpperBound().

Here is the call graph for this function:

template<typename TKN>
char const * sapt::TSA< TKN >::upper_bound ( typename std::vector< TKN >::const_iterator const &  keyStart,
typename std::vector< TKN >::const_iterator const &  keyStop 
) const [inline]

Returns:
a pointer to the end point of the index entry range covering [keyStart,keyStop)
returns the upper bound in the byte array representing the tightly packed sorted list of corpus positions for the given search phrase (i.e., points just beyond the range)

Definition at line 598 of file ug_tsa_base.h.

Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA< TKN >::fillBitSet(), sapt::BitSetCache< TSA >::get(), sapt::BitSetCache< TSA >::get2(), and sapt::TSA< TKN >::rawCnt2().

Here is the caller graph for this function:


Friends And Related Function Documentation

template<typename TKN>
friend class TSA_tree_iterator< TKN > [friend]

Definition at line 60 of file ug_tsa_base.h.


Member Data Documentation

template<typename TKN>
size_t sapt::TSA< TKN >::BitSetCachingThreshold [protected]

Definition at line 90 of file ug_tsa_base.h.

template<typename TKN>
boost::shared_ptr<BSC_t> sapt::TSA< TKN >::bsc

Definition at line 140 of file ug_tsa_base.h.

Referenced by sapt::TSA< TKN >::getBitSet().

template<typename TKN>
boost::shared_ptr<Ttrack<TKN> const> sapt::TSA< TKN >::corpus [protected]

template<typename TKN>
size_t sapt::TSA< TKN >::corpusSize [protected]

Definition at line 68 of file ug_tsa_base.h.

Referenced by sapt::TSA< TKN >::getCorpusSize().

template<typename TKN>
char const* sapt::TSA< TKN >::endArray [protected]

template<typename TKN>
id_type sapt::TSA< TKN >::indexSize [protected]

size of the corpus (in number of tokens) of the corpus underlying the sequence array.

ATTENTION: This number may differ from corpus->numTokens(), namely when the suffix array is based on a subset of the sentences of /corpus/.

Definition at line 87 of file ug_tsa_base.h.

template<typename TKN>
id_type sapt::TSA< TKN >::numTokens [protected]

size of the corpus (in number of sentences) of the corpus underlying the sequence array.

ATTENTION: This number may differ from corpus->size(), namely when the suffix array is based on a subset of the sentences of /corpus/.

Definition at line 78 of file ug_tsa_base.h.

Referenced by sapt::TSA< TOKEN >::aveIndexEntrySize().

template<typename TKN>
char const* sapt::TSA< TKN >::startArray [protected]


The documentation for this class was generated from the following file:

Generated on Thu Jul 6 00:34:28 2017 for Moses by  doxygen 1.5.9