#include <ug_tsa_tree_iterator.h>
Classes | |
struct | SortByApproximateCount |
Public Types | |
typedef TKN | Token |
Public Member Functions | |
virtual | ~TSA_tree_iterator () |
TSA_tree_iterator (TSA< Token > const *s) | |
TSA_tree_iterator (TSA< Token > const *s, TSA_tree_iterator< Token > const &other) | |
TSA_tree_iterator (TSA< Token > const *r, id_type const *s, size_t const len) | |
TSA_tree_iterator (TSA< Token > const *s, Token const *kstart, size_t const len, bool full_match_only=true) | |
TSA_tree_iterator (TSA< Token > const *s, Token const *kstart, Token const *kend, bool full_match_only=true) | |
TSA_tree_iterator (TSA< Token > const *s, TokenIndex const &V, std::string const &key) | |
char const * | lower_bound (int p) const |
char const * | upper_bound (int p) const |
size_t | size () const |
Token const * | getToken (int p) const |
id_type | getSid () const |
ushort | getOffset (int p) const |
size_t | sntCnt (int p=-1) const |
size_t | rawCnt (int p=-1) const |
uint64_t | getPid (int p=-1) const |
virtual bool | extend (Token const &id) |
virtual bool | extend (id_type id) |
virtual bool | down () |
virtual bool | over () |
virtual bool | up () |
std::string | str (TokenIndex const *V=NULL, int start=0, int stop=0) const |
bool | match (Token const *start, Token const *stop) const |
bool | match (id_type sid) const |
count_type | fillBitSet (boost::dynamic_bitset< uint64_t > &bitset) const |
count_type | markEndOfSequence (Token const *start, Token const *stop, boost::dynamic_bitset< uint64_t > &dest) const |
count_type | markSequence (Token const *start, Token const *stop, bitvector &dest) const |
count_type | markSentences (boost::dynamic_bitset< uint64_t > &bitset) const |
count_type | markOccurrences (boost::dynamic_bitset< uint64_t > &bitset, bool markOnlyStartPosition=false) const |
count_type | markOccurrences (std::vector< ushort > &dest) const |
::uint64_t | getSequenceId () const |
bitvector & | filterSentences (bitvector &foo) const |
void | tfAndRoot (bitvector const &ref, bitvector const &snt, bitvector &dest) const |
a special auxiliary function for finding trees | |
size_t | arrayByteSpanSize (int p=-1) const |
double | ca (int p=-1) const |
double | approxOccurrenceCount (int p=-1) const |
size_t | grow (Token const *t, Token const *stop) |
size_t | grow (Token const *snt, bitvector const &cov) |
SPTR< std::vector< typename ttrack::Position > > | randomSample (int level, size_t N) const |
randomly select up to N occurrences of the sequence | |
Public Attributes | |
TSA< Token > const * | root |
Protected Member Functions | |
void | showBounds (std::ostream &out) const |
Protected Attributes | |
std::vector< char const * > | lower |
std::vector< char const * > | upper |
Definition at line 43 of file ug_tsa_tree_iterator.h.
typedef TKN sapt::TSA_tree_iterator< TKN >::Token |
Definition at line 53 of file ug_tsa_tree_iterator.h.
virtual sapt::TSA_tree_iterator< TKN >::~TSA_tree_iterator | ( | ) | [inline, virtual] |
Definition at line 55 of file ug_tsa_tree_iterator.h.
sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s | ) | [inline] |
Definition at line 336 of file ug_tsa_tree_iterator.h.
sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
TSA_tree_iterator< Token > const & | other | |||
) | [inline] |
Definition at line 342 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::getToken(), and sapt::TSA_tree_iterator< TKN >::size().
sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | r, | |
id_type const * | s, | |||
size_t const | len | |||
) | [inline] |
Definition at line 355 of file ug_tsa_tree_iterator.h.
sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
Token const * | kstart, | |||
size_t const | len, | |||
bool | full_match_only = true | |||
) | [inline] |
Definition at line 420 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.
sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
Token const * | kstart, | |||
Token const * | kend, | |||
bool | full_match_only = true | |||
) | [inline] |
Definition at line 439 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::lower, and sapt::TSA_tree_iterator< TKN >::upper.
sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
TokenIndex const & | V, | |||
std::string const & | key | |||
) | [inline] |
Definition at line 368 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::lower, and sapt::TSA_tree_iterator< TKN >::upper.
double sapt::TSA_tree_iterator< TKN >::approxOccurrenceCount | ( | int | p = -1 |
) | const [inline] |
Definition at line 173 of file ug_tsa_tree_iterator.h.
Referenced by dump(), getoccs(), lookup_phrases(), sapt::Bitext< TKN >::prep2(), and show().
size_t sapt::TSA_tree_iterator< TKN >::arrayByteSpanSize | ( | int | p = -1 |
) | const [inline] |
Definition at line 136 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::SortByApproximateCount::operator()().
double sapt::TSA_tree_iterator< TKN >::ca | ( | int | p = -1 |
) | const [inline] |
Definition at line 156 of file ug_tsa_tree_iterator.h.
Referenced by show().
bool sapt::TSA_tree_iterator< TSA_TYPE >::down | ( | ) | [inline, virtual] |
Definition at line 204 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::arrayEnd(), sapt::TSA< TKN >::arrayStart(), sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::find_longer(), sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA< TKN >::getUpperBound(), sapt::TSA_tree_iterator< TKN >::lower, sapt::next(), sapt::TSA< TKN >::readEntry(), sapt::TSA_tree_iterator< TKN >::root, sapt::TSA_tree_iterator< TKN >::size(), u(), sapt::TSA_tree_iterator< TKN >::up(), sapt::TSA_tree_iterator< TKN >::upper, and sapt::TSA< TKN >::upper_bound().
Referenced by dump().
bool sapt::TSA_tree_iterator< Token >::extend | ( | id_type | id | ) | [inline, virtual] |
Definition at line 460 of file ug_tsa_tree_iterator.h.
bool sapt::TSA_tree_iterator< Token >::extend | ( | Token const & | id | ) | [inline, virtual] |
Definition at line 469 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::find_start(), sapt::TSA< TKN >::getLowerBound(), sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA< TKN >::getUpperBound(), I, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readEntry(), sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.
Referenced by lookup_phrases(), and sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator().
count_type sapt::TSA_tree_iterator< Token >::fillBitSet | ( | boost::dynamic_bitset< uint64_t > & | bitset | ) | const [inline] |
Definition at line 627 of file ug_tsa_tree_iterator.h.
bitvector & sapt::TSA_tree_iterator< Token >::filterSentences | ( | bitvector & | foo | ) | const [inline] |
Definition at line 888 of file ug_tsa_tree_iterator.h.
ushort sapt::TSA_tree_iterator< TKN >::getOffset | ( | int | p | ) | const |
uint64_t sapt::TSA_tree_iterator< Token >::getPid | ( | int | p = -1 |
) | const [inline] |
Definition at line 531 of file ug_tsa_tree_iterator.h.
References sid.
Referenced by lookup_phrases(), and sapt::Bitext< TKN >::prep2().
uint64_t sapt::TSA_tree_iterator< Token >::getSequenceId | ( | ) | const [inline] |
Definition at line 770 of file ug_tsa_tree_iterator.h.
References I, and sapt::TSA< TKN >::readEntry().
id_type sapt::TSA_tree_iterator< Token >::getSid | ( | ) | const [inline] |
Definition at line 517 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::endArray, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readSid(), sapt::TSA_tree_iterator< TKN >::root, sid, sapt::TSA< TKN >::startArray, and sapt::TSA_tree_iterator< TKN >::upper.
Token const * sapt::TSA_tree_iterator< Token >::getToken | ( | int | p | ) | const [inline] |
Definition at line 575 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, sapt::TSA_tree_iterator< TKN >::lower, sapt::next(), NULL, and sapt::TSA_tree_iterator< TKN >::root.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::markSequence(), sapt::TSA_tree_iterator< TKN >::match(), show(), sapt::TSA_tree_iterator< TKN >::str(), and sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator().
size_t sapt::TSA_tree_iterator< TKN >::grow | ( | Token const * | snt, | |
bitvector const & | cov | |||
) | [inline] |
Definition at line 184 of file ug_tsa_tree_iterator.h.
size_t sapt::TSA_tree_iterator< TKN >::grow | ( | Token const * | t, | |
Token const * | stop | |||
) | [inline] |
Definition at line 178 of file ug_tsa_tree_iterator.h.
char const * sapt::TSA_tree_iterator< Token >::lower_bound | ( | int | p | ) | const [inline] |
Definition at line 548 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::lower.
Referenced by getoccs().
count_type sapt::TSA_tree_iterator< Token >::markEndOfSequence | ( | Token const * | start, | |
Token const * | stop, | |||
boost::dynamic_bitset< uint64_t > & | dest | |||
) | const [inline] |
Definition at line 710 of file ug_tsa_tree_iterator.h.
count_type sapt::TSA_tree_iterator< Token >::markOccurrences | ( | std::vector< ushort > & | dest | ) | const [inline] |
Definition at line 680 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), sapt::TSA_tree_iterator< TKN >::root, sid, and sapt::TSA_tree_iterator< TKN >::upper.
count_type sapt::TSA_tree_iterator< Token >::markOccurrences | ( | boost::dynamic_bitset< uint64_t > & | bitset, | |
bool | markOnlyStartPosition = false | |||
) | const [inline] |
Definition at line 664 of file ug_tsa_tree_iterator.h.
count_type sapt::TSA_tree_iterator< Token >::markSentences | ( | boost::dynamic_bitset< uint64_t > & | bitset | ) | const [inline] |
Definition at line 637 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), sapt::TSA_tree_iterator< TKN >::root, sid, and sapt::TSA_tree_iterator< TKN >::upper.
count_type sapt::TSA_tree_iterator< Token >::markSequence | ( | Token const * | start, | |
Token const * | stop, | |||
bitvector & | dest | |||
) | const [inline] |
Definition at line 742 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::getToken(), and sapt::TSA_tree_iterator< TKN >::size().
bool sapt::TSA_tree_iterator< Token >::match | ( | id_type | sid | ) | const [inline] |
Definition at line 853 of file ug_tsa_tree_iterator.h.
bool sapt::TSA_tree_iterator< Token >::match | ( | Token const * | start, | |
Token const * | stop | |||
) | const [inline] |
Definition at line 828 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA_tree_iterator< TKN >::lower, and sapt::L2R_Token< T >::next().
bool sapt::TSA_tree_iterator< Token >::over | ( | ) | [inline, virtual] |
Definition at line 253 of file ug_tsa_tree_iterator.h.
Referenced by dump().
SPTR< std::vector< typename ttrack::Position > > sapt::TSA_tree_iterator< Token >::randomSample | ( | int | level, | |
size_t | N | |||
) | const [inline] |
randomly select up to N occurrences of the sequence
Definition at line 912 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::aveIndexEntrySize(), I, sapt::TSA_tree_iterator< TKN >::lower, m, util::rand_excl(), sapt::TSA< TKN >::readEntry(), sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.
size_t sapt::TSA_tree_iterator< Token >::rawCnt | ( | int | p = -1 |
) | const [inline] |
Definition at line 614 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::getCorpusSize(), sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::rawCnt(), sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.
void sapt::TSA_tree_iterator< TKN >::showBounds | ( | std::ostream & | out | ) | const [protected] |
size_t sapt::TSA_tree_iterator< Token >::size | ( | ) | const [inline] |
Definition at line 507 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), dump(), sapt::TSA_tree_iterator< TKN >::markSequence(), sapt::TSA_tree_iterator< TKN >::SortByApproximateCount::operator()(), show(), sapt::TSA_tree_iterator< TKN >::str(), and sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator().
size_t sapt::TSA_tree_iterator< Token >::sntCnt | ( | int | p = -1 |
) | const [inline] |
Definition at line 600 of file ug_tsa_tree_iterator.h.
std::string sapt::TSA_tree_iterator< Token >::str | ( | TokenIndex const * | V = NULL , |
|
int | start = 0 , |
|||
int | stop = 0 | |||
) | const [inline] |
Definition at line 782 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::getToken(), and sapt::TSA_tree_iterator< TKN >::size().
Referenced by show().
void sapt::TSA_tree_iterator< Token >::tfAndRoot | ( | bitvector const & | ref, | |
bitvector const & | snt, | |||
bitvector & | dest | |||
) | const [inline] |
a special auxiliary function for finding trees
Definition at line 864 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, I, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readEntry(), sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.
bool sapt::TSA_tree_iterator< Token >::up | ( | ) | [inline, virtual] |
Definition at line 318 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::lower, and sapt::TSA_tree_iterator< TKN >::upper.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), and dump().
char const * sapt::TSA_tree_iterator< Token >::upper_bound | ( | int | p | ) | const [inline] |
Definition at line 560 of file ug_tsa_tree_iterator.h.
Referenced by getoccs().
std::vector<char const*> sapt::TSA_tree_iterator< TKN >::lower [protected] |
Definition at line 47 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::getSid(), sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA_tree_iterator< TKN >::lower_bound(), sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::TSA_tree_iterator< TKN >::match(), sapt::TSA_tree_iterator< TKN >::randomSample(), sapt::TSA_tree_iterator< TKN >::rawCnt(), sapt::TSA_tree_iterator< TKN >::tfAndRoot(), sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator(), and sapt::TSA_tree_iterator< TKN >::up().
TSA<Token> const* sapt::TSA_tree_iterator< TKN >::root |
Definition at line 55 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), getoccs(), sapt::TSA_tree_iterator< TKN >::getSid(), sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::Bitext< TKN >::prep2(), sapt::TSA_tree_iterator< TKN >::randomSample(), sapt::TSA_tree_iterator< TKN >::rawCnt(), sapt::TSA_tree_iterator< TKN >::tfAndRoot(), and sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator().
std::vector<char const*> sapt::TSA_tree_iterator< TKN >::upper [protected] |
Definition at line 48 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::getSid(), sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::TSA_tree_iterator< TKN >::randomSample(), sapt::TSA_tree_iterator< TKN >::rawCnt(), sapt::TSA_tree_iterator< TKN >::tfAndRoot(), sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator(), and sapt::TSA_tree_iterator< TKN >::up().