#include <ug_tsa_tree_iterator.h>


Classes | |
| struct | SortByApproximateCount |
Public Types | |
| typedef TKN | Token |
Public Member Functions | |
| virtual | ~TSA_tree_iterator () |
| TSA_tree_iterator (TSA< Token > const *s) | |
| TSA_tree_iterator (TSA< Token > const *s, TSA_tree_iterator< Token > const &other) | |
| TSA_tree_iterator (TSA< Token > const *r, id_type const *s, size_t const len) | |
| TSA_tree_iterator (TSA< Token > const *s, Token const *kstart, size_t const len, bool full_match_only=true) | |
| TSA_tree_iterator (TSA< Token > const *s, Token const *kstart, Token const *kend, bool full_match_only=true) | |
| TSA_tree_iterator (TSA< Token > const *s, TokenIndex const &V, std::string const &key) | |
| char const * | lower_bound (int p) const |
| char const * | upper_bound (int p) const |
| size_t | size () const |
| Token const * | getToken (int p) const |
| id_type | getSid () const |
| ushort | getOffset (int p) const |
| size_t | sntCnt (int p=-1) const |
| size_t | rawCnt (int p=-1) const |
| uint64_t | getPid (int p=-1) const |
| virtual bool | extend (Token const &id) |
| virtual bool | extend (id_type id) |
| virtual bool | down () |
| virtual bool | over () |
| virtual bool | up () |
| std::string | str (TokenIndex const *V=NULL, int start=0, int stop=0) const |
| bool | match (Token const *start, Token const *stop) const |
| bool | match (id_type sid) const |
| count_type | fillBitSet (boost::dynamic_bitset< uint64_t > &bitset) const |
| count_type | markEndOfSequence (Token const *start, Token const *stop, boost::dynamic_bitset< uint64_t > &dest) const |
| count_type | markSequence (Token const *start, Token const *stop, bitvector &dest) const |
| count_type | markSentences (boost::dynamic_bitset< uint64_t > &bitset) const |
| count_type | markOccurrences (boost::dynamic_bitset< uint64_t > &bitset, bool markOnlyStartPosition=false) const |
| count_type | markOccurrences (std::vector< ushort > &dest) const |
| ::uint64_t | getSequenceId () const |
| bitvector & | filterSentences (bitvector &foo) const |
| void | tfAndRoot (bitvector const &ref, bitvector const &snt, bitvector &dest) const |
| a special auxiliary function for finding trees | |
| size_t | arrayByteSpanSize (int p=-1) const |
| double | ca (int p=-1) const |
| double | approxOccurrenceCount (int p=-1) const |
| size_t | grow (Token const *t, Token const *stop) |
| size_t | grow (Token const *snt, bitvector const &cov) |
| SPTR< std::vector< typename ttrack::Position > > | randomSample (int level, size_t N) const |
| randomly select up to N occurrences of the sequence | |
Public Attributes | |
| TSA< Token > const * | root |
Protected Member Functions | |
| void | showBounds (std::ostream &out) const |
Protected Attributes | |
| std::vector< char const * > | lower |
| std::vector< char const * > | upper |
Definition at line 43 of file ug_tsa_tree_iterator.h.
| typedef TKN sapt::TSA_tree_iterator< TKN >::Token |
Definition at line 53 of file ug_tsa_tree_iterator.h.
| virtual sapt::TSA_tree_iterator< TKN >::~TSA_tree_iterator | ( | ) | [inline, virtual] |
Definition at line 55 of file ug_tsa_tree_iterator.h.
| sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s | ) | [inline] |
Definition at line 336 of file ug_tsa_tree_iterator.h.
| sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
| TSA_tree_iterator< Token > const & | other | |||
| ) | [inline] |
Definition at line 342 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::getToken(), and sapt::TSA_tree_iterator< TKN >::size().

| sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | r, | |
| id_type const * | s, | |||
| size_t const | len | |||
| ) | [inline] |
Definition at line 355 of file ug_tsa_tree_iterator.h.
| sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
| Token const * | kstart, | |||
| size_t const | len, | |||
| bool | full_match_only = true | |||
| ) | [inline] |
Definition at line 420 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.

| sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
| Token const * | kstart, | |||
| Token const * | kend, | |||
| bool | full_match_only = true | |||
| ) | [inline] |
Definition at line 439 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::lower, and sapt::TSA_tree_iterator< TKN >::upper.

| sapt::TSA_tree_iterator< Token >::TSA_tree_iterator | ( | TSA< Token > const * | s, | |
| TokenIndex const & | V, | |||
| std::string const & | key | |||
| ) | [inline] |
Definition at line 368 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::lower, and sapt::TSA_tree_iterator< TKN >::upper.

| double sapt::TSA_tree_iterator< TKN >::approxOccurrenceCount | ( | int | p = -1 |
) | const [inline] |
Definition at line 173 of file ug_tsa_tree_iterator.h.
Referenced by dump(), getoccs(), lookup_phrases(), sapt::Bitext< TKN >::prep2(), and show().

| size_t sapt::TSA_tree_iterator< TKN >::arrayByteSpanSize | ( | int | p = -1 |
) | const [inline] |
Definition at line 136 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::SortByApproximateCount::operator()().

| double sapt::TSA_tree_iterator< TKN >::ca | ( | int | p = -1 |
) | const [inline] |
Definition at line 156 of file ug_tsa_tree_iterator.h.
Referenced by show().

| bool sapt::TSA_tree_iterator< TSA_TYPE >::down | ( | ) | [inline, virtual] |
Definition at line 204 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::arrayEnd(), sapt::TSA< TKN >::arrayStart(), sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::find_longer(), sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA< TKN >::getUpperBound(), sapt::TSA_tree_iterator< TKN >::lower, sapt::next(), sapt::TSA< TKN >::readEntry(), sapt::TSA_tree_iterator< TKN >::root, sapt::TSA_tree_iterator< TKN >::size(), u(), sapt::TSA_tree_iterator< TKN >::up(), sapt::TSA_tree_iterator< TKN >::upper, and sapt::TSA< TKN >::upper_bound().
Referenced by dump().


| bool sapt::TSA_tree_iterator< Token >::extend | ( | id_type | id | ) | [inline, virtual] |
Definition at line 460 of file ug_tsa_tree_iterator.h.
| bool sapt::TSA_tree_iterator< Token >::extend | ( | Token const & | id | ) | [inline, virtual] |
Definition at line 469 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, sapt::TSA< TKN >::find_end(), sapt::TSA< TKN >::find_start(), sapt::TSA< TKN >::getLowerBound(), sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA< TKN >::getUpperBound(), I, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readEntry(), sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.
Referenced by lookup_phrases(), and sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator().


| count_type sapt::TSA_tree_iterator< Token >::fillBitSet | ( | boost::dynamic_bitset< uint64_t > & | bitset | ) | const [inline] |
Definition at line 627 of file ug_tsa_tree_iterator.h.
| bitvector & sapt::TSA_tree_iterator< Token >::filterSentences | ( | bitvector & | foo | ) | const [inline] |
Definition at line 888 of file ug_tsa_tree_iterator.h.
| ushort sapt::TSA_tree_iterator< TKN >::getOffset | ( | int | p | ) | const |
| uint64_t sapt::TSA_tree_iterator< Token >::getPid | ( | int | p = -1 |
) | const [inline] |
Definition at line 531 of file ug_tsa_tree_iterator.h.
References sid.
Referenced by lookup_phrases(), and sapt::Bitext< TKN >::prep2().

| uint64_t sapt::TSA_tree_iterator< Token >::getSequenceId | ( | ) | const [inline] |
Definition at line 770 of file ug_tsa_tree_iterator.h.
References I, and sapt::TSA< TKN >::readEntry().

| id_type sapt::TSA_tree_iterator< Token >::getSid | ( | ) | const [inline] |
Definition at line 517 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::endArray, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readSid(), sapt::TSA_tree_iterator< TKN >::root, sid, sapt::TSA< TKN >::startArray, and sapt::TSA_tree_iterator< TKN >::upper.

| Token const * sapt::TSA_tree_iterator< Token >::getToken | ( | int | p | ) | const [inline] |
Definition at line 575 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, sapt::TSA_tree_iterator< TKN >::lower, sapt::next(), NULL, and sapt::TSA_tree_iterator< TKN >::root.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::markSequence(), sapt::TSA_tree_iterator< TKN >::match(), show(), sapt::TSA_tree_iterator< TKN >::str(), and sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator().


| size_t sapt::TSA_tree_iterator< TKN >::grow | ( | Token const * | snt, | |
| bitvector const & | cov | |||
| ) | [inline] |
Definition at line 184 of file ug_tsa_tree_iterator.h.
| size_t sapt::TSA_tree_iterator< TKN >::grow | ( | Token const * | t, | |
| Token const * | stop | |||
| ) | [inline] |
Definition at line 178 of file ug_tsa_tree_iterator.h.
| char const * sapt::TSA_tree_iterator< Token >::lower_bound | ( | int | p | ) | const [inline] |
Definition at line 548 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::lower.
Referenced by getoccs().

| count_type sapt::TSA_tree_iterator< Token >::markEndOfSequence | ( | Token const * | start, | |
| Token const * | stop, | |||
| boost::dynamic_bitset< uint64_t > & | dest | |||
| ) | const [inline] |
Definition at line 710 of file ug_tsa_tree_iterator.h.
| count_type sapt::TSA_tree_iterator< Token >::markOccurrences | ( | std::vector< ushort > & | dest | ) | const [inline] |
Definition at line 680 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), sapt::TSA_tree_iterator< TKN >::root, sid, and sapt::TSA_tree_iterator< TKN >::upper.

| count_type sapt::TSA_tree_iterator< Token >::markOccurrences | ( | boost::dynamic_bitset< uint64_t > & | bitset, | |
| bool | markOnlyStartPosition = false | |||
| ) | const [inline] |
Definition at line 664 of file ug_tsa_tree_iterator.h.
| count_type sapt::TSA_tree_iterator< Token >::markSentences | ( | boost::dynamic_bitset< uint64_t > & | bitset | ) | const [inline] |
Definition at line 637 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readOffset(), sapt::TSA< TKN >::readSid(), sapt::TSA_tree_iterator< TKN >::root, sid, and sapt::TSA_tree_iterator< TKN >::upper.

| count_type sapt::TSA_tree_iterator< Token >::markSequence | ( | Token const * | start, | |
| Token const * | stop, | |||
| bitvector & | dest | |||
| ) | const [inline] |
Definition at line 742 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::getToken(), and sapt::TSA_tree_iterator< TKN >::size().

| bool sapt::TSA_tree_iterator< Token >::match | ( | id_type | sid | ) | const [inline] |
Definition at line 853 of file ug_tsa_tree_iterator.h.
| bool sapt::TSA_tree_iterator< Token >::match | ( | Token const * | start, | |
| Token const * | stop | |||
| ) | const [inline] |
Definition at line 828 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA_tree_iterator< TKN >::lower, and sapt::L2R_Token< T >::next().

| bool sapt::TSA_tree_iterator< Token >::over | ( | ) | [inline, virtual] |
Definition at line 253 of file ug_tsa_tree_iterator.h.
Referenced by dump().

| SPTR< std::vector< typename ttrack::Position > > sapt::TSA_tree_iterator< Token >::randomSample | ( | int | level, | |
| size_t | N | |||
| ) | const [inline] |
randomly select up to N occurrences of the sequence
Definition at line 912 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::aveIndexEntrySize(), I, sapt::TSA_tree_iterator< TKN >::lower, m, util::rand_excl(), sapt::TSA< TKN >::readEntry(), sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.

| size_t sapt::TSA_tree_iterator< Token >::rawCnt | ( | int | p = -1 |
) | const [inline] |
Definition at line 614 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::getCorpusSize(), sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::rawCnt(), sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.

| void sapt::TSA_tree_iterator< TKN >::showBounds | ( | std::ostream & | out | ) | const [protected] |
| size_t sapt::TSA_tree_iterator< Token >::size | ( | ) | const [inline] |
Definition at line 507 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), dump(), sapt::TSA_tree_iterator< TKN >::markSequence(), sapt::TSA_tree_iterator< TKN >::SortByApproximateCount::operator()(), show(), sapt::TSA_tree_iterator< TKN >::str(), and sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator().

| size_t sapt::TSA_tree_iterator< Token >::sntCnt | ( | int | p = -1 |
) | const [inline] |
Definition at line 600 of file ug_tsa_tree_iterator.h.
| std::string sapt::TSA_tree_iterator< Token >::str | ( | TokenIndex const * | V = NULL, |
|
| int | start = 0, |
|||
| int | stop = 0 | |||
| ) | const [inline] |
Definition at line 782 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::getToken(), and sapt::TSA_tree_iterator< TKN >::size().
Referenced by show().


| void sapt::TSA_tree_iterator< Token >::tfAndRoot | ( | bitvector const & | ref, | |
| bitvector const & | snt, | |||
| bitvector & | dest | |||
| ) | const [inline] |
a special auxiliary function for finding trees
Definition at line 864 of file ug_tsa_tree_iterator.h.
References sapt::TSA< TKN >::corpus, I, sapt::TSA_tree_iterator< TKN >::lower, sapt::TSA< TKN >::readEntry(), sapt::TSA_tree_iterator< TKN >::root, and sapt::TSA_tree_iterator< TKN >::upper.

| bool sapt::TSA_tree_iterator< Token >::up | ( | ) | [inline, virtual] |
Definition at line 318 of file ug_tsa_tree_iterator.h.
References sapt::TSA_tree_iterator< TKN >::lower, and sapt::TSA_tree_iterator< TKN >::upper.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), and dump().

| char const * sapt::TSA_tree_iterator< Token >::upper_bound | ( | int | p | ) | const [inline] |
Definition at line 560 of file ug_tsa_tree_iterator.h.
Referenced by getoccs().

std::vector<char const*> sapt::TSA_tree_iterator< TKN >::lower [protected] |
Definition at line 47 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::getSid(), sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA_tree_iterator< TKN >::lower_bound(), sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::TSA_tree_iterator< TKN >::match(), sapt::TSA_tree_iterator< TKN >::randomSample(), sapt::TSA_tree_iterator< TKN >::rawCnt(), sapt::TSA_tree_iterator< TKN >::tfAndRoot(), sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator(), and sapt::TSA_tree_iterator< TKN >::up().
| TSA<Token> const* sapt::TSA_tree_iterator< TKN >::root |
Definition at line 55 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), getoccs(), sapt::TSA_tree_iterator< TKN >::getSid(), sapt::TSA_tree_iterator< TKN >::getToken(), sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::Bitext< TKN >::prep2(), sapt::TSA_tree_iterator< TKN >::randomSample(), sapt::TSA_tree_iterator< TKN >::rawCnt(), sapt::TSA_tree_iterator< TKN >::tfAndRoot(), and sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator().
std::vector<char const*> sapt::TSA_tree_iterator< TKN >::upper [protected] |
Definition at line 48 of file ug_tsa_tree_iterator.h.
Referenced by sapt::TSA_tree_iterator< TKN >::down(), sapt::TSA_tree_iterator< TKN >::extend(), sapt::TSA_tree_iterator< TKN >::getSid(), sapt::TSA_tree_iterator< TKN >::markOccurrences(), sapt::TSA_tree_iterator< TKN >::markSentences(), sapt::TSA_tree_iterator< TKN >::randomSample(), sapt::TSA_tree_iterator< TKN >::rawCnt(), sapt::TSA_tree_iterator< TKN >::tfAndRoot(), sapt::TSA_tree_iterator< TKN >::TSA_tree_iterator(), and sapt::TSA_tree_iterator< TKN >::up().
1.5.9