#include <ug_im_tsa.h>


Classes | |
| class | tree_iterator |
Public Member Functions | |
| imTSA () | |
| imTSA (boost::shared_ptr< Ttrack< TOKEN > const > c, bdBitset const *filt, std::ostream *log=NULL, size_t threads=0) | |
| imTSA (imTSA< TOKEN > const &prior, boost::shared_ptr< imTtrack< TOKEN > const > const &crp, std::vector< id_type > const &newsids, size_t const vsize) | |
| count_type | sntCnt (char const *p, char const *const q) const |
| count_type | rawCnt (char const *p, char const *const q) const |
| void | getCounts (char const *p, char const *const q, count_type &sids, count_type &raw) const |
| char const * | readSid (char const *p, char const *q, id_type &sid) const |
| char const * | readSid (char const *p, char const *q,::uint64_t &sid) const |
| char const * | readOffset (char const *p, char const *q, uint16_t &offset) const |
| char const * | readOffset (char const *p, char const *q,::uint64_t &offset) const |
| void | sanityCheck () const |
| void | save_as_mm_tsa (std::string fname) const |
Friends | |
| class | tree_iterator |
Definition at line 57 of file ug_im_tsa.h.
| sapt::imTSA< TOKEN >::imTSA | ( | ) | [inline] |
Definition at line 154 of file ug_im_tsa.h.
References sapt::TSA< TOKEN >::BitSetCachingThreshold, sapt::TSA< TOKEN >::corpusSize, sapt::TSA< TOKEN >::endArray, sapt::TSA< TOKEN >::indexSize, NULL, and sapt::TSA< TOKEN >::startArray.
| sapt::imTSA< TOKEN >::imTSA | ( | boost::shared_ptr< Ttrack< TOKEN > const > | c, | |
| bdBitset const * | filt, | |||
| std::ostream * | log = NULL, |
|||
| size_t | threads = 0 | |||
| ) | [inline] |
Definition at line 168 of file ug_im_tsa.h.
References sapt::TSA< TOKEN >::corpus, sapt::TSA< TOKEN >::corpusSize, sapt::TSA< TOKEN >::endArray, index, sapt::TSA< TOKEN >::indexSize, k, m, sapt::TSA< TOKEN >::numTokens, sapt::ttrack::Position::offset, sapt::ttrack::Position::sid, sid, sorter, sapt::TSA< TOKEN >::startArray, stop, and util::WallTime().

| sapt::imTSA< TOKEN >::imTSA | ( | imTSA< TOKEN > const & | prior, | |
| boost::shared_ptr< imTtrack< TOKEN > const > const & | crp, | |||
| std::vector< id_type > const & | newsids, | |||
| size_t const | vsize | |||
| ) | [inline] |
Definition at line 408 of file ug_im_tsa.h.
References sapt::TSA< TOKEN >::corpus, sapt::TSA< TOKEN >::corpusSize, sapt::TSA< TOKEN >::endArray, index, k, n, sapt::TSA< TOKEN >::numTokens, sapt::ttrack::Position::offset, sapt::ttrack::Position::sid, sid, sort(), sorter, and sapt::TSA< TOKEN >::startArray.

| void sapt::imTSA< TOKEN >::getCounts | ( | char const * | p, | |
| char const *const | q, | |||
| count_type & | sids, | |||
| count_type & | raw | |||
| ) | const [inline, virtual] |
get both sentence and word counts.
Avoids having to go over the byte range representing the range of suffixes in question twice when dealing with memory-mapped suffix arrays.
Implements sapt::TSA< TOKEN >.
Definition at line 360 of file ug_im_tsa.h.
References sapt::check(), sapt::TSA< TOKEN >::corpus, sapt::ttrack::Position::sid, and sid.

| count_type sapt::imTSA< TOKEN >::rawCnt | ( | char const * | p, | |
| char const *const | q | |||
| ) | const [inline, virtual] |
Implements sapt::TSA< TOKEN >.
Definition at line 350 of file ug_im_tsa.h.
| char const * sapt::imTSA< TOKEN >::readOffset | ( | char const * | p, | |
| char const * | q, | |||
| ::uint64_t & | offset | |||
| ) | const [inline, virtual] |
| char const * sapt::imTSA< TOKEN >::readOffset | ( | char const * | p, | |
| char const * | q, | |||
| uint16_t & | offset | |||
| ) | const [inline, virtual] |
read the offset part of the index entry into /offset/
Implements sapt::TSA< TOKEN >.
Definition at line 328 of file ug_im_tsa.h.
| char const * sapt::imTSA< TOKEN >::readSid | ( | char const * | p, | |
| char const * | q, | |||
| ::uint64_t & | sid | |||
| ) | const [inline, virtual] |
| char const * sapt::imTSA< TOKEN >::readSid | ( | char const * | p, | |
| char const * | q, | |||
| id_type & | sid | |||
| ) | const [inline, virtual] |
read the sentence ID into /sid/
Implements sapt::TSA< TOKEN >.
Definition at line 306 of file ug_im_tsa.h.
| void sapt::imTSA< TOKEN >::sanityCheck | ( | ) | const |
| void sapt::imTSA< TOKEN >::save_as_mm_tsa | ( | std::string | fname | ) | const [inline] |
Definition at line 380 of file ug_im_tsa.h.
References index, k, tpt::numwrite(), sid, and tpt::tightwrite().
Referenced by build_mmTSA().


| count_type sapt::imTSA< TOKEN >::sntCnt | ( | char const * | p, | |
| char const *const | q | |||
| ) | const |
friend class tree_iterator [friend] |
1.5.9