#include <ug_mm_tsa.h>
Public Types | |
typedef TSA< TOKEN >::tree_iterator | tree_iterator |
Public Member Functions | |
mmTSA () | |
mmTSA (std::string fname, Ttrack< TOKEN > const *c) | |
void | open (std::string fname, typename boost::shared_ptr< Ttrack< TOKEN > const > c) |
count_type | sntCnt (char const *p, char const *const q) const |
count_type | rawCnt (char const *p, char const *const q) const |
void | getCounts (char const *p, char const *const q, count_type &sids, count_type &raw) const |
char const * | readSid (char const *p, char const *q, id_type &sid) const |
char const * | readSid (char const *p, char const *q,::uint64_t &sid) const |
char const * | readOffset (char const *p, char const *q, uint16_t &offset) const |
char const * | readOffset (char const *p, char const *q,::uint64_t &offset) const |
void | sanityCheck () const |
Public Attributes | |
filepos_type const * | index |
Friends | |
class | TSA_tree_iterator< TOKEN > |
Definition at line 25 of file ug_mm_tsa.h.
typedef TSA<TOKEN>::tree_iterator sapt::mmTSA< TOKEN >::tree_iterator |
sapt::mmTSA< TOKEN >::mmTSA | ( | ) | [inline] |
Definition at line 100 of file ug_mm_tsa.h.
References sapt::TSA< TOKEN >::BitSetCachingThreshold, sapt::TSA< TOKEN >::endArray, NULL, and sapt::TSA< TOKEN >::startArray.
sapt::mmTSA< TOKEN >::mmTSA | ( | std::string | fname, | |
Ttrack< TOKEN > const * | c | |||
) | [inline] |
Definition at line 111 of file ug_mm_tsa.h.
References sapt::mmTSA< TOKEN >::open().
void sapt::mmTSA< TOKEN >::getCounts | ( | char const * | p, | |
char const *const | q, | |||
count_type & | sids, | |||
count_type & | raw | |||
) | const [inline, virtual] |
get both sentence and word counts.
Avoids having to go over the byte range representing the range of suffixes in question twice when dealing with memory-mapped suffix arrays.
Implements sapt::TSA< TOKEN >.
Definition at line 240 of file ug_mm_tsa.h.
References sapt::check(), sapt::TSA< TOKEN >::corpus, sid, and tpt::tightread().
void sapt::mmTSA< TOKEN >::open | ( | std::string | fname, | |
typename boost::shared_ptr< Ttrack< TOKEN > const > | c | |||
) | [inline] |
Definition at line 121 of file ug_mm_tsa.h.
References sapt::TSA< TOKEN >::bsc, sapt::TSA< TOKEN >::corpus, sapt::TSA< TOKEN >::corpusSize, sapt::TSA< TOKEN >::endArray, sapt::mmTSA< TOKEN >::index, sapt::TSA< TOKEN >::indexSize, tpt::numread(), sapt::TSA< TOKEN >::numTokens, Moses::prime(), and sapt::TSA< TOKEN >::startArray.
Referenced by main(), sapt::mmTSA< TOKEN >::mmTSA(), sapt::mmBitext< TKN >::open(), and sapt::imBitext< TKN >::open().
count_type sapt::mmTSA< TOKEN >::rawCnt | ( | char const * | p, | |
char const *const | q | |||
) | const [inline, virtual] |
Implements sapt::TSA< TOKEN >.
Definition at line 222 of file ug_mm_tsa.h.
References sid, and tpt::tightread().
char const * sapt::mmTSA< TOKEN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | offset | |||
) | const [inline, virtual] |
Implements sapt::TSA< TOKEN >.
Definition at line 212 of file ug_mm_tsa.h.
References tpt::tightread().
char const * sapt::mmTSA< TOKEN >::readOffset | ( | char const * | p, | |
char const * | q, | |||
uint16_t & | offset | |||
) | const [inline, virtual] |
read the offset part of the index entry into /offset/
Implements sapt::TSA< TOKEN >.
Definition at line 201 of file ug_mm_tsa.h.
References tpt::tightread().
char const * sapt::mmTSA< TOKEN >::readSid | ( | char const * | p, | |
char const * | q, | |||
::uint64_t & | sid | |||
) | const [inline, virtual] |
Implements sapt::TSA< TOKEN >.
Definition at line 190 of file ug_mm_tsa.h.
References tpt::tightread().
char const * sapt::mmTSA< TOKEN >::readSid | ( | char const * | p, | |
char const * | q, | |||
id_type & | sid | |||
) | const [inline, virtual] |
read the sentence ID into /sid/
Implements sapt::TSA< TOKEN >.
Definition at line 180 of file ug_mm_tsa.h.
References tpt::tightread().
void sapt::mmTSA< TOKEN >::sanityCheck | ( | ) | const |
count_type sapt::mmTSA< TOKEN >::sntCnt | ( | char const * | p, | |
char const *const | q | |||
) | const |
friend class TSA_tree_iterator< TOKEN > [friend] |
Definition at line 29 of file ug_mm_tsa.h.
filepos_type const* sapt::mmTSA< TOKEN >::index |