#include <ug_bitext.h>


Classes | |
| class | agenda |
Public Types | |
| typedef TKN | Token |
| typedef TSA< Token >::tree_iterator | iter |
| typedef std::vector < PhrasePair< Token > > | vec_ppair |
| typedef lru_cache::LRU_Cache < uint64_t, vec_ppair > | pplist_cache_t |
| typedef TSA< Token > | tsa |
Public Member Functions | |
| bool | find_trg_phr_bounds (PhraseExtractionRecord &rec) const |
| given the source phrase sid[start:stop] | |
| bool | find_trg_phr_bounds (size_t const sid, size_t const start, size_t const stop, size_t &s1, size_t &s2, size_t &e1, size_t &e2, int &po_fwd, int &po_bwd, std::vector< unsigned char > *core_alignment, bitvector *full_alignment, bool const flip) const |
| SPTR< pstats > | prep2 (iter const &phrase, int max_sample=-1) const |
| SPTR< pstats > | prep2 (ttasksptr const &ttask, iter const &phrase, bool const track_sids, int max_sample=-1) const |
| virtual void | open (std::string const base, std::string const L1, std::string const L2)=0 |
| SPTR< pstats > | lookup (iter const &phrase, int max_sample=-1) const |
| void | prep (iter const &phrase) const |
| SPTR< pstats > | lookup (ttasksptr const &ttask, iter const &phrase, int max_sample=-1) const |
| void | prep (ttasksptr const &ttask, iter const &phrase, bool const track_sids) const |
| void | setDefaultSampleSize (size_t const max_samples) |
| size_t | getDefaultSampleSize () const |
| std::string | toString (uint64_t pid, int isL2) const |
| virtual size_t | revision () const |
| SPTR< SentenceBias > | loadSentenceBias (std::string const &fname) const |
| SPTR< DocumentBias > | SetupDocumentBias (std::string const &bserver, std::string const &text, std::ostream *log) const |
| SPTR< DocumentBias > | SetupDocumentBias (std::map< std::string, float > context_weights, std::ostream *log) const |
| void | mark_match (Token const *start, Token const *end, iter const &m, bitvector &check) const |
| void | write_yawat_alignment (id_type const sid, iter const *m1, iter const *m2, std::ostream &out) const |
| std::string | sid2docname (id_type const sid) const |
| std::string | docid2name (id_type const sid) const |
| int | docname2docid (std::string const &name) const |
| std::vector< id_type > const * | sid2did () const |
| int | sid2did (uint32_t sid) const |
Public Attributes | |
| SPTR< Ttrack< char > > | Tx |
| SPTR< Ttrack< Token > > | T1 |
| SPTR< Ttrack< Token > > | T2 |
| SPTR< TokenIndex > | V1 |
| SPTR< TokenIndex > | V2 |
| SPTR< TSA< Token > > | I1 |
| SPTR< TSA< Token > > | I2 |
Protected Member Functions | |
| Bitext (size_t const max_sample=1000, size_t const xnum_workers=16) | |
| Bitext (Ttrack< Token > *const t1, Ttrack< Token > *const t2, Ttrack< char > *const tx, TokenIndex *const v1, TokenIndex *const v2, TSA< Token > *const i1, TSA< Token > *const i2, size_t const max_sample=1000, size_t const xnum_workers=16) | |
Protected Attributes | |
| boost::shared_mutex | m_lock |
| SPTR< agenda > | ag |
| size_t | m_num_workers |
| size_t | m_default_sample_size |
| size_t | m_pstats_cache_threshold |
| SPTR< pstats::cache_t > | m_cache1 |
| SPTR< pstats::cache_t > | m_cache2 |
| std::vector< std::string > | m_docname |
| std::map< std::string, id_type > | m_docname2docid |
| SPTR< std::vector< id_type > > | m_sid2docid |
| pplist_cache_t | m_pplist_cache1 |
| pplist_cache_t | m_pplist_cache2 |
Friends | |
| class | BitextSampler |
| class | Moses::Mmsapt |
Definition at line 105 of file ug_bitext.h.
| typedef TSA<Token>::tree_iterator sapt::Bitext< TKN >::iter |
Definition at line 110 of file ug_bitext.h.
| typedef lru_cache::LRU_Cache<uint64_t, vec_ppair> sapt::Bitext< TKN >::pplist_cache_t |
Definition at line 112 of file ug_bitext.h.
| typedef TKN sapt::Bitext< TKN >::Token |
Definition at line 109 of file ug_bitext.h.
| typedef TSA<Token> sapt::Bitext< TKN >::tsa |
Definition at line 113 of file ug_bitext.h.
| typedef std::vector<PhrasePair<Token> > sapt::Bitext< TKN >::vec_ppair |
Definition at line 111 of file ug_bitext.h.
| sapt::Bitext< Token >::Bitext | ( | size_t const | max_sample = 1000, |
|
| size_t const | xnum_workers = 16 | |||
| ) | [inline, protected] |
Definition at line 876 of file ug_bitext.h.
| sapt::Bitext< Token >::Bitext | ( | Ttrack< Token > *const | t1, | |
| Ttrack< Token > *const | t2, | |||
| Ttrack< char > *const | tx, | |||
| TokenIndex *const | v1, | |||
| TokenIndex *const | v2, | |||
| TSA< Token > *const | i1, | |||
| TSA< Token > *const | i2, | |||
| size_t const | max_sample = 1000, |
|||
| size_t const | xnum_workers = 16 | |||
| ) | [inline, protected] |
Definition at line 886 of file ug_bitext.h.
| std::string sapt::Bitext< Token >::docid2name | ( | id_type const | sid | ) | const [inline] |
Definition at line 782 of file ug_bitext.h.
Referenced by print_evidence_list().

| int sapt::Bitext< Token >::docname2docid | ( | std::string const & | name | ) | const [inline] |
Definition at line 771 of file ug_bitext.h.
| bool sapt::Bitext< Token >::find_trg_phr_bounds | ( | size_t const | sid, | |
| size_t const | start, | |||
| size_t const | stop, | |||
| size_t & | s1, | |||
| size_t & | s2, | |||
| size_t & | e1, | |||
| size_t & | e2, | |||
| int & | po_fwd, | |||
| int & | po_bwd, | |||
| std::vector< unsigned char > * | core_alignment, | |||
| bitvector * | full_alignment, | |||
| bool const | flip | |||
| ) | const [inline] |
Definition at line 936 of file ug_bitext.h.
| bool sapt::Bitext< Token >::find_trg_phr_bounds | ( | PhraseExtractionRecord & | rec | ) | const [inline] |
| size_t sapt::Bitext< Token >::getDefaultSampleSize | ( | ) | const [inline] |
Definition at line 856 of file ug_bitext.h.
| SPTR< SentenceBias > sapt::Bitext< Token >::loadSentenceBias | ( | std::string const & | fname | ) | const [inline] |
Definition at line 823 of file ug_bitext.h.
| SPTR< pstats > sapt::Bitext< Token >::lookup | ( | ttasksptr const & | ttask, | |
| iter const & | phrase, | |||
| int | max_sample = -1 | |||
| ) | const [inline] |
| SPTR<pstats> sapt::Bitext< TKN >::lookup | ( | iter const & | phrase, | |
| int | max_sample = -1 | |||
| ) | const |
| void sapt::Bitext< Token >::mark_match | ( | Token const * | start, | |
| Token const * | end, | |||
| iter const & | m, | |||
| bitvector & | check | |||
| ) | const [inline] |
Definition at line 1193 of file ug_bitext.h.
| virtual void sapt::Bitext< TKN >::open | ( | std::string const | base, | |
| std::string const | L1, | |||
| std::string const | L2 | |||
| ) | [pure virtual] |
Implemented in sapt::imBitext< TKN >, and sapt::mmBitext< TKN >.
| void sapt::Bitext< Token >::prep | ( | ttasksptr const & | ttask, | |
| iter const & | phrase, | |||
| bool const | track_sids | |||
| ) | const [inline] |
Definition at line 33 of file ug_bitext_moses.h.
References sapt::Bitext< TKN >::m_default_sample_size, and sapt::Bitext< TKN >::prep2().

| void sapt::Bitext< Token >::prep | ( | iter const & | phrase | ) | const [inline] |
Definition at line 1076 of file ug_bitext.h.
| SPTR< pstats > sapt::Bitext< Token >::prep2 | ( | ttasksptr const & | ttask, | |
| iter const & | phrase, | |||
| bool const | track_sids, | |||
| int | max_sample = -1 | |||
| ) | const [inline] |
Definition at line 47 of file ug_bitext_moses.h.
References sapt::TSA_tree_iterator< TKN >::approxOccurrenceCount(), sapt::TSA_tree_iterator< TKN >::getPid(), I1, NULL, sapt::TSA_tree_iterator< TKN >::root, and UTIL_THROW_IF2.

| SPTR< pstats > sapt::Bitext< Token >::prep2 | ( | iter const & | phrase, | |
| int | max_sample = -1 | |||
| ) | const [inline] |
Definition at line 1091 of file ug_bitext.h.
Referenced by sapt::Bitext< TKN >::prep().

| virtual size_t sapt::Bitext< TKN >::revision | ( | ) | const [inline, virtual] |
| void sapt::Bitext< Token >::setDefaultSampleSize | ( | size_t const | max_samples | ) | [inline] |
Definition at line 863 of file ug_bitext.h.
| SPTR< DocumentBias > sapt::Bitext< Token >::SetupDocumentBias | ( | std::map< std::string, float > | context_weights, | |
| std::ostream * | log | |||
| ) | const [inline] |
Definition at line 1063 of file ug_bitext.h.
| SPTR< DocumentBias > sapt::Bitext< Token >::SetupDocumentBias | ( | std::string const & | bserver, | |
| std::string const & | text, | |||
| std::ostream * | log | |||
| ) | const [inline] |
Definition at line 1049 of file ug_bitext.h.
| int sapt::Bitext< Token >::sid2did | ( | uint32_t | sid | ) | const [inline] |
Definition at line 812 of file ug_bitext.h.
| std::vector< id_type > const * sapt::Bitext< Token >::sid2did | ( | ) | const [inline] |
Definition at line 804 of file ug_bitext.h.
Referenced by main().

| std::string sapt::Bitext< Token >::sid2docname | ( | id_type const | sid | ) | const [inline] |
Definition at line 793 of file ug_bitext.h.
Referenced by main().

| std::string sapt::Bitext< Token >::toString | ( | uint64_t | pid, | |
| int | isL2 | |||
| ) | const [inline] |
Definition at line 838 of file ug_bitext.h.
| void sapt::Bitext< Token >::write_yawat_alignment | ( | id_type const | sid, | |
| iter const * | m1, | |||
| iter const * | m2, | |||
| std::ostream & | out | |||
| ) | const [inline] |
Definition at line 1223 of file ug_bitext.h.
friend class BitextSampler [friend] |
Definition at line 108 of file ug_bitext.h.
friend class Moses::Mmsapt [friend] |
Definition at line 114 of file ug_bitext.h.
SPTR<agenda> sapt::Bitext< TKN >::ag [mutable, protected] |
Definition at line 118 of file ug_bitext.h.
| SPTR<TSA<Token> > sapt::Bitext< TKN >::I1 |
| SPTR<TSA<Token> > sapt::Bitext< TKN >::I2 |
SPTR<pstats::cache_t> sapt::Bitext< TKN >::m_cache1 [protected] |
Definition at line 124 of file ug_bitext.h.
SPTR<pstats::cache_t> sapt::Bitext< TKN >::m_cache2 [protected] |
Definition at line 124 of file ug_bitext.h.
size_t sapt::Bitext< TKN >::m_default_sample_size [protected] |
std::vector<std::string> sapt::Bitext< TKN >::m_docname [protected] |
Definition at line 126 of file ug_bitext.h.
std::map<std::string,id_type> sapt::Bitext< TKN >::m_docname2docid [protected] |
Definition at line 127 of file ug_bitext.h.
boost::shared_mutex sapt::Bitext< TKN >::m_lock [mutable, protected] |
Definition at line 116 of file ug_bitext.h.
size_t sapt::Bitext< TKN >::m_num_workers [protected] |
Definition at line 120 of file ug_bitext.h.
pplist_cache_t sapt::Bitext< TKN >::m_pplist_cache1 [mutable, protected] |
Definition at line 130 of file ug_bitext.h.
pplist_cache_t sapt::Bitext< TKN >::m_pplist_cache2 [mutable, protected] |
Definition at line 130 of file ug_bitext.h.
size_t sapt::Bitext< TKN >::m_pstats_cache_threshold [protected] |
Definition at line 123 of file ug_bitext.h.
SPTR<std::vector<id_type> > sapt::Bitext< TKN >::m_sid2docid [protected] |
| SPTR<Ttrack<Token> > sapt::Bitext< TKN >::T1 |
Definition at line 135 of file ug_bitext.h.
Referenced by sapt::PScoreLex1< Token >::operator()(), sapt::PScoreLengthRatio< Token >::operator()(), and sapt::Bitext< TKN >::agenda::worker::worker().
| SPTR<Ttrack<Token> > sapt::Bitext< TKN >::T2 |
Definition at line 136 of file ug_bitext.h.
Referenced by sapt::PScoreLex1< Token >::operator()(), and sapt::PScoreLengthRatio< Token >::operator()().
| SPTR<Ttrack<char> > sapt::Bitext< TKN >::Tx |
Definition at line 134 of file ug_bitext.h.
| SPTR<TokenIndex> sapt::Bitext< TKN >::V1 |
Definition at line 137 of file ug_bitext.h.
Referenced by main(), sapt::PScoreLex1< Token >::operator()(), show(), and show_pair().
| SPTR<TokenIndex> sapt::Bitext< TKN >::V2 |
Definition at line 138 of file ug_bitext.h.
Referenced by main(), sapt::PScoreLex1< Token >::operator()(), and show_pair().
1.5.9