00001
00002 #pragma once
00003 #include "ug_bitext.h"
00004
00005 namespace sapt
00006 {
00007 template<typename TKN>
00008 class imBitext : public Bitext<TKN>
00009 {
00010 SPTR<imTtrack<char> > myTx;
00011 SPTR<imTtrack<TKN> > myT1;
00012 SPTR<imTtrack<TKN> > myT2;
00013 SPTR<imTSA<TKN> > myI1;
00014 SPTR<imTSA<TKN> > myI2;
00015 static Moses::ThreadSafeCounter my_revision;
00016 public:
00017 size_t revision() const { return my_revision; }
00018 void open(std::string const base, std::string const L1, std::string L2);
00019 imBitext(SPTR<TokenIndex> const& V1,
00020 SPTR<TokenIndex> const& V2,
00021 size_t max_sample = 5000, size_t num_workers=4);
00022 imBitext(size_t max_sample = 5000, size_t num_workers=4);
00023 imBitext(imBitext const& other);
00024
00025
00026
00027
00028 SPTR<imBitext<TKN> >
00029 add(std::vector<std::string> const& s1,
00030 std::vector<std::string> const& s2,
00031 std::vector<std::string> const& a) const;
00032
00033 };
00034
00035 template<typename TKN>
00036 Moses::ThreadSafeCounter
00037 imBitext<TKN>::my_revision;
00038
00039 template<typename TKN>
00040 imBitext<TKN>::
00041 imBitext(size_t max_sample, size_t num_workers)
00042 : Bitext<TKN>(max_sample, num_workers)
00043 {
00044 this->m_default_sample_size = max_sample;
00045 this->V1.reset(new TokenIndex());
00046 this->V2.reset(new TokenIndex());
00047 this->V1->setDynamic(true);
00048 this->V2->setDynamic(true);
00049 ++my_revision;
00050 }
00051
00052 template<typename TKN>
00053 imBitext<TKN>::
00054 imBitext(SPTR<TokenIndex> const& v1,
00055 SPTR<TokenIndex> const& v2,
00056 size_t max_sample, size_t num_workers)
00057 : Bitext<TKN>(max_sample, num_workers)
00058 {
00059
00060 this->V1 = v1;
00061 this->V2 = v2;
00062 this->V1->setDynamic(true);
00063 this->V2->setDynamic(true);
00064 ++my_revision;
00065 }
00066
00067
00068 template<typename TKN>
00069 imBitext<TKN>::
00070 imBitext(imBitext<TKN> const& other)
00071 {
00072 this->myTx = other.myTx;
00073 this->myT1 = other.myT1;
00074 this->myT2 = other.myT2;
00075 this->myI1 = other.myI1;
00076 this->myI2 = other.myI2;
00077 this->Tx = this->myTx;
00078 this->T1 = this->myT1;
00079 this->T2 = this->myT2;
00080 this->I1 = this->myI1;
00081 this->I2 = this->myI2;
00082 this->V1 = other.V1;
00083 this->V2 = other.V2;
00084 this->m_default_sample_size = other.m_default_sample_size;
00085 this->m_num_workers = other.m_num_workers;
00086 ++my_revision;
00087 }
00088
00089 template<>
00090 SPTR<imBitext<L2R_Token<SimpleWordId> > >
00091 imBitext<L2R_Token<SimpleWordId> >::
00092 add(std::vector<std::string> const& s1,
00093 std::vector<std::string> const& s2,
00094 std::vector<std::string> const& aln) const;
00095
00096 template<typename TKN>
00097 SPTR<imBitext<TKN> >
00098 imBitext<TKN>::
00099 add(std::vector<std::string> const& s1,
00100 std::vector<std::string> const& s2,
00101 std::vector<std::string> const& aln) const
00102 {
00103 throw "Not yet implemented";
00104 }
00105
00106
00107 template<typename TKN>
00108 void
00109 imBitext<TKN>::
00110 open(std::string const base, std::string const L1, std::string L2)
00111 {
00112 mmTtrack<TKN>& t1 = *reinterpret_cast<mmTtrack<TKN>*>(this->T1.get());
00113 mmTtrack<TKN>& t2 = *reinterpret_cast<mmTtrack<TKN>*>(this->T2.get());
00114 mmTtrack<char>& tx = *reinterpret_cast<mmTtrack<char>*>(this->Tx.get());
00115 t1.open(base+L1+".mct");
00116 t2.open(base+L2+".mct");
00117 tx.open(base+L1+"-"+L2+".mam");
00118 this->V1->open(base+L1+".tdx"); this->V1->iniReverseIndex();
00119 this->V2->open(base+L2+".tdx"); this->V2->iniReverseIndex();
00120 mmTSA<TKN>& i1 = *reinterpret_cast<mmTSA<TKN>*>(this->I1.get());
00121 mmTSA<TKN>& i2 = *reinterpret_cast<mmTSA<TKN>*>(this->I2.get());
00122 i1.open(base+L1+".sfa", this->T1);
00123 i2.open(base+L2+".sfa", this->T2);
00124 assert(this->T1->size() == this->T2->size());
00125 }
00126
00127 }
00128