00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_StringVectorTemp_h
00023 #define moses_StringVectorTemp_h
00024
00025 #include <vector>
00026 #include <algorithm>
00027 #include <string>
00028 #include <iterator>
00029 #include <cstdio>
00030 #include <cassert>
00031
00032 #include <boost/iterator/iterator_facade.hpp>
00033
00034 #include "ThrowingFwrite.h"
00035 #include "StringVector.h"
00036
00037 #include "MmapAllocator.h"
00038
00039 namespace Moses
00040 {
00041
00042
00043
00044
00045 template <typename ValueT = unsigned char, typename PosT = unsigned int,
00046 template <typename> class Allocator = std::allocator>
00047 class StringVectorTemp
00048 {
00049 protected:
00050 bool m_sorted;
00051 bool m_memoryMapped;
00052
00053 std::vector<ValueT, Allocator<ValueT> >* m_charArray;
00054 std::vector<PosT> m_positions;
00055
00056 virtual const ValueT* value_ptr(PosT i) const;
00057
00058 public:
00059
00060 typedef ValueIteratorRange<const ValueT *> range;
00061
00062
00063
00064 class RangeIterator : public boost::iterator_facade<RangeIterator,
00065 range, std::random_access_iterator_tag, range, PosT>
00066 {
00067
00068 private:
00069 PosT m_index;
00070 StringVectorTemp<ValueT, PosT, Allocator>* m_container;
00071
00072 public:
00073 RangeIterator();
00074 RangeIterator(StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index=0);
00075
00076 PosT get_index();
00077
00078 private:
00079 friend class boost::iterator_core_access;
00080
00081 range dereference() const;
00082 bool equal(RangeIterator const& other) const;
00083 void increment();
00084 void decrement();
00085 void advance(PosT n);
00086
00087 PosT distance_to(RangeIterator const& other) const;
00088 };
00089
00090
00091
00092 class StringIterator : public boost::iterator_facade<StringIterator,
00093 std::string, std::random_access_iterator_tag, const std::string, PosT>
00094 {
00095
00096 private:
00097 PosT m_index;
00098 StringVectorTemp<ValueT, PosT, Allocator>* m_container;
00099
00100 public:
00101 StringIterator();
00102 StringIterator(StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index=0);
00103
00104 PosT get_index();
00105
00106 private:
00107 friend class boost::iterator_core_access;
00108
00109 const std::string dereference() const;
00110 bool equal(StringIterator const& other) const;
00111 void increment();
00112 void decrement();
00113 void advance(PosT n);
00114 PosT distance_to(StringIterator const& other) const;
00115 };
00116
00117 typedef RangeIterator iterator;
00118 typedef StringIterator string_iterator;
00119
00120 StringVectorTemp();
00121 StringVectorTemp(Allocator<ValueT> alloc);
00122
00123 virtual ~StringVectorTemp() {
00124 delete m_charArray;
00125 }
00126
00127 void swap(StringVectorTemp<ValueT, PosT, Allocator> &c) {
00128 m_positions.swap(c.m_positions);
00129 m_charArray->swap(*c.m_charArray);
00130
00131 bool temp = m_sorted;
00132 m_sorted = c.m_sorted;
00133 c.m_sorted = temp;
00134 }
00135
00136 bool is_sorted() const;
00137 PosT size() const;
00138 virtual PosT size2() const;
00139
00140 template<class Iterator> Iterator begin() const;
00141 template<class Iterator> Iterator end() const;
00142
00143 iterator begin() const;
00144 iterator end() const;
00145
00146 PosT length(PosT i) const;
00147
00148
00149 const ValueT* begin(PosT i) const;
00150 const ValueT* end(PosT i) const;
00151
00152 void clear() {
00153 m_charArray->clear();
00154 m_sorted = true;
00155 m_positions.clear();
00156 }
00157
00158 range at(PosT i) const;
00159 range operator[](PosT i) const;
00160 range back() const;
00161
00162 template <typename StringT>
00163 void push_back(StringT s);
00164 void push_back(const char* c);
00165
00166 template <typename StringT>
00167 PosT find(StringT &s) const;
00168 PosT find(const char* c) const;
00169 };
00170
00171
00172
00173
00174
00175 template<typename ValueT, typename PosT, template <typename> class Allocator>
00176 StringVectorTemp<ValueT, PosT, Allocator>::StringVectorTemp()
00177 : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >()) { }
00178
00179 template<typename ValueT, typename PosT, template <typename> class Allocator>
00180 StringVectorTemp<ValueT, PosT, Allocator>::StringVectorTemp(Allocator<ValueT> alloc)
00181 : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >(alloc)) { }
00182
00183 template<typename ValueT, typename PosT, template <typename> class Allocator>
00184 template <typename StringT>
00185 void StringVectorTemp<ValueT, PosT, Allocator>::push_back(StringT s)
00186 {
00187 if(is_sorted() && size() && !(back() < s))
00188 m_sorted = false;
00189
00190 m_positions.push_back(size2());
00191 std::copy(s.begin(), s.end(), std::back_inserter(*m_charArray));
00192 }
00193
00194 template<typename ValueT, typename PosT, template <typename> class Allocator>
00195 void StringVectorTemp<ValueT, PosT, Allocator>::push_back(const char* c)
00196 {
00197 std::string dummy(c);
00198 push_back(dummy);
00199 }
00200
00201 template<typename ValueT, typename PosT, template <typename> class Allocator>
00202 template <typename Iterator>
00203 Iterator StringVectorTemp<ValueT, PosT, Allocator>::begin() const
00204 {
00205 return Iterator(const_cast<StringVectorTemp<ValueT, PosT, Allocator>&>(*this), 0);
00206 }
00207
00208 template<typename ValueT, typename PosT, template <typename> class Allocator>
00209 template <typename Iterator>
00210 Iterator StringVectorTemp<ValueT, PosT, Allocator>::end() const
00211 {
00212 return Iterator(const_cast<StringVectorTemp<ValueT, PosT, Allocator>&>(*this), size());
00213 }
00214
00215 template<typename ValueT, typename PosT, template <typename> class Allocator>
00216 typename StringVectorTemp<ValueT, PosT, Allocator>::iterator StringVectorTemp<ValueT, PosT, Allocator>::begin() const
00217 {
00218 return begin<iterator>();
00219 };
00220
00221 template<typename ValueT, typename PosT, template <typename> class Allocator>
00222 typename StringVectorTemp<ValueT, PosT, Allocator>::iterator StringVectorTemp<ValueT, PosT, Allocator>::end() const
00223 {
00224 return end<iterator>();
00225 };
00226
00227 template<typename ValueT, typename PosT, template <typename> class Allocator>
00228 bool StringVectorTemp<ValueT, PosT, Allocator>::is_sorted() const
00229 {
00230 return m_sorted;
00231 }
00232
00233 template<typename ValueT, typename PosT, template <typename> class Allocator>
00234 PosT StringVectorTemp<ValueT, PosT, Allocator>::size() const
00235 {
00236 return m_positions.size();
00237 }
00238
00239 template<typename ValueT, typename PosT, template <typename> class Allocator>
00240 PosT StringVectorTemp<ValueT, PosT, Allocator>::size2() const
00241 {
00242 return m_charArray->size();
00243 }
00244
00245 template<typename ValueT, typename PosT, template <typename> class Allocator>
00246 typename StringVectorTemp<ValueT, PosT, Allocator>::range StringVectorTemp<ValueT, PosT, Allocator>::at(PosT i) const
00247 {
00248 return range(begin(i), end(i));
00249 }
00250
00251 template<typename ValueT, typename PosT, template <typename> class Allocator>
00252 typename StringVectorTemp<ValueT, PosT, Allocator>::range StringVectorTemp<ValueT, PosT, Allocator>::operator[](PosT i) const
00253 {
00254 return at(i);
00255 }
00256
00257 template<typename ValueT, typename PosT, template <typename> class Allocator>
00258 typename StringVectorTemp<ValueT, PosT, Allocator>::range StringVectorTemp<ValueT, PosT, Allocator>::back() const
00259 {
00260 return at(size()-1);
00261 }
00262
00263 template<typename ValueT, typename PosT, template <typename> class Allocator>
00264 PosT StringVectorTemp<ValueT, PosT, Allocator>::length(PosT i) const
00265 {
00266 if(i+1 < size())
00267 return m_positions[i+1] - m_positions[i];
00268 else
00269 return size2() - m_positions[i];
00270 }
00271
00272 template<typename ValueT, typename PosT, template <typename> class Allocator>
00273 const ValueT* StringVectorTemp<ValueT, PosT, Allocator>::value_ptr(PosT i) const
00274 {
00275 return &(*m_charArray)[m_positions[i]];
00276 }
00277
00278 template<typename ValueT, typename PosT, template <typename> class Allocator>
00279
00280 const ValueT* StringVectorTemp<ValueT, PosT, Allocator>::begin(PosT i) const
00281 {
00282
00283 return value_ptr(i);
00284 }
00285
00286 template<typename ValueT, typename PosT, template <typename> class Allocator>
00287
00288 const ValueT* StringVectorTemp<ValueT, PosT, Allocator>::end(PosT i) const
00289 {
00290
00291 return value_ptr(i) + length(i);
00292 }
00293
00294 template<typename ValueT, typename PosT, template <typename> class Allocator>
00295 template <typename StringT>
00296 PosT StringVectorTemp<ValueT, PosT, Allocator>::find(StringT &s) const
00297 {
00298 if(m_sorted)
00299 return std::distance(begin(), std::lower_bound(begin(), end(), s));
00300 return std::distance(begin(), std::find(begin(), end(), s));
00301 }
00302
00303 template<typename ValueT, typename PosT, template <typename> class Allocator>
00304 PosT StringVectorTemp<ValueT, PosT, Allocator>::find(const char* c) const
00305 {
00306 std::string s(c);
00307 return find(s);
00308 }
00309
00310
00311
00312 template<typename ValueT, typename PosT, template <typename> class Allocator>
00313 StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::RangeIterator() : m_index(0), m_container(0) { }
00314
00315 template<typename ValueT, typename PosT, template <typename> class Allocator>
00316 StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::RangeIterator(StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index)
00317 : m_index(index), m_container(&sv) { }
00318
00319 template<typename ValueT, typename PosT, template <typename> class Allocator>
00320 PosT StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::get_index()
00321 {
00322 return m_index;
00323 }
00324
00325 template<typename ValueT, typename PosT, template <typename> class Allocator>
00326 typename StringVectorTemp<ValueT, PosT, Allocator>::range
00327 StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::dereference() const
00328 {
00329 return typename StringVectorTemp<ValueT, PosT, Allocator>::range(
00330 m_container->begin(m_index),
00331 m_container->end(m_index)
00332 );
00333 }
00334
00335 template<typename ValueT, typename PosT, template <typename> class Allocator>
00336 bool StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::equal(
00337 StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator const& other) const
00338 {
00339 return m_index == other.m_index && m_container == other.m_container;
00340 }
00341
00342 template<typename ValueT, typename PosT, template <typename> class Allocator>
00343 void StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::increment()
00344 {
00345 m_index++;
00346 }
00347
00348 template<typename ValueT, typename PosT, template <typename> class Allocator>
00349 void StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::decrement()
00350 {
00351 m_index--;
00352 }
00353
00354 template<typename ValueT, typename PosT, template <typename> class Allocator>
00355 void StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::advance(PosT n)
00356 {
00357 m_index += n;
00358 }
00359
00360 template<typename ValueT, typename PosT, template <typename> class Allocator>
00361 PosT StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator::distance_to(
00362 StringVectorTemp<ValueT, PosT, Allocator>::RangeIterator const& other) const
00363 {
00364 return other.m_index - m_index;
00365 }
00366
00367
00368
00369 template<typename ValueT, typename PosT, template <typename> class Allocator>
00370 StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::StringIterator()
00371 : m_index(0), m_container(0) { }
00372
00373 template<typename ValueT, typename PosT, template <typename> class Allocator>
00374 StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::StringIterator(
00375 StringVectorTemp<ValueT, PosT, Allocator> &sv, PosT index) : m_index(index),
00376 m_container(&sv) { }
00377
00378 template<typename ValueT, typename PosT, template <typename> class Allocator>
00379 PosT StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::get_index()
00380 {
00381 return m_index;
00382 }
00383
00384 template<typename ValueT, typename PosT, template <typename> class Allocator>
00385 const std::string StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::dereference() const
00386 {
00387 return StringVectorTemp<ValueT, PosT, Allocator>::range(m_container->begin(m_index),
00388 m_container->end(m_index)).str();
00389 }
00390
00391 template<typename ValueT, typename PosT, template <typename> class Allocator>
00392 bool StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::equal(
00393 StringVectorTemp<ValueT, PosT, Allocator>::StringIterator const& other) const
00394 {
00395 return m_index == other.m_index && m_container == other.m_container;
00396 }
00397
00398 template<typename ValueT, typename PosT, template <typename> class Allocator>
00399 void StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::increment()
00400 {
00401 m_index++;
00402 }
00403
00404 template<typename ValueT, typename PosT, template <typename> class Allocator>
00405 void StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::decrement()
00406 {
00407 m_index--;
00408 }
00409
00410 template<typename ValueT, typename PosT, template <typename> class Allocator>
00411 void StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::advance(PosT n)
00412 {
00413 m_index += n;
00414 }
00415
00416 template<typename ValueT, typename PosT, template <typename> class Allocator>
00417 PosT StringVectorTemp<ValueT, PosT, Allocator>::StringIterator::distance_to(
00418 StringVectorTemp<ValueT, PosT, Allocator>::StringIterator const& other) const
00419 {
00420 return other.m_index - m_index;
00421 }
00422
00423
00424
00425 typedef StringVectorTemp<unsigned char, unsigned int> MediumStringVectorTemp;
00426 typedef StringVectorTemp<unsigned char, unsigned long> LongStringVectorTemp;
00427
00428 }
00429
00430 #endif