00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_MonotonicVector_h
00023 #define moses_MonotonicVector_h
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <vector>
00033 #include <limits>
00034 #include <algorithm>
00035 #include <cstdio>
00036 #include <cassert>
00037
00038 #include "ThrowingFwrite.h"
00039 #include "ListCoders.h"
00040 #include "MmapAllocator.h"
00041
00042 namespace Moses
00043 {
00044
00045 template<typename PosT = size_t, typename NumT = size_t, PosT stepSize = 32,
00046 template <typename> class Allocator = std::allocator>
00047 class MonotonicVector
00048 {
00049 private:
00050 typedef std::vector<NumT, Allocator<NumT> > Anchors;
00051 typedef std::vector<unsigned int, Allocator<unsigned int> > Diffs;
00052
00053 Anchors m_anchors;
00054 Diffs m_diffs;
00055 std::vector<unsigned int> m_tempDiffs;
00056
00057 size_t m_size;
00058 PosT m_last;
00059 bool m_final;
00060
00061 public:
00062 typedef PosT value_type;
00063
00064 MonotonicVector() : m_size(0), m_last(0), m_final(false) {}
00065
00066 size_t size() const {
00067 return m_size + m_tempDiffs.size();
00068 }
00069
00070 PosT at(size_t i) const {
00071 PosT s = stepSize;
00072 PosT j = m_anchors[i / s];
00073 PosT r = i % s;
00074
00075 typename Diffs::const_iterator it = m_diffs.begin() + j;
00076
00077 PosT k = 0;
00078 k += VarInt32::DecodeAndSum(it, m_diffs.end(), 1);
00079 if(i < m_size)
00080 k += Simple9::DecodeAndSum(it, m_diffs.end(), r);
00081 else if(i < m_size + m_tempDiffs.size())
00082 for(size_t l = 0; l < r; l++)
00083 k += m_tempDiffs[l];
00084
00085 return k;
00086 }
00087
00088 PosT operator[](PosT i) const {
00089 return at(i);
00090 }
00091
00092 PosT back() const {
00093 return at(size()-1);
00094 }
00095
00096 void push_back(PosT i) {
00097 assert(m_final != true);
00098
00099 if(m_anchors.size() == 0 && m_tempDiffs.size() == 0) {
00100 m_anchors.push_back(0);
00101 VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs));
00102 m_last = i;
00103 m_size++;
00104
00105 return;
00106 }
00107
00108 if(m_tempDiffs.size() == stepSize-1) {
00109 Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
00110 std::back_inserter(m_diffs));
00111 m_anchors.push_back(m_diffs.size());
00112 VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs));
00113
00114 m_size += m_tempDiffs.size() + 1;
00115 m_tempDiffs.clear();
00116 } else {
00117 PosT last = m_last;
00118 PosT diff = i - last;
00119 m_tempDiffs.push_back(diff);
00120 }
00121 m_last = i;
00122 }
00123
00124 void commit() {
00125 assert(m_final != true);
00126 Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
00127 std::back_inserter(m_diffs));
00128 m_size += m_tempDiffs.size();
00129 m_tempDiffs.clear();
00130 m_final = true;
00131 }
00132
00133 size_t usage() {
00134 return m_diffs.size() * sizeof(unsigned int)
00135 + m_anchors.size() * sizeof(NumT);
00136 }
00137
00138 size_t load(std::FILE* in, bool map = false) {
00139 size_t byteSize = 0;
00140
00141 byteSize += fread(&m_final, sizeof(bool), 1, in) * sizeof(bool);
00142 byteSize += fread(&m_size, sizeof(size_t), 1, in) * sizeof(size_t);
00143 byteSize += fread(&m_last, sizeof(PosT), 1, in) * sizeof(PosT);
00144
00145 byteSize += loadVector(m_diffs, in, map);
00146 byteSize += loadVector(m_anchors, in, map);
00147
00148 return byteSize;
00149 }
00150
00151 template <typename ValueT>
00152 size_t loadVector(std::vector<ValueT, std::allocator<ValueT> >& v,
00153 std::FILE* in, bool map = false) {
00154
00155 assert(map == false);
00156
00157 size_t byteSize = 0;
00158
00159 size_t valSize;
00160 byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
00161
00162 v.resize(valSize, 0);
00163 byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
00164
00165 return byteSize;
00166 }
00167
00168 template <typename ValueT>
00169 size_t loadVector(std::vector<ValueT, MmapAllocator<ValueT> >& v,
00170 std::FILE* in, bool map = false) {
00171 size_t byteSize = 0;
00172
00173 size_t valSize;
00174 byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
00175
00176 if(map == false) {
00177
00178
00179
00180 v.resize(valSize, 0);
00181 byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
00182 } else {
00183
00184
00185
00186 size_t valPos = std::ftell(in);
00187
00188 Allocator<ValueT> alloc(in, valPos);
00189 std::vector<ValueT, Allocator<ValueT> > vTemp(alloc);
00190 vTemp.resize(valSize);
00191 v.swap(vTemp);
00192
00193 std::fseek(in, valSize * sizeof(ValueT), SEEK_CUR);
00194 byteSize += valSize * sizeof(ValueT);
00195 }
00196
00197 return byteSize;
00198 }
00199
00200 size_t save(std::FILE* out) {
00201 if(!m_final)
00202 commit();
00203
00204 bool byteSize = 0;
00205 byteSize += ThrowingFwrite(&m_final, sizeof(bool), 1, out) * sizeof(bool);
00206 byteSize += ThrowingFwrite(&m_size, sizeof(size_t), 1, out) * sizeof(size_t);
00207 byteSize += ThrowingFwrite(&m_last, sizeof(PosT), 1, out) * sizeof(PosT);
00208
00209 size_t size = m_diffs.size();
00210 byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
00211 byteSize += ThrowingFwrite(&m_diffs[0], sizeof(unsigned int), size, out) * sizeof(unsigned int);
00212
00213 size = m_anchors.size();
00214 byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
00215 byteSize += ThrowingFwrite(&m_anchors[0], sizeof(NumT), size, out) * sizeof(NumT);
00216
00217 return byteSize;
00218 }
00219
00220 void swap(MonotonicVector<PosT, NumT, stepSize, Allocator> &mv) {
00221 if(!m_final)
00222 commit();
00223
00224 m_diffs.swap(mv.m_diffs);
00225 m_anchors.swap(mv.m_anchors);
00226 }
00227 };
00228
00229 }
00230 #endif