00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_WordsBitmap_h
00023 #define moses_WordsBitmap_h
00024
00025 #include <algorithm>
00026 #include <limits>
00027 #include <vector>
00028 #include <iostream>
00029 #include <cstring>
00030 #include <cmath>
00031 #include <cstdlib>
00032 #include "TypeDef.h"
00033 #include "Range.h"
00034
00035 namespace Moses
00036 {
00037 typedef unsigned long WordsBitmapID;
00038
00050 class Bitmap
00051 {
00052 friend std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap);
00053 private:
00054 std::vector<char> m_bitmap;
00055 size_t m_firstGap;
00056 size_t m_numWordsCovered;
00057
00058 Bitmap();
00059 Bitmap& operator= (const Bitmap& other);
00060
00062 void UpdateFirstGap(size_t startPos, size_t endPos, bool value) {
00063 if (value) {
00064
00065 if (startPos <= m_firstGap && m_firstGap <= endPos) {
00066 m_firstGap = NOT_FOUND;
00067 for (size_t i = endPos + 1 ; i < m_bitmap.size(); ++i) {
00068 if (!m_bitmap[i]) {
00069 m_firstGap = i;
00070 break;
00071 }
00072 }
00073 }
00074
00075 } else {
00076
00077 if (startPos < m_firstGap) {
00078 m_firstGap = startPos;
00079 }
00080 }
00081 }
00082
00084 void
00085 SetValueNonOverlap(Range const& range) {
00086 size_t startPos = range.GetStartPos();
00087 size_t endPos = range.GetEndPos();
00088
00089 for(size_t pos = startPos ; pos <= endPos ; pos++) {
00090 m_bitmap[pos] = true;
00091 }
00092
00093 m_numWordsCovered += range.GetNumWordsCovered();
00094 UpdateFirstGap(startPos, endPos, true);
00095 }
00096
00097 public:
00099 explicit Bitmap(size_t size, const std::vector<bool>& initializer);
00100
00102 explicit Bitmap(size_t size);
00103
00105 explicit Bitmap(const Bitmap ©);
00106
00107 explicit Bitmap(const Bitmap ©, const Range &range);
00108
00110 size_t GetNumWordsCovered() const {
00111 return m_numWordsCovered;
00112 }
00113
00115 size_t GetFirstGapPos() const {
00116 return m_firstGap;
00117 }
00118
00119
00121 size_t GetLastGapPos() const {
00122 for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) {
00123 if (!m_bitmap[pos]) {
00124 return pos;
00125 }
00126 }
00127
00128 return NOT_FOUND;
00129 }
00130
00131
00133 size_t GetLastPos() const {
00134 for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) {
00135 if (m_bitmap[pos]) {
00136 return pos;
00137 }
00138 }
00139
00140 return NOT_FOUND;
00141 }
00142
00144 bool GetValue(size_t pos) const {
00145 return bool(m_bitmap[pos]);
00146 }
00148 void SetValue( size_t pos, bool value ) {
00149 bool origValue = m_bitmap[pos];
00150 if (origValue == value) {
00151
00152 } else {
00153 m_bitmap[pos] = value;
00154 UpdateFirstGap(pos, pos, value);
00155 if (value) {
00156 ++m_numWordsCovered;
00157 } else {
00158 --m_numWordsCovered;
00159 }
00160 }
00161 }
00162
00164 bool IsComplete() const {
00165 return GetSize() == GetNumWordsCovered();
00166 }
00168 bool Overlap(const Range &compare) const {
00169 for (size_t pos = compare.GetStartPos() ; pos <= compare.GetEndPos() ; pos++) {
00170 if (m_bitmap[pos])
00171 return true;
00172 }
00173 return false;
00174 }
00176 size_t GetSize() const {
00177 return m_bitmap.size();
00178 }
00179
00180 inline size_t GetEdgeToTheLeftOf(size_t l) const {
00181 if (l == 0) return l;
00182 while (l && !m_bitmap[l-1]) {
00183 --l;
00184 }
00185 return l;
00186 }
00187
00188 inline size_t GetEdgeToTheRightOf(size_t r) const {
00189 if (r+1 == m_bitmap.size()) return r;
00190 return (
00191 std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) -
00192 m_bitmap.begin()
00193 ) - 1;
00194 }
00195
00196
00198 WordsBitmapID GetID() const {
00199 assert(m_bitmap.size() < (1<<16));
00200
00201 size_t start = GetFirstGapPos();
00202 if (start == NOT_FOUND) start = m_bitmap.size();
00203
00204 size_t end = GetLastPos();
00205 if (end == NOT_FOUND) end = 0;
00206
00207 assert(end < start || end-start <= 16);
00208 WordsBitmapID id = 0;
00209 for(size_t pos = end; pos > start; pos--) {
00210 id = id*2 + (int) GetValue(pos);
00211 }
00212 return id + (1<<16) * start;
00213 }
00214
00216 WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const {
00217 assert(m_bitmap.size() < (1<<16));
00218
00219 size_t start = GetFirstGapPos();
00220 if (start == NOT_FOUND) start = m_bitmap.size();
00221
00222 size_t end = GetLastPos();
00223 if (end == NOT_FOUND) end = 0;
00224
00225 if (start == startPos) start = endPos+1;
00226 if (end < endPos) end = endPos;
00227
00228 assert(end < start || end-start <= 16);
00229 WordsBitmapID id = 0;
00230 for(size_t pos = end; pos > start; pos--) {
00231 id = id*2;
00232 if (GetValue(pos) || (startPos<=pos && pos<=endPos))
00233 id++;
00234 }
00235 return id + (1<<16) * start;
00236 }
00237
00238
00239 size_t hash() const;
00240 bool operator==(const Bitmap& other) const;
00241 bool operator!=(const Bitmap& other) const {
00242 return !(*this == other);
00243 }
00244
00245 TO_STRING();
00246 };
00247
00248 }
00249 #endif