00001 #ifndef UTIL_TOKENIZE_PIECE_H
00002 #define UTIL_TOKENIZE_PIECE_H
00003
00004 #include "util/exception.hh"
00005 #include "util/string_piece.hh"
00006
00007 #include <boost/iterator/iterator_facade.hpp>
00008
00009 #include <algorithm>
00010 #include <cstring>
00011
00012 namespace util {
00013
00014
00015 class OutOfTokens : public Exception {
00016 public:
00017 OutOfTokens() throw() {}
00018 ~OutOfTokens() throw() {}
00019 };
00020
00021 class SingleCharacter {
00022 public:
00023 SingleCharacter() {}
00024 explicit SingleCharacter(char delim) : delim_(delim) {}
00025
00026 StringPiece Find(const StringPiece &in) const {
00027 return StringPiece(std::find(in.data(), in.data() + in.size(), delim_), 1);
00028 }
00029
00030 private:
00031 char delim_;
00032 };
00033
00034 class MultiCharacter {
00035 public:
00036 MultiCharacter() {}
00037
00038 explicit MultiCharacter(const StringPiece &delimiter) : delimiter_(delimiter) {}
00039
00040 StringPiece Find(const StringPiece &in) const {
00041 return StringPiece(std::search(in.data(), in.data() + in.size(), delimiter_.data(), delimiter_.data() + delimiter_.size()), delimiter_.size());
00042 }
00043
00044 private:
00045 StringPiece delimiter_;
00046 };
00047
00048 class AnyCharacter {
00049 public:
00050 AnyCharacter() {}
00051 explicit AnyCharacter(const StringPiece &chars) : chars_(chars) {}
00052
00053 StringPiece Find(const StringPiece &in) const {
00054 return StringPiece(std::find_first_of(in.data(), in.data() + in.size(), chars_.data(), chars_.data() + chars_.size()), 1);
00055 }
00056
00057 private:
00058 StringPiece chars_;
00059 };
00060
00061 class BoolCharacter {
00062 public:
00063 BoolCharacter() {}
00064
00065 explicit BoolCharacter(const bool *delimiter) { delimiter_ = delimiter; }
00066
00067 StringPiece Find(const StringPiece &in) const {
00068 for (const char *i = in.data(); i != in.data() + in.size(); ++i) {
00069 if (delimiter_[static_cast<unsigned char>(*i)]) return StringPiece(i, 1);
00070 }
00071 return StringPiece(in.data() + in.size(), 0);
00072 }
00073
00074 template <unsigned Length> static void Build(const char (&characters)[Length], bool (&out)[256]) {
00075 memset(out, 0, sizeof(out));
00076 for (const char *i = characters; i != characters + Length; ++i) {
00077 out[static_cast<unsigned char>(*i)] = true;
00078 }
00079 }
00080
00081 private:
00082 const bool *delimiter_;
00083 };
00084
00085 class AnyCharacterLast {
00086 public:
00087 AnyCharacterLast() {}
00088
00089 explicit AnyCharacterLast(const StringPiece &chars) : chars_(chars) {}
00090
00091 StringPiece Find(const StringPiece &in) const {
00092 return StringPiece(std::find_end(in.data(), in.data() + in.size(), chars_.data(), chars_.data() + chars_.size()), 1);
00093 }
00094
00095 private:
00096 StringPiece chars_;
00097 };
00098
00099 template <class Find, bool SkipEmpty = false> class TokenIter : public boost::iterator_facade<TokenIter<Find, SkipEmpty>, const StringPiece, boost::forward_traversal_tag> {
00100 public:
00101 TokenIter() {}
00102
00103 template <class Construct> TokenIter(const StringPiece &str, const Construct &construct) : after_(str), finder_(construct) {
00104 increment();
00105 }
00106
00107 bool operator!() const {
00108 return current_.data() == 0;
00109 }
00110 operator bool() const {
00111 return current_.data() != 0;
00112 }
00113
00114 static TokenIter<Find, SkipEmpty> end() {
00115 return TokenIter<Find, SkipEmpty>();
00116 }
00117
00118 private:
00119 friend class boost::iterator_core_access;
00120
00121 void increment() {
00122 do {
00123 StringPiece found(finder_.Find(after_));
00124 current_ = StringPiece(after_.data(), found.data() - after_.data());
00125 if (found.data() == after_.data() + after_.size()) {
00126 after_ = StringPiece(NULL, 0);
00127 } else {
00128 after_ = StringPiece(found.data() + found.size(), after_.data() - found.data() + after_.size() - found.size());
00129 }
00130 } while (SkipEmpty && current_.data() && current_.empty());
00131 }
00132
00133 bool equal(const TokenIter<Find, SkipEmpty> &other) const {
00134 return current_.data() == other.current_.data();
00135 }
00136
00137 const StringPiece &dereference() const {
00138 UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens");
00139 return current_;
00140 }
00141
00142 StringPiece current_;
00143 StringPiece after_;
00144
00145 Find finder_;
00146 };
00147
00148 }
00149
00150 #endif // UTIL_TOKENIZE_PIECE_H