00001 #ifndef UTIL_FILE_PIECE_H
00002 #define UTIL_FILE_PIECE_H
00003
00004 #include "util/ersatz_progress.hh"
00005 #include "util/exception.hh"
00006 #include "util/file.hh"
00007 #include "util/mmap.hh"
00008 #include "util/read_compressed.hh"
00009 #include "util/string_piece.hh"
00010
00011 #include <cstddef>
00012 #include <iosfwd>
00013 #include <string>
00014 #include <cassert>
00015 #include <stdint.h>
00016
00017 namespace util {
00018
00019 class ParseNumberException : public Exception {
00020 public:
00021 explicit ParseNumberException(StringPiece value) throw();
00022 ~ParseNumberException() throw() {}
00023 };
00024
00025 extern const bool kSpaces[256];
00026
00027
00028 class FilePiece {
00029 public:
00030
00031 explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
00032
00033 explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
00034
00035
00036
00037
00038
00039
00040 explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576);
00041
00042 ~FilePiece();
00043
00044 char get() {
00045 if (position_ == position_end_) {
00046 Shift();
00047 if (at_end_) throw EndOfFileException();
00048 }
00049 return *(position_++);
00050 }
00051
00052
00053 StringPiece ReadDelimited(const bool *delim = kSpaces) {
00054 SkipSpaces(delim);
00055 return Consume(FindDelimiterOrEOF(delim));
00056 }
00057
00059 bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) {
00060 assert(delim[static_cast<unsigned char>('\n')]);
00061
00062 for (; ; ++position_) {
00063 if (position_ == position_end_) {
00064 try {
00065 Shift();
00066 } catch (const util::EndOfFileException &e) { return false; }
00067
00068 if (position_ == position_end_) return false;
00069 }
00070 if (!delim[static_cast<unsigned char>(*position_)]) break;
00071 if (*position_ == '\n') return false;
00072 }
00073
00074 to = Consume(FindDelimiterOrEOF(delim));
00075 return true;
00076 }
00077
00090 StringPiece ReadLine(char delim = '\n', bool strip_cr = true);
00091
00101 bool ReadLineOrEOF(StringPiece &to, char delim = '\n', bool strip_cr = true);
00102
00103 float ReadFloat();
00104 double ReadDouble();
00105 long int ReadLong();
00106 unsigned long int ReadULong();
00107
00108
00109 void SkipSpaces(const bool *delim = kSpaces) {
00110 assert(position_ <= position_end_);
00111 for (; ; ++position_) {
00112 if (position_ == position_end_) {
00113 Shift();
00114
00115 if (position_ == position_end_) return;
00116 }
00117 assert(position_ < position_end_);
00118 if (!delim[static_cast<unsigned char>(*position_)]) return;
00119 }
00120 }
00121
00122 uint64_t Offset() const {
00123 return position_ - data_.begin() + mapped_offset_;
00124 }
00125
00126 const std::string &FileName() const { return file_name_; }
00127
00128 private:
00129 void InitializeNoRead(const char *name, std::size_t min_buffer);
00130
00131 void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer);
00132
00133 template <class T> T ReadNumber();
00134
00135 StringPiece Consume(const char *to) {
00136 assert(to >= position_);
00137 StringPiece ret(position_, to - position_);
00138 position_ = to;
00139 return ret;
00140 }
00141
00142 const char *FindDelimiterOrEOF(const bool *delim = kSpaces);
00143
00144 void Shift();
00145
00146 void MMapShift(uint64_t desired_begin);
00147
00148 void TransitionToRead();
00149 void ReadShift();
00150
00151 const char *position_, *last_space_, *position_end_;
00152
00153 scoped_fd file_;
00154 const uint64_t total_size_;
00155 const uint64_t page_;
00156
00157 std::size_t default_map_size_;
00158 uint64_t mapped_offset_;
00159
00160
00161 scoped_memory data_;
00162
00163 bool at_end_;
00164 bool fallback_to_read_;
00165
00166 ErsatzProgress progress_;
00167
00168 std::string file_name_;
00169
00170 ReadCompressed fell_back_;
00171 };
00172
00173 }
00174
00175 #endif // UTIL_FILE_PIECE_H