00001 #include "lm/filter/arpa_io.hh" 00002 #include "util/file_piece.hh" 00003 #include "util/string_stream.hh" 00004 00005 #include <iostream> 00006 #include <ostream> 00007 #include <string> 00008 #include <vector> 00009 00010 #include <cctype> 00011 #include <cerrno> 00012 #include <cstring> 00013 00014 namespace lm { 00015 00016 ARPAInputException::ARPAInputException(const StringPiece &message) throw() { 00017 *this << message; 00018 } 00019 00020 ARPAInputException::ARPAInputException(const StringPiece &message, const StringPiece &line) throw() { 00021 *this << message << " in line " << line; 00022 } 00023 00024 ARPAInputException::~ARPAInputException() throw() {} 00025 00026 // Seeking is the responsibility of the caller. 00027 template <class Stream> void WriteCounts(Stream &out, const std::vector<uint64_t> &number) { 00028 out << "\n\\data\\\n"; 00029 for (unsigned int i = 0; i < number.size(); ++i) { 00030 out << "ngram " << i+1 << "=" << number[i] << '\n'; 00031 } 00032 out << '\n'; 00033 } 00034 00035 size_t SizeNeededForCounts(const std::vector<uint64_t> &number) { 00036 util::StringStream stream; 00037 WriteCounts(stream, number); 00038 return stream.str().size(); 00039 } 00040 00041 bool IsEntirelyWhiteSpace(const StringPiece &line) { 00042 for (size_t i = 0; i < static_cast<size_t>(line.size()); ++i) { 00043 if (!isspace(line.data()[i])) return false; 00044 } 00045 return true; 00046 } 00047 00048 ARPAOutput::ARPAOutput(const char *name, size_t buffer_size) 00049 : file_backing_(util::CreateOrThrow(name)), file_(file_backing_.get(), buffer_size) {} 00050 00051 void ARPAOutput::ReserveForCounts(std::streampos reserve) { 00052 for (std::streampos i = 0; i < reserve; i += std::streampos(1)) { 00053 file_ << '\n'; 00054 } 00055 } 00056 00057 void ARPAOutput::BeginLength(unsigned int length) { 00058 file_ << '\\' << length << "-grams:" << '\n'; 00059 } 00060 00061 void ARPAOutput::EndLength(unsigned int length) { 00062 file_ << '\n'; 00063 if (length > counts_.size()) { 00064 counts_.resize(length); 00065 } 00066 counts_[length - 1] = fast_counter_; 00067 } 00068 00069 void ARPAOutput::Finish() { 00070 file_ << "\\end\\\n"; 00071 file_.seekp(0); 00072 WriteCounts(file_, counts_); 00073 file_.flush(); 00074 } 00075 00076 } // namespace lm