00001 #include "pcfg.h"
00002
00003 #include <cassert>
00004
00005 #include <boost/algorithm/string.hpp>
00006 #include <boost/lexical_cast.hpp>
00007
00008 #include "syntax-common/exception.h"
00009
00010 namespace MosesTraining {
00011 namespace Syntax {
00012
00013 void Pcfg::Add(const Key &key, double score) {
00014 rules_[key] = score;
00015 }
00016
00017 bool Pcfg::Lookup(const Key &key, double &score) const {
00018 Map::const_iterator p = rules_.find(key);
00019 if (p == rules_.end()) {
00020 return false;
00021 }
00022 score = p->second;
00023 return true;
00024 }
00025
00026 void Pcfg::Read(std::istream &input, Vocabulary &vocab) {
00027 std::string line;
00028 std::string lhs_string;
00029 std::vector<std::string> rhs_strings;
00030 std::string score_string;
00031 Key key;
00032 while (std::getline(input, line)) {
00033
00034 std::size_t pos = line.find("|||");
00035 if (pos == std::string::npos) {
00036 throw Exception("missing first delimiter");
00037 }
00038 lhs_string = line.substr(0, pos);
00039 boost::trim(lhs_string);
00040
00041
00042 std::size_t begin = pos+3;
00043 pos = line.find("|||", begin);
00044 if (pos == std::string::npos) {
00045 throw Exception("missing second delimiter");
00046 }
00047 std::string rhs_text = line.substr(begin, pos-begin);
00048 boost::trim(rhs_text);
00049 rhs_strings.clear();
00050 boost::split(rhs_strings, rhs_text, boost::algorithm::is_space(),
00051 boost::algorithm::token_compress_on);
00052
00053
00054 score_string = line.substr(pos+3);
00055 boost::trim(score_string);
00056
00057
00058 key.clear();
00059 key.reserve(rhs_strings.size()+1);
00060 key.push_back(vocab.Insert(lhs_string));
00061 for (std::vector<std::string>::const_iterator p = rhs_strings.begin();
00062 p != rhs_strings.end(); ++p) {
00063 key.push_back(vocab.Insert(*p));
00064 }
00065
00066
00067 double score = boost::lexical_cast<double>(score_string);
00068 Add(key, score);
00069 }
00070 }
00071
00072 void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const {
00073 for (const_iterator p = begin(); p != end(); ++p) {
00074 const Key &key = p->first;
00075 double score = p->second;
00076 std::vector<std::size_t>::const_iterator q = key.begin();
00077 std::vector<std::size_t>::const_iterator end = key.end();
00078 output << vocab.Lookup(*q++) << " |||";
00079 while (q != end) {
00080 output << " " << vocab.Lookup(*q++);
00081 }
00082 output << " ||| " << score << std::endl;
00083 }
00084 }
00085
00086 }
00087 }