00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef MERT_FEATURE_DATA_H_
00010 #define MERT_FEATURE_DATA_H_
00011
00012 #include <vector>
00013 #include <iostream>
00014 #include <stdexcept>
00015 #include <boost/lexical_cast.hpp>
00016 #include "FeatureArray.h"
00017
00018 namespace MosesTuning
00019 {
00020
00021
00022 class FeatureData
00023 {
00024 private:
00025 std::size_t m_num_features;
00026 std::string m_features;
00027 std::map<std::string, std::size_t> m_feature_name_to_index;
00028 std::map<std::size_t, std::string> m_index_to_feature_name;
00029 featdata_t m_array;
00030 idx2name m_index_to_array_name;
00031 name2idx m_array_name_to_index;
00032
00033 public:
00034 FeatureData();
00035 ~FeatureData() {}
00036
00037 void clear() {
00038 m_array.clear();
00039 }
00040
00041 FeatureArray& get(size_t idx) {
00042 return m_array.at(idx);
00043 }
00044 const FeatureArray& get(size_t idx) const {
00045 return m_array.at(idx);
00046 }
00047
00048 inline bool exists(int sent_idx) const {
00049 return existsInternal(getIndex(sent_idx));
00050 }
00051
00052 inline bool existsInternal(int sent_idx) const {
00053 return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
00054 }
00055
00056 inline FeatureStats& get(std::size_t i, std::size_t j) {
00057 return m_array.at(i).get(j);
00058 }
00059
00060 inline const FeatureStats& get(std::size_t i, std::size_t j) const {
00061 return m_array.at(i).get(j);
00062 }
00063
00064 void add(FeatureArray& e);
00065 void add(FeatureStats& e, int sent_idx);
00066
00067 std::size_t size() const {
00068 return m_array.size();
00069 }
00070
00071 std::size_t NumberOfFeatures() const {
00072 return m_num_features;
00073 }
00074 void NumberOfFeatures(std::size_t v) {
00075 m_num_features = v;
00076 }
00077
00078 std::string Features() const {
00079 return m_features;
00080 }
00081 void Features(const std::string& f) {
00082 m_features = f;
00083 }
00084
00085 void save(const std::string &file, bool bin=false);
00086 void save(std::ostream* os, bool bin=false);
00087 void save(bool bin=false);
00088
00089 void load(std::istream* is, const SparseVector& sparseWeights);
00090 void load(const std::string &file, const SparseVector& sparseWeights);
00091
00092 bool check_consistency() const;
00093
00094 void setIndex();
00095
00096 inline int getIndex(int idx) const {
00097 name2idx::const_iterator i = m_array_name_to_index.find(idx);
00098 if (i != m_array_name_to_index.end())
00099 return i->second;
00100 else
00101 return -1;
00102 }
00103
00104 inline int getName(std::size_t idx) const {
00105 idx2name::const_iterator i = m_index_to_array_name.find(idx);
00106 if (i != m_index_to_array_name.end())
00107 throw std::runtime_error("there is no entry at index " + boost::lexical_cast<std::string>(idx));
00108 return i->second;
00109 }
00110
00111 bool existsFeatureNames() const {
00112 return (m_index_to_feature_name.size() > 0) ? true : false;
00113 }
00114
00115 std::string getFeatureName(std::size_t idx) const {
00116 if (idx >= m_index_to_feature_name.size())
00117 throw std::runtime_error("Error: you required an too big index");
00118 std::map<std::size_t, std::string>::const_iterator it = m_index_to_feature_name.find(idx);
00119 if (it == m_index_to_feature_name.end()) {
00120 throw std::runtime_error("Error: specified id is unknown: " + boost::lexical_cast<std::string>(idx));
00121 } else {
00122 return it->second;
00123 }
00124 }
00125
00126 std::size_t getFeatureIndex(const std::string& name) const {
00127 std::map<std::string, std::size_t>::const_iterator it = m_feature_name_to_index.find(name);
00128 if (it == m_feature_name_to_index.end()) {
00129 std::string msg = "Error: feature " + name + " is unknown. Known features: ";
00130 for (std::map<std::string, std::size_t>::const_iterator cit = m_feature_name_to_index.begin();
00131 cit != m_feature_name_to_index.end(); cit++) {
00132 msg += cit->first;
00133 msg += ", ";
00134 }
00135
00136 throw std::runtime_error(msg);
00137 }
00138 return it->second;
00139 }
00140
00141 void setFeatureMap(const std::string& feat);
00142
00143
00144 std::string ToString() const;
00145 };
00146
00147 }
00148
00149 #endif // MERT_FEATURE_DATA_H_