00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "FeatureData.h"
00010
00011 #include <limits>
00012 #include "FileStream.h"
00013 #include "Util.h"
00014
00015 using namespace std;
00016
00017 namespace MosesTuning
00018 {
00019
00020
00021
00022 FeatureData::FeatureData()
00023 : m_num_features(0) {}
00024
00025 void FeatureData::save(ostream* os, bool bin)
00026 {
00027 for (featdata_t::iterator i = m_array.begin(); i != m_array.end(); i++)
00028 i->save(os, bin);
00029 }
00030
00031 void FeatureData::save(const string &file, bool bin)
00032 {
00033 if (file.empty()) return;
00034 TRACE_ERR("saving the array into " << file << endl);
00035 ofstream ofs(file.c_str(), ios::out);
00036 ostream* os = &ofs;
00037 save(os, bin);
00038 ofs.close();
00039 }
00040
00041 void FeatureData::save(bool bin)
00042 {
00043 save(&cout, bin);
00044 }
00045
00046 void FeatureData::load(istream* is, const SparseVector& sparseWeights)
00047 {
00048 FeatureArray entry;
00049
00050 while (!is->eof()) {
00051
00052 if (!is->good()) {
00053 cerr << "ERROR FeatureData::load inFile.good()" << endl;
00054 }
00055
00056 entry.clear();
00057 entry.load(is, sparseWeights);
00058
00059 if (entry.size() == 0)
00060 break;
00061
00062 if (size() == 0)
00063 setFeatureMap(entry.Features());
00064
00065 add(entry);
00066 }
00067 }
00068
00069
00070 void FeatureData::load(const string &file, const SparseVector& sparseWeights)
00071 {
00072 TRACE_ERR("loading feature data from " << file << endl);
00073 inputfilestream input_stream(file);
00074 if (!input_stream) {
00075 throw runtime_error("Unable to open feature file: " + file);
00076 }
00077 istream* is = &input_stream;
00078 load(is, sparseWeights);
00079 input_stream.close();
00080 }
00081
00082 void FeatureData::add(FeatureArray& e)
00083 {
00084 if (exists(e.getIndex())) {
00085
00086 size_t pos = getIndex(e.getIndex());
00087 m_array.at(pos).merge(e);
00088 } else {
00089 m_array.push_back(e);
00090 setIndex();
00091 }
00092 }
00093
00094 void FeatureData::add(FeatureStats& e, int sent_idx)
00095 {
00096 if (exists(sent_idx)) {
00097
00098 size_t pos = getIndex(sent_idx);
00099
00100 m_array.at(pos).add(e);
00101 } else {
00102
00103 FeatureArray a;
00104 a.NumberOfFeatures(m_num_features);
00105 a.Features(m_features);
00106 a.setIndex(sent_idx);
00107 a.add(e);
00108 add(a);
00109 }
00110 }
00111
00112 bool FeatureData::check_consistency() const
00113 {
00114 if (m_array.size() == 0)
00115 return true;
00116
00117 for (featdata_t::const_iterator i = m_array.begin(); i != m_array.end(); i++)
00118 if (!i->check_consistency()) return false;
00119
00120 return true;
00121 }
00122
00123 void FeatureData::setIndex()
00124 {
00125 size_t j=0;
00126 for (featdata_t::iterator i = m_array.begin(); i !=m_array.end(); i++) {
00127 m_index_to_array_name[j]=(*i).getIndex();
00128 m_array_name_to_index[(*i).getIndex()] = j;
00129 j++;
00130 }
00131 }
00132
00133 void FeatureData::setFeatureMap(const string& feat)
00134 {
00135 m_num_features = 0;
00136 m_features = feat;
00137
00138 vector<string> buf;
00139 Tokenize(feat.c_str(), ' ', &buf);
00140 for (vector<string>::const_iterator it = buf.begin();
00141 it != buf.end(); ++it) {
00142 const size_t size = m_index_to_feature_name.size();
00143 m_feature_name_to_index[*it] = size;
00144 m_index_to_feature_name[size] = *it;
00145 ++m_num_features;
00146 }
00147 }
00148
00149 string FeatureData::ToString() const
00150 {
00151 string res;
00152
00153 {
00154 stringstream ss;
00155 ss << "number of features: " << m_num_features
00156 << ", features: " << m_features;
00157 res.append(ss.str());
00158 }
00159
00160 res.append("feature_id_map = { ");
00161 for (map<string, size_t>::const_iterator it = m_feature_name_to_index.begin();
00162 it != m_feature_name_to_index.end(); ++it) {
00163 stringstream ss;
00164 ss << it->first << " => " << it->second << ", ";
00165 res.append(ss.str());
00166 }
00167 res.append("}");
00168
00169 return res;
00170 }
00171
00172 }