00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef MERT_DATA_H_
00010 #define MERT_DATA_H_
00011
00012 #include <vector>
00013 #include <boost/shared_ptr.hpp>
00014
00015 #include "Util.h"
00016 #include "FeatureData.h"
00017 #include "ScoreData.h"
00018
00019 namespace MosesTuning
00020 {
00021
00022 class Scorer;
00023
00024 typedef boost::shared_ptr<ScoreData> ScoreDataHandle;
00025 typedef boost::shared_ptr<FeatureData> FeatureDataHandle;
00026
00027
00028
00029 class Data
00030 {
00031 private:
00032 Scorer* m_scorer;
00033 std::string m_score_type;
00034 std::size_t m_num_scores;
00035 ScoreDataHandle m_score_data;
00036 FeatureDataHandle m_feature_data;
00037 SparseVector m_sparse_weights;
00038
00039 public:
00040 explicit Data(Scorer* scorer, const std::string& sparseweightsfile="");
00041
00042 void clear() {
00043 m_score_data->clear();
00044 m_feature_data->clear();
00045 }
00046
00047 ScoreDataHandle getScoreData() {
00048 return m_score_data;
00049 }
00050
00051 FeatureDataHandle getFeatureData() {
00052 return m_feature_data;
00053 }
00054
00055 Scorer* getScorer() {
00056 return m_scorer;
00057 }
00058
00059 std::size_t NumberOfFeatures() const {
00060 return m_feature_data->NumberOfFeatures();
00061 }
00062
00063 std::string Features() const {
00064 return m_feature_data->Features();
00065 }
00066 void Features(const std::string &f) {
00067 m_feature_data->Features(f);
00068 }
00069
00070 void loadNBest(const std::string &file, bool oneBest=false);
00071
00072 void load(const std::string &featfile, const std::string &scorefile);
00073
00074 void save(const std::string &featfile, const std::string &scorefile, bool bin=false);
00075
00076
00077 void removeDuplicates();
00078
00079
00080 inline bool existsFeatureNames() const {
00081 return m_feature_data->existsFeatureNames();
00082 }
00083
00084 inline std::string getFeatureName(std::size_t idx) const {
00085 return m_feature_data->getFeatureName(idx);
00086 }
00087
00088 inline std::size_t getFeatureIndex(const std::string& name) const {
00089 return m_feature_data->getFeatureIndex(name);
00090 }
00091
00098 void createShards(std::size_t shard_count, float shard_size, const std::string& scorerconfig,
00099 std::vector<Data>& shards);
00100
00101
00102 void InitFeatureMap(const std::string& str);
00103 void AddFeatures(const std::string& str,
00104 int sentence_index);
00105 };
00106
00107 }
00108
00109 #endif // MERT_DATA_H_