00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <boost/algorithm/string/predicate.hpp>
00021 #include "ScoreFeature.h"
00022 #include "DomainFeature.h"
00023 #include "InternalStructFeature.h"
00024
00025 using namespace std;
00026 using namespace boost::algorithm;
00027
00028 namespace MosesTraining
00029 {
00030
00031
00032 const string& ScoreFeatureManager::usage() const
00033 {
00034 const static string& usage = "[--[Sparse]Domain[Indicator|Ratio|Subset|Bin] domain-file [bins]]" ;
00035 return usage;
00036 }
00037
00038 void ScoreFeatureManager::configure(const std::vector<std::string> args)
00039 {
00040 bool domainAdded = false;
00041 bool sparseDomainAdded = false;
00042
00043 for (size_t i = 0; i < args.size(); ++i) {
00044 if (args[i] == "--IgnoreSentenceId") {
00045 m_includeSentenceId = true;
00046 } else if (starts_with(args[i], "--Domain")) {
00047 string type = args[i].substr(8);
00048 ++i;
00049 UTIL_THROW_IF(i == args.size(), ScoreFeatureArgumentException, "Missing domain file");
00050 string domainFile = args[i];
00051 UTIL_THROW_IF(domainAdded, ScoreFeatureArgumentException,
00052 "Only allowed one domain feature");
00053 if (type == "Subset") {
00054 m_features.push_back(ScoreFeaturePtr(new SubsetDomainFeature(domainFile)));
00055 } else if (type == "Ratio") {
00056 m_features.push_back(ScoreFeaturePtr(new RatioDomainFeature(domainFile)));
00057 } else if (type == "Indicator") {
00058 m_features.push_back(ScoreFeaturePtr(new IndicatorDomainFeature(domainFile)));
00059 } else {
00060 UTIL_THROW(ScoreFeatureArgumentException, "Unknown domain feature type " << type);
00061 }
00062 domainAdded = true;
00063 m_includeSentenceId = true;
00064 } else if (starts_with(args[i], "--SparseDomain")) {
00065 string type = args[i].substr(14);
00066 ++i;
00067 UTIL_THROW_IF(i == args.size(), ScoreFeatureArgumentException, "Missing domain file");
00068 string domainFile = args[i];
00069 UTIL_THROW_IF(sparseDomainAdded, ScoreFeatureArgumentException,
00070 "Only allowed one sparse domain feature");
00071 if (type == "Subset") {
00072 m_features.push_back(ScoreFeaturePtr(new SparseSubsetDomainFeature(domainFile)));
00073 } else if (type == "Ratio") {
00074 m_features.push_back(ScoreFeaturePtr(new SparseRatioDomainFeature(domainFile)));
00075 } else if (type == "Indicator") {
00076 m_features.push_back(ScoreFeaturePtr(new SparseIndicatorDomainFeature(domainFile)));
00077 } else {
00078 UTIL_THROW(ScoreFeatureArgumentException, "Unknown domain feature type " << type);
00079 }
00080 sparseDomainAdded = true;
00081 m_includeSentenceId = true;
00082 } else if(args[i] == "--TreeFeatureSparse") {
00083
00084 m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
00085 } else if(args[i] == "--TreeFeatureDense") {
00086
00087 m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
00088 } else {
00089 UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
00090 }
00091
00092 }
00093
00094 }
00095
00096 void ScoreFeatureManager::addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
00097 float count,
00098 int sentenceId) const
00099 {
00100 for (size_t i = 0; i < m_features.size(); ++i) {
00101 m_features[i]->addPropertiesToPhrasePair(phrasePair, count, sentenceId);
00102 }
00103 }
00104
00105 void ScoreFeatureManager::addFeatures(const ScoreFeatureContext& context,
00106 std::vector<float>& denseValues,
00107 std::map<std::string,float>& sparseValues) const
00108 {
00109 for (size_t i = 0; i < m_features.size(); ++i) {
00110 m_features[i]->add(context, denseValues, sparseValues);
00111 }
00112 }
00113 }
00114