00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef moses_ScoreComponentCollection_h
00022 #define moses_ScoreComponentCollection_h
00023
00024 #include <numeric>
00025 #include <sstream>
00026
00027 #ifdef MPI_ENABLE
00028 #include <boost/serialization/access.hpp>
00029 #include <boost/serialization/split_member.hpp>
00030 #endif
00031
00032 #include "moses/FF/FeatureFunction.h"
00033 #include "FeatureVector.h"
00034 #include "TypeDef.h"
00035 #include "Util.h"
00036 #include "util/exception.hh"
00037
00038 namespace Moses
00039 {
00040
00044 struct ScorePair {
00045 friend std::ostream& operator<<(std::ostream& os, const ScorePair& rhs);
00046
00047 std::vector<float> denseScores;
00048 std::map<StringPiece, float> sparseScores;
00049
00050 ScorePair() {
00051 }
00052 ScorePair(const std::vector<float> &other)
00053 :denseScores(other) {
00054 }
00055
00056 void PlusEquals(const ScorePair &other);
00057 void PlusEquals(const StringPiece &key, float value);
00058
00059 void PlusEquals(const std::vector<float> &other) {
00060 UTIL_THROW_IF2(denseScores.size() != other.size(), "Number of scores incorrect");
00061 std::transform(denseScores.begin(),
00062 denseScores.end(),
00063 other.begin(),
00064 denseScores.begin(),
00065 std::plus<float>());
00066 }
00067 };
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087 class ScoreComponentCollection
00088 {
00089 friend std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs);
00090 friend void swap(ScoreComponentCollection &first, ScoreComponentCollection &second);
00091
00092 private:
00093 FVector m_scores;
00094
00095 public:
00096
00097 private:
00098
00099
00100 static size_t s_denseVectorSize;
00101 public:
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115 public:
00116 static void ResetCounter() {
00117 s_denseVectorSize = 0;
00118 }
00119
00121 ScoreComponentCollection();
00122
00124 ScoreComponentCollection(const ScoreComponentCollection& rhs)
00125 : m_scores(rhs.m_scores) {
00126 }
00127
00128 ScoreComponentCollection& operator=( const ScoreComponentCollection& rhs ) {
00129 m_scores = rhs.m_scores;
00130 return *this;
00131 }
00132
00137 static void RegisterScoreProducer(FeatureFunction* scoreProducer);
00138
00140 bool Load(const std::string& filename) {
00141 return m_scores.load(filename);
00142 }
00143
00144 const FVector& GetScoresVector() const {
00145 return m_scores;
00146 }
00147
00148 const std::valarray<FValue> &getCoreFeatures() const {
00149 return m_scores.getCoreFeatures();
00150 }
00151
00152 size_t Size() const {
00153 return m_scores.size();
00154 }
00155
00156 void Resize() {
00157 if (m_scores.coreSize() != s_denseVectorSize) {
00158 m_scores.resize(s_denseVectorSize);
00159 }
00160 }
00161
00163 static FVector CreateFVector() {
00164 return FVector(s_denseVectorSize);
00165 }
00166
00167 void SetToBinaryOf(const ScoreComponentCollection& rhs) {
00168 m_scores.setToBinaryOf(rhs.m_scores);
00169 }
00170
00172 void ZeroAll() {
00173 m_scores.clear();
00174 }
00175
00176 void MultiplyEquals(float scalar);
00177 void DivideEquals(float scalar);
00178 void CoreDivideEquals(float scalar);
00179 void DivideEquals(const ScoreComponentCollection& rhs);
00180 void MultiplyEquals(const ScoreComponentCollection& rhs);
00181 void MultiplyEqualsBackoff(const ScoreComponentCollection& rhs, float backoff);
00182 void MultiplyEquals(float core_r0, float sparse_r0);
00183 void MultiplyEquals(const FeatureFunction* sp, float scalar);
00184
00185 size_t GetNumberWeights(const FeatureFunction* sp);
00186
00187 void CoreAssign(const ScoreComponentCollection& rhs) {
00188 m_scores.coreAssign(rhs.m_scores);
00189 }
00190
00192 void PlusEquals(const ScoreComponentCollection& rhs) {
00193 m_scores += rhs.m_scores;
00194 }
00195
00196
00197 void SparsePlusEquals(const ScoreComponentCollection& rhs) {
00198 m_scores.sparsePlusEquals(rhs.m_scores);
00199 }
00200
00201
00202 void CorePlusEquals(const ScoreComponentCollection& rhs) {
00203 m_scores.corePlusEquals(rhs.m_scores);
00204 }
00205
00206 void PlusEquals(const FVector& scores) {
00207 m_scores += scores;
00208 }
00209
00211 void MinusEquals(const ScoreComponentCollection& rhs) {
00212 m_scores -= rhs.m_scores;
00213 }
00214
00215
00216 void MinusEquals(const FeatureFunction*sp, const std::string& name, float score) {
00217 FName fname(sp->GetScoreProducerDescription(),name);
00218 m_scores[fname] -= score;
00219 }
00220
00221
00222 void SparseMinusEquals(const std::string& full_name, float score) {
00223 FName fname(full_name);
00224 m_scores[fname] -= score;
00225 }
00226
00230 void
00231 PlusEquals(const FeatureFunction* sp,
00232 const ScoreComponentCollection& scores) {
00233 size_t i = sp->GetIndex();
00234 size_t stop = i + sp->GetNumScoreComponents();
00235 for (; i < stop; ++i) m_scores[i] += scores.m_scores[i];
00236 }
00237
00241 void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) {
00242 UTIL_THROW_IF2(scores.size() != sp->GetNumScoreComponents(),
00243 "Number of scores is incorrect");
00244 size_t offset = sp->GetIndex();
00245 for (size_t i = 0; i < scores.size(); ++i) {
00246 m_scores[i + offset] += scores[i];
00247 }
00248 }
00249
00250 void PlusEquals(const FeatureFunction* sp, float scores[]) {
00251 size_t numScores = sp->GetNumScoreComponents();
00252 size_t offset = sp->GetIndex();
00253 for (size_t i = 0; i < numScores; ++i) {
00254 m_scores[i + offset] += scores[i];
00255 }
00256 }
00257
00261 void PlusEquals(const FeatureFunction* sp, float score) {
00262 UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
00263 "Number of scores is incorrect");
00264 m_scores[sp->GetIndex()] += score;
00265 }
00266
00267
00268 void PlusEquals(const FeatureFunction*sp, const StringPiece& name, float score) {
00269 FName fname(sp->GetScoreProducerDescription(),name);
00270 m_scores[fname] += score;
00271 }
00272
00273 void PlusEquals(const FeatureFunction* sp, const ScorePair &scorePair);
00274
00275
00276 void PlusEquals(size_t index, float score) {
00277 m_scores[index] += score;
00278 }
00279
00280
00281 void SparsePlusEquals(const std::string& full_name, float score) {
00282 FName fname(full_name);
00283 m_scores[fname] += score;
00284 }
00285
00286 void SparsePlusEquals(const FName& fname, float score) {
00287 m_scores[fname] += score;
00288 }
00289
00290 void Assign(const FeatureFunction* sp, const std::vector<float>& scores);
00291
00295 void Assign(const FeatureFunction* sp, float score) {
00296
00297 UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
00298 "Feature function must must only contain 1 score");
00299 m_scores[sp->GetIndex()] = score;
00300 }
00301
00302
00303 void Assign(size_t index, float score) {
00304 m_scores[index] = score;
00305 }
00306
00307 void Assign(const FeatureFunction*sp, const StringPiece &name, float score) {
00308 FName fname(sp->GetScoreProducerDescription(),name);
00309 m_scores[fname] = score;
00310 }
00311
00312
00313
00314 void Assign(const FeatureFunction* sp, const std::string &line);
00315
00316
00317 void Assign(const std::string name, float score) {
00318 FName fname(name);
00319 m_scores[fname] = score;
00320 }
00321
00322 float InnerProduct(const ScoreComponentCollection& rhs) const {
00323 return m_scores.inner_product(rhs.m_scores);
00324 }
00325
00326 float PartialInnerProduct(const FeatureFunction* sp, const std::vector<float>& rhs) const {
00327 std::vector<float> lhs = GetScoresForProducer(sp);
00328 UTIL_THROW_IF2(lhs.size() != rhs.size(),
00329 "Number of weights must match number of scores");
00330 return std::inner_product(lhs.begin(), lhs.end(), rhs.begin(), 0.0f);
00331 }
00332
00334 std::vector<float> GetScoresForProducer(const FeatureFunction* sp) const {
00335 size_t components = sp->GetNumScoreComponents();
00336
00337 std::vector<float> res(components);
00338 size_t offset = sp->GetIndex();
00339 for (size_t i = 0; i < res.size(); ++i) {
00340 res[i] = m_scores[i + offset];
00341 }
00342 return res;
00343 }
00344
00346 FVector GetVectorForProducer(const FeatureFunction* sp) const;
00347
00348 float GetSparseWeight(const FName& featureName) const {
00349 return m_scores[featureName];
00350 }
00351
00352 void PrintCoreFeatures() {
00353 m_scores.printCoreFeatures();
00354 }
00355
00356 void ThresholdScaling(float maxValue) {
00357
00358
00359
00360 m_scores.thresholdScale(maxValue);
00361 }
00362
00363 void CapMax(float maxValue) {
00364
00365 m_scores.capMax(maxValue);
00366 }
00367
00368 void CapMin(float minValue) {
00369
00370 m_scores.capMin(minValue);
00371 }
00372
00373
00374
00375
00376
00377
00380 float GetScoreForProducer(const FeatureFunction* sp) const {
00381 UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
00382 "Feature function must must only contain 1 score");
00383 return m_scores[sp->GetIndex()];
00384 }
00385
00386
00387 float GetScoreForProducer
00388 (const FeatureFunction* sp, const std::string& name) const {
00389 FName fname(sp->GetScoreProducerDescription(),name);
00390 return m_scores[fname];
00391 }
00392
00393 float GetWeightedScore() const;
00394
00395 void ZeroDenseFeatures(const FeatureFunction* sp);
00396 void InvertDenseFeatures(const FeatureFunction* sp);
00397 void L1Normalise();
00398 float GetL1Norm() const;
00399 float GetL2Norm() const;
00400 float GetLInfNorm() const;
00401 size_t L1Regularize(float lambda);
00402 void L2Regularize(float lambda);
00403 size_t SparseL1Regularize(float lambda);
00404 void SparseL2Regularize(float lambda);
00405 void Save(const std::string& filename) const;
00406 void Save(std::ostream&, bool multiline=true) const;
00407
00408 void IncrementSparseHopeFeatures() {
00409 m_scores.incrementSparseHopeFeatures();
00410 }
00411 void IncrementSparseFearFeatures() {
00412 m_scores.incrementSparseFearFeatures();
00413 }
00414 void PrintSparseHopeFeatureCounts(std::ofstream& out) {
00415 m_scores.printSparseHopeFeatureCounts(out);
00416 }
00417 void PrintSparseFearFeatureCounts(std::ofstream& out) {
00418 m_scores.printSparseFearFeatureCounts(out);
00419 }
00420 void PrintSparseHopeFeatureCounts() {
00421 m_scores.printSparseHopeFeatureCounts();
00422 }
00423 void PrintSparseFearFeatureCounts() {
00424 m_scores.printSparseFearFeatureCounts();
00425 }
00426 size_t PruneSparseFeatures(size_t threshold) {
00427 return m_scores.pruneSparseFeatures(threshold);
00428 }
00429 size_t PruneZeroWeightFeatures() {
00430 return m_scores.pruneZeroWeightFeatures();
00431 }
00432 void UpdateConfidenceCounts(ScoreComponentCollection &weightUpdate, bool signedCounts) {
00433 m_scores.updateConfidenceCounts(weightUpdate.m_scores, signedCounts);
00434 }
00435 void UpdateLearningRates(float decay_core, float decay_sparse, ScoreComponentCollection &confidenceCounts, float core_r0, float sparse_r0) {
00436 m_scores.updateLearningRates(decay_core, decay_sparse, confidenceCounts.m_scores, core_r0, sparse_r0);
00437 }
00438 void Merge(const ScoreComponentCollection &other) {
00439 m_scores.merge(other.m_scores);
00440 }
00441
00442 void OutputAllFeatureScores(std::ostream &out, bool with_labels) const;
00443 void OutputFeatureScores(std::ostream& out, Moses::FeatureFunction const* ff,
00444 std::string &lastName, bool with_labels) const;
00445
00446 #ifdef MPI_ENABLE
00447 public:
00448 friend class boost::serialization::access;
00449
00450 private:
00451
00452 template<class Archive>
00453 void save(Archive &ar, const unsigned int version) const {
00454 ar << m_scores;
00455 }
00456
00457 template<class Archive>
00458 void load(Archive &ar, const unsigned int version) {
00459 ar >> m_scores;
00460
00461 }
00462
00463 BOOST_SERIALIZATION_SPLIT_MEMBER()
00464
00465 #endif
00466
00467 };
00468
00469 struct SCCPlus {
00470 ScoreComponentCollection operator()
00471 (const ScoreComponentCollection& lhs,
00472 const ScoreComponentCollection& rhs) {
00473 ScoreComponentCollection sum(lhs);
00474 sum.PlusEquals(rhs);
00475 return sum;
00476 }
00477 };
00478
00479 inline void swap(ScoreComponentCollection &first, ScoreComponentCollection &second)
00480 {
00481 swap(first.m_scores, second.m_scores);
00482 }
00483
00484 }
00485 #endif