00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 #ifndef moses_ScoreComponentCollection_h
00022 #define moses_ScoreComponentCollection_h
00023 
00024 #include <numeric>
00025 #include <sstream>
00026 
00027 #ifdef MPI_ENABLE
00028 #include <boost/serialization/access.hpp>
00029 #include <boost/serialization/split_member.hpp>
00030 #endif
00031 
00032 #include "moses/FF/FeatureFunction.h"
00033 #include "FeatureVector.h"
00034 #include "TypeDef.h"
00035 #include "Util.h"
00036 #include "util/exception.hh"
00037 
00038 namespace Moses
00039 {
00040 
00044 struct ScorePair {
00045   friend std::ostream& operator<<(std::ostream& os, const ScorePair& rhs);
00046 
00047   std::vector<float> denseScores;
00048   std::map<StringPiece, float> sparseScores;
00049 
00050   ScorePair() {
00051   }
00052   ScorePair(const std::vector<float> &other)
00053     :denseScores(other) {
00054   }
00055 
00056   void PlusEquals(const ScorePair &other);
00057   void PlusEquals(const StringPiece &key, float value);
00058 
00059   void PlusEquals(const std::vector<float> &other) {
00060     UTIL_THROW_IF2(denseScores.size() != other.size(), "Number of scores incorrect");
00061     std::transform(denseScores.begin(),
00062                    denseScores.end(),
00063                    other.begin(),
00064                    denseScores.begin(),
00065                    std::plus<float>());
00066   }
00067 };
00068 
00069 
00070 
00071 
00072 
00073 
00074 
00075 
00076 
00077 
00078 
00079 
00080 
00081 
00082 
00083 
00084 
00085 
00086 
00087 class ScoreComponentCollection
00088 {
00089   friend std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs);
00090   friend void swap(ScoreComponentCollection &first, ScoreComponentCollection &second);
00091 
00092 private:
00093   FVector m_scores;
00094 
00095 public:
00096   
00097 private:
00098   
00099   
00100   static size_t s_denseVectorSize;
00101 public:
00102   
00103   
00104   
00105   
00106   
00107   
00108   
00109   
00110   
00111   
00112   
00113   
00114 
00115 public:
00116   static void ResetCounter() {
00117     s_denseVectorSize = 0;
00118   }
00119 
00121   ScoreComponentCollection();
00122 
00124   ScoreComponentCollection(const ScoreComponentCollection& rhs)
00125     : m_scores(rhs.m_scores) {
00126   }
00127 
00128   ScoreComponentCollection& operator=( const ScoreComponentCollection& rhs ) {
00129     m_scores = rhs.m_scores;
00130     return *this;
00131   }
00132 
00137   static void RegisterScoreProducer(FeatureFunction* scoreProducer);
00138 
00140   bool Load(const std::string& filename) {
00141     return m_scores.load(filename);
00142   }
00143 
00144   const FVector& GetScoresVector() const {
00145     return m_scores;
00146   }
00147 
00148   const std::valarray<FValue> &getCoreFeatures() const {
00149     return m_scores.getCoreFeatures();
00150   }
00151 
00152   size_t Size() const {
00153     return m_scores.size();
00154   }
00155 
00156   void Resize() {
00157     if (m_scores.coreSize() != s_denseVectorSize) {
00158       m_scores.resize(s_denseVectorSize);
00159     }
00160   }
00161 
00163   static FVector CreateFVector() {
00164     return FVector(s_denseVectorSize);
00165   }
00166 
00167   void SetToBinaryOf(const ScoreComponentCollection& rhs) {
00168     m_scores.setToBinaryOf(rhs.m_scores);
00169   }
00170 
00172   void ZeroAll() {
00173     m_scores.clear();
00174   }
00175 
00176   void MultiplyEquals(float scalar);
00177   void DivideEquals(float scalar);
00178   void CoreDivideEquals(float scalar);
00179   void DivideEquals(const ScoreComponentCollection& rhs);
00180   void MultiplyEquals(const ScoreComponentCollection& rhs);
00181   void MultiplyEqualsBackoff(const ScoreComponentCollection& rhs, float backoff);
00182   void MultiplyEquals(float core_r0, float sparse_r0);
00183   void MultiplyEquals(const FeatureFunction* sp, float scalar);
00184 
00185   size_t GetNumberWeights(const FeatureFunction* sp);
00186 
00187   void CoreAssign(const ScoreComponentCollection& rhs) {
00188     m_scores.coreAssign(rhs.m_scores);
00189   }
00190 
00192   void PlusEquals(const ScoreComponentCollection& rhs) {
00193     m_scores += rhs.m_scores;
00194   }
00195 
00196   
00197   void SparsePlusEquals(const ScoreComponentCollection& rhs) {
00198     m_scores.sparsePlusEquals(rhs.m_scores);
00199   }
00200 
00201   
00202   void CorePlusEquals(const ScoreComponentCollection& rhs) {
00203     m_scores.corePlusEquals(rhs.m_scores);
00204   }
00205 
00206   void PlusEquals(const FVector& scores) {
00207     m_scores += scores;
00208   }
00209 
00211   void MinusEquals(const ScoreComponentCollection& rhs) {
00212     m_scores -= rhs.m_scores;
00213   }
00214 
00215   
00216   void MinusEquals(const FeatureFunction*sp, const std::string& name, float score) {
00217     FName fname(sp->GetScoreProducerDescription(),name);
00218     m_scores[fname] -= score;
00219   }
00220 
00221   
00222   void SparseMinusEquals(const std::string& full_name, float score) {
00223     FName fname(full_name);
00224     m_scores[fname] -= score;
00225   }
00226 
00230   void
00231   PlusEquals(const FeatureFunction* sp,
00232              const ScoreComponentCollection& scores) {
00233     size_t i = sp->GetIndex();
00234     size_t stop = i + sp->GetNumScoreComponents();
00235     for (; i < stop; ++i) m_scores[i] += scores.m_scores[i];
00236   }
00237 
00241   void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) {
00242     UTIL_THROW_IF2(scores.size() != sp->GetNumScoreComponents(),
00243                    "Number of scores is incorrect");
00244     size_t offset = sp->GetIndex();
00245     for (size_t i = 0; i < scores.size(); ++i) {
00246       m_scores[i + offset] += scores[i];
00247     }
00248   }
00249 
00250   void PlusEquals(const FeatureFunction* sp, float scores[]) {
00251     size_t numScores = sp->GetNumScoreComponents();
00252     size_t offset = sp->GetIndex();
00253     for (size_t i = 0; i < numScores; ++i) {
00254       m_scores[i + offset] += scores[i];
00255     }
00256   }
00257 
00261   void PlusEquals(const FeatureFunction* sp, float score) {
00262     UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
00263                    "Number of scores is incorrect");
00264     m_scores[sp->GetIndex()] += score;
00265   }
00266 
00267   
00268   void PlusEquals(const FeatureFunction*sp, const StringPiece& name, float score) {
00269     FName fname(sp->GetScoreProducerDescription(),name);
00270     m_scores[fname] += score;
00271   }
00272 
00273   void PlusEquals(const FeatureFunction* sp, const ScorePair &scorePair);
00274 
00275   
00276   void PlusEquals(size_t index, float score) {
00277     m_scores[index] += score;
00278   }
00279 
00280   
00281   void SparsePlusEquals(const std::string& full_name, float score) {
00282     FName fname(full_name);
00283     m_scores[fname] += score;
00284   }
00285 
00286   void SparsePlusEquals(const FName& fname, float score) {
00287     m_scores[fname] += score;
00288   }
00289 
00290   void Assign(const FeatureFunction* sp, const std::vector<float>& scores);
00291 
00295   void Assign(const FeatureFunction* sp, float score) {
00296 
00297     UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
00298                    "Feature function must must only contain 1 score");
00299     m_scores[sp->GetIndex()] = score;
00300   }
00301 
00302   
00303   void Assign(size_t index, float score) {
00304     m_scores[index] = score;
00305   }
00306 
00307   void Assign(const FeatureFunction*sp, const StringPiece &name, float score) {
00308     FName fname(sp->GetScoreProducerDescription(),name);
00309     m_scores[fname] = score;
00310   }
00311 
00312 
00313   
00314   void Assign(const FeatureFunction* sp, const std::string &line);
00315 
00316   
00317   void Assign(const std::string name, float score) {
00318     FName fname(name);
00319     m_scores[fname] = score;
00320   }
00321 
00322   float InnerProduct(const ScoreComponentCollection& rhs) const {
00323     return m_scores.inner_product(rhs.m_scores);
00324   }
00325 
00326   float PartialInnerProduct(const FeatureFunction* sp, const std::vector<float>& rhs) const {
00327     std::vector<float> lhs = GetScoresForProducer(sp);
00328     UTIL_THROW_IF2(lhs.size() != rhs.size(),
00329                    "Number of weights must match number of scores");
00330     return std::inner_product(lhs.begin(), lhs.end(), rhs.begin(), 0.0f);
00331   }
00332 
00334   std::vector<float> GetScoresForProducer(const FeatureFunction* sp) const {
00335     size_t components = sp->GetNumScoreComponents();
00336 
00337     std::vector<float> res(components);
00338     size_t offset = sp->GetIndex();
00339     for (size_t i = 0; i < res.size(); ++i) {
00340       res[i] = m_scores[i + offset];
00341     }
00342     return res;
00343   }
00344 
00346   FVector GetVectorForProducer(const FeatureFunction* sp) const;
00347 
00348   float GetSparseWeight(const FName& featureName) const {
00349     return m_scores[featureName];
00350   }
00351 
00352   void PrintCoreFeatures() {
00353     m_scores.printCoreFeatures();
00354   }
00355 
00356   void ThresholdScaling(float maxValue) {
00357     
00358     
00359     
00360     m_scores.thresholdScale(maxValue);
00361   }
00362 
00363   void CapMax(float maxValue) {
00364     
00365     m_scores.capMax(maxValue);
00366   }
00367 
00368   void CapMin(float minValue) {
00369     
00370     m_scores.capMin(minValue);
00371   }
00372 
00373   
00374   
00375   
00376   
00377 
00380   float GetScoreForProducer(const FeatureFunction* sp) const {
00381     UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
00382                    "Feature function must must only contain 1 score");
00383     return m_scores[sp->GetIndex()];
00384   }
00385 
00386   
00387   float GetScoreForProducer
00388   (const FeatureFunction* sp, const std::string& name) const {
00389     FName fname(sp->GetScoreProducerDescription(),name);
00390     return m_scores[fname];
00391   }
00392 
00393   float GetWeightedScore() const;
00394 
00395   void ZeroDenseFeatures(const FeatureFunction* sp);
00396   void InvertDenseFeatures(const FeatureFunction* sp);
00397   void L1Normalise();
00398   float GetL1Norm() const;
00399   float GetL2Norm() const;
00400   float GetLInfNorm() const;
00401   size_t L1Regularize(float lambda);
00402   void L2Regularize(float lambda);
00403   size_t SparseL1Regularize(float lambda);
00404   void SparseL2Regularize(float lambda);
00405   void Save(const std::string& filename) const;
00406   void Save(std::ostream&, bool multiline=true) const;
00407 
00408   void IncrementSparseHopeFeatures() {
00409     m_scores.incrementSparseHopeFeatures();
00410   }
00411   void IncrementSparseFearFeatures() {
00412     m_scores.incrementSparseFearFeatures();
00413   }
00414   void PrintSparseHopeFeatureCounts(std::ofstream& out) {
00415     m_scores.printSparseHopeFeatureCounts(out);
00416   }
00417   void PrintSparseFearFeatureCounts(std::ofstream& out) {
00418     m_scores.printSparseFearFeatureCounts(out);
00419   }
00420   void PrintSparseHopeFeatureCounts() {
00421     m_scores.printSparseHopeFeatureCounts();
00422   }
00423   void PrintSparseFearFeatureCounts() {
00424     m_scores.printSparseFearFeatureCounts();
00425   }
00426   size_t PruneSparseFeatures(size_t threshold) {
00427     return m_scores.pruneSparseFeatures(threshold);
00428   }
00429   size_t PruneZeroWeightFeatures() {
00430     return m_scores.pruneZeroWeightFeatures();
00431   }
00432   void UpdateConfidenceCounts(ScoreComponentCollection &weightUpdate, bool signedCounts) {
00433     m_scores.updateConfidenceCounts(weightUpdate.m_scores, signedCounts);
00434   }
00435   void UpdateLearningRates(float decay_core, float decay_sparse, ScoreComponentCollection &confidenceCounts, float core_r0, float sparse_r0) {
00436     m_scores.updateLearningRates(decay_core, decay_sparse, confidenceCounts.m_scores, core_r0, sparse_r0);
00437   }
00438   void Merge(const ScoreComponentCollection &other) {
00439     m_scores.merge(other.m_scores);
00440   }
00441 
00442   void OutputAllFeatureScores(std::ostream &out, bool with_labels) const;
00443   void OutputFeatureScores(std::ostream& out, Moses::FeatureFunction const* ff,
00444                            std::string &lastName, bool with_labels) const;
00445 
00446 #ifdef MPI_ENABLE
00447 public:
00448   friend class boost::serialization::access;
00449 
00450 private:
00451   
00452   template<class Archive>
00453   void save(Archive &ar, const unsigned int version) const {
00454     ar << m_scores;
00455   }
00456 
00457   template<class Archive>
00458   void load(Archive &ar, const unsigned int version) {
00459     ar >> m_scores;
00460 
00461   }
00462 
00463   BOOST_SERIALIZATION_SPLIT_MEMBER()
00464 
00465 #endif
00466 
00467 };
00468 
00469 struct SCCPlus {
00470   ScoreComponentCollection operator()
00471   (const ScoreComponentCollection& lhs,
00472    const ScoreComponentCollection& rhs) {
00473     ScoreComponentCollection sum(lhs);
00474     sum.PlusEquals(rhs);
00475     return sum;
00476   }
00477 };
00478 
00479 inline void swap(ScoreComponentCollection &first, ScoreComponentCollection &second)
00480 {
00481   swap(first.m_scores, second.m_scores);
00482 }
00483 
00484 }
00485 #endif