00001
00002
00003
00004
00005
00006
00007
00008
00009 #pragma once
00010
00011 #include "sapt_pscore_base.h"
00012 #include <boost/dynamic_bitset.hpp>
00013 #include <boost/math/distributions/binomial.hpp>
00014 #include "mm/ug_ttrack_base.h"
00015
00016 namespace sapt {
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 template<typename Token>
00036 class
00037 PScoreLengthRatio : public PhraseScorer<Token>
00038 {
00039 public:
00040 PScoreLengthRatio(std::string const& spec)
00041 {
00042 this->m_feature_names.push_back("lenrat");
00043 this->m_num_feats = this->m_feature_names.size();
00044 }
00045
00046 bool
00047 isIntegerValued(int i) const { return false; }
00048
00049 void
00050 operator()(Bitext<Token> const& bt,
00051 PhrasePair<Token>& pp,
00052 std::vector<float> * dest = NULL) const
00053 {
00054 if (!dest) dest = &pp.fvals;
00055 float p = float(bt.T1->numTokens());
00056 p /= bt.T1->numTokens() + bt.T2->numTokens();
00057 float len1 = sapt::len_from_pid(pp.p1);
00058 float len2 = sapt::len_from_pid(pp.p2);
00059
00060 boost::math::binomial binomi(len1 + len2, p);
00061 float& x = (*dest)[this->m_index];
00062 if (len2/(len1 + len2) < p)
00063 x = log(boost::math::cdf(binomi,len2));
00064 else
00065 x = log(boost::math::cdf(boost::math::complement(binomi,len2 - 1)));
00066 }
00067 };
00068 }
00069