00001 #ifndef moses_Diffs_h
00002 #define moses_Diffs_h
00003
00004 #include <cmath>
00005
00006 namespace Moses
00007 {
00008
00009 typedef char Diff;
00010 typedef std::vector<Diff> Diffs;
00011
00012 template <class Sequence, class Pred>
00013 void CreateDiffRec(size_t** c,
00014 const Sequence &s1,
00015 const Sequence &s2,
00016 size_t start,
00017 size_t i,
00018 size_t j,
00019 Diffs& diffs,
00020 Pred pred)
00021 {
00022 if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
00023 CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
00024 diffs.push_back(Diff('m'));
00025 } else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
00026 CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
00027 diffs.push_back(Diff('i'));
00028 } else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
00029 CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
00030 diffs.push_back(Diff('d'));
00031 }
00032 }
00033
00034 template <class Sequence, class Pred>
00035 Diffs CreateDiff(const Sequence& s1,
00036 const Sequence& s2,
00037 Pred pred)
00038 {
00039
00040 Diffs diffs;
00041
00042 size_t n = s2.size();
00043
00044 int start = 0;
00045 int m_end = s1.size() - 1;
00046 int n_end = s2.size() - 1;
00047
00048 while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) {
00049 diffs.push_back(Diff('m'));
00050 start++;
00051 }
00052 while(start <= m_end && start <= n_end && pred(s1[m_end], s2[n_end])) {
00053 m_end--;
00054 n_end--;
00055 }
00056
00057 size_t m_new = m_end - start + 1;
00058 size_t n_new = n_end - start + 1;
00059
00060 size_t** c = new size_t*[m_new + 1];
00061 for(size_t i = 0; i <= m_new; ++i) {
00062 c[i] = new size_t[n_new + 1];
00063 c[i][0] = 0;
00064 }
00065 for(size_t j = 0; j <= n_new; ++j)
00066 c[0][j] = 0;
00067 for(size_t i = 1; i <= m_new; ++i)
00068 for(size_t j = 1; j <= n_new; ++j)
00069 if(pred(s1[i - 1 + start], s2[j - 1 + start]))
00070 c[i][j] = c[i-1][j-1] + 1;
00071 else
00072 c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j];
00073
00074 CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred);
00075
00076 for(size_t i = 0; i <= m_new; ++i)
00077 delete[] c[i];
00078 delete[] c;
00079
00080 for (size_t i = n_end + 1; i < n; ++i)
00081 diffs.push_back(Diff('m'));
00082
00083 return diffs;
00084 }
00085
00086 template <class Sequence>
00087 Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
00088 {
00089 return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
00090 }
00091
00092 template <class Sequence, class Sig, class Stats>
00093 void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
00094 {
00095 if(sig.size() != stats.size())
00096 throw "Signature size differs from score array size.";
00097
00098 size_t m = 0, d = 0, i = 0, s = 0;
00099 Diffs diff = CreateDiff(s1, s2);
00100
00101 for(int j = 0; j < (int)diff.size(); ++j) {
00102 if(diff[j] == 'm')
00103 m++;
00104 else if(diff[j] == 'd') {
00105 d++;
00106 int k = 0;
00107 while(j - k >= 0 && j + 1 + k < (int)diff.size() &&
00108 diff[j - k] == 'd' && diff[j + 1 + k] == 'i') {
00109 d--;
00110 s++;
00111 k++;
00112 }
00113 j += k;
00114 } else if(diff[j] == 'i')
00115 i++;
00116 }
00117
00118 for(size_t j = 0; j < sig.size(); ++j) {
00119 switch (sig[j]) {
00120 case 'l':
00121 stats[j] += d + i + s;
00122 break;
00123 case 'm':
00124 stats[j] += m;
00125 break;
00126 case 'd':
00127 stats[j] += d;
00128 break;
00129 case 'i':
00130 stats[j] += i;
00131 break;
00132 case 's':
00133 stats[j] += s;
00134 break;
00135 case 'r':
00136 float macc = 1;
00137 if (d + i + s + m)
00138 macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m);
00139 if(macc > 0)
00140 stats[j] += log(macc);
00141 else
00142 stats[j] += log(1.0/(float)(d + i + s + m + 1));
00143 break;
00144 }
00145 }
00146 }
00147
00148 }
00149
00150 #endif