00001 #ifndef moses_Diffs_h 00002 #define moses_Diffs_h 00003 00004 #include <cmath> 00005 00006 namespace Moses 00007 { 00008 00009 typedef char Diff; 00010 typedef std::vector<Diff> Diffs; 00011 00012 template <class Sequence, class Pred> 00013 void CreateDiffRec(size_t** c, 00014 const Sequence &s1, 00015 const Sequence &s2, 00016 size_t start, 00017 size_t i, 00018 size_t j, 00019 Diffs& diffs, 00020 Pred pred) 00021 { 00022 if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) { 00023 CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred); 00024 diffs.push_back(Diff('m')); 00025 } else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) { 00026 CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred); 00027 diffs.push_back(Diff('i')); 00028 } else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) { 00029 CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred); 00030 diffs.push_back(Diff('d')); 00031 } 00032 } 00033 00034 template <class Sequence, class Pred> 00035 Diffs CreateDiff(const Sequence& s1, 00036 const Sequence& s2, 00037 Pred pred) 00038 { 00039 00040 Diffs diffs; 00041 00042 size_t n = s2.size(); 00043 00044 int start = 0; 00045 int m_end = s1.size() - 1; 00046 int n_end = s2.size() - 1; 00047 00048 while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) { 00049 diffs.push_back(Diff('m')); 00050 start++; 00051 } 00052 while(start <= m_end && start <= n_end && pred(s1[m_end], s2[n_end])) { 00053 m_end--; 00054 n_end--; 00055 } 00056 00057 size_t m_new = m_end - start + 1; 00058 size_t n_new = n_end - start + 1; 00059 00060 size_t** c = new size_t*[m_new + 1]; 00061 for(size_t i = 0; i <= m_new; ++i) { 00062 c[i] = new size_t[n_new + 1]; 00063 c[i][0] = 0; 00064 } 00065 for(size_t j = 0; j <= n_new; ++j) 00066 c[0][j] = 0; 00067 for(size_t i = 1; i <= m_new; ++i) 00068 for(size_t j = 1; j <= n_new; ++j) 00069 if(pred(s1[i - 1 + start], s2[j - 1 + start])) 00070 c[i][j] = c[i-1][j-1] + 1; 00071 else 00072 c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j]; 00073 00074 CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred); 00075 00076 for(size_t i = 0; i <= m_new; ++i) 00077 delete[] c[i]; 00078 delete[] c; 00079 00080 for (size_t i = n_end + 1; i < n; ++i) 00081 diffs.push_back(Diff('m')); 00082 00083 return diffs; 00084 } 00085 00086 template <class Sequence> 00087 Diffs CreateDiff(const Sequence& s1, const Sequence& s2) 00088 { 00089 return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>()); 00090 } 00091 00092 template <class Sequence, class Sig, class Stats> 00093 void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats) 00094 { 00095 if(sig.size() != stats.size()) 00096 throw "Signature size differs from score array size."; 00097 00098 size_t m = 0, d = 0, i = 0, s = 0; 00099 Diffs diff = CreateDiff(s1, s2); 00100 00101 for(int j = 0; j < (int)diff.size(); ++j) { 00102 if(diff[j] == 'm') 00103 m++; 00104 else if(diff[j] == 'd') { 00105 d++; 00106 int k = 0; 00107 while(j - k >= 0 && j + 1 + k < (int)diff.size() && 00108 diff[j - k] == 'd' && diff[j + 1 + k] == 'i') { 00109 d--; 00110 s++; 00111 k++; 00112 } 00113 j += k; 00114 } else if(diff[j] == 'i') 00115 i++; 00116 } 00117 00118 for(size_t j = 0; j < sig.size(); ++j) { 00119 switch (sig[j]) { 00120 case 'l': 00121 stats[j] += d + i + s; 00122 break; 00123 case 'm': 00124 stats[j] += m; 00125 break; 00126 case 'd': 00127 stats[j] += d; 00128 break; 00129 case 'i': 00130 stats[j] += i; 00131 break; 00132 case 's': 00133 stats[j] += s; 00134 break; 00135 case 'r': 00136 float macc = 1; 00137 if (d + i + s + m) 00138 macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m); 00139 if(macc > 0) 00140 stats[j] += log(macc); 00141 else 00142 stats[j] += log(1.0/(float)(d + i + s + m + 1)); 00143 break; 00144 } 00145 } 00146 } 00147 00148 } 00149 00150 #endif