mosesdecoder/moses/FF/Diffs.h
2016-06-02 00:00:39 +01:00

151 lines
3.5 KiB
C++

#ifndef moses_Diffs_h
#define moses_Diffs_h
#include <cmath>
namespace Moses
{
typedef char Diff;
typedef std::vector<Diff> Diffs;
template <class Sequence, class Pred>
void CreateDiffRec(size_t** c,
const Sequence &s1,
const Sequence &s2,
size_t start,
size_t i,
size_t j,
Diffs& diffs,
Pred pred)
{
if(i > 0 && j > 0 && pred(s1[i - 1 + start], s2[j - 1 + start])) {
CreateDiffRec(c, s1, s2, start, i - 1, j - 1, diffs, pred);
diffs.push_back(Diff('m'));
} else if(j > 0 && (i == 0 || c[i][j-1] >= c[i-1][j])) {
CreateDiffRec(c, s1, s2, start, i, j-1, diffs, pred);
diffs.push_back(Diff('i'));
} else if(i > 0 && (j == 0 || c[i][j-1] < c[i-1][j])) {
CreateDiffRec(c, s1, s2, start, i-1, j, diffs, pred);
diffs.push_back(Diff('d'));
}
}
template <class Sequence, class Pred>
Diffs CreateDiff(const Sequence& s1,
const Sequence& s2,
Pred pred)
{
Diffs diffs;
size_t n = s2.size();
int start = 0;
int m_end = s1.size() - 1;
int n_end = s2.size() - 1;
while(start <= m_end && start <= n_end && pred(s1[start], s2[start])) {
diffs.push_back(Diff('m'));
start++;
}
while(start <= m_end && start <= n_end && pred(s1[m_end], s2[n_end])) {
m_end--;
n_end--;
}
size_t m_new = m_end - start + 1;
size_t n_new = n_end - start + 1;
size_t** c = new size_t*[m_new + 1];
for(size_t i = 0; i <= m_new; ++i) {
c[i] = new size_t[n_new + 1];
c[i][0] = 0;
}
for(size_t j = 0; j <= n_new; ++j)
c[0][j] = 0;
for(size_t i = 1; i <= m_new; ++i)
for(size_t j = 1; j <= n_new; ++j)
if(pred(s1[i - 1 + start], s2[j - 1 + start]))
c[i][j] = c[i-1][j-1] + 1;
else
c[i][j] = c[i][j-1] > c[i-1][j] ? c[i][j-1] : c[i-1][j];
CreateDiffRec(c, s1, s2, start, m_new, n_new, diffs, pred);
for(size_t i = 0; i <= m_new; ++i)
delete[] c[i];
delete[] c;
for (size_t i = n_end + 1; i < n; ++i)
diffs.push_back(Diff('m'));
return diffs;
}
template <class Sequence>
Diffs CreateDiff(const Sequence& s1, const Sequence& s2)
{
return CreateDiff(s1, s2, std::equal_to<typename Sequence::value_type>());
}
template <class Sequence, class Sig, class Stats>
void AddStats(const Sequence& s1, const Sequence& s2, const Sig& sig, Stats& stats)
{
if(sig.size() != stats.size())
throw "Signature size differs from score array size.";
size_t m = 0, d = 0, i = 0, s = 0;
Diffs diff = CreateDiff(s1, s2);
for(int j = 0; j < (int)diff.size(); ++j) {
if(diff[j] == 'm')
m++;
else if(diff[j] == 'd') {
d++;
int k = 0;
while(j - k >= 0 && j + 1 + k < (int)diff.size() &&
diff[j - k] == 'd' && diff[j + 1 + k] == 'i') {
d--;
s++;
k++;
}
j += k;
} else if(diff[j] == 'i')
i++;
}
for(size_t j = 0; j < sig.size(); ++j) {
switch (sig[j]) {
case 'l':
stats[j] += d + i + s;
break;
case 'm':
stats[j] += m;
break;
case 'd':
stats[j] += d;
break;
case 'i':
stats[j] += i;
break;
case 's':
stats[j] += s;
break;
case 'r':
float macc = 1;
if (d + i + s + m)
macc = 1.0 - (float)(d + i + s)/(float)(d + i + s + m);
if(macc > 0)
stats[j] += log(macc);
else
stats[j] += log(1.0/(float)(d + i + s + m + 1));
break;
}
}
}
}
#endif