2012-03-23 20:52:24 +04:00
|
|
|
#include "SemposOverlapping.h"
|
2012-03-24 19:07:47 +04:00
|
|
|
#include "SemposScorer.h"
|
2012-03-23 20:52:24 +04:00
|
|
|
|
2012-03-23 22:09:04 +04:00
|
|
|
#include <algorithm>
|
2012-03-23 20:52:24 +04:00
|
|
|
#include <stdexcept>
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace
|
|
|
|
{
|
2012-03-23 20:52:24 +04:00
|
|
|
|
2012-06-30 23:23:45 +04:00
|
|
|
MosesTuning::SemposOverlapping* g_overlapping = NULL;
|
2012-03-23 20:52:24 +04:00
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
2012-06-30 23:23:45 +04:00
|
|
|
namespace MosesTuning
|
|
|
|
{
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
|
|
|
|
SemposOverlapping* SemposOverlappingFactory::GetOverlapping(const string& str, const SemposScorer* sempos)
|
|
|
|
{
|
2012-03-23 20:52:24 +04:00
|
|
|
if (str == "cap-micro") {
|
2012-03-24 19:07:47 +04:00
|
|
|
return new CapMicroOverlapping(sempos);
|
2012-03-23 20:52:24 +04:00
|
|
|
} else if (str == "cap-macro") {
|
2012-03-24 19:07:47 +04:00
|
|
|
return new CapMacroOverlapping(sempos);
|
2012-03-23 20:52:24 +04:00
|
|
|
} else {
|
|
|
|
throw runtime_error("Unknown overlapping: " + str);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void SemposOverlappingFactory::SetOverlapping(SemposOverlapping* ovr)
|
|
|
|
{
|
2012-03-23 20:52:24 +04:00
|
|
|
g_overlapping = ovr;
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<int> CapMicroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
|
|
|
|
{
|
|
|
|
vector<int> stats(2);
|
|
|
|
sentence_t intersection;
|
|
|
|
|
|
|
|
set_intersection(cand.begin(), cand.end(), ref.begin(), ref.end(),
|
|
|
|
inserter(intersection, intersection.begin()));
|
|
|
|
|
2012-03-24 19:07:47 +04:00
|
|
|
int multCoeff = 1000;
|
|
|
|
|
|
|
|
float interSum = 0;
|
2013-05-29 21:16:15 +04:00
|
|
|
for (sentence_t::iterator it = intersection.begin(); it != intersection.end(); it++) {
|
2012-03-24 19:07:47 +04:00
|
|
|
interSum += semposScorer->weight(it->first);
|
|
|
|
}
|
|
|
|
|
|
|
|
float refSum = 0;
|
2013-05-29 21:16:15 +04:00
|
|
|
for (sentence_t::iterator it = ref.begin(); it != ref.end(); it++) {
|
|
|
|
refSum += semposScorer->weight(it->first);
|
2012-03-24 19:07:47 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
stats[0] = (int)(multCoeff * interSum);
|
|
|
|
stats[1] = (int)(multCoeff * refSum);
|
2012-03-23 20:52:24 +04:00
|
|
|
return stats;
|
|
|
|
}
|
|
|
|
|
|
|
|
float CapMicroOverlapping::calculateScore(const vector<int>& stats) const
|
|
|
|
{
|
2012-03-23 23:12:33 +04:00
|
|
|
if (stats.size() != 2) {
|
2012-03-23 20:52:24 +04:00
|
|
|
throw std::runtime_error("Size of stats vector has to be 2");
|
|
|
|
}
|
|
|
|
if (stats[1] == 0) return 1.0f;
|
|
|
|
return stats[0] / static_cast<float>(stats[1]);
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<int> CapMacroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
|
|
|
|
{
|
|
|
|
vector<int> stats(2 * kMaxNOC);
|
|
|
|
sentence_t intersection;
|
|
|
|
|
|
|
|
set_intersection(cand.begin(), cand.end(), ref.begin(), ref.end(),
|
|
|
|
inserter(intersection, intersection.begin()));
|
|
|
|
|
2012-03-24 19:07:47 +04:00
|
|
|
int multCoeff = 1000;
|
|
|
|
|
2012-03-23 20:52:24 +04:00
|
|
|
for (int i = 0; i < 2 * kMaxNOC; ++i) stats[i] = 0;
|
|
|
|
for (sentence_t::const_iterator it = intersection.begin(); it != intersection.end(); ++it) {
|
|
|
|
const int sempos = it->second;
|
2012-03-24 19:07:47 +04:00
|
|
|
float weight = semposScorer->weight(it->first);
|
|
|
|
stats[2 * sempos] += weight * multCoeff ;
|
2012-03-23 20:52:24 +04:00
|
|
|
}
|
|
|
|
for (sentence_t::const_iterator it = ref.begin(); it != ref.end(); ++it) {
|
|
|
|
const int sempos = it->second;
|
2012-03-24 19:07:47 +04:00
|
|
|
float weight = semposScorer->weight(it->first);
|
|
|
|
stats[2 * sempos + 1] += weight * multCoeff;
|
2012-03-23 20:52:24 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return stats;
|
|
|
|
}
|
|
|
|
|
|
|
|
float CapMacroOverlapping::calculateScore(const vector<int>& stats) const
|
|
|
|
{
|
|
|
|
if (stats.size() != 2 * kMaxNOC) {
|
|
|
|
// TODO: Add some comments. The number "38" looks like a magic number.
|
|
|
|
throw std::runtime_error("Size of stats vector has to be 38");
|
|
|
|
}
|
|
|
|
|
|
|
|
int n = 0;
|
|
|
|
float sum = 0;
|
|
|
|
for (int i = 0; i < kMaxNOC; ++i) {
|
|
|
|
int clipped = stats[2 * i];
|
|
|
|
int refsize = stats[2 * i + 1];
|
|
|
|
if (refsize > 0) {
|
2012-03-23 23:12:33 +04:00
|
|
|
sum += clipped / static_cast<float>(refsize);
|
2012-03-23 20:52:24 +04:00
|
|
|
++n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (n == 0) return 1;
|
|
|
|
return sum / n;
|
|
|
|
}
|
2012-06-30 23:23:45 +04:00
|
|
|
|
|
|
|
}
|