mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
Remove unnecessary headers; prefix private members with "m_".
- Add const. - Add a virtual destructor to abstract class. - Add some TODOs to some constant values. Create files for "overlapping" classes.
This commit is contained in:
parent
3319805102
commit
5e5f500254
@ -18,6 +18,7 @@ FeatureDataIterator.cpp
|
|||||||
Data.cpp
|
Data.cpp
|
||||||
BleuScorer.cpp
|
BleuScorer.cpp
|
||||||
SemposScorer.cpp
|
SemposScorer.cpp
|
||||||
|
SemposOverlapping.cpp
|
||||||
InterpolatedScorer.cpp
|
InterpolatedScorer.cpp
|
||||||
Point.cpp
|
Point.cpp
|
||||||
PerScorer.cpp
|
PerScorer.cpp
|
||||||
|
90
mert/SemposOverlapping.cpp
Normal file
90
mert/SemposOverlapping.cpp
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
#include "SemposOverlapping.h"
|
||||||
|
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
SemposOverlapping* g_overlapping = NULL;
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
SemposOverlapping* SemposOverlappingFactory::GetOverlapping(const string& str) {
|
||||||
|
if (str == "cap-micro") {
|
||||||
|
return new CapMicroOverlapping;
|
||||||
|
} else if (str == "cap-macro") {
|
||||||
|
return new CapMacroOverlapping;
|
||||||
|
} else {
|
||||||
|
throw runtime_error("Unknown overlapping: " + str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SemposOverlappingFactory::SetOverlapping(SemposOverlapping* ovr) {
|
||||||
|
g_overlapping = ovr;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<int> CapMicroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
|
||||||
|
{
|
||||||
|
vector<int> stats(2);
|
||||||
|
sentence_t intersection;
|
||||||
|
|
||||||
|
set_intersection(cand.begin(), cand.end(), ref.begin(), ref.end(),
|
||||||
|
inserter(intersection, intersection.begin()));
|
||||||
|
|
||||||
|
stats[0] = static_cast<int>(intersection.size());
|
||||||
|
stats[1] = static_cast<int>(ref.size());
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
float CapMicroOverlapping::calculateScore(const vector<int>& stats) const
|
||||||
|
{
|
||||||
|
if (stats.size() != 2)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Size of stats vector has to be 2");
|
||||||
|
}
|
||||||
|
if (stats[1] == 0) return 1.0f;
|
||||||
|
return stats[0] / static_cast<float>(stats[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<int> CapMacroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
|
||||||
|
{
|
||||||
|
vector<int> stats(2 * kMaxNOC);
|
||||||
|
sentence_t intersection;
|
||||||
|
|
||||||
|
set_intersection(cand.begin(), cand.end(), ref.begin(), ref.end(),
|
||||||
|
inserter(intersection, intersection.begin()));
|
||||||
|
|
||||||
|
for (int i = 0; i < 2 * kMaxNOC; ++i) stats[i] = 0;
|
||||||
|
for (sentence_t::const_iterator it = intersection.begin(); it != intersection.end(); ++it) {
|
||||||
|
const int sempos = it->second;
|
||||||
|
++stats[2 * sempos];
|
||||||
|
}
|
||||||
|
for (sentence_t::const_iterator it = ref.begin(); it != ref.end(); ++it) {
|
||||||
|
const int sempos = it->second;
|
||||||
|
++stats[2 * sempos + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
float CapMacroOverlapping::calculateScore(const vector<int>& stats) const
|
||||||
|
{
|
||||||
|
if (stats.size() != 2 * kMaxNOC) {
|
||||||
|
// TODO: Add some comments. The number "38" looks like a magic number.
|
||||||
|
throw std::runtime_error("Size of stats vector has to be 38");
|
||||||
|
}
|
||||||
|
|
||||||
|
int n = 0;
|
||||||
|
float sum = 0;
|
||||||
|
for (int i = 0; i < kMaxNOC; ++i) {
|
||||||
|
int clipped = stats[2 * i];
|
||||||
|
int refsize = stats[2 * i + 1];
|
||||||
|
if (refsize > 0) {
|
||||||
|
sum += clipped / (float) refsize;
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (n == 0) return 1;
|
||||||
|
return sum / n;
|
||||||
|
}
|
86
mert/SemposOverlapping.h
Normal file
86
mert/SemposOverlapping.h
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
#ifndef MERT_SEMPOSOVERLAPPING_H_
|
||||||
|
#define MERT_SEMPOSOVERLAPPING_H_
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
// TODO: need to comments about this number.
|
||||||
|
const int kMaxNOC = 30;
|
||||||
|
|
||||||
|
typedef std::pair<std::string, std::string> str_item_t;
|
||||||
|
typedef std::vector<str_item_t> str_sentence_t;
|
||||||
|
typedef str_sentence_t::const_iterator str_sentence_it;
|
||||||
|
|
||||||
|
typedef std::pair<int,int> item_t;
|
||||||
|
typedef std::multiset<item_t> sentence_t;
|
||||||
|
typedef sentence_t::const_iterator sentence_it;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface for classes representing overlapping formulas
|
||||||
|
*/
|
||||||
|
class SemposOverlapping
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~SemposOverlapping() {}
|
||||||
|
virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref) = 0;
|
||||||
|
virtual float calculateScore(const std::vector<int>& stats) const = 0;
|
||||||
|
virtual std::size_t NumberOfScores() const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SemposOverlappingFactory {
|
||||||
|
public:
|
||||||
|
static SemposOverlapping* GetOverlapping(const std::string& str);
|
||||||
|
|
||||||
|
// dependency injection for unit testing.
|
||||||
|
static void SetOverlapping(SemposOverlapping* ovr);
|
||||||
|
|
||||||
|
private:
|
||||||
|
SemposOverlappingFactory() {}
|
||||||
|
~SemposOverlappingFactory() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Overlapping proposed by (Bojar and Machacek, WMT 2011)
|
||||||
|
*
|
||||||
|
* Please refer to the paper for details:
|
||||||
|
* http://aclweb.org/anthology-new/W/W11/W11-2108.pdf
|
||||||
|
*/
|
||||||
|
class CapMicroOverlapping : public SemposOverlapping
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CapMicroOverlapping() {}
|
||||||
|
~CapMicroOverlapping() {}
|
||||||
|
|
||||||
|
virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
|
||||||
|
virtual float calculateScore(const std::vector<int>& stats) const;
|
||||||
|
virtual std::size_t NumberOfScores() const { return 2; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// no copying allowed.
|
||||||
|
CapMicroOverlapping(const CapMicroOverlapping&);
|
||||||
|
CapMicroOverlapping& operator=(const CapMicroOverlapping&);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Overlapping proposed by (Kos and Bojar, 2009)
|
||||||
|
*/
|
||||||
|
class CapMacroOverlapping : public SemposOverlapping
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
CapMacroOverlapping() {}
|
||||||
|
~CapMacroOverlapping() {}
|
||||||
|
|
||||||
|
virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
|
||||||
|
virtual float calculateScore(const std::vector<int>& stats) const;
|
||||||
|
virtual std::size_t NumberOfScores() const { return kMaxNOC * 2; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
// no copying allowed.
|
||||||
|
CapMacroOverlapping(const CapMacroOverlapping&);
|
||||||
|
CapMacroOverlapping& operator=(const CapMacroOverlapping&);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // MERT_SEMPOSOVERLAPPING_H_
|
@ -1,39 +1,30 @@
|
|||||||
#include <sys/types.h>
|
#include "SemposScorer.h"
|
||||||
#include <unistd.h>
|
|
||||||
#include <sstream>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <algorithm>
|
|
||||||
#include <set>
|
|
||||||
#include <map>
|
|
||||||
#include <iterator>
|
|
||||||
|
|
||||||
#include "SemposScorer.h"
|
|
||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
|
|
||||||
SemposScorer::SemposScorer(const string& config)
|
using namespace std;
|
||||||
: StatisticsBasedScorer("SEMPOS",config),
|
|
||||||
debug(false)
|
|
||||||
{
|
|
||||||
string debugSwitch = getConfig("debug", "0");
|
|
||||||
if (debugSwitch == "1") debug = true;
|
|
||||||
|
|
||||||
string overlapping = getConfig("overlapping", "cap-micro");
|
|
||||||
if (overlapping == "cap-micro") {
|
|
||||||
ovr = new CapMicroOverlapping();
|
|
||||||
} else if (overlapping == "cap-macro") {
|
|
||||||
ovr = new CapMacroOverlapping();
|
|
||||||
} else {
|
|
||||||
throw runtime_error("Unknown overlapping: " + overlapping);
|
|
||||||
}
|
|
||||||
|
|
||||||
semposMap.clear();
|
SemposScorer::SemposScorer(const string& config)
|
||||||
|
: StatisticsBasedScorer("SEMPOS", config),
|
||||||
|
m_ovr(SemposOverlappingFactory::GetOverlapping(getConfig("overlapping", "cap-micro"))),
|
||||||
|
m_enable_debug(false)
|
||||||
|
{
|
||||||
|
const string& debugSwitch = getConfig("debug", "0");
|
||||||
|
if (debugSwitch == "1") m_enable_debug = true;
|
||||||
|
|
||||||
|
m_semposMap.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SemposScorer::~SemposScorer() {}
|
||||||
|
|
||||||
void SemposScorer::setReferenceFiles(const vector<string>& referenceFiles)
|
void SemposScorer::setReferenceFiles(const vector<string>& referenceFiles)
|
||||||
{
|
{
|
||||||
//make sure reference data is clear
|
//make sure reference data is clear
|
||||||
ref_sentences.clear();
|
m_ref_sentences.clear();
|
||||||
|
|
||||||
//load reference data
|
//load reference data
|
||||||
for (size_t rid = 0; rid < referenceFiles.size(); ++rid) {
|
for (size_t rid = 0; rid < referenceFiles.size(); ++rid) {
|
||||||
@ -41,10 +32,10 @@ void SemposScorer::setReferenceFiles(const vector<string>& referenceFiles)
|
|||||||
if (!refin) {
|
if (!refin) {
|
||||||
throw runtime_error("Unable to open: " + referenceFiles[rid]);
|
throw runtime_error("Unable to open: " + referenceFiles[rid]);
|
||||||
}
|
}
|
||||||
ref_sentences.push_back(vector<sentence_t>());
|
m_ref_sentences.push_back(vector<sentence_t>());
|
||||||
string line;
|
string line;
|
||||||
while (getline(refin,line)) {
|
while (getline(refin,line)) {
|
||||||
line = applyFactors(line);
|
line = applyFactors(line);
|
||||||
|
|
||||||
str_sentence_t sentence;
|
str_sentence_t sentence;
|
||||||
splitSentence(line, sentence);
|
splitSentence(line, sentence);
|
||||||
@ -52,68 +43,58 @@ void SemposScorer::setReferenceFiles(const vector<string>& referenceFiles)
|
|||||||
sentence_t encodedSentence;
|
sentence_t encodedSentence;
|
||||||
encodeSentence(sentence, encodedSentence);
|
encodeSentence(sentence, encodedSentence);
|
||||||
|
|
||||||
ref_sentences[rid].push_back(encodedSentence);
|
m_ref_sentences[rid].push_back(encodedSentence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SemposScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
|
void SemposScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
|
||||||
{
|
{
|
||||||
vector<int> stats;
|
vector<ScoreStatsType> stats;
|
||||||
|
|
||||||
string sentence = applyFactors(text);
|
|
||||||
|
|
||||||
|
const string& sentence = applyFactors(text);
|
||||||
str_sentence_t splitCandSentence;
|
str_sentence_t splitCandSentence;
|
||||||
splitSentence(sentence, splitCandSentence);
|
splitSentence(sentence, splitCandSentence);
|
||||||
|
|
||||||
sentence_t encodedCandSentence;
|
sentence_t encodedCandSentence;
|
||||||
encodeSentence(splitCandSentence, encodedCandSentence);
|
encodeSentence(splitCandSentence, encodedCandSentence);
|
||||||
|
|
||||||
if (ref_sentences.size() == 1) {
|
if (m_ref_sentences.size() == 1) {
|
||||||
stats = ovr->prepareStats(encodedCandSentence, ref_sentences[0][sid]);
|
stats = m_ovr->prepareStats(encodedCandSentence, m_ref_sentences[0][sid]);
|
||||||
} else {
|
} else {
|
||||||
float max = -1;
|
float max = -1.0f;
|
||||||
for (size_t rid = 0; rid < ref_sentences.size(); ++rid) {
|
for (size_t rid = 0; rid < m_ref_sentences.size(); ++rid) {
|
||||||
vector<int> tmp = ovr->prepareStats(encodedCandSentence, ref_sentences[rid][sid]);
|
const vector<ScoreStatsType>& tmp = m_ovr->prepareStats(encodedCandSentence, m_ref_sentences[rid][sid]);
|
||||||
if (ovr->calculateScore(tmp) > max) {
|
if (m_ovr->calculateScore(tmp) > max) {
|
||||||
stats = tmp;
|
stats = tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
entry.set(stats);
|
||||||
stringstream sout;
|
|
||||||
copy(stats.begin(),stats.end(),ostream_iterator<int>(sout," "));
|
|
||||||
string stats_str = sout.str();
|
|
||||||
entry.set(stats_str);
|
|
||||||
}
|
|
||||||
|
|
||||||
float SemposScorer::calculateScore(const vector<int>& comps) const
|
|
||||||
{
|
|
||||||
return ovr->calculateScore(comps);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void SemposScorer::splitSentence(const string& sentence, str_sentence_t& splitSentence)
|
void SemposScorer::splitSentence(const string& sentence, str_sentence_t& splitSentence)
|
||||||
{
|
{
|
||||||
splitSentence.clear();
|
splitSentence.clear();
|
||||||
|
|
||||||
vector<string> tokens;
|
vector<string> tokens;
|
||||||
split(sentence, ' ', tokens);
|
split(sentence, ' ', tokens);
|
||||||
for (vector<string>::iterator it = tokens.begin(); it != tokens.end(); ++it)
|
for (vector<string>::iterator it = tokens.begin(); it != tokens.end(); ++it)
|
||||||
{
|
{
|
||||||
vector<string> factors;
|
vector<string> factors;
|
||||||
split(*it, '|', factors);
|
split(*it, '|', factors);
|
||||||
if (factors.size() != 2) throw runtime_error("Sempos scorer accepts two factors (item|class)");
|
if (factors.size() != 2) throw runtime_error("Sempos scorer accepts two factors (item|class)");
|
||||||
string Item = factors[0];
|
const string& item = factors[0];
|
||||||
string Class = factors[1];
|
const string& klass = factors[1];
|
||||||
splitSentence.push_back(make_pair(Item, Class));
|
splitSentence.push_back(make_pair(item, klass));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SemposScorer::encodeSentence(const str_sentence_t& sentence, sentence_t& encodedSentence)
|
void SemposScorer::encodeSentence(const str_sentence_t& sentence, sentence_t& encodedSentence)
|
||||||
{
|
{
|
||||||
for (str_sentence_it it = sentence.begin(); it != sentence.end(); ++it) {
|
for (str_sentence_it it = sentence.begin(); it != sentence.end(); ++it) {
|
||||||
int tlemma = encodeString(it->first);
|
const int tlemma = encodeString(it->first);
|
||||||
int sempos = encodeSempos(it->second);
|
const int sempos = encodeSempos(it->second);
|
||||||
if (sempos >= 0) {
|
if (sempos >= 0) {
|
||||||
encodedSentence.insert(make_pair(tlemma,sempos));
|
encodedSentence.insert(make_pair(tlemma,sempos));
|
||||||
}
|
}
|
||||||
@ -122,11 +103,11 @@ void SemposScorer::encodeSentence(const str_sentence_t& sentence, sentence_t& en
|
|||||||
|
|
||||||
int SemposScorer::encodeString(const string& str)
|
int SemposScorer::encodeString(const string& str)
|
||||||
{
|
{
|
||||||
encoding_it encoding = stringMap.find(str);
|
encoding_it encoding = m_stringMap.find(str);
|
||||||
int encoded_str;
|
int encoded_str;
|
||||||
if (encoding == stringMap.end()) {
|
if (encoding == m_stringMap.end()) {
|
||||||
encoded_str = (int)stringMap.size();
|
encoded_str = static_cast<int>(m_stringMap.size());
|
||||||
stringMap[str] = encoded_str;
|
m_stringMap[str] = encoded_str;
|
||||||
} else {
|
} else {
|
||||||
encoded_str = encoding->second;
|
encoded_str = encoding->second;
|
||||||
}
|
}
|
||||||
@ -136,15 +117,15 @@ int SemposScorer::encodeString(const string& str)
|
|||||||
int SemposScorer::encodeSempos(const string& sempos)
|
int SemposScorer::encodeSempos(const string& sempos)
|
||||||
{
|
{
|
||||||
if (sempos == "-") return -1;
|
if (sempos == "-") return -1;
|
||||||
encoding_it it = semposMap.find(sempos);
|
encoding_it it = m_semposMap.find(sempos);
|
||||||
if (it == semposMap.end())
|
if (it == m_semposMap.end())
|
||||||
{
|
{
|
||||||
if (semposMap.size() == maxNOC)
|
if (m_semposMap.size() == kMaxNOC)
|
||||||
{
|
{
|
||||||
throw std::runtime_error("Number of classes is greater than maxNOC");
|
throw std::runtime_error("Number of classes is greater than kMaxNOC");
|
||||||
}
|
}
|
||||||
int classNumber = semposMap.size();
|
const int classNumber = static_cast<int>(m_semposMap.size());
|
||||||
semposMap[sempos] = classNumber;
|
m_semposMap[sempos] = classNumber;
|
||||||
return classNumber;
|
return classNumber;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -152,70 +133,3 @@ int SemposScorer::encodeSempos(const string& sempos)
|
|||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SemposScorer::~SemposScorer()
|
|
||||||
{
|
|
||||||
delete ovr;
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<int> CapMicroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
|
|
||||||
{
|
|
||||||
vector<int> stats(2);
|
|
||||||
sentence_t intersection;
|
|
||||||
|
|
||||||
set_intersection(cand.begin(),cand.end(),ref.begin(),ref.end(), inserter(intersection, intersection.begin()));
|
|
||||||
|
|
||||||
stats[0] = intersection.size();
|
|
||||||
stats[1] = ref.size();
|
|
||||||
return stats;
|
|
||||||
}
|
|
||||||
|
|
||||||
float CapMicroOverlapping::calculateScore(const vector<int>& stats)
|
|
||||||
{
|
|
||||||
if (stats.size() != 2)
|
|
||||||
{
|
|
||||||
throw std::runtime_error("Size of stats vector has to be 2");
|
|
||||||
}
|
|
||||||
if (stats[1] == 0) return (float) 1;
|
|
||||||
return stats[0]/(float)stats[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
vector<int> CapMacroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
|
|
||||||
{
|
|
||||||
vector<int> stats(2*maxNOC);
|
|
||||||
sentence_t intersection;
|
|
||||||
|
|
||||||
set_intersection(cand.begin(),cand.end(),ref.begin(),ref.end(), inserter(intersection, intersection.begin()));
|
|
||||||
|
|
||||||
for (int i = 0; i < 2*maxNOC; ++i) stats[i]=0;
|
|
||||||
for (sentence_t::const_iterator it = intersection.begin(); it != intersection.end(); ++it) {
|
|
||||||
int sempos = it->second;
|
|
||||||
++stats[2*sempos];
|
|
||||||
}
|
|
||||||
for (sentence_t::const_iterator it = ref.begin(); it != ref.end(); ++it) {
|
|
||||||
int sempos = it->second;
|
|
||||||
++stats[2*sempos+1];
|
|
||||||
}
|
|
||||||
|
|
||||||
return stats;
|
|
||||||
}
|
|
||||||
|
|
||||||
float CapMacroOverlapping::calculateScore(const vector<int>& stats)
|
|
||||||
{
|
|
||||||
if (stats.size() != 2*maxNOC) throw std::runtime_error("Size of stats vector has to be 38");
|
|
||||||
|
|
||||||
int n = 0;
|
|
||||||
float sum = 0;
|
|
||||||
for (int i = 0; i < maxNOC; ++i) {
|
|
||||||
int clipped = stats[2*i];
|
|
||||||
int refsize = stats[2*i+1];
|
|
||||||
if (refsize > 0) {
|
|
||||||
sum += clipped / (float) refsize;
|
|
||||||
++n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (n == 0) return 1;
|
|
||||||
return sum / n;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
@ -1,101 +1,60 @@
|
|||||||
#ifndef __SEMPOSSCORER_H__
|
#ifndef MERT_SEMPOSSCORER_H_
|
||||||
#define __SEMPOSSCORER_H__
|
#define MERT_SEMPOSSCORER_H_
|
||||||
|
|
||||||
#include <algorithm>
|
#include <map>
|
||||||
#include <cmath>
|
|
||||||
#include <iostream>
|
|
||||||
#include <iterator>
|
|
||||||
#include <set>
|
|
||||||
#include <sstream>
|
|
||||||
#include <stdexcept>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <limits.h>
|
#include <boost/scoped_ptr.hpp>
|
||||||
|
|
||||||
#include "Types.h"
|
#include "Types.h"
|
||||||
#include "ScoreData.h"
|
#include "ScoreData.h"
|
||||||
#include "Scorer.h"
|
#include "Scorer.h"
|
||||||
|
|
||||||
using namespace std;
|
// NOTE: This header should be included in .cpp file
|
||||||
|
// because SemposScorer wants to know what actual SemposOverlapping type is
|
||||||
|
// when we implement the scorer in .cpp file.
|
||||||
|
// However, currently SemposScorer uses a bunch of typedefs, which are
|
||||||
|
// used in SemposScorer as well as inherited SemposOverlapping classes.
|
||||||
|
#include "SemposOverlapping.h"
|
||||||
|
|
||||||
const int maxNOC = 30;
|
/**
|
||||||
|
* This class represents sempos based metrics.
|
||||||
typedef pair<string,string> str_item_t;
|
*/
|
||||||
typedef vector<str_item_t> str_sentence_t;
|
|
||||||
typedef str_sentence_t::const_iterator str_sentence_it;
|
|
||||||
|
|
||||||
typedef pair<int,int> item_t;
|
|
||||||
typedef multiset<item_t> sentence_t;
|
|
||||||
typedef sentence_t::const_iterator sentence_it;
|
|
||||||
|
|
||||||
// Base class for classes representing overlapping formulas
|
|
||||||
class SemposOverlapping
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
virtual vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref) = 0;
|
|
||||||
virtual float calculateScore(const vector<int>& stats) = 0;
|
|
||||||
virtual size_t NumberOfScores() const = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Overlapping proposed by (Bojar and Machacek,2011);
|
|
||||||
class CapMicroOverlapping : public SemposOverlapping
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
virtual vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
|
|
||||||
virtual float calculateScore(const vector<int>& stats);
|
|
||||||
virtual size_t NumberOfScores() const {
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//Overlapping proposed by (Bojar and Kos,2009)
|
|
||||||
class CapMacroOverlapping : public SemposOverlapping
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
virtual vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
|
|
||||||
virtual float calculateScore(const vector<int>& stats);
|
|
||||||
virtual size_t NumberOfScores() const {
|
|
||||||
return maxNOC*2;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// This class represents sempos based metrics
|
|
||||||
class SemposScorer: public StatisticsBasedScorer
|
class SemposScorer: public StatisticsBasedScorer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
SemposScorer(const string& config);
|
explicit SemposScorer(const std::string& config);
|
||||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
|
||||||
virtual void prepareStats(size_t sindex, const string& text, ScoreStats& entry);
|
|
||||||
|
|
||||||
virtual size_t NumberOfScores() const {
|
|
||||||
return ovr->NumberOfScores();
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
~SemposScorer();
|
~SemposScorer();
|
||||||
|
|
||||||
virtual float calculateScore(const vector<int>& comps) const;
|
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||||
|
virtual void prepareStats(std::size_t sindex, const std::string& text, ScoreStats& entry);
|
||||||
|
virtual std::size_t NumberOfScores() const { return m_ovr->NumberOfScores(); }
|
||||||
|
virtual float calculateScore(const std::vector<int>& comps) const {
|
||||||
|
return m_ovr->calculateScore(comps);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool EnableDebug() const { return m_enable_debug; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SemposOverlapping* ovr;
|
boost::scoped_ptr<SemposOverlapping> m_ovr;
|
||||||
vector<vector<sentence_t> > ref_sentences;
|
std::vector<std::vector<sentence_t> > m_ref_sentences;
|
||||||
|
|
||||||
typedef map<string, int> encoding_t;
|
typedef std::map<std::string, int> encoding_t;
|
||||||
typedef encoding_t::iterator encoding_it;
|
typedef encoding_t::iterator encoding_it;
|
||||||
|
|
||||||
encoding_t semposMap;
|
encoding_t m_semposMap;
|
||||||
encoding_t stringMap;
|
encoding_t m_stringMap;
|
||||||
|
bool m_enable_debug;
|
||||||
|
|
||||||
void splitSentence(const string& sentence, str_sentence_t& splitSentence);
|
void splitSentence(const std::string& sentence, str_sentence_t& splitSentence);
|
||||||
void encodeSentence(const str_sentence_t& sentence, sentence_t& encodedSentence);
|
void encodeSentence(const str_sentence_t& sentence, sentence_t& encodedSentence);
|
||||||
int encodeString(const string& str);
|
int encodeString(const std::string& str);
|
||||||
int encodeSempos(const string& sempos);
|
int encodeSempos(const std::string& sempos);
|
||||||
|
|
||||||
//no copy
|
// no copying allowed.
|
||||||
SemposScorer(const SemposScorer&);
|
SemposScorer(const SemposScorer&);
|
||||||
SemposScorer& operator=(const SemposScorer&);
|
SemposScorer& operator=(const SemposScorer&);
|
||||||
|
|
||||||
bool debug;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //__BLEUSCORER_H
|
#endif // MERT_SEMPOSSCORER_H_
|
||||||
|
Loading…
Reference in New Issue
Block a user