mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
Merge branch 'master' of github.com:moses-smt/mosesdecoder
This commit is contained in:
commit
04a717be2b
161
mert/InterpolatedScorer.cpp
Normal file
161
mert/InterpolatedScorer.cpp
Normal file
@ -0,0 +1,161 @@
|
||||
#include "Scorer.h"
|
||||
#include "ScorerFactory.h"
|
||||
#include "InterpolatedScorer.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
InterpolatedScorer::InterpolatedScorer (const string& name, const string& config): Scorer(name,config)
|
||||
{
|
||||
|
||||
// name would be: HAMMING,BLEU or similar
|
||||
string scorers = name;
|
||||
while (scorers.length() > 0) {
|
||||
string scorertype = "";
|
||||
getNextPound(scorers,scorertype,",");
|
||||
Scorer *theScorer=ScorerFactory::getScorer(scorertype,config);
|
||||
_scorers.push_back(theScorer);
|
||||
}
|
||||
if (_scorers.size() == 0) {
|
||||
throw runtime_error("There are no scorers");
|
||||
}
|
||||
cerr << "Number of scorers: " << _scorers.size() << endl;
|
||||
|
||||
//TODO debug this
|
||||
string wtype = getConfig("weights","");
|
||||
//Default weights set to uniform ie. if two weights 0.5 each
|
||||
//weights should add to 1
|
||||
if (wtype.length() == 0) {
|
||||
float weight = 1.0/_scorers.size() ;
|
||||
//cout << " Default weights:" << weight << endl;
|
||||
for (size_t i = 0; i < _scorers.size(); i ++) {
|
||||
_scorerWeights.push_back(weight);
|
||||
}
|
||||
} else {
|
||||
float tot=0;
|
||||
//cout << "Defined weights:" << endl;
|
||||
while (wtype.length() > 0) {
|
||||
string scoreweight = "";
|
||||
getNextPound(wtype,scoreweight,"+");
|
||||
float weight = atof(scoreweight.c_str());
|
||||
_scorerWeights.push_back(weight);
|
||||
tot += weight;
|
||||
//cout << " :" << weight ;
|
||||
}
|
||||
//cout << endl;
|
||||
if (tot != float(1)) {
|
||||
for (vector<float>::iterator it = _scorerWeights.begin(); it != _scorerWeights.end(); ++it)
|
||||
{
|
||||
*it /= tot;
|
||||
}
|
||||
}
|
||||
|
||||
if (_scorers.size() != _scorerWeights.size()) {
|
||||
throw runtime_error("The number of weights does not equal the number of scorers!");
|
||||
}
|
||||
}
|
||||
cerr << "The weights for the interpolated scorers are: " << endl;
|
||||
for (vector<float>::iterator it = _scorerWeights.begin(); it < _scorerWeights.end(); it++) {
|
||||
cerr << *it << " " ;
|
||||
}
|
||||
cerr <<endl;
|
||||
|
||||
|
||||
}
|
||||
|
||||
void InterpolatedScorer::setScoreData(ScoreData* data)
|
||||
{
|
||||
size_t last = 0;
|
||||
m_score_data = data;
|
||||
for (vector<Scorer*>::iterator itsc = _scorers.begin(); itsc!=_scorers.end(); itsc++) {
|
||||
int numScoresScorer = (*itsc)->NumberOfScores();
|
||||
ScoreData* newData =new ScoreData(**itsc);
|
||||
for (size_t i = 0; i < data->size(); i++) {
|
||||
ScoreArray scoreArray = data->get(i);
|
||||
ScoreArray newScoreArray;
|
||||
std::string istr;
|
||||
std::stringstream out;
|
||||
out << i;
|
||||
istr = out.str();
|
||||
size_t numNBest = scoreArray.size();
|
||||
//cout << " Datasize " << data->size() << " NumNBest " << numNBest << endl ;
|
||||
for (size_t j = 0; j < numNBest ; j++) {
|
||||
ScoreStats scoreStats = data->get(i, j);
|
||||
//cout << "Scorestats " << scoreStats << " i " << i << " j " << j << endl;
|
||||
ScoreStats newScoreStats;
|
||||
for (size_t k = last; k < size_t(numScoresScorer + last); k++) {
|
||||
ScoreStatsType score = scoreStats.get(k);
|
||||
newScoreStats.add(score);
|
||||
}
|
||||
//cout << " last " << last << " NumScores " << numScoresScorer << "newScorestats " << newScoreStats << endl;
|
||||
newScoreArray.add(newScoreStats);
|
||||
}
|
||||
newScoreArray.setIndex(istr);
|
||||
newData->add(newScoreArray);
|
||||
}
|
||||
//newData->dump();
|
||||
(*itsc)->setScoreData(newData);
|
||||
last += numScoresScorer;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** The interpolated scorer calls a vector of scorers and combines them with
|
||||
weights **/
|
||||
void InterpolatedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
|
||||
statscores_t& scores) const
|
||||
{
|
||||
|
||||
//cout << "*******InterpolatedScorer::score" << endl;
|
||||
size_t scorerNum = 0;
|
||||
for (vector<Scorer*>::const_iterator itsc = _scorers.begin(); itsc!=_scorers.end(); itsc++) {
|
||||
//int numScores = (*itsc)->NumberOfScores();
|
||||
statscores_t tscores;
|
||||
(*itsc)->score(candidates,diffs,tscores);
|
||||
size_t inc = 0;
|
||||
for (statscores_t::iterator itstatsc = tscores.begin(); itstatsc!=tscores.end(); itstatsc++) {
|
||||
//cout << "Scores " << (*itstatsc) << endl;
|
||||
float weight = _scorerWeights[scorerNum];
|
||||
if (weight == 0) {
|
||||
stringstream msg;
|
||||
msg << "No weights for scorer" << scorerNum ;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
if (scorerNum == 0) {
|
||||
scores.push_back(weight * (*itstatsc));
|
||||
} else {
|
||||
scores[inc] += weight * (*itstatsc);
|
||||
}
|
||||
//cout << "Scorer:" << scorerNum << " scoreNum:" << inc << " score: " << (*itstatsc) << " weight:" << weight << endl;
|
||||
inc++;
|
||||
|
||||
}
|
||||
scorerNum++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
|
||||
{
|
||||
for (vector<Scorer *>::iterator itsc = _scorers.begin(); itsc!=_scorers.end(); itsc++) {
|
||||
(*itsc)->setReferenceFiles(referenceFiles);
|
||||
}
|
||||
}
|
||||
|
||||
void InterpolatedScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
|
||||
{
|
||||
stringstream buff;
|
||||
int i=0;
|
||||
for (vector<Scorer*>::iterator itsc = _scorers.begin(); itsc!=_scorers.end(); itsc++) {
|
||||
ScoreStats tempEntry;
|
||||
(*itsc)->prepareStats(sid, text, tempEntry);
|
||||
if (i > 0) buff << " ";
|
||||
buff << tempEntry;
|
||||
i++;
|
||||
}
|
||||
//cout << " Scores for interpolated: " << buff << endl;
|
||||
string str = buff.str();
|
||||
entry.set(str);
|
||||
}
|
||||
|
48
mert/InterpolatedScorer.h
Normal file
48
mert/InterpolatedScorer.h
Normal file
@ -0,0 +1,48 @@
|
||||
#ifndef __INTERPOLATED_SCORER_H__
|
||||
#define __INTERPOLATED_SCORER_H__
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "Types.h"
|
||||
#include "ScoreData.h"
|
||||
#include "Scorer.h"
|
||||
|
||||
/**
|
||||
* Class that includes other scorers eg.
|
||||
* Interpolated HAMMING and BLEU scorer **/
|
||||
class InterpolatedScorer : public Scorer
|
||||
{
|
||||
|
||||
public:
|
||||
// name would be: "HAMMING,BLEU" or similar
|
||||
InterpolatedScorer(const string& name, const string& config);
|
||||
virtual ~InterpolatedScorer() {};
|
||||
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
|
||||
statscores_t& scores) const;
|
||||
|
||||
void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
void prepareStats(size_t sid, const string& text, ScoreStats& entry);
|
||||
virtual size_t NumberOfScores() const {
|
||||
size_t sz=0;
|
||||
for (vector<Scorer*>::const_iterator itsc = _scorers.begin(); itsc < _scorers.end(); itsc++) {
|
||||
sz += (*itsc)->NumberOfScores();
|
||||
}
|
||||
return sz;
|
||||
};
|
||||
|
||||
virtual void setScoreData(ScoreData* data);
|
||||
|
||||
protected:
|
||||
vector<Scorer*> _scorers;
|
||||
vector<float> _scorerWeights;
|
||||
};
|
||||
|
||||
#endif //__INTERPOLATED_SCORER_H
|
@ -12,6 +12,7 @@ FeatureStats.cpp FeatureArray.cpp FeatureData.cpp
|
||||
FeatureDataIterator.cpp
|
||||
Data.cpp
|
||||
BleuScorer.cpp
|
||||
InterpolatedScorer.cpp
|
||||
Point.cpp
|
||||
PerScorer.cpp
|
||||
Scorer.cpp
|
||||
|
@ -24,6 +24,11 @@ public:
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
|
||||
|
||||
virtual size_t NumberOfScores() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void whoami() const {
|
||||
cerr << "I AM MergeScorer" << endl;
|
||||
}
|
||||
|
@ -28,10 +28,7 @@ class Scorer
|
||||
/**
|
||||
* Return the number of statistics needed for the computation of the score.
|
||||
*/
|
||||
virtual size_t NumberOfScores() const {
|
||||
cerr << "Scorer: 0" << endl;
|
||||
return 0;
|
||||
}
|
||||
virtual size_t NumberOfScores() const = 0;
|
||||
|
||||
/**
|
||||
* Set the reference files. This must be called before prepareStats().
|
||||
@ -57,7 +54,9 @@ class Scorer
|
||||
* applying each in turn, and calculating a new score each time.
|
||||
*/
|
||||
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
|
||||
statscores_t& scores) const {
|
||||
statscores_t& scores) const = 0;
|
||||
/*
|
||||
{
|
||||
//dummy impl
|
||||
if (!m_score_data) {
|
||||
throw runtime_error("score data not loaded");
|
||||
@ -67,6 +66,7 @@ class Scorer
|
||||
scores.push_back(0);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Calculate the score of the sentences corresponding to the list of candidate
|
||||
@ -93,7 +93,7 @@ class Scorer
|
||||
/**
|
||||
* Set the score data, prior to scoring.
|
||||
*/
|
||||
void setScoreData(ScoreData* data) {
|
||||
virtual void setScoreData(ScoreData* data) {
|
||||
m_score_data = data;
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "TerScorer.h"
|
||||
#include "CderScorer.h"
|
||||
#include "MergeScorer.h"
|
||||
#include "InterpolatedScorer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -32,6 +33,11 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config) {
|
||||
} else if (type == "MERGE") {
|
||||
return (MergeScorer*) new MergeScorer(config);
|
||||
} else {
|
||||
throw runtime_error("Unknown scorer type: " + type);
|
||||
if (type.find(',') != string::npos) {
|
||||
return new InterpolatedScorer(type, config);
|
||||
}
|
||||
else {
|
||||
throw runtime_error("Unknown scorer type: " + type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -138,10 +138,19 @@ void usage()
|
||||
cerr << "[--help|-h] print this message and exit" << endl;
|
||||
cerr << endl;
|
||||
cerr << "Evaluator is able to compute more metrics at once. To do this," << endl;
|
||||
cerr << "separate scorers with semicolon (note that comma is used to separate" << endl;
|
||||
cerr << "scorers in the interpolated scorer)." << endl;
|
||||
cerr << "specify more --sctype arguments. You can also specify more --scconfig strings." << endl;
|
||||
cerr << endl;
|
||||
cerr << "If you specify only one metric and one candidate file, only the final score" << endl;
|
||||
cerr << "The example below prints BLEU score, PER score and interpolated" << endl;
|
||||
cerr << "score of CDER and PER with the given weights." << endl;
|
||||
cerr << endl;
|
||||
cerr << "./evaluator \\" << endl;
|
||||
cerr << "\t--sctype BLEU --scconfig reflen:closest \\" << endl;
|
||||
cerr << "\t--sctype PER \\" << endl;
|
||||
cerr << "\t--sctype CDER,PER --scconfig weights:0.25+0.75 \\" << endl;
|
||||
cerr << "\t--candidate CANDIDATE \\" << endl;
|
||||
cerr << "\t--reference REFERENCE" << endl;
|
||||
cerr << endl;
|
||||
cerr << "If you specify only one scorer and one candidate file, only the final score" << endl;
|
||||
cerr << "will be printed to stdout. Otherwise each line will contain metric name" << endl;
|
||||
cerr << "and/or filename and the final score. Since most of the metrics prints some" << endl;
|
||||
cerr << "debuging info, consider redirecting stderr to /dev/null." << endl;
|
||||
@ -161,8 +170,8 @@ static struct option long_options[] = {
|
||||
|
||||
// Options used in evaluator.
|
||||
struct ProgramOption {
|
||||
string scorer_type;
|
||||
string scorer_config;
|
||||
vector<string> scorer_types;
|
||||
vector<string> scorer_configs;
|
||||
string reference;
|
||||
string candidate;
|
||||
int bootstrap;
|
||||
@ -170,9 +179,7 @@ struct ProgramOption {
|
||||
bool has_seed;
|
||||
|
||||
ProgramOption()
|
||||
: scorer_type("BLEU"),
|
||||
scorer_config(""),
|
||||
reference(""),
|
||||
: reference(""),
|
||||
candidate(""),
|
||||
bootstrap(0),
|
||||
seed(0),
|
||||
@ -182,13 +189,16 @@ struct ProgramOption {
|
||||
void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
|
||||
int c;
|
||||
int option_index;
|
||||
int last_scorer_index = -1;
|
||||
while ((c = getopt_long(argc, argv, "s:c:R:C:b:r:h", long_options, &option_index)) != -1) {
|
||||
switch(c) {
|
||||
case 's':
|
||||
opt->scorer_type = string(optarg);
|
||||
opt->scorer_types.push_back(string(optarg));
|
||||
opt->scorer_configs.push_back(string(""));
|
||||
last_scorer_index++;
|
||||
break;
|
||||
case 'c':
|
||||
opt->scorer_config = string(optarg);
|
||||
opt->scorer_configs[last_scorer_index] = string(optarg);
|
||||
break;
|
||||
case 'R':
|
||||
opt->reference = string(optarg);
|
||||
@ -207,6 +217,13 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
|
||||
usage();
|
||||
}
|
||||
}
|
||||
|
||||
// Add default scorer if no scorer provided
|
||||
if (opt->scorer_types.size() == 0)
|
||||
{
|
||||
opt->scorer_types.push_back(string("BLEU"));
|
||||
opt->scorer_configs.push_back(string(""));
|
||||
}
|
||||
}
|
||||
|
||||
void InitSeed(const ProgramOption *opt) {
|
||||
@ -236,7 +253,6 @@ int main(int argc, char** argv)
|
||||
try {
|
||||
vector<string> refFiles;
|
||||
vector<string> candFiles;
|
||||
vector<string> scorerTypes;
|
||||
|
||||
if (option.reference.length() == 0) throw runtime_error("You have to specify at least one reference file.");
|
||||
split(option.reference, ',', refFiles);
|
||||
@ -244,17 +260,14 @@ int main(int argc, char** argv)
|
||||
if (option.candidate.length() == 0) throw runtime_error("You have to specify at least one candidate file.");
|
||||
split(option.candidate, ',', candFiles);
|
||||
|
||||
if (option.scorer_type.length() == 0) throw runtime_error("You have to specify at least one scorer.");
|
||||
split(option.scorer_type, ';', scorerTypes);
|
||||
|
||||
if (candFiles.size() > 1) g_has_more_files = true;
|
||||
if (scorerTypes.size() > 1) g_has_more_scorers = true;
|
||||
if (option.scorer_types.size() > 1) g_has_more_scorers = true;
|
||||
|
||||
for (vector<string>::const_iterator fileIt = candFiles.begin(); fileIt != candFiles.end(); ++fileIt)
|
||||
{
|
||||
for (vector<string>::const_iterator scorerIt = scorerTypes.begin(); scorerIt != scorerTypes.end(); ++scorerIt)
|
||||
for (size_t i = 0; i < option.scorer_types.size(); i++)
|
||||
{
|
||||
g_scorer = ScorerFactory::getScorer(*scorerIt, option.scorer_config);
|
||||
g_scorer = ScorerFactory::getScorer(option.scorer_types[i], option.scorer_configs[i]);
|
||||
g_scorer->setReferenceFiles(refFiles);
|
||||
EvaluatorUtil::evaluate(*fileIt, option.bootstrap);
|
||||
delete g_scorer;
|
||||
|
Loading…
Reference in New Issue
Block a user