Fix constructors of scorer classes and optimizer classes.

Using public const members is not good idea.
It should be initialized in private by constructors.
This commit is contained in:
Tetsuo Kiso 2011-11-12 10:16:31 +09:00
parent fdb83b0f6d
commit 43beb88df5
12 changed files with 130 additions and 115 deletions

View File

@ -1,6 +1,29 @@
#include "BleuScorer.h"
const int BleuScorer::LENGTH = 4;
BleuScorer::BleuScorer(const string& config)
: StatisticsBasedScorer("BLEU",config),
kLENGTH(4),
_refLengthStrategy(BLEU_CLOSEST) {
//configure regularisation
static string KEY_REFLEN = "reflen";
static string REFLEN_AVERAGE = "average";
static string REFLEN_SHORTEST = "shortest";
static string REFLEN_CLOSEST = "closest";
string reflen = getConfig(KEY_REFLEN,REFLEN_CLOSEST);
if (reflen == REFLEN_AVERAGE) {
_refLengthStrategy = BLEU_AVERAGE;
} else if (reflen == REFLEN_SHORTEST) {
_refLengthStrategy = BLEU_SHORTEST;
} else if (reflen == REFLEN_CLOSEST) {
_refLengthStrategy = BLEU_CLOSEST;
} else {
throw runtime_error("Unknown reference length strategy: " + reflen);
}
// cerr << "Using reference length strategy: " << reflen << endl;
}
BleuScorer::~BleuScorer() {}
size_t BleuScorer::countNgrams(const string& line, counts_t& counts, unsigned int n)
{
@ -62,7 +85,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
throw runtime_error("File " + referenceFiles[i] + " has too many sentences");
}
counts_t counts;
size_t length = countNgrams(line,counts,LENGTH);
size_t length = countNgrams(line,counts,kLENGTH);
//for any counts larger than those already there, merge them in
for (counts_it ci = counts.begin(); ci != counts.end(); ++ci) {
counts_it oldcount_it = _refcounts[sid]->find(ci->first);
@ -99,8 +122,8 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
}
counts_t testcounts;
//stats for this line
vector<float> stats(LENGTH*2);;
size_t length = countNgrams(text,testcounts,LENGTH);
vector<float> stats(kLENGTH*2);;
size_t length = countNgrams(text,testcounts,kLENGTH);
//dump_counts(testcounts);
if (_refLengthStrategy == BLEU_SHORTEST) {
//cerr << reflengths.size() << " " << sid << endl;
@ -157,15 +180,15 @@ float BleuScorer::calculateScore(const vector<int>& comps)
//cerr << "BLEU: ";
//copy(comps.begin(),comps.end(), ostream_iterator<int>(cerr," "));
float logbleu = 0.0;
for (int i = 0; i < LENGTH; ++i) {
for (int i = 0; i < kLENGTH; ++i) {
if (comps[2*i] == 0) {
return 0.0;
}
logbleu += log(comps[2*i]) - log(comps[2*i+1]);
}
logbleu /= LENGTH;
float brevity = 1.0 - (float)comps[LENGTH*2]/comps[1];//reflength divided by test length
logbleu /= kLENGTH;
float brevity = 1.0 - (float)comps[kLENGTH*2]/comps[1];//reflength divided by test length
if (brevity < 0.0) {
logbleu += brevity;
}

View File

@ -27,33 +27,15 @@ enum BleuReferenceLengthStrategy { BLEU_AVERAGE, BLEU_SHORTEST, BLEU_CLOSEST };
class BleuScorer: public StatisticsBasedScorer
{
public:
explicit BleuScorer(const string& config = "") : StatisticsBasedScorer("BLEU",config),_refLengthStrategy(BLEU_CLOSEST) {
//configure regularisation
static string KEY_REFLEN = "reflen";
static string REFLEN_AVERAGE = "average";
static string REFLEN_SHORTEST = "shortest";
static string REFLEN_CLOSEST = "closest";
explicit BleuScorer(const string& config = "");
~BleuScorer();
string reflen = getConfig(KEY_REFLEN,REFLEN_CLOSEST);
if (reflen == REFLEN_AVERAGE) {
_refLengthStrategy = BLEU_AVERAGE;
} else if (reflen == REFLEN_SHORTEST) {
_refLengthStrategy = BLEU_SHORTEST;
} else if (reflen == REFLEN_CLOSEST) {
_refLengthStrategy = BLEU_CLOSEST;
} else {
throw runtime_error("Unknown reference length strategy: " + reflen);
}
// cerr << "Using reference length strategy: " << reflen << endl;
}
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
static const int LENGTH;
size_t NumberOfScores() {
// cerr << "BleuScorer: " << (2 * LENGTH + 1) << endl;
return (2 * LENGTH + 1);
return (2 * kLENGTH + 1);
}
@ -63,7 +45,6 @@ public:
private:
// no copying allowed
BleuScorer(const BleuScorer&);
~BleuScorer() {}
BleuScorer& operator=(const BleuScorer&);
//Used to construct the ngram map
@ -105,6 +86,8 @@ private:
}
cerr << endl;
}
const int kLENGTH;
BleuReferenceLengthStrategy _refLengthStrategy;
// data extracted from reference files

View File

@ -5,9 +5,9 @@
#include <algorithm>
CderScorer::CderScorer(const string& config)
: StatisticsBasedScorer("CDER",config)
{
}
: StatisticsBasedScorer("CDER",config) {}
CderScorer::~CderScorer() {}
void CderScorer::setReferenceFiles(const vector<string>& referenceFiles)
{

View File

@ -15,6 +15,8 @@ class CderScorer: public StatisticsBasedScorer
{
public:
explicit CderScorer(const string& config);
~CderScorer();
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
@ -46,7 +48,6 @@ private:
// no copying allowed
CderScorer(const CderScorer&);
~CderScorer() {}
CderScorer& operator=(const CderScorer&);
};

View File

@ -362,10 +362,8 @@ vector<statscore_t> Optimizer::GetIncStatScore(vector<unsigned> thefirst,vector<
}
float SimpleOptimizer::eps=0.0001;
statscore_t SimpleOptimizer::TrueRun(Point& P)const
{
statscore_t prevscore=0;
statscore_t bestscore=MIN_FLOAT;
Point best;
@ -417,7 +415,7 @@ statscore_t SimpleOptimizer::TrueRun(Point& P)const
P=best; //update the current vector with the best point on all line tested
if(verboselevel()>3)
cerr<<nrun<<"\t"<<P<<endl;
} while(bestscore-prevscore>eps);
} while(bestscore-prevscore>kEPS);
if(verboselevel()>2) {
cerr<<"end Powell Algo, nrun="<<nrun<<endl;
@ -427,8 +425,6 @@ statscore_t SimpleOptimizer::TrueRun(Point& P)const
return bestscore;
}
float RandomDirectionOptimizer::eps=0.0001;
statscore_t RandomDirectionOptimizer::TrueRun(Point& P)const
{
statscore_t prevscore=P.score;
@ -450,7 +446,7 @@ statscore_t RandomDirectionOptimizer::TrueRun(Point& P)const
cerr<<"\tending point: "<< P << " => " << score << endl;
}
if (score-prevscore > eps)
if (score-prevscore > kEPS)
nrun_no_change=0;
prevscore = score;
}

View File

@ -74,9 +74,9 @@ public:
class SimpleOptimizer: public Optimizer
{
private:
static float eps;
const float kEPS;
public:
SimpleOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom): Optimizer(dim, i2O, start,nrandom) {}
SimpleOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom): Optimizer(dim, i2O, start,nrandom), kEPS(0.0001) {}
virtual statscore_t TrueRun(Point&)const;
};
@ -86,9 +86,9 @@ public:
class RandomDirectionOptimizer: public Optimizer
{
private:
static float eps;
const float kEPS;
public:
RandomDirectionOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom): Optimizer(dim,i2O,start,nrandom) {}
RandomDirectionOptimizer(unsigned dim,vector<unsigned> i2O,vector<parameter_t> start,unsigned int nrandom): Optimizer(dim,i2O,start,nrandom), kEPS(0.0001) {}
virtual statscore_t TrueRun(Point&)const;
};

View File

@ -1,5 +1,9 @@
#include "PerScorer.h"
PerScorer::PerScorer(const string& config)
: StatisticsBasedScorer("PER",config) {}
PerScorer::~PerScorer() {}
void PerScorer::setReferenceFiles(const vector<string>& referenceFiles)
{

View File

@ -26,7 +26,9 @@ using namespace std;
class PerScorer: public StatisticsBasedScorer
{
public:
explicit PerScorer(const string& config = "") : StatisticsBasedScorer("PER",config) {}
explicit PerScorer(const string& config = "");
~PerScorer();
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
@ -46,7 +48,6 @@ public:
private:
// no copying allowed
PerScorer(const PerScorer&);
~PerScorer() {}
PerScorer& operator=(const PerScorer&);
// data extracted from reference files

View File

@ -1,5 +1,27 @@
#include "Scorer.h"
Scorer::Scorer(const string& name, const string& config)
: _name(name), _scoreData(0), _preserveCase(true) {
// cerr << "Scorer config string: " << config << endl;
size_t start = 0;
while (start < config.size()) {
size_t end = config.find(",",start);
if (end == string::npos) {
end = config.size();
}
string nv = config.substr(start,end-start);
size_t split = nv.find(":");
if (split == string::npos) {
throw runtime_error("Missing colon when processing scorer config: " + config);
}
string name = nv.substr(0,split);
string value = nv.substr(split+1,nv.size()-split-1);
cerr << "name: " << name << " value: " << value << endl;
_config[name] = value;
start = end+1;
}
}
//regularisation strategies
static float score_min(const statscores_t& scores, size_t start, size_t end)
{
@ -26,6 +48,43 @@ static float score_average(const statscores_t& scores, size_t start, size_t end)
return total / (end - start);
}
StatisticsBasedScorer::StatisticsBasedScorer(const string& name, const string& config)
: Scorer(name,config) {
//configure regularisation
static string KEY_TYPE = "regtype";
static string KEY_WINDOW = "regwin";
static string KEY_CASE = "case";
static string TYPE_NONE = "none";
static string TYPE_AVERAGE = "average";
static string TYPE_MINIMUM = "min";
static string TRUE = "true";
static string FALSE = "false";
string type = getConfig(KEY_TYPE,TYPE_NONE);
if (type == TYPE_NONE) {
_regularisationStrategy = REG_NONE;
} else if (type == TYPE_AVERAGE) {
_regularisationStrategy = REG_AVERAGE;
} else if (type == TYPE_MINIMUM) {
_regularisationStrategy = REG_MINIMUM;
} else {
throw runtime_error("Unknown scorer regularisation strategy: " + type);
}
// cerr << "Using scorer regularisation strategy: " << type << endl;
string window = getConfig(KEY_WINDOW,"0");
_regularisationWindow = atoi(window.c_str());
// cerr << "Using scorer regularisation window: " << _regularisationWindow << endl;
string preservecase = getConfig(KEY_CASE,TRUE);
if (preservecase == TRUE) {
_preserveCase = true;
} else if (preservecase == FALSE) {
_preserveCase = false;
}
// cerr << "Using case preservation: " << _preserveCase << endl;
}
void StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
statscores_t& scores)
{

View File

@ -32,28 +32,7 @@ private:
string _name;
public:
Scorer(const string& name, const string& config): _name(name), _scoreData(0), _preserveCase(true) {
// cerr << "Scorer config string: " << config << endl;
size_t start = 0;
while (start < config.size()) {
size_t end = config.find(",",start);
if (end == string::npos) {
end = config.size();
}
string nv = config.substr(start,end-start);
size_t split = nv.find(":");
if (split == string::npos) {
throw runtime_error("Missing colon when processing scorer config: " + config);
}
string name = nv.substr(0,split);
string value = nv.substr(split+1,nv.size()-split-1);
cerr << "name: " << name << " value: " << value << endl;
_config[name] = value;
start = end+1;
}
}
Scorer(const string& name, const string& config);
virtual ~Scorer() {}
/**
@ -192,46 +171,9 @@ private:
*/
class StatisticsBasedScorer : public Scorer
{
public:
StatisticsBasedScorer(const string& name, const string& config): Scorer(name,config) {
//configure regularisation
static string KEY_TYPE = "regtype";
static string KEY_WINDOW = "regwin";
static string KEY_CASE = "case";
static string TYPE_NONE = "none";
static string TYPE_AVERAGE = "average";
static string TYPE_MINIMUM = "min";
static string TRUE = "true";
static string FALSE = "false";
string type = getConfig(KEY_TYPE,TYPE_NONE);
if (type == TYPE_NONE) {
_regularisationStrategy = REG_NONE;
} else if (type == TYPE_AVERAGE) {
_regularisationStrategy = REG_AVERAGE;
} else if (type == TYPE_MINIMUM) {
_regularisationStrategy = REG_MINIMUM;
} else {
throw runtime_error("Unknown scorer regularisation strategy: " + type);
}
// cerr << "Using scorer regularisation strategy: " << type << endl;
string window = getConfig(KEY_WINDOW,"0");
_regularisationWindow = atoi(window.c_str());
// cerr << "Using scorer regularisation window: " << _regularisationWindow << endl;
string preservecase = getConfig(KEY_CASE,TRUE);
if (preservecase == TRUE) {
_preserveCase = true;
} else if (preservecase == FALSE) {
_preserveCase = false;
}
// cerr << "Using case preservation: " << _preserveCase << endl;
}
~StatisticsBasedScorer() {}
StatisticsBasedScorer(const string& name, const string& config);
virtual ~StatisticsBasedScorer() {}
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
statscores_t& scores);

View File

@ -2,10 +2,13 @@
#include "TERsrc/tercalc.h"
#include "TERsrc/terAlignment.h"
const int TerScorer::LENGTH = 2;
using namespace TERCpp;
using namespace std;
TerScorer::TerScorer(const string& config)
: StatisticsBasedScorer("TER",config), kLENGTH(2) {}
TerScorer::~TerScorer() {}
void TerScorer::setReferenceFiles ( const vector<string>& referenceFiles )
{

View File

@ -29,16 +29,18 @@ using namespace TERCpp;
class TerScorer: public StatisticsBasedScorer
{
public:
explicit TerScorer(const string& config = "") : StatisticsBasedScorer("TER",config) {}
explicit TerScorer(const string& config = "");
~TerScorer();
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
static const int LENGTH;
virtual void whoami() {
cerr << "I AM TerScorer" << std::endl;
}
size_t NumberOfScores() {
// cerr << "TerScorer: " << (LENGTH + 1) << endl;
return (LENGTH + 1);
return (kLENGTH + 1);
}
@ -47,20 +49,21 @@ public:
// float calculateScore(const vector<float>& comps);
private:
const int kLENGTH;
string javaEnv;
string tercomEnv;
// no copying allowed
TerScorer(const TerScorer&);
~TerScorer() {}
TerScorer& operator=(const TerScorer&);
// data extracted from reference files
vector<size_t> _reflengths;
vector<multiset<int> > _reftokens;
vector<vector<int> > m_references;
vector<vector<vector<int> > > m_multi_references;
string m_pid;
// no copying allowed
TerScorer(const TerScorer&);
TerScorer& operator=(const TerScorer&);
};
#endif // __TERSCORER_H__