mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
Incremental interface for scorer
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1678 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
69c6164b82
commit
e2921af063
@ -176,25 +176,7 @@ void BleuScorer::prepareStats(int sid, const string& text, ScoreStats& entry) {
|
||||
entry.set(stats_str);
|
||||
}
|
||||
|
||||
|
||||
float BleuScorer::score(const std::vector<unsigned int>& candidates) {
|
||||
if (!_scoreData) {
|
||||
throw std::runtime_error("score data not loaded");
|
||||
}
|
||||
vector<int> comps(LENGTH*2+1);
|
||||
for (size_t i = 0; i < candidates.size(); ++i) {
|
||||
ScoreStats stats = _scoreData->get(i,candidates[i]);
|
||||
if (stats.size() != comps.size()) {
|
||||
stringstream msg;
|
||||
msg << "Bleu statistics for (" << "," << candidates[i] << ") have incorrect "
|
||||
<< "number of fields. Found: " << stats.size() << " Expected: "
|
||||
<< comps.size();
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
for (size_t k = 0; k < comps.size(); ++k) {
|
||||
comps[k] += stats.get(k);
|
||||
}
|
||||
}
|
||||
float BleuScorer::bleu(const vector<int>& comps) {
|
||||
float logbleu = 0.0;
|
||||
for (int i = 0; i < LENGTH; ++i) {
|
||||
if (comps[2*i] == 0) {
|
||||
@ -212,6 +194,49 @@ float BleuScorer::score(const std::vector<unsigned int>& candidates) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
void BleuScorer::score(const candidates_t& candidates, const diffs_t& diffs,
|
||||
scores_t& scores) {
|
||||
if (!_scoreData) {
|
||||
throw runtime_error("score data not loaded");
|
||||
}
|
||||
//calculate the score for the candidates
|
||||
vector<int> comps(LENGTH*2+1);
|
||||
for (size_t i = 0; i < candidates.size(); ++i) {
|
||||
ScoreStats stats = _scoreData->get(i,candidates[i]);
|
||||
if (stats.size() != comps.size()) {
|
||||
stringstream msg;
|
||||
msg << "Bleu statistics for (" << "," << candidates[i] << ") have incorrect "
|
||||
<< "number of fields. Found: " << stats.size() << " Expected: "
|
||||
<< comps.size();
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
for (size_t k = 0; k < comps.size(); ++k) {
|
||||
comps[k] += stats.get(k);
|
||||
}
|
||||
}
|
||||
scores.push_back(bleu(comps));
|
||||
|
||||
candidates_t last_candidates(candidates);
|
||||
//apply each of the diffs, and get new scores
|
||||
for (size_t i = 0; i < diffs.size(); ++i) {
|
||||
for (size_t j = 0; j < diffs[i].size(); ++j) {
|
||||
size_t sid = diffs[i][j].first;
|
||||
size_t nid = diffs[i][j].second;
|
||||
size_t last_nid = last_candidates[sid];
|
||||
for (size_t k = 0; k < comps.size(); ++k) {
|
||||
int diff = _scoreData->get(sid,nid).get(k)
|
||||
- _scoreData->get(sid,last_nid).get(k);
|
||||
comps[k] += diff;
|
||||
}
|
||||
last_candidates[sid] = nid;
|
||||
}
|
||||
scores.push_back(bleu(comps));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
void BleuScorer::prepare(const vector<string>& referencefiles, const string& nbestfile) {
|
||||
//processReferences(referencefiles, refcounts,reflengths,encodings);
|
||||
|
@ -31,7 +31,8 @@ class BleuScorer: public Scorer {
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
virtual void prepareStats(int sid, const string& text, ScoreStats& entry);
|
||||
|
||||
virtual float score(const std::vector<unsigned int>& candidates);
|
||||
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
|
||||
scores_t& scores);
|
||||
|
||||
static const int LENGTH;
|
||||
|
||||
@ -83,6 +84,7 @@ class BleuScorer: public Scorer {
|
||||
|
||||
void encode(const string& line, vector<int>& encoded);
|
||||
size_t countNgrams(const string& line, counts_t& counts, unsigned int n);
|
||||
float bleu(const vector<int>& comps);
|
||||
|
||||
void dump_counts(counts_t& counts) {
|
||||
for (counts_it i = counts.begin(); i != counts.end(); ++i) {
|
||||
|
@ -9,24 +9,73 @@
|
||||
|
||||
#include "ScoreData.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
typedef vector<pair<unsigned int, unsigned int> > diff_t;
|
||||
typedef vector<diff_t> diffs_t;
|
||||
typedef vector<unsigned int> candidates_t;
|
||||
typedef vector<float> scores_t;
|
||||
|
||||
class ScoreStats;
|
||||
|
||||
/**
|
||||
* Superclass of all scorers and dummy implementation. In order to add a new
|
||||
* scorer it should be sufficient to override prepareStats(), setReferenceFiles()
|
||||
* and score()
|
||||
**/
|
||||
class Scorer {
|
||||
|
||||
public:
|
||||
|
||||
Scorer(const std::string& name): _name(name), _scoreData(0) {}
|
||||
Scorer(const string& name): _name(name), _scoreData(0) {}
|
||||
|
||||
const std::string& getName() const {return _name;}
|
||||
|
||||
/**
|
||||
* set the reference files. This must be called before prepareStats.
|
||||
**/
|
||||
void setReferenceFiles(const std::vector<std::string>& referenceFiles) {
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles) {
|
||||
//do nothing
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Process the given guessed text, corresponding to the given reference sindex
|
||||
* and add the appropriate statistics to the entry.
|
||||
**/
|
||||
virtual void prepareStats(int sindex, const string& text, ScoreStats& entry) {
|
||||
//cerr << text << std::endl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score using each of the candidate index, then go through the diffs
|
||||
* applying each in turn, and calculating a new score each time.
|
||||
**/
|
||||
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
|
||||
scores_t& scores) {
|
||||
//dummy impl
|
||||
if (!_scoreData) {
|
||||
throw runtime_error("score data not loaded");
|
||||
}
|
||||
scores.push_back(0);
|
||||
for (size_t i = 0; i < diffs.size(); ++i) {
|
||||
scores.push_back(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate the score of the sentences corresponding to the list of candidate
|
||||
* indices. Each index indicates the 1-best choice from the n-best list.
|
||||
**/
|
||||
float score(const candidates_t& candidates) {
|
||||
diffs_t diffs;
|
||||
scores_t scores;
|
||||
score(candidates, diffs, scores);
|
||||
return scores[0];
|
||||
}
|
||||
|
||||
const string& getName() const {return _name;}
|
||||
|
||||
size_t getReferenceSize() {
|
||||
if (_scoreData) {
|
||||
return _scoreData->size();
|
||||
@ -34,13 +83,6 @@ class Scorer {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the given guessed text, corresponding to the given reference sindex
|
||||
* and add the appropriate statistics to the entry.
|
||||
**/
|
||||
virtual void prepareStats(int sindex, const std::string& text, ScoreStats& entry) {
|
||||
//std::cerr << text << std::endl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the score data, prior to scoring.
|
||||
@ -49,23 +91,11 @@ class Scorer {
|
||||
_scoreData = scoreData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the score of the sentences corresponding to the list of candidate
|
||||
* indices. Each index indicates the 1-best choice from the n-best list.
|
||||
**/
|
||||
virtual float score(const std::vector<unsigned int>& candidates) {
|
||||
if (!_scoreData) {
|
||||
throw std::runtime_error("score data not loaded");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
ScoreData* _scoreData;
|
||||
|
||||
private:
|
||||
std::string _name;
|
||||
string _name;
|
||||
|
||||
};
|
||||
|
||||
|
@ -12,25 +12,27 @@ int main(int argc, char** argv) {
|
||||
vector<string> references;
|
||||
references.push_back("test_scorer_data/reference.txt");
|
||||
//bs.prepare(references, "test-scorer-data/nbest.out");
|
||||
BleuScorer scorer;
|
||||
scorer.setReferenceFiles(references);
|
||||
ScoreData sd(scorer);
|
||||
Scorer* scorer = new BleuScorer();;
|
||||
scorer->setReferenceFiles(references);
|
||||
ScoreData sd(*scorer);
|
||||
sd.loadnbest("test_scorer_data/nbest.out");
|
||||
//sd.savetxt();
|
||||
|
||||
//calculate a bleu scores
|
||||
scorer.setScoreData(&sd);
|
||||
unsigned int index = 0;
|
||||
vector<unsigned int> candidates;
|
||||
//calculate two bleu scores, nbest and a diff
|
||||
scorer->setScoreData(&sd);
|
||||
candidates_t candidates(sd.size());;
|
||||
for (size_t i = 0; i < sd.size(); ++i) {
|
||||
sd.get(i,index).savetxt("/dev/stdout");
|
||||
candidates.push_back(index++);
|
||||
if (index == 10) {
|
||||
index = 0;
|
||||
}
|
||||
sd.get(i,0).savetxt("/dev/stdout");
|
||||
}
|
||||
|
||||
cout << "Bleu ";
|
||||
float bleu = scorer.score(candidates);
|
||||
cout << bleu << endl;
|
||||
diffs_t diffs;
|
||||
diff_t diff;
|
||||
diff.push_back(make_pair(1,2));
|
||||
diff.push_back(make_pair(7,8));
|
||||
diffs.push_back(diff);
|
||||
|
||||
scores_t scores;
|
||||
scorer->score(candidates,diffs,scores);
|
||||
|
||||
cout << "Bleus: " << scores[0] << " " << scores[1] << endl;
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
#/usr/bin/python
|
||||
#!/usr/bin/python
|
||||
|
||||
#
|
||||
# Calculate bleu score for test files using old (python) script
|
||||
@ -38,21 +38,30 @@ def main():
|
||||
tests[-1].append(text)
|
||||
nbest_fh.close()
|
||||
|
||||
# pick sentences to score with
|
||||
index = 0
|
||||
# score with first best
|
||||
cookedtests = []
|
||||
for i in range(len(tests)):
|
||||
sentence = tests[i][index]
|
||||
sentence = tests[i][0]
|
||||
cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
|
||||
stats = " ".join(["%d %d" % (c,g) for (c,g) in zip(cookedtest['correct'], cookedtest['guess'])])
|
||||
print " %s %d" % (stats ,cookedtest['reflen'])
|
||||
cookedtests.append(cookedtest)
|
||||
index = index + 1
|
||||
if index == 10:
|
||||
index = 0
|
||||
bleu1 = bleu.score_cooked(cookedtests)
|
||||
|
||||
bleu = bleu.score_cooked(cookedtests)
|
||||
print "Bleu: ", bleu
|
||||
# vary, and score again
|
||||
cookedtests = []
|
||||
for i in range(len(tests)):
|
||||
sentence = tests[i][0]
|
||||
if i == 7:
|
||||
sentence = tests[i][8]
|
||||
elif i == 1:
|
||||
sentences = tests[i][2]
|
||||
cookedtest = (bleu.cook_test(sentence, cookedrefs[i]))
|
||||
cookedtests.append(cookedtest)
|
||||
bleu2 = bleu.score_cooked(cookedtests)
|
||||
|
||||
|
||||
print "Bleus: ", bleu1,bleu2
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user