Minor change for calculating BLEU.

To avoid defining the similar variables twice to calculate
document-wise BLEU and sentence-wise BLEU scores.
This commit is contained in:
Tetsuo Kiso 2012-03-10 02:49:31 +09:00
parent 127f958bed
commit ed6e6f00b1
3 changed files with 15 additions and 13 deletions

View File

@ -85,7 +85,6 @@ class BleuScorer::NgramCounts {
BleuScorer::BleuScorer(const string& config)
: StatisticsBasedScorer("BLEU", config),
kLENGTH(4),
m_ref_length_type(CLOSEST) {
const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
if (reflen == REFLEN_AVERAGE) {
@ -150,7 +149,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
throw runtime_error("File " + referenceFiles[i] + " has too many sentences");
}
NgramCounts counts;
size_t length = countNgrams(line, counts, kLENGTH);
size_t length = countNgrams(line, counts, kBleuNgramOrder);
//for any counts larger than those already there, merge them in
for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
@ -184,9 +183,9 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
}
NgramCounts testcounts;
// stats for this line
vector<ScoreStatsType> stats(kLENGTH * 2);
vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
string sentence = this->applyFactors(text);
const size_t length = countNgrams(sentence, testcounts, kLENGTH);
const size_t length = countNgrams(sentence, testcounts, kBleuNgramOrder);
// Calculate effective reference length.
switch (m_ref_length_type) {
@ -222,15 +221,16 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
float BleuScorer::calculateScore(const vector<int>& comps) const
{
float logbleu = 0.0;
for (int i = 0; i < kLENGTH; ++i) {
for (int i = 0; i < kBleuNgramOrder; ++i) {
if (comps[2*i] == 0) {
return 0.0;
}
logbleu += log(comps[2*i]) - log(comps[2*i+1]);
}
logbleu /= kLENGTH;
const float brevity = 1.0 - static_cast<float>(comps[kLENGTH*2]) / comps[1];//reflength divided by test length
logbleu /= kBleuNgramOrder;
// reflength divided by test length
const float brevity = 1.0 - static_cast<float>(comps[kBleuNgramOrder * 2]) / comps[1];
if (brevity < 0.0) {
logbleu += brevity;
}

View File

@ -12,6 +12,8 @@
using namespace std;
const int kBleuNgramOrder = 4;
/**
* Bleu scoring
*/
@ -24,7 +26,7 @@ public:
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
virtual float calculateScore(const vector<int>& comps) const;
virtual size_t NumberOfScores() const { return 2 * kLENGTH + 1; }
virtual size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
private:
enum ReferenceLengthType {
@ -55,7 +57,6 @@ private:
void CalcShortest(size_t sentence_id,
vector<ScoreStatsType>& stats) const;
const int kLENGTH;
ReferenceLengthType m_ref_length_type;
// data extracted from reference files

View File

@ -34,9 +34,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include <boost/program_options.hpp>
#include "BleuScorer.h"
#include "FeatureDataIterator.h"
#include "ScoreDataIterator.h"
@ -70,13 +72,12 @@ public:
static float sentenceLevelBleuPlusOne(const vector<float>& stats) {
float logbleu = 0.0;
const unsigned int bleu_order = 4;
for (unsigned int j=0; j<bleu_order; j++) {
for (unsigned int j=0; j<kBleuNgramOrder; j++) {
//cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
logbleu += log(stats[2*j]+1) - log(stats[2*j+1]+1);
}
logbleu /= bleu_order;
const float brevity = 1.0 - static_cast<float>(stats[(bleu_order*2)]) / stats[1];
logbleu /= kBleuNgramOrder;
const float brevity = 1.0 - static_cast<float>(stats[(kBleuNgramOrder * 2)]) / stats[1];
if (brevity < 0.0) {
logbleu += brevity;
}