mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-10 10:59:21 +03:00
Minor change for calculating BLEU.
To avoid defining the similar variables twice to calculate document-wise BLEU and sentence-wise BLEU scores.
This commit is contained in:
parent
127f958bed
commit
ed6e6f00b1
@ -85,7 +85,6 @@ class BleuScorer::NgramCounts {
|
||||
|
||||
BleuScorer::BleuScorer(const string& config)
|
||||
: StatisticsBasedScorer("BLEU", config),
|
||||
kLENGTH(4),
|
||||
m_ref_length_type(CLOSEST) {
|
||||
const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
|
||||
if (reflen == REFLEN_AVERAGE) {
|
||||
@ -150,7 +149,7 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
|
||||
throw runtime_error("File " + referenceFiles[i] + " has too many sentences");
|
||||
}
|
||||
NgramCounts counts;
|
||||
size_t length = countNgrams(line, counts, kLENGTH);
|
||||
size_t length = countNgrams(line, counts, kBleuNgramOrder);
|
||||
|
||||
//for any counts larger than those already there, merge them in
|
||||
for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
|
||||
@ -184,9 +183,9 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
|
||||
}
|
||||
NgramCounts testcounts;
|
||||
// stats for this line
|
||||
vector<ScoreStatsType> stats(kLENGTH * 2);
|
||||
vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
|
||||
string sentence = this->applyFactors(text);
|
||||
const size_t length = countNgrams(sentence, testcounts, kLENGTH);
|
||||
const size_t length = countNgrams(sentence, testcounts, kBleuNgramOrder);
|
||||
|
||||
// Calculate effective reference length.
|
||||
switch (m_ref_length_type) {
|
||||
@ -222,15 +221,16 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
|
||||
float BleuScorer::calculateScore(const vector<int>& comps) const
|
||||
{
|
||||
float logbleu = 0.0;
|
||||
for (int i = 0; i < kLENGTH; ++i) {
|
||||
for (int i = 0; i < kBleuNgramOrder; ++i) {
|
||||
if (comps[2*i] == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
logbleu += log(comps[2*i]) - log(comps[2*i+1]);
|
||||
|
||||
}
|
||||
logbleu /= kLENGTH;
|
||||
const float brevity = 1.0 - static_cast<float>(comps[kLENGTH*2]) / comps[1];//reflength divided by test length
|
||||
logbleu /= kBleuNgramOrder;
|
||||
// reflength divided by test length
|
||||
const float brevity = 1.0 - static_cast<float>(comps[kBleuNgramOrder * 2]) / comps[1];
|
||||
if (brevity < 0.0) {
|
||||
logbleu += brevity;
|
||||
}
|
||||
|
@ -12,6 +12,8 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
const int kBleuNgramOrder = 4;
|
||||
|
||||
/**
|
||||
* Bleu scoring
|
||||
*/
|
||||
@ -24,7 +26,7 @@ public:
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
|
||||
virtual float calculateScore(const vector<int>& comps) const;
|
||||
virtual size_t NumberOfScores() const { return 2 * kLENGTH + 1; }
|
||||
virtual size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
|
||||
|
||||
private:
|
||||
enum ReferenceLengthType {
|
||||
@ -55,7 +57,6 @@ private:
|
||||
void CalcShortest(size_t sentence_id,
|
||||
vector<ScoreStatsType>& stats) const;
|
||||
|
||||
const int kLENGTH;
|
||||
ReferenceLengthType m_ref_length_type;
|
||||
|
||||
// data extracted from reference files
|
||||
|
@ -34,9 +34,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include "BleuScorer.h"
|
||||
#include "FeatureDataIterator.h"
|
||||
#include "ScoreDataIterator.h"
|
||||
|
||||
@ -70,13 +72,12 @@ public:
|
||||
|
||||
static float sentenceLevelBleuPlusOne(const vector<float>& stats) {
|
||||
float logbleu = 0.0;
|
||||
const unsigned int bleu_order = 4;
|
||||
for (unsigned int j=0; j<bleu_order; j++) {
|
||||
for (unsigned int j=0; j<kBleuNgramOrder; j++) {
|
||||
//cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
|
||||
logbleu += log(stats[2*j]+1) - log(stats[2*j+1]+1);
|
||||
}
|
||||
logbleu /= bleu_order;
|
||||
const float brevity = 1.0 - static_cast<float>(stats[(bleu_order*2)]) / stats[1];
|
||||
logbleu /= kBleuNgramOrder;
|
||||
const float brevity = 1.0 - static_cast<float>(stats[(kBleuNgramOrder * 2)]) / stats[1];
|
||||
if (brevity < 0.0) {
|
||||
logbleu += brevity;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user