mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
Add an unit test for BleuScorer.
The test contains only ngram counting. More tests for calculating BLEU score are reuiqred.
This commit is contained in:
parent
6c64d94b29
commit
82ae12249c
155
mert/BleuScorerTest.cpp
Normal file
155
mert/BleuScorerTest.cpp
Normal file
@ -0,0 +1,155 @@
|
||||
#include "BleuScorer.h"
|
||||
|
||||
#define BOOST_TEST_MODULE MertBleuScorer
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "Ngram.h"
|
||||
#include "Vocabulary.h"
|
||||
#include "Util.h"
|
||||
|
||||
namespace {
|
||||
|
||||
NgramCounts* g_counts = NULL;
|
||||
|
||||
NgramCounts* GetNgramCounts() {
|
||||
assert(g_counts);
|
||||
return g_counts;
|
||||
}
|
||||
|
||||
void SetNgramCounts(NgramCounts* counts) {
|
||||
g_counts = counts;
|
||||
}
|
||||
|
||||
struct Unigram {
|
||||
Unigram(const std::string& a) {
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
|
||||
}
|
||||
NgramCounts::Key instance;
|
||||
};
|
||||
|
||||
struct Bigram {
|
||||
Bigram(const std::string& a, const std::string& b) {
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
|
||||
}
|
||||
NgramCounts::Key instance;
|
||||
};
|
||||
|
||||
struct Trigram {
|
||||
Trigram(const std::string& a, const std::string& b, const std::string& c) {
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
|
||||
}
|
||||
NgramCounts::Key instance;
|
||||
};
|
||||
|
||||
struct Fourgram {
|
||||
Fourgram(const std::string& a, const std::string& b,
|
||||
const std::string& c, const std::string& d) {
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
|
||||
instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(d));
|
||||
}
|
||||
NgramCounts::Key instance;
|
||||
};
|
||||
|
||||
void CheckUnigram(const std::string& str) {
|
||||
Unigram unigram(str);
|
||||
NgramCounts::Value v;
|
||||
BOOST_CHECK(GetNgramCounts()->Lookup(unigram.instance, &v));
|
||||
}
|
||||
|
||||
void CheckBigram(const std::string& a, const std::string& b) {
|
||||
Bigram bigram(a, b);
|
||||
NgramCounts::Value v;
|
||||
BOOST_CHECK(GetNgramCounts()->Lookup(bigram.instance, &v));
|
||||
}
|
||||
|
||||
void CheckTrigram(const std::string& a, const std::string& b,
|
||||
const std::string& c) {
|
||||
Trigram trigram(a, b, c);
|
||||
NgramCounts::Value v;
|
||||
BOOST_CHECK(GetNgramCounts()->Lookup(trigram.instance, &v));
|
||||
}
|
||||
|
||||
void CheckFourgram(const std::string& a, const std::string& b,
|
||||
const std::string& c, const std::string& d) {
|
||||
Fourgram fourgram(a, b, c, d);
|
||||
NgramCounts::Value v;
|
||||
BOOST_CHECK(GetNgramCounts()->Lookup(fourgram.instance, &v));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
BOOST_AUTO_TEST_CASE(bleu_reference_type) {
|
||||
BleuScorer scorer;
|
||||
// BleuScorer will use "closest" by default.
|
||||
BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::CLOSEST);
|
||||
|
||||
scorer.SetReferenceLengthType(BleuScorer::AVERAGE);
|
||||
BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::AVERAGE);
|
||||
|
||||
scorer.SetReferenceLengthType(BleuScorer::SHORTEST);
|
||||
BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::SHORTEST);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(bleu_count_ngrams) {
|
||||
BleuScorer scorer;
|
||||
|
||||
std::string line = "I saw a girl with a telescope .";
|
||||
// In the above string, we will get the 25 ngrams.
|
||||
//
|
||||
// unigram: "I", "saw", "a", "girl", "with", "telescope", "."
|
||||
// bigram: "I saw", "saw a", "a girl", "girl with", "with a", "a telescope"
|
||||
// "telescope ."
|
||||
// trigram: "I saw a", "saw a girl", "a girl with", "girl with a",
|
||||
// "with a telescope", "a telescope ."
|
||||
// 4-gram: "I saw a girl", "saw a girl with", "a girl with a",
|
||||
// "girl with a telescope", "with a telescope ."
|
||||
NgramCounts counts;
|
||||
BOOST_REQUIRE(scorer.CountNgrams(line, counts, kBleuNgramOrder) == 8);
|
||||
BOOST_CHECK_EQUAL(25, counts.size());
|
||||
|
||||
mert::Vocabulary* vocab = scorer.GetVocab();
|
||||
BOOST_CHECK_EQUAL(7, vocab->size());
|
||||
|
||||
std::vector<std::string> res;
|
||||
Tokenize(line.c_str(), ' ', &res);
|
||||
std::vector<int> ids(res.size());
|
||||
for (std::size_t i = 0; i < res.size(); ++i) {
|
||||
BOOST_CHECK(vocab->Lookup(res[i], &ids[i]));
|
||||
}
|
||||
|
||||
SetNgramCounts(&counts);
|
||||
|
||||
// unigram
|
||||
for (std::size_t i = 0; i < res.size(); ++i) {
|
||||
CheckUnigram(res[i]);
|
||||
}
|
||||
|
||||
// bigram
|
||||
CheckBigram("I", "saw");
|
||||
CheckBigram("saw", "a");
|
||||
CheckBigram("a", "girl");
|
||||
CheckBigram("girl", "with");
|
||||
CheckBigram("with", "a");
|
||||
CheckBigram("a", "telescope");
|
||||
CheckBigram("telescope", ".");
|
||||
|
||||
// trigram
|
||||
CheckTrigram("I", "saw", "a");
|
||||
CheckTrigram("saw", "a", "girl");
|
||||
CheckTrigram("a", "girl", "with");
|
||||
CheckTrigram("girl", "with", "a");
|
||||
CheckTrigram("with", "a", "telescope");
|
||||
CheckTrigram("a", "telescope", ".");
|
||||
|
||||
// 4-gram
|
||||
CheckFourgram("I", "saw", "a", "girl");
|
||||
CheckFourgram("saw", "a", "girl", "with");
|
||||
CheckFourgram("a", "girl", "with", "a");
|
||||
CheckFourgram("girl", "with", "a", "telescope");
|
||||
CheckFourgram("with", "a", "telescope", ".");
|
||||
}
|
@ -50,6 +50,7 @@ exe pro : pro.cpp mert_lib ..//boost_program_options ;
|
||||
|
||||
alias programs : mert extractor evaluator pro ;
|
||||
|
||||
unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test data_test : DataTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test ngram_test : NgramTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
|
Loading…
Reference in New Issue
Block a user