debugging and fixes for BleuScoreFeature

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/mira-mtm5@3479 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bhaddow 2010-09-16 12:49:57 +00:00
parent 7136dcbf6d
commit ba5ccb7bc5
3 changed files with 37 additions and 9 deletions

View File

@ -2,6 +2,8 @@
#include "StaticData.h"
using namespace std;
namespace Moses {
size_t BleuScoreState::bleu_order = 4;
@ -49,9 +51,22 @@ int BleuScoreState::Compare(const FFState& o) const
if (m_ngram_matches[i] > other.m_ngram_matches[i])
return 1;
}
return 0;
}
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state) {
state.print(out);
return out;
}
void BleuScoreState::print(std::ostream& out) const {
out << "nw=" << m_num_new_words << ";ref=" << m_scaled_ref_length <<
"source= " << m_source_length << ";counts=";
for (size_t i = 0; i < bleu_order; ++i) {
out << m_ngram_matches[i] << "/" << m_ngram_counts[i] << ",";
}
out << "ctxt=" << m_words;
}
BleuScoreFeature::BleuScoreFeature() {
const_cast<ScoreIndexManager&>
@ -59,12 +74,11 @@ BleuScoreFeature::BleuScoreFeature() {
}
void BleuScoreFeature::LoadReferences(std::vector<
std::vector< std::string > >refs)
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
{
FactorCollection& fc = FactorCollection::Instance();
for (size_t ref_id = 0; ref_id < refs.size(); ref_id++) {
std::vector< std::string >& ref = refs[ref_id];
const std::vector< std::string >& ref = refs[ref_id];
std::pair< size_t, std::map< Phrase, size_t > > ref_pair;
ref_pair.first = ref.size();
for (size_t order = 1; order < BleuScoreState::bleu_order; order++) {
@ -95,6 +109,7 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
std::map< Phrase, size_t >::const_iterator reference_ngrams_iter;
const BleuScoreState& ps = dynamic_cast<const BleuScoreState&>(*prev_state);
BleuScoreState* new_state = new BleuScoreState(ps);
// cerr << "PS: " << ps << endl;
float old_bleu, new_bleu;
size_t num_new_words, ctx_start_idx, ctx_end_idx, ngram_start_idx,
@ -132,10 +147,15 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
}
// Update state.
ctx_end_idx = new_words.GetSize();
ctx_start_idx = ctx_end_idx - (BleuScoreState::bleu_order - 1);
ctx_end_idx = new_words.GetSize()-1;
size_t bleu_context_length = BleuScoreState::bleu_order -1;
if (ctx_end_idx > bleu_context_length) {
ctx_start_idx = ctx_end_idx - bleu_context_length;
} else {
ctx_start_idx = 0;
}
new_state->m_words = new_words.GetSubString(WordsRange(ctx_start_idx,
ctx_start_idx));
ctx_end_idx));
new_state->m_num_new_words = new_words.GetSize();
new_state->m_source_length = cur_hypo.GetSourcePhrase()->GetSize();
new_state->m_scaled_ref_length = m_cur_ref_length * (
@ -151,11 +171,14 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
}
float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
if (!state->m_ngram_counts[0]) return 0;
float precision = 1.0;
// Calculate ngram precisions.
for (size_t i = 1; i < BleuScoreState::bleu_order; i++)
precision *= state->m_ngram_matches[i] / state->m_ngram_counts[i];
if (state->m_ngram_counts[i]) {
precision *= state->m_ngram_matches[i] / state->m_ngram_counts[i];
}
// Apply brevity penalty if applicable.
if (state->m_num_new_words < state->m_scaled_ref_length)

View File

@ -22,6 +22,7 @@ public:
BleuScoreState();
virtual int Compare(const FFState& other) const;
void print(std::ostream& out) const;
private:
Phrase m_words;
@ -34,6 +35,8 @@ private:
std::vector< size_t > m_ngram_matches;
};
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state);
class BleuScoreFeature : public StatefulFeatureFunction {
public:
BleuScoreFeature();

View File

@ -1222,7 +1222,9 @@ bool StaticData::LoadReferences() {
}
in.close();
}
//TODO: Set the references in the bleu feature
//Set the references in the bleu feature
m_bleuScoreFeature->LoadReferences(references);
m_bleuScoreFeature->SetCurrentReference(0); //TODO: Temporary, for testing
return true;
}