mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
debugging and fixes for BleuScoreFeature
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/mira-mtm5@3479 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
7136dcbf6d
commit
ba5ccb7bc5
@ -2,6 +2,8 @@
|
||||
|
||||
#include "StaticData.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses {
|
||||
|
||||
size_t BleuScoreState::bleu_order = 4;
|
||||
@ -49,9 +51,22 @@ int BleuScoreState::Compare(const FFState& o) const
|
||||
if (m_ngram_matches[i] > other.m_ngram_matches[i])
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state) {
|
||||
state.print(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
void BleuScoreState::print(std::ostream& out) const {
|
||||
out << "nw=" << m_num_new_words << ";ref=" << m_scaled_ref_length <<
|
||||
"source= " << m_source_length << ";counts=";
|
||||
for (size_t i = 0; i < bleu_order; ++i) {
|
||||
out << m_ngram_matches[i] << "/" << m_ngram_counts[i] << ",";
|
||||
}
|
||||
out << "ctxt=" << m_words;
|
||||
|
||||
}
|
||||
|
||||
BleuScoreFeature::BleuScoreFeature() {
|
||||
const_cast<ScoreIndexManager&>
|
||||
@ -59,12 +74,11 @@ BleuScoreFeature::BleuScoreFeature() {
|
||||
|
||||
}
|
||||
|
||||
void BleuScoreFeature::LoadReferences(std::vector<
|
||||
std::vector< std::string > >refs)
|
||||
void BleuScoreFeature::LoadReferences(const std::vector< std::vector< std::string > >& refs)
|
||||
{
|
||||
FactorCollection& fc = FactorCollection::Instance();
|
||||
for (size_t ref_id = 0; ref_id < refs.size(); ref_id++) {
|
||||
std::vector< std::string >& ref = refs[ref_id];
|
||||
const std::vector< std::string >& ref = refs[ref_id];
|
||||
std::pair< size_t, std::map< Phrase, size_t > > ref_pair;
|
||||
ref_pair.first = ref.size();
|
||||
for (size_t order = 1; order < BleuScoreState::bleu_order; order++) {
|
||||
@ -95,6 +109,7 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
std::map< Phrase, size_t >::const_iterator reference_ngrams_iter;
|
||||
const BleuScoreState& ps = dynamic_cast<const BleuScoreState&>(*prev_state);
|
||||
BleuScoreState* new_state = new BleuScoreState(ps);
|
||||
// cerr << "PS: " << ps << endl;
|
||||
|
||||
float old_bleu, new_bleu;
|
||||
size_t num_new_words, ctx_start_idx, ctx_end_idx, ngram_start_idx,
|
||||
@ -132,10 +147,15 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
}
|
||||
|
||||
// Update state.
|
||||
ctx_end_idx = new_words.GetSize();
|
||||
ctx_start_idx = ctx_end_idx - (BleuScoreState::bleu_order - 1);
|
||||
ctx_end_idx = new_words.GetSize()-1;
|
||||
size_t bleu_context_length = BleuScoreState::bleu_order -1;
|
||||
if (ctx_end_idx > bleu_context_length) {
|
||||
ctx_start_idx = ctx_end_idx - bleu_context_length;
|
||||
} else {
|
||||
ctx_start_idx = 0;
|
||||
}
|
||||
new_state->m_words = new_words.GetSubString(WordsRange(ctx_start_idx,
|
||||
ctx_start_idx));
|
||||
ctx_end_idx));
|
||||
new_state->m_num_new_words = new_words.GetSize();
|
||||
new_state->m_source_length = cur_hypo.GetSourcePhrase()->GetSize();
|
||||
new_state->m_scaled_ref_length = m_cur_ref_length * (
|
||||
@ -151,11 +171,14 @@ FFState* BleuScoreFeature::Evaluate(const Hypothesis& cur_hypo,
|
||||
}
|
||||
|
||||
float BleuScoreFeature::CalculateBleu(BleuScoreState* state) const {
|
||||
if (!state->m_ngram_counts[0]) return 0;
|
||||
float precision = 1.0;
|
||||
|
||||
// Calculate ngram precisions.
|
||||
for (size_t i = 1; i < BleuScoreState::bleu_order; i++)
|
||||
if (state->m_ngram_counts[i]) {
|
||||
precision *= state->m_ngram_matches[i] / state->m_ngram_counts[i];
|
||||
}
|
||||
|
||||
// Apply brevity penalty if applicable.
|
||||
if (state->m_num_new_words < state->m_scaled_ref_length)
|
||||
|
@ -22,6 +22,7 @@ public:
|
||||
|
||||
BleuScoreState();
|
||||
virtual int Compare(const FFState& other) const;
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
private:
|
||||
Phrase m_words;
|
||||
@ -34,6 +35,8 @@ private:
|
||||
std::vector< size_t > m_ngram_matches;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state);
|
||||
|
||||
class BleuScoreFeature : public StatefulFeatureFunction {
|
||||
public:
|
||||
BleuScoreFeature();
|
||||
|
@ -1222,7 +1222,9 @@ bool StaticData::LoadReferences() {
|
||||
}
|
||||
in.close();
|
||||
}
|
||||
//TODO: Set the references in the bleu feature
|
||||
//Set the references in the bleu feature
|
||||
m_bleuScoreFeature->LoadReferences(references);
|
||||
m_bleuScoreFeature->SetCurrentReference(0); //TODO: Temporary, for testing
|
||||
|
||||
return true;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user