mosesdecoder/moses/FF/BleuScoreFeature.h

192 lines
6.0 KiB
C
Raw Normal View History

#ifndef BLUESCOREFEATURE_H
#define BLUESCOREFEATURE_H
#include <utility>
#include <string>
#include <vector>
2011-11-18 14:17:16 +04:00
#include <boost/unordered_map.hpp>
2013-05-24 22:11:15 +04:00
#include "StatefulFeatureFunction.h"
2013-05-24 21:02:49 +04:00
#include "moses/FF/FFState.h"
#include "moses/Phrase.h"
#include "moses/ChartHypothesis.h"
2013-05-29 21:16:15 +04:00
namespace Moses
{
class BleuScoreFeature;
2013-05-29 21:16:15 +04:00
class BleuScoreState : public FFState
{
public:
2013-05-29 21:16:15 +04:00
friend class BleuScoreFeature;
static size_t bleu_order;
2013-05-29 21:16:15 +04:00
BleuScoreState();
virtual int Compare(const FFState& other) const;
void print(std::ostream& out) const;
private:
2013-05-29 21:16:15 +04:00
Phrase m_words;
size_t m_source_length;
size_t m_target_length;
2013-05-29 21:16:15 +04:00
// scaled reference length is needed for scoring incomplete hypotheses against reference translation
float m_scaled_ref_length;
2013-05-29 21:16:15 +04:00
std::vector< size_t > m_ngram_counts;
std::vector< size_t > m_ngram_matches;
2013-05-29 21:16:15 +04:00
void AddNgramCountAndMatches(std::vector< size_t >& counts, std::vector< size_t >& matches);
};
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state);
typedef boost::unordered_map< Phrase, size_t > NGrams;
class RefValue : public std::pair<std::vector<size_t>,NGrams>
{
public:
RefValue& operator=( const RefValue& rhs ) {
first = rhs.first;
second = rhs.second;
return *this;
}
};
2013-05-29 21:16:15 +04:00
class BleuScoreFeature : public StatefulFeatureFunction
{
public:
2011-11-18 14:17:16 +04:00
typedef boost::unordered_map<size_t, RefValue > RefCounts;
2011-11-18 14:17:16 +04:00
typedef boost::unordered_map<size_t, NGrams> Matches;
2013-01-29 14:54:09 +04:00
BleuScoreFeature(const std::string &line);
void SetParameter(const std::string& key, const std::string& value);
std::vector<float> DefaultWeights() const;
2013-05-29 21:16:15 +04:00
void PrintHistory(std::ostream& out) const;
void LoadReferences(const std::vector< std::vector< std::string > > &);
void SetCurrSourceLength(size_t);
void SetCurrNormSourceLength(size_t);
void SetCurrShortestRefLength(size_t);
void SetCurrAvgRefLength(size_t sent_id);
void SetAvgInputLength (float l) {
m_avg_input_length = l;
}
void SetCurrReferenceNgrams(size_t sent_id);
size_t GetShortestRefIndex(size_t ref_id);
size_t GetClosestRefLength(size_t ref_id, int hypoLength);
void UpdateHistory(const std::vector< const Word* >&);
void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
void PrintRefLength(const std::vector<size_t>& ref_ids);
void SetBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
bool scaleByInverseLength, bool scaleByAvgInverseLength,
float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu);
void GetNgramMatchCounts(Phrase&,
const NGrams&,
std::vector< size_t >&,
std::vector< size_t >&,
size_t skip = 0) const;
void GetNgramMatchCounts_prefix(Phrase&,
const NGrams&,
std::vector< size_t >&,
std::vector< size_t >&,
size_t new_start_indices,
size_t last_end_index) const;
void GetNgramMatchCounts_overlap(Phrase& phrase,
const NGrams& ref_ngram_counts,
std::vector< size_t >& ret_counts,
std::vector< size_t >& ret_matches,
size_t overlap_index) const;
void GetClippedNgramMatchesAndCounts(Phrase&,
const NGrams&,
std::vector< size_t >&,
std::vector< size_t >&,
size_t skip = 0) const;
FFState* Evaluate( const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
int featureID,
ScoreComponentCollection* accumulator) const;
void Evaluate(const InputType &input
, const InputPath &inputPath
, ScoreComponentCollection &scoreBreakdown) const
{}
void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
2013-05-29 21:16:15 +04:00
bool Enabled() const {
return m_enabled;
}
bool IsUseable(const FactorMask &mask) const;
2013-05-29 21:16:15 +04:00
float CalculateBleu(BleuScoreState*) const;
float CalculateBleu(Phrase translation) const;
const FFState* EmptyHypothesisState(const InputType&) const;
float GetSourceLengthHistory() {
return m_source_length_history;
}
float GetTargetLengthHistory() {
return m_target_length_history;
}
float GetAverageInputLength() {
return m_avg_input_length;
}
private:
2013-05-29 21:16:15 +04:00
bool m_enabled;
bool m_sentence_bleu;
bool m_simple_history_bleu;
2013-05-29 21:16:15 +04:00
// counts for pseudo-document
std::vector< float > m_count_history;
std::vector< float > m_match_history;
float m_source_length_history;
float m_target_length_history;
float m_ref_length_history;
2013-05-29 21:16:15 +04:00
size_t m_cur_source_length;
size_t m_cur_norm_source_length; // length without <s>, </s>
RefCounts m_refs;
NGrams m_cur_ref_ngrams;
float m_cur_ref_length;
2013-05-29 21:16:15 +04:00
// scale BLEU score by history of input length
bool m_scale_by_input_length;
bool m_scale_by_avg_input_length;
2013-05-29 21:16:15 +04:00
// scale by the inverse of the input length * 100
bool m_scale_by_inverse_length;
bool m_scale_by_avg_inverse_length;
2013-05-29 21:16:15 +04:00
float m_avg_input_length;
2012-03-07 21:43:15 +04:00
2013-05-29 21:16:15 +04:00
float m_scale_by_x;
2013-05-29 21:16:15 +04:00
// smoothing factor for history counts
float m_historySmoothing;
2013-05-29 21:16:15 +04:00
enum SmoothingScheme { PLUS_ONE = 1, PLUS_POINT_ONE = 2, PAPINENI = 3 };
SmoothingScheme m_smoothing_scheme;
};
} // Namespace.
#endif //BLUESCOREFEATURE_H