2010-09-14 19:00:22 +04:00
|
|
|
#ifndef BLUESCOREFEATURE_H
|
|
|
|
#define BLUESCOREFEATURE_H
|
|
|
|
|
2010-09-15 20:51:21 +04:00
|
|
|
#include <utility>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
2011-11-18 14:17:16 +04:00
|
|
|
#include <boost/unordered_map.hpp>
|
|
|
|
|
2013-05-24 22:11:15 +04:00
|
|
|
#include "StatefulFeatureFunction.h"
|
2010-09-14 19:00:22 +04:00
|
|
|
|
2013-05-24 21:02:49 +04:00
|
|
|
#include "moses/FF/FFState.h"
|
|
|
|
#include "moses/Phrase.h"
|
|
|
|
#include "moses/ChartHypothesis.h"
|
2010-09-14 19:00:22 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2010-09-14 19:00:22 +04:00
|
|
|
|
|
|
|
class BleuScoreFeature;
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
class BleuScoreState : public FFState
|
|
|
|
{
|
2010-09-14 19:00:22 +04:00
|
|
|
public:
|
2013-05-29 21:16:15 +04:00
|
|
|
friend class BleuScoreFeature;
|
|
|
|
static size_t bleu_order;
|
2010-09-14 19:00:22 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
BleuScoreState();
|
|
|
|
virtual int Compare(const FFState& other) const;
|
|
|
|
void print(std::ostream& out) const;
|
2010-09-14 19:00:22 +04:00
|
|
|
|
|
|
|
private:
|
2013-05-29 21:16:15 +04:00
|
|
|
Phrase m_words;
|
|
|
|
size_t m_source_length;
|
|
|
|
size_t m_target_length;
|
2010-10-22 20:33:36 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// scaled reference length is needed for scoring incomplete hypotheses against reference translation
|
|
|
|
float m_scaled_ref_length;
|
2010-09-16 16:38:00 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
std::vector< size_t > m_ngram_counts;
|
|
|
|
std::vector< size_t > m_ngram_matches;
|
2012-04-01 22:59:00 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void AddNgramCountAndMatches(std::vector< size_t >& counts, std::vector< size_t >& matches);
|
2010-09-14 19:00:22 +04:00
|
|
|
};
|
|
|
|
|
2012-04-01 22:59:00 +04:00
|
|
|
|
2010-09-16 16:49:57 +04:00
|
|
|
std::ostream& operator<<(std::ostream& out, const BleuScoreState& state);
|
|
|
|
|
2012-10-16 22:00:41 +04:00
|
|
|
typedef boost::unordered_map< Phrase, size_t > NGrams;
|
|
|
|
|
|
|
|
class RefValue : public std::pair<std::vector<size_t>,NGrams>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
RefValue& operator=( const RefValue& rhs ) {
|
|
|
|
first = rhs.first;
|
|
|
|
second = rhs.second;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2010-09-16 19:40:13 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
class BleuScoreFeature : public StatefulFeatureFunction
|
|
|
|
{
|
2010-09-14 19:00:22 +04:00
|
|
|
public:
|
2011-11-18 14:17:16 +04:00
|
|
|
|
2012-10-16 22:00:41 +04:00
|
|
|
typedef boost::unordered_map<size_t, RefValue > RefCounts;
|
2011-11-18 14:17:16 +04:00
|
|
|
typedef boost::unordered_map<size_t, NGrams> Matches;
|
|
|
|
|
2013-01-29 14:54:09 +04:00
|
|
|
BleuScoreFeature(const std::string &line);
|
2011-06-11 02:45:17 +04:00
|
|
|
|
2013-08-13 19:39:37 +04:00
|
|
|
void SetParameter(const std::string& key, const std::string& value);
|
|
|
|
|
|
|
|
std::vector<float> DefaultWeights() const;
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void PrintHistory(std::ostream& out) const;
|
|
|
|
void LoadReferences(const std::vector< std::vector< std::string > > &);
|
|
|
|
void SetCurrSourceLength(size_t);
|
|
|
|
void SetCurrNormSourceLength(size_t);
|
|
|
|
void SetCurrShortestRefLength(size_t);
|
|
|
|
void SetCurrAvgRefLength(size_t sent_id);
|
|
|
|
void SetAvgInputLength (float l) {
|
|
|
|
m_avg_input_length = l;
|
|
|
|
}
|
|
|
|
void SetCurrReferenceNgrams(size_t sent_id);
|
|
|
|
size_t GetShortestRefIndex(size_t ref_id);
|
|
|
|
size_t GetClosestRefLength(size_t ref_id, int hypoLength);
|
|
|
|
void UpdateHistory(const std::vector< const Word* >&);
|
|
|
|
void UpdateHistory(const std::vector< std::vector< const Word* > >& hypos, std::vector<size_t>& sourceLengths, std::vector<size_t>& ref_ids, size_t rank, size_t epoch);
|
|
|
|
void PrintRefLength(const std::vector<size_t>& ref_ids);
|
|
|
|
void SetBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
|
|
|
|
bool scaleByInverseLength, bool scaleByAvgInverseLength,
|
|
|
|
float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu);
|
|
|
|
|
|
|
|
void GetNgramMatchCounts(Phrase&,
|
|
|
|
const NGrams&,
|
|
|
|
std::vector< size_t >&,
|
|
|
|
std::vector< size_t >&,
|
|
|
|
size_t skip = 0) const;
|
|
|
|
void GetNgramMatchCounts_prefix(Phrase&,
|
|
|
|
const NGrams&,
|
|
|
|
std::vector< size_t >&,
|
|
|
|
std::vector< size_t >&,
|
|
|
|
size_t new_start_indices,
|
|
|
|
size_t last_end_index) const;
|
|
|
|
void GetNgramMatchCounts_overlap(Phrase& phrase,
|
|
|
|
const NGrams& ref_ngram_counts,
|
|
|
|
std::vector< size_t >& ret_counts,
|
|
|
|
std::vector< size_t >& ret_matches,
|
|
|
|
size_t overlap_index) const;
|
|
|
|
void GetClippedNgramMatchesAndCounts(Phrase&,
|
|
|
|
const NGrams&,
|
|
|
|
std::vector< size_t >&,
|
|
|
|
std::vector< size_t >&,
|
|
|
|
size_t skip = 0) const;
|
|
|
|
|
|
|
|
FFState* Evaluate( const Hypothesis& cur_hypo,
|
|
|
|
const FFState* prev_state,
|
|
|
|
ScoreComponentCollection* accumulator) const;
|
|
|
|
FFState* EvaluateChart(const ChartHypothesis& cur_hypo,
|
|
|
|
int featureID,
|
|
|
|
ScoreComponentCollection* accumulator) const;
|
2013-08-30 18:49:00 +04:00
|
|
|
void Evaluate(const InputType &input
|
|
|
|
, const InputPath &inputPath
|
|
|
|
, ScoreComponentCollection &scoreBreakdown) const
|
|
|
|
{}
|
|
|
|
void Evaluate(const Phrase &source
|
|
|
|
, const TargetPhrase &targetPhrase
|
|
|
|
, ScoreComponentCollection &scoreBreakdown
|
|
|
|
, ScoreComponentCollection &estimatedFutureScore) const
|
|
|
|
{}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
bool Enabled() const {
|
|
|
|
return m_enabled;
|
|
|
|
}
|
2013-05-30 15:41:08 +04:00
|
|
|
|
|
|
|
bool IsUseable(const FactorMask &mask) const;
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
float CalculateBleu(BleuScoreState*) const;
|
|
|
|
float CalculateBleu(Phrase translation) const;
|
|
|
|
const FFState* EmptyHypothesisState(const InputType&) const;
|
|
|
|
|
|
|
|
float GetSourceLengthHistory() {
|
|
|
|
return m_source_length_history;
|
|
|
|
}
|
|
|
|
float GetTargetLengthHistory() {
|
|
|
|
return m_target_length_history;
|
|
|
|
}
|
|
|
|
float GetAverageInputLength() {
|
|
|
|
return m_avg_input_length;
|
|
|
|
}
|
2012-03-15 22:43:08 +04:00
|
|
|
|
2010-09-15 20:51:21 +04:00
|
|
|
private:
|
2013-05-29 21:16:15 +04:00
|
|
|
bool m_enabled;
|
|
|
|
bool m_sentence_bleu;
|
|
|
|
bool m_simple_history_bleu;
|
2012-04-01 22:59:00 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// counts for pseudo-document
|
|
|
|
std::vector< float > m_count_history;
|
|
|
|
std::vector< float > m_match_history;
|
|
|
|
float m_source_length_history;
|
|
|
|
float m_target_length_history;
|
|
|
|
float m_ref_length_history;
|
2011-06-11 02:45:17 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
size_t m_cur_source_length;
|
|
|
|
size_t m_cur_norm_source_length; // length without <s>, </s>
|
|
|
|
RefCounts m_refs;
|
|
|
|
NGrams m_cur_ref_ngrams;
|
|
|
|
float m_cur_ref_length;
|
2012-03-12 23:59:10 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// scale BLEU score by history of input length
|
|
|
|
bool m_scale_by_input_length;
|
|
|
|
bool m_scale_by_avg_input_length;
|
2011-10-26 14:36:00 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// scale by the inverse of the input length * 100
|
|
|
|
bool m_scale_by_inverse_length;
|
|
|
|
bool m_scale_by_avg_inverse_length;
|
2012-03-12 23:59:10 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
float m_avg_input_length;
|
2012-03-07 21:43:15 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
float m_scale_by_x;
|
2011-10-24 21:39:23 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// smoothing factor for history counts
|
|
|
|
float m_historySmoothing;
|
2011-10-23 01:23:58 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
enum SmoothingScheme { PLUS_ONE = 1, PLUS_POINT_ONE = 2, PAPINENI = 3 };
|
|
|
|
SmoothingScheme m_smoothing_scheme;
|
2010-09-14 19:00:22 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
} // Namespace.
|
|
|
|
|
|
|
|
#endif //BLUESCOREFEATURE_H
|
2010-09-15 20:51:21 +04:00
|
|
|
|