#ifndef MERT_BLEU_SCORER_H_ #define MERT_BLEU_SCORER_H_ #include #include #include #include "Types.h" #include "ScoreData.h" #include "Scorer.h" #include "ScopedVector.h" const int kBleuNgramOrder = 4; class NgramCounts; class Reference; /** * Bleu scoring */ class BleuScorer: public StatisticsBasedScorer { public: enum ReferenceLengthType { AVERAGE, CLOSEST, SHORTEST }; explicit BleuScorer(const std::string& config = ""); ~BleuScorer(); virtual void setReferenceFiles(const std::vector& referenceFiles); virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry); virtual float calculateScore(const std::vector& comps) const; virtual std::size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; } int CalcReferenceLength(std::size_t sentence_id, std::size_t length); ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; } void SetReferenceLengthType(ReferenceLengthType type) { m_ref_length_type = type; } const std::vector& GetReferences() const { return m_references.get(); } /** * Count the ngrams of each type, up to the given length in the input line. */ std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n); void DumpCounts(std::ostream* os, const NgramCounts& counts) const; bool OpenReference(const char* filename, std::size_t file_id); // NOTE: this function is used for unit testing. bool OpenReferenceStream(std::istream* is, std::size_t file_id); private: ReferenceLengthType m_ref_length_type; // reference translations. ScopedVector m_references; // no copying allowed BleuScorer(const BleuScorer&); BleuScorer& operator=(const BleuScorer&); }; /** Computes sentence-level BLEU+1 score. * This function is used in PRO. */ float sentenceLevelBleuPlusOne(const std::vector& stats); #endif // MERT_BLEU_SCORER_H_