#ifndef MERT_REFERENCE_H_ #define MERT_REFERENCE_H_ #include #include #include #include "Ngram.h" /** * Reference class represents reference translations for an output * translation used in calculating BLEU score. */ class Reference { public: // for m_length typedef std::vector::iterator iterator; typedef std::vector::const_iterator const_iterator; Reference() : m_counts(new NgramCounts) { } ~Reference() { delete m_counts; } NgramCounts* get_counts() { return m_counts; } const NgramCounts* get_counts() const { return m_counts; } iterator begin() { return m_length.begin(); } const_iterator begin() const { return m_length.begin(); } iterator end() { return m_length.end(); } const_iterator end() const { return m_length.end(); } void push_back(size_t len) { m_length.push_back(len); } size_t num_references() const { return m_length.size(); } int CalcAverage() const; int CalcClosest(size_t length) const; int CalcShortest() const; private: NgramCounts* m_counts; // multiple reference lengths std::vector m_length; }; inline int Reference::CalcAverage() const { int total = 0; for (size_t i = 0; i < m_length.size(); ++i) { total += m_length[i]; } return static_cast( static_cast(total) / m_length.size()); } inline int Reference::CalcClosest(size_t length) const { int min_diff = INT_MAX; int closest_ref_id = 0; // an index of the closest reference translation for (size_t i = 0; i < m_length.size(); ++i) { const int ref_length = m_length[i]; const int length_diff = abs(ref_length - static_cast(length)); const int abs_min_diff = abs(min_diff); // Look for the closest reference if (length_diff < abs_min_diff) { min_diff = ref_length - length; closest_ref_id = i; // if two references has the same closest length, take the shortest } else if (length_diff == abs_min_diff) { if (ref_length < static_cast(m_length[closest_ref_id])) { closest_ref_id = i; } } } return static_cast(m_length[closest_ref_id]); } inline int Reference::CalcShortest() const { return *std::min_element(m_length.begin(), m_length.end()); } #endif // MERT_REFERENCE_H_