From 96f7b42eb9944cc6fb5173c320562ca2dc5a2229 Mon Sep 17 00:00:00 2001 From: Tetsuo Kiso Date: Mon, 5 Nov 2012 01:24:16 +0900 Subject: [PATCH] Move implementation details from the header to .cpp file. Also add const to variables that we don't want to change. --- mert/SentenceLevelScorer.cpp | 87 +++++++++++++++++++++++++++--------- mert/SentenceLevelScorer.h | 72 +++++++---------------------- 2 files changed, 81 insertions(+), 78 deletions(-) diff --git a/mert/SentenceLevelScorer.cpp b/mert/SentenceLevelScorer.cpp index df3bbb5c1..0b159f0b7 100644 --- a/mert/SentenceLevelScorer.cpp +++ b/mert/SentenceLevelScorer.cpp @@ -6,17 +6,61 @@ // Copyright 2012 __MyCompanyName__. All rights reserved. // -#include #include "SentenceLevelScorer.h" +#include +#include + using namespace std; namespace MosesTuning { - -/** The sentence level scores have already been calculated, just need to average them - and include the differences. Allows scores which are floats **/ +SentenceLevelScorer::SentenceLevelScorer(const string& name, const string& config) + : Scorer(name, config), + m_regularisationStrategy(REG_NONE), + m_regularisationWindow(0) { + Init(); +} + +SentenceLevelScorer::~SentenceLevelScorer() {} + +void SentenceLevelScorer::Init() { + // Configure regularisation. + static string KEY_TYPE = "regtype"; + static string KEY_WINDOW = "regwin"; + static string KEY_CASE = "case"; + static string TYPE_NONE = "none"; + static string TYPE_AVERAGE = "average"; + static string TYPE_MINIMUM = "min"; + static string TRUE = "true"; + static string FALSE = "false"; + + const string type = getConfig(KEY_TYPE, TYPE_NONE); + if (type == TYPE_NONE) { + m_regularisationStrategy = REG_NONE; + } else if (type == TYPE_AVERAGE) { + m_regularisationStrategy = REG_AVERAGE; + } else if (type == TYPE_MINIMUM) { + m_regularisationStrategy = REG_MINIMUM; + } else { + throw boost::lexer::runtime_error("Unknown scorer regularisation strategy: " + type); + } + cerr << "Using scorer regularisation strategy: " << type << endl; + + const string window = getConfig(KEY_WINDOW, "0"); + m_regularisationWindow = atoi(window.c_str()); + cerr << "Using scorer regularisation window: " << m_regularisationWindow << endl; + + const string preservecase = getConfig(KEY_CASE, TRUE); + if (preservecase == TRUE) { + m_enable_preserve_case = true; + } else if (preservecase == FALSE) { + m_enable_preserve_case = false; + } + cerr << "Using case preservation: " << m_enable_preserve_case << endl; +} + void SentenceLevelScorer::score(const candidates_t& candidates, const diffs_t& diffs, statscores_t& scores) { @@ -31,7 +75,7 @@ void SentenceLevelScorer::score(const candidates_t& candidates, const diffs_t& if (candidates.size() == 0) { throw runtime_error("No candidates supplied"); } - int numCounts = m_score_data->get(0,candidates[0]).size(); + const int numCounts = m_score_data->get(0,candidates[0]).size(); vector totals(numCounts); for (size_t i = 0; i < candidates.size(); ++i) { //cout << " i " << i << " candi " << candidates[i] ; @@ -57,21 +101,21 @@ void SentenceLevelScorer::score(const candidates_t& candidates, const diffs_t& totals[k] /= candidates.size(); //cout << "finaltotals = " << totals[k] << endl; } - + scores.push_back(calculateScore(totals)); - + candidates_t last_candidates(candidates); //apply each of the diffs, and get new scores for (size_t i = 0; i < diffs.size(); ++i) { for (size_t j = 0; j < diffs[i].size(); ++j) { - size_t sid = diffs[i][j].first; - size_t nid = diffs[i][j].second; + const size_t sid = diffs[i][j].first; + const size_t nid = diffs[i][j].second; //cout << "sid = " << sid << endl; //cout << "nid = " << nid << endl; - size_t last_nid = last_candidates[sid]; + const size_t last_nid = last_candidates[sid]; for (size_t k = 0; k < totals.size(); ++k) { - float diff = m_score_data->get(sid,nid).get(k) - - m_score_data->get(sid,last_nid).get(k); + const float diff = m_score_data->get(sid,nid).get(k) + - m_score_data->get(sid,last_nid).get(k); //cout << "diff = " << diff << endl; totals[k] += diff/candidates.size(); //cout << "totals = " << totals[k] << endl; @@ -80,29 +124,28 @@ void SentenceLevelScorer::score(const candidates_t& candidates, const diffs_t& } scores.push_back(calculateScore(totals)); } - + //regularisation. This can either be none, or the min or average as described in //Cer, Jurafsky and Manning at WMT08 - if (_regularisationStrategy == REG_NONE || _regularisationWindow <= 0) { + if (m_regularisationStrategy == REG_NONE || m_regularisationWindow <= 0) { //no regularisation return; } - + //window size specifies the +/- in each direction statscores_t raw_scores(scores);//copy scores for (size_t i = 0; i < scores.size(); ++i) { size_t start = 0; - if (i >= _regularisationWindow) { - start = i - _regularisationWindow; + if (i >= m_regularisationWindow) { + start = i - m_regularisationWindow; } - size_t end = min(scores.size(), i + _regularisationWindow+1); - if (_regularisationStrategy == REG_AVERAGE) { - scores[i] = score_average(raw_scores,start,end); + const size_t end = min(scores.size(), i + m_regularisationWindow+1); + if (m_regularisationStrategy == REG_AVERAGE) { + scores[i] = score_average(raw_scores, start, end); } else { - scores[i] = score_min(raw_scores,start,end); + scores[i] = score_min(raw_scores, start, end); } } } } - diff --git a/mert/SentenceLevelScorer.h b/mert/SentenceLevelScorer.h index b875c8af9..a950d623b 100644 --- a/mert/SentenceLevelScorer.h +++ b/mert/SentenceLevelScorer.h @@ -12,77 +12,37 @@ #include "Scorer.h" #include #include -#include -#include namespace MosesTuning { - /** * Abstract base class for scorers that work by using sentence level - * statistics eg. permutation distance metrics **/ + * statistics (e.g., permutation distance metrics). **/ class SentenceLevelScorer : public Scorer { - public: - SentenceLevelScorer(const std::string& name, const std::string& config): Scorer(name,config) { - //configure regularisation - static std::string KEY_TYPE = "regtype"; - static std::string KEY_WINDOW = "regwin"; - static std::string KEY_CASE = "case"; - static std::string TYPE_NONE = "none"; - static std::string TYPE_AVERAGE = "average"; - static std::string TYPE_MINIMUM = "min"; - static std::string TRUE = "true"; - static std::string FALSE = "false"; - - std::string type = getConfig(KEY_TYPE,TYPE_NONE); - if (type == TYPE_NONE) { - _regularisationStrategy = REG_NONE; - } else if (type == TYPE_AVERAGE) { - _regularisationStrategy = REG_AVERAGE; - } else if (type == TYPE_MINIMUM) { - _regularisationStrategy = REG_MINIMUM; - } else { - throw boost::lexer::runtime_error("Unknown scorer regularisation strategy: " + type); - } - std::cerr << "Using scorer regularisation strategy: " << type << std::endl; - - std::string window = getConfig(KEY_WINDOW,"0"); - _regularisationWindow = atoi(window.c_str()); - std::cerr << "Using scorer regularisation window: " << _regularisationWindow << std::endl; - - std::string preservecase = getConfig(KEY_CASE,TRUE); - if (preservecase == TRUE) { - m_enable_preserve_case = true; - } else if (preservecase == FALSE) { - m_enable_preserve_case = false; - } - std::cerr << "Using case preservation: " << m_enable_preserve_case << std::endl; - - - } - ~SentenceLevelScorer() {}; + SentenceLevelScorer(const std::string& name, const std::string& config); + ~SentenceLevelScorer(); + + /** The sentence level scores have already been calculated, just need to average them + and include the differences. Allows scores which are floats. **/ virtual void score(const candidates_t& candidates, const diffs_t& diffs, statscores_t& scores); - - //calculate the actual score - virtual statscore_t calculateScore(const std::vector& totals) { + + // calculate the actual score * + virtual statscore_t calculateScore(const std::vector& totals) const { return 0; - }; - - - + } + protected: - + // Set up regularisation parameters. + void Init(); + //regularisation - ScorerRegularisationStrategy _regularisationStrategy; - size_t _regularisationWindow; - + ScorerRegularisationStrategy m_regularisationStrategy; + size_t m_regularisationWindow; }; - } - #endif