Implement brevity penalty smoothing for PRO

As in Nakov et al (Coling 2012)
2024-08-17 15:30:26 +03:00 · 2013-02-18 11:11:20 +00:00 · 2013-02-18 11:11:20 +00:00 · 9ca364fb22
commit 9ca364fb22
parent 2e6b5e7a23
6 changed files with 27 additions and 28 deletions
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@ -237,15 +237,19 @@ void BleuScorer::DumpCounts(ostream* os,
  *os << endl;
 }

-float sentenceLevelBleuPlusOne(const vector<float>& stats) {
+float smoothedSentenceBleu
+  (const std::vector<float>& stats, float smoothing, bool smoothBP) {
+
  CHECK(stats.size() == kBleuNgramOrder * 2 + 1);

  float logbleu = 0.0;
  for (int j = 0; j < kBleuNgramOrder; j++) {
-    logbleu += log(stats[2 * j] + 1.0) - log(stats[2 * j + 1] + 1.0);
+    logbleu += log(stats[2 * j] + smoothing) - log(stats[2 * j + 1] + smoothing);
  }
  logbleu /= kBleuNgramOrder;
-  const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
+  const float reflength = stats[(kBleuNgramOrder * 2)]  + 
+    (smoothBP ? smoothing : 0.0f);
+  const float brevity = 1.0 - reflength / stats[1];

  if (brevity < 0.0) {
    logbleu += brevity;
@ -334,26 +338,12 @@ vector<float> BleuScorer::ScoreNbestList(const string& scoreFile, const string&
 	vector<float> bleuScores;
 	for (size_t i=0; i < hypotheses.size(); ++i) {
 		pair<size_t,size_t> translation = hypotheses[i];
-		float bleu = sentenceLevelBleuPlusOne(scoreDataIters[translation.first]->operator[](translation.second));
+		float bleu = smoothedSentenceBleu(scoreDataIters[translation.first]->operator[](translation.second));
 		bleuScores.push_back(bleu);
 	}
 	return bleuScores;
 }

-float BleuScorer::sentenceLevelBleuPlusOne(const vector<float>& stats) {
-	float logbleu = 0.0;
-	const unsigned int bleu_order = 4;
-	for (unsigned int j=0; j<bleu_order; j++) {
-		//cerr << (stats.get(2*j)+1) << "/" << (stats.get(2*j+1)+1) << " ";
-		logbleu += log(stats[2*j]+1) - log(stats[2*j+1]+1);
-	}
-	logbleu /= bleu_order;
-	float brevity = 1.0 - (float)stats[(bleu_order*2)]/stats[1];
-	if (brevity < 0.0) {
-		logbleu += brevity;
-	}
-	//cerr << brevity << " -> " << exp(logbleu) << endl;
-	return exp(logbleu);
-}
+

 }
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@ -34,7 +34,6 @@ public:
  ~BleuScorer();

  static std::vector<float> ScoreNbestList(const std::string& scoreFile, const std::string& featureFile);
-  static float sentenceLevelBleuPlusOne(const std::vector<float>& stats);

  virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
  virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
@ -74,7 +73,8 @@ private:
 /** Computes sentence-level BLEU+1 score.
 * This function is used in PRO.
 */
-float sentenceLevelBleuPlusOne(const std::vector<float>& stats);
+float smoothedSentenceBleu
+  (const std::vector<float>& stats, float smoothing=1.0, bool smoothBP=false);

 /** Computes sentence-level BLEU score given a background corpus.
 * This function is used in batch MIRA.
--- a/mert/BleuScorerTest.cpp
+++ b/mert/BleuScorerTest.cpp
@ -244,7 +244,7 @@ BOOST_AUTO_TEST_CASE(calculate_actual_score) {
  // reference-length
  stats[8] = 7;

-  BOOST_CHECK(IsAlmostEqual(0.5115f, scorer.calculateScore(stats)));
+  BOOST_CHECK_CLOSE(0.5115f, scorer.calculateScore(stats), 0.01);
 }

 BOOST_AUTO_TEST_CASE(sentence_level_bleu) {
@ -270,5 +270,7 @@ BOOST_AUTO_TEST_CASE(sentence_level_bleu) {
  // reference-length
  stats[8] = 7.0;

-  BOOST_CHECK(IsAlmostEqual(0.5985f, sentenceLevelBleuPlusOne(stats)));
+  BOOST_CHECK_CLOSE(0.5985f, smoothedSentenceBleu(stats), 0.01);
+  BOOST_CHECK_CLOSE(0.5624f, smoothedSentenceBleu(stats, 0.5), 0.01 );
+  BOOST_CHECK_CLOSE(0.5067f, smoothedSentenceBleu(stats, 1.0, true), 0.01);
 }
--- a/mert/pro.cpp
+++ b/mert/pro.cpp
@ -105,6 +105,8 @@ int main(int argc, char** argv)
  const unsigned int n_candidates = 5000; // Gamma, in Hopkins & May
  const unsigned int n_samples = 50; // Xi, in Hopkins & May
  const float min_diff = 0.05;
+  bool smoothBP = false;
+  const float bleuSmoothing = 1.0f;

  po::options_description desc("Allowed options");
  desc.add_options()
@ -113,6 +115,7 @@ int main(int argc, char** argv)
      ("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
      ("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
      ("output-file,o", po::value<string>(&outputFile), "Output file")
+      ("smooth-brevity-penalty,b", po::value(&smoothBP)->zero_tokens()->default_value(false), "Smooth the brevity penalty, as in Nakov et al. (Coling 2012)")
      ;

  po::options_description cmdline_options;
@ -201,11 +204,11 @@ int main(int argc, char** argv)
    for(size_t  i=0; i<n_candidates; i++) {
      size_t rand1 = rand() % n_translations;
      pair<size_t,size_t> translation1 = hypotheses[rand1];
-      float bleu1 = sentenceLevelBleuPlusOne(scoreDataIters[translation1.first]->operator[](translation1.second));
+      float bleu1 = smoothedSentenceBleu(scoreDataIters[translation1.first]->operator[](translation1.second), bleuSmoothing, smoothBP);

      size_t rand2 = rand() % n_translations;
      pair<size_t,size_t> translation2 = hypotheses[rand2];
-      float bleu2 = sentenceLevelBleuPlusOne(scoreDataIters[translation2.first]->operator[](translation2.second));
+      float bleu2 = smoothedSentenceBleu(scoreDataIters[translation2.first]->operator[](translation2.second), bleuSmoothing, smoothBP);

      /*
      cerr << "t(" << translation1.first << "," << translation1.second << ") = " << bleu1 <<
--- a/mert/sentence-bleu.cpp
+++ b/mert/sentence-bleu.cpp
@ -38,7 +38,7 @@ int main(int argc, char **argv)
  vector<ScoreStats>::const_iterator sentIt;
  for (sentIt = entries.begin(); sentIt != entries.end(); sentIt++) {
    vector<float> stats(sentIt->getArray(), sentIt->getArray() + sentIt->size());
-    cout << BleuScorer::sentenceLevelBleuPlusOne(stats) << "\n";
+    cout << smoothedSentenceBleu(stats) << "\n";
  }
  return 0;
 }
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@ -148,6 +148,7 @@ my $mertdir = undef; # path to new mert directory
 my $mertargs = undef; # args to pass through to mert & extractor
 my $mertmertargs = undef; # args to pass through to mert only
 my $extractorargs = undef; # args to pass through to extractor only
+my $proargs = undef; # args to pass through to pro only

 # Args to pass through to batch mira only.  This flags is useful to
 # change MIRA's hyperparameters such as regularization parameter C,
@ -197,6 +198,7 @@ GetOptions(
  "mertdir=s" => \$mertdir,
  "mertargs=s" => \$mertargs,
  "extractorargs=s" => \$extractorargs,
+  "proargs=s" => \$proargs,
  "mertmertargs=s" => \$mertmertargs,
  "rootdir=s" => \$SCRIPTS_ROOTDIR,
  "filtercmd=s" => \$filtercmd, # allow to override the default location
@ -398,6 +400,8 @@ $mert_extract_args .= " $extractorargs";

 $mertmertargs = "" if !defined $mertmertargs;

+$proargs = "" unless $proargs;
+
 my $mert_mert_args = "$mertargs $mertmertargs";
 $mert_mert_args =~ s/\-+(binary|b)\b//;
 $mert_mert_args .= " $scconfig";
@ -788,11 +792,11 @@ while (1) {
  my %sparse_weights; # sparse features
  my $pro_optimizer_cmd = "$pro_optimizer $megam_default_options run$run.pro.data";
  if ($___PAIRWISE_RANKED_OPTIMIZER) {  # pro optimization
-    $cmd = "$mert_pro_cmd $seed_settings $pro_file_settings -o run$run.pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer_cmd";
+    $cmd = "$mert_pro_cmd $proargs $seed_settings $pro_file_settings -o run$run.pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer_cmd";
    &submit_or_exec($cmd, $mert_outfile, $mert_logfile);
  } elsif ($___PRO_STARTING_POINT) {  # First, run pro, then mert
    # run pro...
-    my $pro_cmd = "$mert_pro_cmd $seed_settings $pro_file_settings -o run$run.pro.data ; $pro_optimizer_cmd";
+    my $pro_cmd = "$mert_pro_cmd $proargs $seed_settings $pro_file_settings -o run$run.pro.data ; $pro_optimizer_cmd";
    &submit_or_exec($pro_cmd, "run$run.pro.out", "run$run.pro.err");
    # ... get results ...
    ($bestpoint,$devbleu) = &get_weights_from_mert("run$run.pro.out","run$run.pro.err",scalar @{$featlist->{"names"}},\%sparse_weights);