From 57bcad0c5fa0d1461e6aab68c81f07d2b7f2b977 Mon Sep 17 00:00:00 2001
From: bojar <bojar@1f5c12ca-751b-0410-a591-d2e778427230>
Date: Mon, 31 Jul 2006 14:17:43 +0000
Subject: [PATCH] the cleanup of mert-moses seems to be finished added first
 simple 'make release' goal

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@405 1f5c12ca-751b-0410-a591-d2e778427230
---
 scripts/Makefile               |  18 ++++++
 scripts/training/mert-moses.pl | 108 ++++++++++++++++++++++-----------
 2 files changed, 91 insertions(+), 35 deletions(-)
 create mode 100644 scripts/Makefile

diff --git a/scripts/Makefile b/scripts/Makefile
new file mode 100644
index 000000000..94c6b9fe9
--- /dev/null
+++ b/scripts/Makefile
@@ -0,0 +1,18 @@
+# This makefile is here to simplify the automatic releases (and tests!!!)
+# of the scripts
+
+TS?=$(shell date '+%Y%m%d-%H%M')
+RELEASEDIR=/export/ws06osmt/bin/scripts-$(TS)
+
+
+VALID_TRAINING_SCRIPTS_NAMES=filter-model-given-input.pl  mert-moses.pl train-factored-phrase-model.perl
+# Make trick to add directory name to all of them:
+VALID_TRAINING_SCRIPTS=$(VALID_TRAINING_SCRIPTS_NAMES:%=training/%)
+
+# the list of all scripts that should be released
+VALID_SCRIPTS= $(VALID_TRAINING_SCRIPTS)
+
+release:
+	if [ -e $(RELEASEDIR) ]; then echo "Targetdir exists! Not touching it! $(RELEASEDIR)"; exit 1; fi
+	mkdir -p $(RELEASEDIR)
+	cp $(VALID_SCRIPTS) $(RELEASEDIR)
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index 7e312ddb6..29ec8e1e3 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -9,6 +9,7 @@
 
 # Revision history
 
+# 29 Jul 2006 run-filter, score-nbest and mert run on the queue (Nicola; Ondrej had to type it in again)
 # 28 Jul 2006 attempt at foolproof usage, strong checking of input validity, merged the parallel and nonparallel version (Ondrej Bojar)
 # 27 Jul 2006 adding the safesystem() function to handle with process failure
 # 22 Jul 2006 fixed a bug about handling relative path of configuration file (Nicola Bertoldi) 
@@ -84,10 +85,11 @@ my $___START_STEP = undef;  # which iteration step to start with
 my $___AVERAGE = 0;
 
 my $bindir = undef; # path to all tools (overriden by specific options)
-my $CMERTDIR = undef; # path to cmert directory
+my $cmertdir = undef; # path to cmert directory
 my $pythoncmd = undef; # path to python executable
 my $filtercmd = undef; # path to filter-model-given-input.pl
 my $SCORENBESTCMD = undef;
+my $qsubwrapper = undef;
 
 
 use strict;
@@ -107,10 +109,11 @@ GetOptions(
   "average" => \$___AVERAGE,
   "help" => \$usage,
   "bindir=s" => \$bindir,
-  "cmertdir=s" => \$CMERTDIR,
+  "cmertdir=s" => \$cmertdir,
   "pythoncmd=s" => \$pythoncmd,
   "filtercmd=s" => \$filtercmd, # allow to override the default location
   "scorenbestcmd=s" => \$SCORENBESTCMD, # path to score-nbest.py
+  "qsubwrapper=s" => \$qsubwrapper, # allow to override the default location
 );
 
 # the 4 required parameters can be supplied on the command line directly
@@ -154,19 +157,21 @@ $bindir = $ENV{"MOSESBIN"} if !defined $bindir;
 # path of script for filtering phrase tables and running the decoder
 $filtercmd="$bindir/filter-model-given-input.pl" if !defined $filtercmd;
 
+$qsubwrapper="$bindir/qsub-wrapper.pl" if !defined $qsubwrapper;
 
-$CMERTDIR = "$bindir/cmert-0.5" if !defined $CMERTDIR;
-my $CMERT="$CMERTDIR/mert";
 
-$SCORENBESTCMD = "$CMERTDIR/score-nbest.py" if ! defined $SCORENBESTCMD;
+$cmertdir = "$bindir/cmert-0.5" if !defined $cmertdir;
+my $cmertcmd="$cmertdir/mert";
 
-$pythoncmd = "$CMERTDIR/python" if !defined $pythoncmd;
+$SCORENBESTCMD = "$cmertdir/score-nbest.py" if ! defined $SCORENBESTCMD;
+
+$pythoncmd = "$cmertdir/python" if !defined $pythoncmd;
 
 $ENV{PYTHONPATH} = $pythoncmd; # other scripts need to know
 
 
 die "Not executable: $filtercmd" if ! -x $filtercmd;
-die "Not executable: $CMERT" if ! -x $CMERT;
+die "Not executable: $cmertcmd" if ! -x $cmertcmd;
 die "Not executable: $pythoncmd" if ! -x $pythoncmd;
 die "Not executable: $___DECODER" if ! -x $___DECODER;
 
@@ -334,7 +339,8 @@ close(RANGES);
 
 # filter the phrase tables, use --decoder-flags
 print "filtering the phrase tables... ".`date`;
-safesystem("$filtercmd ./filtered $___CONFIG $___DEV_F") or die "Failed to filter the tables";
+my $cmd = "$filtercmd ./filtered $___CONFIG $___DEV_F";
+safesystem("$qsubwrapper -command='$cmd'") or die "Failed to submit filtering of tables to the queue (via $qsubwrapper)";
 
 
 # the decoder should now use the filtered model
@@ -359,30 +365,38 @@ while(1) {
   }
   close(WEIGHTS);
 
+  # In case something dies later, we might wish to have a copy
+  create_config($___CONFIG, "./run$run.moses.ini", \@LAMBDA, \@NAME, $run, (defined$devbleu?$devbleu:"--not-estimated--"));
+
 
   # skip if restarted
   if (!$skip_decoder) {
       print "($run) run decoder to produce n-best lists\n";
       print "LAMBDAS are @LAMBDA\n";
       run_decoder(\@LAMBDA);
+      safesystem("gzip -f run*out") or die "Failed to gzip run*out";
   }
   else {
       print "skipped decoder run\n";
       $skip_decoder = 0;
   }
-  safesystem("gzip -f run*out") or die "Failed to gzip run*out";
 
   my $EFF_REF_LEN = "";
   if ($___AVERAGE) {
      $EFF_REF_LEN = "-a";
   }
 
+  # To be sure that scoring script produses these fresh:
+  safesystem("rm -f cands.opt feats.opt") or die;
+
   # convert n-best list into a numberized format with error scores
-  safesystem("gunzip run*.best*.out.gz") or die "Failed to gunzip run*.best*.out.gz";
-  print STDERR "Scoring the nbestlist or whatever.\n";
-  my $cmd = "sort -mn -t \"|\" -k 1,1 run*.best*.out | $SCORENBESTCMD $EFF_REF_LEN ".join(" ", @references)." ./";
-  safesystem("$cmd") or die "Failed to score-nbest list or whatever.";
-  safesystem("gzip -f run*.best*.out") or die;
+
+  print STDERR "Scoring the nbestlist.\n";
+  my $cmd = "export PYTHONPATH=$pythoncmd ; gunzip -dc run*.best*.out.gz | sort -n -t \"|\" -k 1,1 | $SCORENBESTCMD $EFF_REF_LEN ".join(" ", @references)." ./";
+  safesystem("$qsubwrapper -command='$cmd'") or die "Failed to submit scoring nbestlist to queue (via $qsubwrapper)";
+
+
+  print STDERR "Hoping that scoring succeeded. Don't know how to check for it! XXX.\n";
 
 
   # keep a count of lines in nbests lists (alltogether)
@@ -406,15 +420,16 @@ while(1) {
 
   # run cmert
   safesystem("cat ranges.txt weights.txt > init.opt") or die;
-  safesystem("rm -f weights.txt") or die;
+  safesystem("mv weights.txt run$run.input_weights.txt") or die; # keep a copy of the weights
 
   #store actual values
   safesystem("cp init.opt run$run.init.opt") or die;
 
   my $DIM = scalar(@LAMBDA); # number of lambdas
-
-  print STDERR "Running cmert.\n";
-  safesystem("$CMERT -d $DIM 2> cmert.log") or die;
+  $cmd="$cmertcmd -d $DIM";
+ 
+  print STDERR "Starting cmert.\n";
+  safesystem("$qsubwrapper -command='$cmd' -stderr=cmert.log") or die "Failed to start cmert (via qsubwrapper $qsubwrapper)";
 
   my $bestpoint = undef;
   my $devbleu = undef;
@@ -447,9 +462,9 @@ safesystem ("cp cmert.log run$run.cmert.log") or die;
 # This is fine, because the new attempt did not bring any improvement,
 # so we do not want to use it.
 # @NAME are the names of models the lambdas belong to
-create_config(@LAMBDA, @NAME);
+create_config($___CONFIG, "./moses.ini", \@LAMBDA, \@NAME, $run, $devbleu);
 
-#chdir back to the original directory
+#chdir back to the original directory # useless, just to remind we were not there
 chdir($cwd);
 
 sub run_decoder {
@@ -473,38 +488,58 @@ sub run_decoder {
 }
 
 sub create_config {
+    my $infn = shift; # source config
+    my $outfn = shift; # where to save the config
     my $lambdas = shift; # the lambdas we should write
-    my @lambdas = @$lambdas;
+    my @lambdas = @$lambdas; # my own copy of the array
     my $names = shift; # the names of the lambdas
-    my @names = @$names;
+    my @names = @$names; # my own copy of the array
+    my $run = shift;  # just for verbosity
+    my $devbleu = shift; # just for verbosity
 
-    my %P;
-    # parameters specified at the command line
-    {
-	my $parameter;
-	print "PARAM IS |$___DECODER_FLAGS|\n";
+    my %P; # the hash of all parameters we wish to override
+
+    # first convert the command line parameters to the hash
+    { # ensure local scope of vars
+	my $parameter=undef;
+	print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n";
+        $___DECODER_FLAGS =~ s/^\s*|\s*$//;
+        $___DECODER_FLAGS =~ s/\s+/ /;
 	foreach (split(/ /,$___DECODER_FLAGS)) {
-	    print "$_ :::\n";
 	    if (/^\-([^\d].*)$/) {
 		$parameter = $1;
 		$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
-		print "\tis parameter $parameter\n";
 	    }
 	    else {
+                die "Found value with no -paramname before it: $_"
+                  if !defined $parameter;
 		push @{$P{$parameter}},$_;
 	    }
 	}
     }
 
+    # Convert weights to elements in P
+    # First delete all weights params from the input
+    foreach my $abbr (@names) {
+      my $name = defined $ABBR2FULL{$abbr} ? $ABBR2FULL{$abbr} : $abbr;
+      delete($P{$name});
+    }
+    while (my $abbr = shift @names) {
+      my $w = shift @lambdas;
+      die "Lambdas and names do not have equal length!" if !defined $w;
+      my $name = defined $ABBR2FULL{$abbr} ? $ABBR2FULL{$abbr} : $abbr;
+      push @{$P{$name}}, $w;
+    }
 
-    # create new moses.ini decoder config file
-    open(INI,$P{"config"}[0]);
-    delete($P{"config"});
-    print "OUT: > moses.ini\n";
-    open(OUT,"> moses.ini");
+
+    # create new moses.ini decoder config file by cloning and overriding the original one
+    open(INI,$infn) or die "Can't read $infn";
+    delete($P{"config"}); # never output 
+    print "Saving new config to: $outfn";
+    open(OUT,"> $outfn") or die "Can't write $outfn";
     print OUT "# MERT optimized configuration\n";
     print OUT "# decoder $___DECODER\n";
-    print OUT "# $devbleu on dev $___DEV_F\n";
+    print OUT "# BLEU $devbleu on dev $___DEV_F\n";
     print OUT "# $run iterations\n";
     print OUT "# finished ".`date`;
     my $line = <INI>;
@@ -546,6 +581,7 @@ sub create_config {
 	}
     }
 
+    # write all additional parameters
     foreach my $parameter (keys %P) {
 	print OUT "\n[$parameter]\n";
 	foreach (@{$P{$parameter}}) {
@@ -555,6 +591,7 @@ sub create_config {
 
     close(INI);
     close(OUT);
+    print STDERR "Saved: $outfn\n";
 }
 
 sub safesystem {
@@ -632,6 +669,7 @@ sub scan_config {
       next;
     }
     if (defined $section && $section eq "mapping") {
+      # keep track of mapping steps used
       $defined_steps{$1}++ if /^([TG])/;
     }
     if (defined $section && defined $where_is_filename{$section}) {