the cleanup of mert-moses seems to be finished

added first simple 'make release' goal


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@405 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bojar 2006-07-31 14:17:43 +00:00
parent 0c38aeae70
commit 57bcad0c5f
2 changed files with 91 additions and 35 deletions

18
scripts/Makefile Normal file
View File

@ -0,0 +1,18 @@
# This makefile is here to simplify the automatic releases (and tests!!!)
# of the scripts
TS?=$(shell date '+%Y%m%d-%H%M')
RELEASEDIR=/export/ws06osmt/bin/scripts-$(TS)
VALID_TRAINING_SCRIPTS_NAMES=filter-model-given-input.pl mert-moses.pl train-factored-phrase-model.perl
# Make trick to add directory name to all of them:
VALID_TRAINING_SCRIPTS=$(VALID_TRAINING_SCRIPTS_NAMES:%=training/%)
# the list of all scripts that should be released
VALID_SCRIPTS= $(VALID_TRAINING_SCRIPTS)
release:
if [ -e $(RELEASEDIR) ]; then echo "Targetdir exists! Not touching it! $(RELEASEDIR)"; exit 1; fi
mkdir -p $(RELEASEDIR)
cp $(VALID_SCRIPTS) $(RELEASEDIR)

View File

@ -9,6 +9,7 @@
# Revision history
# 29 Jul 2006 run-filter, score-nbest and mert run on the queue (Nicola; Ondrej had to type it in again)
# 28 Jul 2006 attempt at foolproof usage, strong checking of input validity, merged the parallel and nonparallel version (Ondrej Bojar)
# 27 Jul 2006 adding the safesystem() function to handle with process failure
# 22 Jul 2006 fixed a bug about handling relative path of configuration file (Nicola Bertoldi)
@ -84,10 +85,11 @@ my $___START_STEP = undef; # which iteration step to start with
my $___AVERAGE = 0;
my $bindir = undef; # path to all tools (overriden by specific options)
my $CMERTDIR = undef; # path to cmert directory
my $cmertdir = undef; # path to cmert directory
my $pythoncmd = undef; # path to python executable
my $filtercmd = undef; # path to filter-model-given-input.pl
my $SCORENBESTCMD = undef;
my $qsubwrapper = undef;
use strict;
@ -107,10 +109,11 @@ GetOptions(
"average" => \$___AVERAGE,
"help" => \$usage,
"bindir=s" => \$bindir,
"cmertdir=s" => \$CMERTDIR,
"cmertdir=s" => \$cmertdir,
"pythoncmd=s" => \$pythoncmd,
"filtercmd=s" => \$filtercmd, # allow to override the default location
"scorenbestcmd=s" => \$SCORENBESTCMD, # path to score-nbest.py
"qsubwrapper=s" => \$qsubwrapper, # allow to override the default location
);
# the 4 required parameters can be supplied on the command line directly
@ -154,19 +157,21 @@ $bindir = $ENV{"MOSESBIN"} if !defined $bindir;
# path of script for filtering phrase tables and running the decoder
$filtercmd="$bindir/filter-model-given-input.pl" if !defined $filtercmd;
$qsubwrapper="$bindir/qsub-wrapper.pl" if !defined $qsubwrapper;
$CMERTDIR = "$bindir/cmert-0.5" if !defined $CMERTDIR;
my $CMERT="$CMERTDIR/mert";
$SCORENBESTCMD = "$CMERTDIR/score-nbest.py" if ! defined $SCORENBESTCMD;
$cmertdir = "$bindir/cmert-0.5" if !defined $cmertdir;
my $cmertcmd="$cmertdir/mert";
$pythoncmd = "$CMERTDIR/python" if !defined $pythoncmd;
$SCORENBESTCMD = "$cmertdir/score-nbest.py" if ! defined $SCORENBESTCMD;
$pythoncmd = "$cmertdir/python" if !defined $pythoncmd;
$ENV{PYTHONPATH} = $pythoncmd; # other scripts need to know
die "Not executable: $filtercmd" if ! -x $filtercmd;
die "Not executable: $CMERT" if ! -x $CMERT;
die "Not executable: $cmertcmd" if ! -x $cmertcmd;
die "Not executable: $pythoncmd" if ! -x $pythoncmd;
die "Not executable: $___DECODER" if ! -x $___DECODER;
@ -334,7 +339,8 @@ close(RANGES);
# filter the phrase tables, use --decoder-flags
print "filtering the phrase tables... ".`date`;
safesystem("$filtercmd ./filtered $___CONFIG $___DEV_F") or die "Failed to filter the tables";
my $cmd = "$filtercmd ./filtered $___CONFIG $___DEV_F";
safesystem("$qsubwrapper -command='$cmd'") or die "Failed to submit filtering of tables to the queue (via $qsubwrapper)";
# the decoder should now use the filtered model
@ -359,30 +365,38 @@ while(1) {
}
close(WEIGHTS);
# In case something dies later, we might wish to have a copy
create_config($___CONFIG, "./run$run.moses.ini", \@LAMBDA, \@NAME, $run, (defined$devbleu?$devbleu:"--not-estimated--"));
# skip if restarted
if (!$skip_decoder) {
print "($run) run decoder to produce n-best lists\n";
print "LAMBDAS are @LAMBDA\n";
run_decoder(\@LAMBDA);
safesystem("gzip -f run*out") or die "Failed to gzip run*out";
}
else {
print "skipped decoder run\n";
$skip_decoder = 0;
}
safesystem("gzip -f run*out") or die "Failed to gzip run*out";
my $EFF_REF_LEN = "";
if ($___AVERAGE) {
$EFF_REF_LEN = "-a";
}
# To be sure that scoring script produses these fresh:
safesystem("rm -f cands.opt feats.opt") or die;
# convert n-best list into a numberized format with error scores
safesystem("gunzip run*.best*.out.gz") or die "Failed to gunzip run*.best*.out.gz";
print STDERR "Scoring the nbestlist or whatever.\n";
my $cmd = "sort -mn -t \"|\" -k 1,1 run*.best*.out | $SCORENBESTCMD $EFF_REF_LEN ".join(" ", @references)." ./";
safesystem("$cmd") or die "Failed to score-nbest list or whatever.";
safesystem("gzip -f run*.best*.out") or die;
print STDERR "Scoring the nbestlist.\n";
my $cmd = "export PYTHONPATH=$pythoncmd ; gunzip -dc run*.best*.out.gz | sort -n -t \"|\" -k 1,1 | $SCORENBESTCMD $EFF_REF_LEN ".join(" ", @references)." ./";
safesystem("$qsubwrapper -command='$cmd'") or die "Failed to submit scoring nbestlist to queue (via $qsubwrapper)";
print STDERR "Hoping that scoring succeeded. Don't know how to check for it! XXX.\n";
# keep a count of lines in nbests lists (alltogether)
@ -406,15 +420,16 @@ while(1) {
# run cmert
safesystem("cat ranges.txt weights.txt > init.opt") or die;
safesystem("rm -f weights.txt") or die;
safesystem("mv weights.txt run$run.input_weights.txt") or die; # keep a copy of the weights
#store actual values
safesystem("cp init.opt run$run.init.opt") or die;
my $DIM = scalar(@LAMBDA); # number of lambdas
$cmd="$cmertcmd -d $DIM";
print STDERR "Running cmert.\n";
safesystem("$CMERT -d $DIM 2> cmert.log") or die;
print STDERR "Starting cmert.\n";
safesystem("$qsubwrapper -command='$cmd' -stderr=cmert.log") or die "Failed to start cmert (via qsubwrapper $qsubwrapper)";
my $bestpoint = undef;
my $devbleu = undef;
@ -447,9 +462,9 @@ safesystem ("cp cmert.log run$run.cmert.log") or die;
# This is fine, because the new attempt did not bring any improvement,
# so we do not want to use it.
# @NAME are the names of models the lambdas belong to
create_config(@LAMBDA, @NAME);
create_config($___CONFIG, "./moses.ini", \@LAMBDA, \@NAME, $run, $devbleu);
#chdir back to the original directory
#chdir back to the original directory # useless, just to remind we were not there
chdir($cwd);
sub run_decoder {
@ -473,38 +488,58 @@ sub run_decoder {
}
sub create_config {
my $infn = shift; # source config
my $outfn = shift; # where to save the config
my $lambdas = shift; # the lambdas we should write
my @lambdas = @$lambdas;
my @lambdas = @$lambdas; # my own copy of the array
my $names = shift; # the names of the lambdas
my @names = @$names;
my @names = @$names; # my own copy of the array
my $run = shift; # just for verbosity
my $devbleu = shift; # just for verbosity
my %P;
# parameters specified at the command line
{
my $parameter;
print "PARAM IS |$___DECODER_FLAGS|\n";
my %P; # the hash of all parameters we wish to override
# first convert the command line parameters to the hash
{ # ensure local scope of vars
my $parameter=undef;
print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n";
$___DECODER_FLAGS =~ s/^\s*|\s*$//;
$___DECODER_FLAGS =~ s/\s+/ /;
foreach (split(/ /,$___DECODER_FLAGS)) {
print "$_ :::\n";
if (/^\-([^\d].*)$/) {
$parameter = $1;
$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
print "\tis parameter $parameter\n";
}
else {
die "Found value with no -paramname before it: $_"
if !defined $parameter;
push @{$P{$parameter}},$_;
}
}
}
# Convert weights to elements in P
# First delete all weights params from the input
foreach my $abbr (@names) {
my $name = defined $ABBR2FULL{$abbr} ? $ABBR2FULL{$abbr} : $abbr;
delete($P{$name});
}
while (my $abbr = shift @names) {
my $w = shift @lambdas;
die "Lambdas and names do not have equal length!" if !defined $w;
my $name = defined $ABBR2FULL{$abbr} ? $ABBR2FULL{$abbr} : $abbr;
push @{$P{$name}}, $w;
}
# create new moses.ini decoder config file
open(INI,$P{"config"}[0]);
delete($P{"config"});
print "OUT: > moses.ini\n";
open(OUT,"> moses.ini");
# create new moses.ini decoder config file by cloning and overriding the original one
open(INI,$infn) or die "Can't read $infn";
delete($P{"config"}); # never output
print "Saving new config to: $outfn";
open(OUT,"> $outfn") or die "Can't write $outfn";
print OUT "# MERT optimized configuration\n";
print OUT "# decoder $___DECODER\n";
print OUT "# $devbleu on dev $___DEV_F\n";
print OUT "# BLEU $devbleu on dev $___DEV_F\n";
print OUT "# $run iterations\n";
print OUT "# finished ".`date`;
my $line = <INI>;
@ -546,6 +581,7 @@ sub create_config {
}
}
# write all additional parameters
foreach my $parameter (keys %P) {
print OUT "\n[$parameter]\n";
foreach (@{$P{$parameter}}) {
@ -555,6 +591,7 @@ sub create_config {
close(INI);
close(OUT);
print STDERR "Saved: $outfn\n";
}
sub safesystem {
@ -632,6 +669,7 @@ sub scan_config {
next;
}
if (defined $section && $section eq "mapping") {
# keep track of mapping steps used
$defined_steps{$1}++ if /^([TG])/;
}
if (defined $section && defined $where_is_filename{$section}) {