adding regression tests for mert

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2551 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
nicolabertoldi 2009-10-01 12:53:44 +00:00
parent 3484a1fd93
commit 820e3dfe9c
33 changed files with 32795 additions and 0 deletions

View File

@ -0,0 +1,46 @@
package MertRegressionTesting;
use strict;
# if your tests need a new version of the test data, increment this
# and make sure that a moses-regression-tests-vX.Y is available
use constant TESTING_DATA_VERSION => '0.1';
# find the data directory in a few likely locations and make sure
# that it is the correct version
sub find_data_directory
{
my ($test_script_root, $data_dir) = @_;
my $data_version = TESTING_DATA_VERSION;
my @ds = ();
my $mrtp = "mert-reg-test-data-$data_version";
push @ds, $data_dir if defined $data_dir;
push @ds, "$test_script_root/$mrtp";
push @ds, "/tmp/$mrtp";
push @ds, "/var/tmp/$mrtp";
foreach my $d (@ds) {
next unless (-d $d);
return $d;
}
print STDERR<<EOT;
You do not appear to have the regression testing data installed.
You may either specify a non-standard location when running
the test suite with the --data-dir option,
or, you may install it in any one of the following
standard locations: $test_script_root, /tmp, or /var/tmp with these
commands:
cd <DESIRED_INSTALLATION_DIRECTORY>
MODIFY ACCORDING TO IRSTLM
wget http://www.statmt.org/moses/reg-testing/mert-regression-tests-v$data_version.tar
tar xf mert-regression-tests-v$data_version.tar
rm mert-regression-tests-v$data_version.tar
EOT
exit 1;
}
1;

View File

@ -0,0 +1,88 @@
#!/usr/bin/perl -w
use strict;
my ($results, $truth) = @ARGV;
my ($report, $pass, $fail) = compare_results("$results/results.dat", "$truth/results.dat");
open OUT, ">$results/Summary";
print OUT $report;
print $report;
close OUT;
if ($fail > 0) {
print <<EOT;
There were failures in this test run. Please analyze the results carefully.
EOT
exit 1;
}
exit 0;
sub compare_results {
my ($testf, $truthf) = @_;
my $test = read_results($testf);
my $truth = read_results($truthf);
my $ct1 = delete $truth->{'COMPARISON_TYPE'};
my $ct2 = delete $test->{'COMPARISON_TYPE'};
my $pass = 0;
my $fail = 0;
my $report = '';
foreach my $k (sort keys %$truth) {
$report .= "test-name=$k\tresult=";
if (!exists $test->{$k}) {
$report .= "missing from test results\n";
$fail++;
next;
}
my $truthv = $truth->{$k} || '';
my $testv = delete $test->{$k} || '';
if ($ct1->{$k} eq '=') {
if ($truthv eq $testv) {
$report .= "pass\n";
$pass++;
} else {
$report .= "fail\n\tTRUTH=$truthv\n\t TEST=$testv\n";
$fail++;
}
} else { # numeric difference
$testv=$testv?$testv:0;
$truthv=$truthv?$truthv:0;
my $diff = $testv - $truthv;
if ($diff == 0) { $report .= "identical\n"; next; }
$report .= "BASELINE=$truthv, TEST=$testv\t DELTA=$diff";
if ($truthv != 0) {
my $pct = $diff/$truthv;
my $t = sprintf "\t PCT CHANGE=%4.2f", $pct*100;
$report .= $t;
}
$report .= "\n";
}
}
foreach my $k (sort keys %$test) {
$fail++;
$report .= "test-name=$k\tfound in TEST but not in TRUTH.\n";
}
$report .= "\nTESTS PASSED=$pass\nTESTS FAILED=$fail\n";
return $report, $pass, $fail;
}
sub read_results {
my ($file) = @_;
open IN, "<$file" or die "Could not open $file!";
my %res;
while (my $l = <IN>) {
if ($l =~ /^([A-Za-z0-9_]+)\s*([=~])\s*(.+)$/) {
my ($key, $comparison_type, $value) = ($1, $2, $3);
$res{$key} = $value;
$res{'COMPARISON_TYPE'}->{$key}=$comparison_type;
}
}
close IN;
return \%res;
}

View File

@ -0,0 +1,125 @@
#!/usr/bin/perl -w
use strict;
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
use MertRegressionTesting;
use Getopt::Long;
use File::Temp qw ( tempfile );
use POSIX qw ( strftime );
my @SIGS = qw ( SIGHUP SIGINT SIGQUIT SIGILL SIGTRAP SIGABRT SIGIOT SIGBUS SIGFPE SIGKILL SIGUSR1 SIGSEGV SIGUSR2 SIGPIPE SIGALRM SIGTERM SIGSTKFLT SIGCHLD SIGCONT SIGSTOP SIGTSTP SIGTTIN SIGTTOU SIGURG SIGXCPU SIGXFSZ SIGVTALRM SIGPROF SIGWINCH SIGIO SIGPWR SIGSYS SIGUNUSED SIGRTMIN );
my ($decoder, $test_name);
my $test_dir = "$script_dir/tests";
my $mert_scripts_dir;
my $data_dir;
my $BIN_TEST = $script_dir;
my $results_dir;
GetOptions("test=s" => \$test_name,
"data-dir=s"=> \$data_dir,
"mert-scripts-dir=s"=> \$mert_scripts_dir,
"test-dir=s"=> \$test_dir,
"results-dir=s"=> \$results_dir,
) or exit 1;
die "Please specify a test to run with --test\n" unless $test_name;
die "Please specify the location of the data directory with --data-dir\n" unless $data_dir;
die "Please specify the location of the mert directory with --mert-scripts-dir\n" unless $mert_scripts_dir;
die "Cannot locate test dir at $test_dir" unless (-d $test_dir);
$test_dir .= "/$test_name";
die "Cannot locate test dir at $test_dir" unless (-d $test_dir);
#### get place to put results
unless (defined $results_dir) { $results_dir = "$data_dir/results"; }
if (!-d $results_dir) {
print STDERR "[WARNING] Results directory not found.\n";
mkdir ($results_dir) || die "Failed to create $results_dir";
}
$results_dir .= "/$test_name";
if (!-d $results_dir) {
print STDERR "[WARNING] Results directory for test=$test_name could not be found.\n";
mkdir ($results_dir) || die "Failed to create $results_dir";
}
##########
my $ts = get_timestamp("$test_dir/command");
my $results = "$results_dir/$ts";
mkdir($results) || die "Failed to create results directory: $results\n";
my $truth = "$test_dir/truth";
if (!-d $truth) {
die "Could not find truth/ in $test_dir!\n";
}
print "RESULTS AVAILABLE IN: $results\n\n";
my ($o, $elapsed, $ec, $sig) = exec_test($test_dir, $results);
my $error = ($sig || $ec > 0);
if ($error) {
open OUT, ">$results/Summary";
print STDERR "$test_name CRASHED.\n\texit_code=$ec\n\tsignal=$sig\n";
print OUT "$test_name CRASHED.\n\texit_code=$ec\n\tsignal=$sig\n";
close OUT;
exit 2 if $sig;
exit 3;
}
($o, $ec, $sig) = run_command("$test_dir/filter-stdout $results/run.stdout > $results/results.dat");
warn "filter-stdout failed!" if ($ec > 0 || $sig);
($o, $ec, $sig) = run_command("$test_dir/filter-stderr $results/run.stderr >> $results/results.dat");
warn "filter-stderr failed!" if ($ec > 0 || $sig);
open OUT, ">> $results/results.dat";
print OUT "TOTAL_WALLTIME ~ $elapsed\n";
close OUT;
run_command("gzip $results/run.stdout");
run_command("gzip $results/run.stderr");
($o, $ec, $sig) = run_command("$BIN_TEST/compare-results.pl $results $truth");
print $o;
if ($ec) {
print STDERR "FAILURE, for debugging see $test_dir\n";
exit 1;
}
exit 0;
sub exec_test {
my ($test_dir,$results) = @_;
my $start_time = time;
my ($o, $ec, $sig) = run_command("sh $test_dir/command $mert_scripts_dir $test_dir 1> $results/run.stdout 2> $results/run.stderr");
my $elapsed = 0;
$elapsed = time - $start_time;
return ($o, $elapsed, $ec, $sig);
}
sub run_command {
my ($cmd) = @_;
my $o = `$cmd`;
my $exit_code = $? >> 8;
my $signal = $? & 127;
my $core_dumped = $? & 128;
if ($signal) { $signal = sig_name($signal); }
return $o, $exit_code, $signal;
}
sub sig_name {
my $sig = shift;
return $SIGS[$sig];
}
sub get_timestamp {
my ($file) = @_;
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,$ctime,$blksize,$blocks) = stat($file);
my $timestamp = strftime("%Y%m%d-%H%M%S", gmtime $mtime);
my $timestamp2 = strftime("%Y%m%d-%H%M%S", gmtime);
my $username = `whoami`; chomp $username;
return "command.v$timestamp-$username-at-$timestamp2";
}

View File

@ -0,0 +1,105 @@
#!/usr/bin/perl -w
use strict;
my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
use Getopt::Long;
############################################################
my @tests = qw (
mert-basic
extractor-txt
extractor-bin
);
my @qsubtests = qw (
);
if (@qsubtests){
my $cmd=&getQsubCmd();
if (!defined($cmd)){
print STDERR "Regression tests (@qsubtests) can not run on $ENV{HOST}\nbecause SGE is not installed\n\n";
}else{
push @tests, @qsubtests;
}
}
###########################################################
use MertRegressionTesting;
use File::Temp qw ( tempfile );
use POSIX qw ( strftime );
my $test_dir;
my $BIN_TEST = $script_dir;
my $data_dir;
my $mert_scripts_dir;
GetOptions("data-dir=s" => \$data_dir,
"mert-scripts-dir=s"=> \$mert_scripts_dir,
) or exit 1;
$data_dir = MertRegressionTesting::find_data_directory($BIN_TEST, $data_dir);
my $test_run = "$BIN_TEST/run-single-test.pl --data-dir=$data_dir";
$test_dir = $script_dir . "/tests";
$test_run .= " --test-dir=$test_dir" if $test_dir;
$test_run .= " --mert-scripts-dir=$mert_scripts_dir" if $mert_scripts_dir;
print "Data directory: $data_dir\n";
print "Running tests: @tests\n\n";
print "TEST NAME STATUS PATH TO RESULTS\n";
my $lb = "---------------------------------------------------------------------------------------------------------\n";
print $lb;
my $fail = 0;
my @failed;
foreach my $test (@tests) {
my $cmd = "$test_run --test=$test";
my ($res, $output, $results_path) = do_test($cmd);
format STDOUT =
@<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$test, $res, $results_path
.
write;
if ($res eq 'FAIL') {
print "$lb$output$lb";
$fail++;
push @failed, $test;
} else {
# TOTAL_WALLTIME result=BASELINE=11, TEST=12 DELTA=1 PCT CHANGE=9.09
if ($output =~ /TOTAL_WALLTIME\s+result\s*=\s*([^\n]+)/o) {
print "\t\tTiming statistics: $1\n";
}
}
}
my $total = scalar @tests;
my $fail_percentage = int(100 * $fail / $total);
my $pass_percentage = int(100 * ($total-$fail) / $total);
print "\n$pass_percentage% of the tests passed.\n";
print "$fail_percentage% of the tests failed.\n";
if ($fail_percentage>0) { print "\nPLEASE INVESTIGATE THESE FAILED TESTS: @failed\n"; }
sub do_test {
my ($test) = @_;
my $o = `$test 2>&1`;
my $res = 'PASS';
$res = 'FAIL' if ($? > 0);
my $od = '';
if ($o =~ /RESULTS AVAILABLE IN: (.*)$/m) {
$od = $1;
$o =~ s/^RESULTS AVAIL.*$//mo;
}
return ($res, $o, $od);
}
sub getQsubCmd {
my $a =`which qsub | head -1 | awk '{print \$1}'`;
chomp($a);
if ($a && -e $a){ return $a; }
else{ return undef; }
}

View File

@ -0,0 +1,48 @@
#! /bin/sh -w
bin=$1; shift
testdir=$1; shift
cd $testdir
extractor=$bin/extractor
size=15
ref=data/reference
prevscfile=""
prevffile=""
for run in 1 2 3 4 5 ; do
nbest=data/nbest.$run.gz
scfile=SCORESTAT.run$run
ffile=FEATSTAT.run$run
if [ $run -le 1 ] ;then
$extractor --nbest $nbest --reference $ref --ffile $ffile --scfile $scfile
$extractor --binary --nbest $nbest --reference $ref --ffile $ffile.b --scfile $scfile.b
$extractor --ffile $ffile.2 --scfile $scfile.2 --prev-scfile $scfile.b --prev-ffile $ffile.b
else
$extractor --nbest $nbest --reference $ref --ffile $ffile --scfile $scfile --prev-scfile $prevscfile --prev-ffile $prevffile
$extractor --binary --nbest $nbest --reference $ref --ffile $ffile.b --scfile $scfile.b --prev-scfile $prevscfile.b --prev-ffile $prevffile.b
$extractor --ffile $ffile.2 --scfile $scfile.2 --prev-scfile $scfile.b --prev-ffile $ffile.b
fi
cmp $scfile $scfile.2
cmp $ffile $ffile.2
prevscfile=$scfile
prevffile=$ffile
run=$(($run +1))
done
wc *.run* | grep run
for run in 1 2 3 4 5 ; do
scfile=SCORESTAT.run$run
ffile=FEATSTAT.run$run
rm $ffile $scfile
rm $ffile.2 $scfile.2
rm $ffile.b $scfile.b
done

View File

@ -0,0 +1,10 @@
what i would also call for , however , is to look beyond immediate concerns in biarritz .
we , as elected representatives , are at least as responsible for encouraging it to make progress in the face of adversity as we are for relaying the messages that we receive from public opinion in each of our countries .
with an eye to recent events , the issue of petrol prices also seems to me to be particularly noteworthy .
at present , the council is talking about incorporating such mechanisms in article 7 .
secondly , it lies in its transparency for the public , who now know what rights they have in respect of those who create and apply european law , and in its transparency for those who do precisely that , draft or apply european law .
i agree with him that the commission must continue to play a pivotal role as guardian of the common interests of the community .
that is why i believe it is very important for the chairman of the eurogroup - which we chose to set up - to continue to play a full role in this area .
i for my part believe that it is precisely for this reason that the convention method is a good one and that ample use should again be made of it in the future .
mr president , mr president-in-office of the council , ladies and gentlemen , thank you for your kind accolade and thank you also to those who have gently reproached me for not making this speech earlier .
we must work on closer cooperation and make it simpler and more effective in order to bring this about : closer cooperation is the immediate , simplest way forward if we are to take that additional step towards integration , the need for which has been confirmed by the many powerful speeches which i have heard today .

View File

@ -0,0 +1,2 @@
#!/usr/bin/perl

View File

@ -0,0 +1,8 @@
#!/usr/bin/perl
$x=0;
while (<>) {
chomp;
$x++;
print "STDOUT_$x=$_\n";
}

View File

@ -0,0 +1,31 @@
STDOUT_1= 1020 14190 86273 FEATSTAT.run1
STDOUT_2= 1020 14190 86273 FEATSTAT.run1.2
STDOUT_3= 123 882 57120 FEATSTAT.run1.b
STDOUT_4= 2020 28190 172503 FEATSTAT.run2
STDOUT_5= 2020 28190 172503 FEATSTAT.run2.2
STDOUT_6= 232 1638 113120 FEATSTAT.run2.b
STDOUT_7= 3020 42190 264672 FEATSTAT.run3
STDOUT_8= 3020 42190 264672 FEATSTAT.run3.2
STDOUT_9= 350 2292 169120 FEATSTAT.run3.b
STDOUT_10= 4020 56190 360150 FEATSTAT.run4
STDOUT_11= 4020 56190 360150 FEATSTAT.run4.2
STDOUT_12= 454 3220 225120 FEATSTAT.run4.b
STDOUT_13= 5020 70190 462892 FEATSTAT.run5
STDOUT_14= 5020 70190 462892 FEATSTAT.run5.2
STDOUT_15= 574 4299 281120 FEATSTAT.run5.b
STDOUT_16= 1020 9060 26328 SCORESTAT.run1
STDOUT_17= 1020 9060 26328 SCORESTAT.run1.2
STDOUT_18= 203 1486 36490 SCORESTAT.run1.b
STDOUT_19= 2020 18060 52341 SCORESTAT.run2
STDOUT_20= 2020 18060 52341 SCORESTAT.run2.2
STDOUT_21= 463 2908 72490 SCORESTAT.run2.b
STDOUT_22= 3020 27060 77299 SCORESTAT.run3
STDOUT_23= 3020 27060 77299 SCORESTAT.run3.2
STDOUT_24= 600 3634 108490 SCORESTAT.run3.b
STDOUT_25= 4020 36060 103698 SCORESTAT.run4
STDOUT_26= 4020 36060 103698 SCORESTAT.run4.2
STDOUT_27= 994 4763 144490 SCORESTAT.run4.b
STDOUT_28= 5020 45060 129840 SCORESTAT.run5
STDOUT_29= 5020 45060 129840 SCORESTAT.run5.2
STDOUT_30= 1254 5837 180490 SCORESTAT.run5.b
TOTAL_WALLTIME ~ 4

View File

@ -0,0 +1,40 @@
#! /bin/sh -w
bin=$1; shift
testdir=$1; shift
cd $testdir
extractor=$bin/extractor
size=15
ref=data/reference
prevscfile=""
prevffile=""
for run in 1 2 3 4 5 ; do
nbest=data/nbest.$run.gz
scfile=SCORESTAT.run$run
ffile=FEATSTAT.run$run
if [ $run -le 1 ] ;then
$extractor --nbest $nbest --reference $ref --ffile $ffile --scfile $scfile
else
$extractor --nbest $nbest --reference $ref --ffile $ffile --scfile $scfile --prev-scfile $prevscfile --prev-ffile $prevffile
fi
prevscfile=$scfile
prevffile=$ffile
cat $ffile $scfile
run=$(($run +1))
done
for run in 1 2 3 4 5 ; do
scfile=SCORESTAT.run$run
ffile=FEATSTAT.run$run
rm $ffile $scfile
done

View File

@ -0,0 +1,10 @@
what i would also call for , however , is to look beyond immediate concerns in biarritz .
we , as elected representatives , are at least as responsible for encouraging it to make progress in the face of adversity as we are for relaying the messages that we receive from public opinion in each of our countries .
with an eye to recent events , the issue of petrol prices also seems to me to be particularly noteworthy .
at present , the council is talking about incorporating such mechanisms in article 7 .
secondly , it lies in its transparency for the public , who now know what rights they have in respect of those who create and apply european law , and in its transparency for those who do precisely that , draft or apply european law .
i agree with him that the commission must continue to play a pivotal role as guardian of the common interests of the community .
that is why i believe it is very important for the chairman of the eurogroup - which we chose to set up - to continue to play a full role in this area .
i for my part believe that it is precisely for this reason that the convention method is a good one and that ample use should again be made of it in the future .
mr president , mr president-in-office of the council , ladies and gentlemen , thank you for your kind accolade and thank you also to those who have gently reproached me for not making this speech earlier .
we must work on closer cooperation and make it simpler and more effective in order to bring this about : closer cooperation is the immediate , simplest way forward if we are to take that additional step towards integration , the need for which has been confirmed by the many powerful speeches which i have heard today .

View File

@ -0,0 +1,2 @@
#!/usr/bin/perl

View File

@ -0,0 +1,8 @@
#!/usr/bin/perl
$x=0;
while (<>) {
chomp;
$x++;
print "STDOUT_$x=$_\n";
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
#! /bin/sh -w
bin=$1; shift
testdir=$1; shift
cd $testdir
$bin/mert --scfile data/SCORESTAT.txt --ffile data/FEATSTAT.txt --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best"
$bin/mert --scfile data/SCORESTAT.bin --ffile data/FEATSTAT.bin --ifile data/INIT -d 14 -n 20 -r 1000 2>&1 | grep -i "^Best"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
0.4 0.15 0.15 0.15 0.15 0.15 0.15 0.5 -1 0.2 0.2 0.2 0.2 0.2

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
#!/usr/bin/perl
$x=0;
while (<>) {
chomp;
next if !/savetxt/;
s/.+?(ngram)/$1/i;
$x++;
print "STDERR_$x=$_\n";
}

View File

@ -0,0 +1,8 @@
#!/usr/bin/perl
$x=0;
while (<>) {
chomp;
$x++;
print "STDOUT_$x=$_\n";
}

View File

@ -0,0 +1,3 @@
STDOUT_1=Best point: 0.000649822 0.000350135 0.000510268 0.000674896 0.000624551 0.000695801 0.000698594 0.000404815 -0.00044227 0.000720043 0.000762028 0.000892314 0.000260735 -0.992314 => 0.304339
STDOUT_2=Best point: 0.000649822 0.000350135 0.000510268 0.000674896 0.000624551 0.000695801 0.000698594 0.000404815 -0.00044227 0.000720043 0.000762028 0.000892314 0.000260735 -0.992314 => 0.304339
TOTAL_WALLTIME ~ 4