From d53350dd95f22812d98ce4e6afc2f40a16aaac1c Mon Sep 17 00:00:00 2001 From: bhaddow Date: Mon, 12 Sep 2011 20:53:22 +0000 Subject: [PATCH] Regression test for lm oov feature git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4218 1f5c12ca-751b-0410-a591-d2e778427230 --- .../phrase.basic-lm-oov/filter-stderr.pl | 22 ++++++++ .../phrase.basic-lm-oov/filter-stdout.pl | 7 +++ .../tests/phrase.basic-lm-oov/moses.ini | 53 +++++++++++++++++++ .../phrase.basic-lm-oov/to-translate.txt | 5 ++ .../phrase.basic-lm-oov/truth/results.txt | 13 +++++ 5 files changed, 100 insertions(+) create mode 100755 regression-testing/tests/phrase.basic-lm-oov/filter-stderr.pl create mode 100755 regression-testing/tests/phrase.basic-lm-oov/filter-stdout.pl create mode 100644 regression-testing/tests/phrase.basic-lm-oov/moses.ini create mode 100644 regression-testing/tests/phrase.basic-lm-oov/to-translate.txt create mode 100644 regression-testing/tests/phrase.basic-lm-oov/truth/results.txt diff --git a/regression-testing/tests/phrase.basic-lm-oov/filter-stderr.pl b/regression-testing/tests/phrase.basic-lm-oov/filter-stderr.pl new file mode 100755 index 000000000..2f6e17607 --- /dev/null +++ b/regression-testing/tests/phrase.basic-lm-oov/filter-stderr.pl @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; } +use RegTestUtils; + +$x=0; +while (<>) { + chomp; + + if (/^Finished loading LanguageModels/) { + my $time = RegTestUtils::readTime($_); + print "LMLOAD_TIME ~ $time\n"; + } + if (/^Finished loading phrase tables/) { + my $time = RegTestUtils::readTime($_); + print "PTLOAD_TIME ~ $time\n"; + } + next unless /^BEST TRANSLATION:/; + my $pscore = RegTestUtils::readHypoScore($_); + $x++; + print "SCORE_$x = $pscore\n"; +} diff --git a/regression-testing/tests/phrase.basic-lm-oov/filter-stdout.pl b/regression-testing/tests/phrase.basic-lm-oov/filter-stdout.pl new file mode 100755 index 000000000..476ddf6e9 --- /dev/null +++ b/regression-testing/tests/phrase.basic-lm-oov/filter-stdout.pl @@ -0,0 +1,7 @@ +#!/usr/bin/perl +$x=0; +while (<>) { + chomp; + $x++; + print "TRANSLATION_$x=$_\n"; +} diff --git a/regression-testing/tests/phrase.basic-lm-oov/moses.ini b/regression-testing/tests/phrase.basic-lm-oov/moses.ini new file mode 100644 index 000000000..88bbc1ee6 --- /dev/null +++ b/regression-testing/tests/phrase.basic-lm-oov/moses.ini @@ -0,0 +1,53 @@ +# moses.ini for regression test + +[ttable-file] +0 0 0 5 ${MODEL_PATH}/basic-surface-only/phrase-table.gz + +# language model +[lmodel-file] +0 0 3 ${LM_PATH}/europarl.en.srilm.gz +# limit on how many phrase translations e for each phrase f are loaded +[ttable-limit] +#ttable element load limit 0 = all elements loaded +20 + +# distortion (reordering) weight +[weight-d] +0.141806519223522 + +# language model weight +[weight-l] +0.142658800199951 +0.1 + +# translation model weight (phrase translation, lexical weighting) +[weight-t] +0.00402447059454402 +0.0685647475075862 +0.294089113124688 +0.0328320356515851 +-0.0426081987467227 + +# word penalty +[weight-w] +-0.273416114951401 + +[distortion-limit] +4 + +[beam-threshold] +0.03 + +[input-factors] +0 + +[mapping] +T 0 + +[lmodel-oov-feature] +1 + + +[verbose] +2 + diff --git a/regression-testing/tests/phrase.basic-lm-oov/to-translate.txt b/regression-testing/tests/phrase.basic-lm-oov/to-translate.txt new file mode 100644 index 000000000..a505702d1 --- /dev/null +++ b/regression-testing/tests/phrase.basic-lm-oov/to-translate.txt @@ -0,0 +1,5 @@ +ich frage sie also , herr präsident : stellen die unterschiedlichen arbeitskosten somit nicht auch eine beschränkung des freien wettbewerbs in der europäischen union dar ? +schaut man sich die fälligkeitspläne der ausführung des haushalts für die rubriken 2 , 3 , 4 und 7 an , stellt man fest , dass nur durchschnittlich 8 % aller verpflichtungen durch zahlungen gedeckt sind . +vor drei jahren haben wir mit unserer beschäftigungsinitiative begonnen , indem wir kleinen und mittleren unternehmen halfen , chancenkapital zu bekommen . +das parlament will das auf zweierlei weise tun . +nur dann werden die europäischen institutionen auch ihrem auftrag gerecht . diff --git a/regression-testing/tests/phrase.basic-lm-oov/truth/results.txt b/regression-testing/tests/phrase.basic-lm-oov/truth/results.txt new file mode 100644 index 000000000..faca9f17b --- /dev/null +++ b/regression-testing/tests/phrase.basic-lm-oov/truth/results.txt @@ -0,0 +1,13 @@ +TRANSLATION_1=i ask you , therefore , mr president , the different labour costs are therefore not a restriction of free competition in the european union ? +TRANSLATION_2=if we look at the fälligkeitspläne the implementation of the budget for the categories 2 , 3 , 4 and 7 to , we see that only an average of 8 % of commitments by payments are met . +TRANSLATION_3=three years ago our employment strategy , we started by small and medium-sized enterprises halfen , chancenkapital to obtain . +TRANSLATION_4=parliament wants the in two ways . +TRANSLATION_5=only then will the european institutions to its mandate . +LMLOAD_TIME ~ 8.00 +PTLOAD_TIME ~ 9.00 +SCORE_1 = -14.843 +SCORE_2 = -133.657 +SCORE_3 = -240.041 +SCORE_4 = -5.995 +SCORE_5 = -7.015 +TOTAL_WALLTIME ~ 28