mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-02 17:09:36 +03:00
Merge branch 'miramerge' of thor.inf.ed.ac.uk:/fs/saxnot3/ehasler/mosesdecoder_github_mira into miramerge
This commit is contained in:
commit
2b9c250d36
@ -388,6 +388,7 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
// initialise Moses
|
// initialise Moses
|
||||||
// add initial Bleu weight and references to initialize Bleu feature
|
// add initial Bleu weight and references to initialize Bleu feature
|
||||||
|
boost::trim(decoder_settings);
|
||||||
decoder_settings += " -weight-bl 1 -references";
|
decoder_settings += " -weight-bl 1 -references";
|
||||||
if (trainWithMultipleFolds) {
|
if (trainWithMultipleFolds) {
|
||||||
decoder_settings += " ";
|
decoder_settings += " ";
|
||||||
|
@ -393,26 +393,22 @@ sub createTestScriptAndSubmit {
|
|||||||
my $skip_submit = $_[8];
|
my $skip_submit = $_[8];
|
||||||
|
|
||||||
#file names
|
#file names
|
||||||
my $job_name = $name."_".$testtype."_".$train_iteration.$suffix;
|
|
||||||
|
|
||||||
my $test_script = "$name-$testtype";
|
|
||||||
my $test_script_file = $working_dir."/".$test_script.".$train_iteration".$suffix.".sh";
|
|
||||||
my $test_out = $test_script . ".$train_iteration" . $suffix . ".out";
|
|
||||||
my $test_err = $test_script . ".$train_iteration" . $suffix . ".err";
|
|
||||||
|
|
||||||
my $output_file;
|
my $output_file;
|
||||||
my $output_error_file;
|
my $output_error_file;
|
||||||
my $bleu_file;
|
my $bleu_file;
|
||||||
|
my $file_id = "";
|
||||||
if ($weight_dump_frequency == 1) {
|
if ($weight_dump_frequency == 1) {
|
||||||
if ($train_iteration < 10) {
|
if ($train_iteration < 10) {
|
||||||
$output_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".out";
|
$output_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".out";
|
||||||
$output_error_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".err";
|
$output_error_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".err";
|
||||||
$bleu_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".bleu";
|
$bleu_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".bleu";
|
||||||
|
$file_id = "0".$train_iteration.$suffix;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$output_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".out";
|
$output_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".out";
|
||||||
$output_error_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".err";
|
$output_error_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".err";
|
||||||
$bleu_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".bleu";
|
$bleu_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".bleu";
|
||||||
|
$file_id = $train_iteration.$suffix;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -420,14 +416,23 @@ sub createTestScriptAndSubmit {
|
|||||||
$output_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".out";
|
$output_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".out";
|
||||||
$output_error_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".err";
|
$output_error_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".err";
|
||||||
$bleu_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu";
|
$bleu_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu";
|
||||||
|
$file_id = "0".$epoch."_".$epoch_slice.$suffix;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$output_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".out";
|
$output_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".out";
|
||||||
$output_error_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".err";
|
$output_error_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".err";
|
||||||
$bleu_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu";
|
$bleu_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu";
|
||||||
|
$file_id = $epoch."_".$epoch_slice.$suffix;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
my $job_name = $name."_".$testtype."_".$file_id;
|
||||||
|
|
||||||
|
my $test_script = "$name-$testtype";
|
||||||
|
my $test_script_file = "$working_dir/$test_script.$file_id.sh";
|
||||||
|
my $test_out = "$test_script.$file_id.out";
|
||||||
|
my $test_err = "$test_script.$file_id.err";
|
||||||
|
|
||||||
if (! (open TEST, ">$test_script_file" )) {
|
if (! (open TEST, ">$test_script_file" )) {
|
||||||
die "Unable to create test script $test_script_file\n";
|
die "Unable to create test script $test_script_file\n";
|
||||||
}
|
}
|
||||||
@ -445,6 +450,7 @@ sub createTestScriptAndSubmit {
|
|||||||
my $readCoreWeights = 0;
|
my $readCoreWeights = 0;
|
||||||
my $readExtraWeights = 0;
|
my $readExtraWeights = 0;
|
||||||
my %extra_weights;
|
my %extra_weights;
|
||||||
|
my $abs_weights = 0;
|
||||||
while(<WEIGHTS>) {
|
while(<WEIGHTS>) {
|
||||||
chomp;
|
chomp;
|
||||||
my ($name,$value) = split;
|
my ($name,$value) = split;
|
||||||
@ -455,22 +461,28 @@ sub createTestScriptAndSubmit {
|
|||||||
} else {
|
} else {
|
||||||
if ($name eq "WordPenalty") {
|
if ($name eq "WordPenalty") {
|
||||||
$wordpenalty_weight = $value;
|
$wordpenalty_weight = $value;
|
||||||
|
$abs_weights += abs($value);
|
||||||
$readCoreWeights += 1;
|
$readCoreWeights += 1;
|
||||||
} elsif ($name =~ /^PhraseModel/) {
|
} elsif ($name =~ /^PhraseModel/) {
|
||||||
push @phrasemodel_weights,$value;
|
push @phrasemodel_weights,$value;
|
||||||
|
$abs_weights += abs($value);
|
||||||
$readCoreWeights += 1;
|
$readCoreWeights += 1;
|
||||||
} elsif ($name =~ /^LM\:2/) {
|
} elsif ($name =~ /^LM\:2/) {
|
||||||
$lm2_weight = $value;
|
$lm2_weight = $value;
|
||||||
|
$abs_weights += abs($value);
|
||||||
$readCoreWeights += 1;
|
$readCoreWeights += 1;
|
||||||
}
|
}
|
||||||
elsif ($name =~ /^LM/) {
|
elsif ($name =~ /^LM/) {
|
||||||
$lm_weight = $value;
|
$lm_weight = $value;
|
||||||
|
$abs_weights += abs($value);
|
||||||
$readCoreWeights += 1;
|
$readCoreWeights += 1;
|
||||||
} elsif ($name eq "Distortion") {
|
} elsif ($name eq "Distortion") {
|
||||||
$distortion_weight = $value;
|
$distortion_weight = $value;
|
||||||
|
$abs_weights += abs($value);
|
||||||
$readCoreWeights += 1;
|
$readCoreWeights += 1;
|
||||||
} elsif ($name =~ /^LexicalReordering/) {
|
} elsif ($name =~ /^LexicalReordering/) {
|
||||||
push @lexicalreordering_weights,$value;
|
push @lexicalreordering_weights,$value;
|
||||||
|
$abs_weights += abs($value);
|
||||||
$readCoreWeights += 1;
|
$readCoreWeights += 1;
|
||||||
} else {
|
} else {
|
||||||
$extra_weights{$name} = $value;
|
$extra_weights{$name} = $value;
|
||||||
@ -480,60 +492,65 @@ sub createTestScriptAndSubmit {
|
|||||||
}
|
}
|
||||||
close WEIGHTS;
|
close WEIGHTS;
|
||||||
|
|
||||||
if (!defined $core_weight_file) {
|
|
||||||
print "Number of core weights read: ".$readCoreWeights."\n";
|
print "Number of core weights read: ".$readCoreWeights."\n";
|
||||||
}
|
|
||||||
print "Number of extra weights read: ".$readExtraWeights."\n";
|
print "Number of extra weights read: ".$readExtraWeights."\n";
|
||||||
|
|
||||||
# If there is a core weight file, we have to load the core weights from that file (NOTE: this is not necessary if the core weights are also printed to the weights file)
|
# If there is a core weight file, we have to load the core weights from that file (NOTE: this is not necessary if the core weights are also printed to the weights file)
|
||||||
if (defined $core_weight_file) {
|
# if (defined $core_weight_file) {
|
||||||
@phrasemodel_weights = ();
|
# @phrasemodel_weights = ();
|
||||||
@lexicalreordering_weights = ();
|
# @lexicalreordering_weights = ();
|
||||||
$readCoreWeights = 0;
|
# $readCoreWeights = 0;
|
||||||
if (! (open CORE_WEIGHTS, "$core_weight_file")) {
|
# if (! (open CORE_WEIGHTS, "$core_weight_file")) {
|
||||||
die "Unable to open core weights file $core_weight_file\n";
|
# die "Unable to open core weights file $core_weight_file\n";
|
||||||
}
|
# }
|
||||||
print "Reading core weights from file..\n";
|
# print "Reading core weights from file..\n";
|
||||||
while(<CORE_WEIGHTS>) {
|
# while(<CORE_WEIGHTS>) {
|
||||||
chomp;
|
# chomp;
|
||||||
my ($name,$value) = split;
|
# my ($name,$value) = split;
|
||||||
next if ($name =~ /^!Unknown/);
|
# next if ($name =~ /^!Unknown/);
|
||||||
next if ($name =~ /^BleuScore/);
|
# next if ($name =~ /^BleuScore/);
|
||||||
if ($name eq "DEFAULT_") {
|
# if ($name eq "DEFAULT_") {
|
||||||
$default_weight = $value;
|
# $default_weight = $value;
|
||||||
}
|
# }
|
||||||
else {
|
# else {
|
||||||
if ($name eq "WordPenalty") {
|
# if ($name eq "WordPenalty") {
|
||||||
$wordpenalty_weight = $value;
|
# $wordpenalty_weight = $value;
|
||||||
$readCoreWeights += 1;
|
# $abs_weights += abs($value);
|
||||||
} elsif ($name =~ /^PhraseModel/) {
|
# $readCoreWeights += 1;
|
||||||
push @phrasemodel_weights,$value;
|
# } elsif ($name =~ /^PhraseModel/) {
|
||||||
$readCoreWeights += 1;
|
# push @phrasemodel_weights,$value;
|
||||||
} elsif ($name =~ /^LM\:2/) {
|
# $abs_weights += abs($value);
|
||||||
$lm2_weight = $value;
|
# $readCoreWeights += 1;
|
||||||
$readCoreWeights += 1;
|
# } elsif ($name =~ /^LM\:2/) {
|
||||||
}
|
# $lm2_weight = $value;
|
||||||
elsif ($name =~ /^LM/) {
|
# $abs_weights += abs($value);
|
||||||
$lm_weight = $value;
|
# $readCoreWeights += 1;
|
||||||
$readCoreWeights += 1;
|
# }
|
||||||
} elsif ($name eq "Distortion") {
|
# elsif ($name =~ /^LM/) {
|
||||||
$distortion_weight = $value;
|
# $lm_weight = $value;
|
||||||
$readCoreWeights += 1;
|
# $abs_weights += abs($value);
|
||||||
} elsif ($name =~ /^LexicalReordering/) {
|
# $readCoreWeights += 1;
|
||||||
push @lexicalreordering_weights,$value;
|
# } elsif ($name eq "Distortion") {
|
||||||
$readCoreWeights += 1;
|
# $distortion_weight = $value;
|
||||||
} else {
|
# $abs_weights += abs($value);
|
||||||
# there should be no extra weights in the core weights file
|
# $readCoreWeights += 1;
|
||||||
print "weight not matched: $name:$value\n";
|
# } elsif ($name =~ /^LexicalReordering/) {
|
||||||
}
|
# push @lexicalreordering_weights,$value;
|
||||||
}
|
# $abs_weights += abs($value);
|
||||||
}
|
# $readCoreWeights += 1;
|
||||||
close CORE_WEIGHTS;
|
# } else {
|
||||||
print "Number of core weights read: ".$readCoreWeights."\n";
|
# # there should be no extra weights in the core weights file
|
||||||
}
|
# print "weight not matched: $name:$value\n";
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# }
|
||||||
|
# close CORE_WEIGHTS;
|
||||||
|
# print "Number of core weights read: ".$readCoreWeights."\n";
|
||||||
|
# }
|
||||||
|
|
||||||
# Create new ini file
|
# Create new ini file (changing format: expt1-devtest.00_2.ini instead of expt1-devtest.3.ini)
|
||||||
my $new_ini_file = $working_dir."/".$test_script.".".$train_iteration.$suffix.".ini";
|
# my $new_ini_file = $working_dir."/".$test_script.".".$train_iteration.$suffix.".ini";
|
||||||
|
my $new_ini_file = "$working_dir/$test_script.$file_id.ini";
|
||||||
if (! (open NEWINI, ">$new_ini_file" )) {
|
if (! (open NEWINI, ">$new_ini_file" )) {
|
||||||
die "Unable to create ini file $new_ini_file\n";
|
die "Unable to create ini file $new_ini_file\n";
|
||||||
}
|
}
|
||||||
@ -541,15 +558,16 @@ sub createTestScriptAndSubmit {
|
|||||||
die "Unable to read ini file $old_ini_file\n";
|
die "Unable to read ini file $old_ini_file\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# write normalized weights to ini file
|
||||||
while(<OLDINI>) {
|
while(<OLDINI>) {
|
||||||
if (/weight-l/) {
|
if (/weight-l/) {
|
||||||
print NEWINI "[weight-l]\n";
|
print NEWINI "[weight-l]\n";
|
||||||
print NEWINI $lm_weight;
|
print NEWINI ($lm_weight/$abs_weights);
|
||||||
print NEWINI "\n";
|
print NEWINI "\n";
|
||||||
|
|
||||||
if (defined $lm2_weight) {
|
if (defined $lm2_weight) {
|
||||||
readline(OLDINI);
|
readline(OLDINI);
|
||||||
print NEWINI $lm2_weight;
|
print NEWINI ($lm2_weight/$abs_weights);
|
||||||
print NEWINI "\n";
|
print NEWINI "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -557,38 +575,37 @@ sub createTestScriptAndSubmit {
|
|||||||
} elsif (/weight-t/) {
|
} elsif (/weight-t/) {
|
||||||
print NEWINI "[weight-t]\n";
|
print NEWINI "[weight-t]\n";
|
||||||
foreach my $phrasemodel_weight (@phrasemodel_weights) {
|
foreach my $phrasemodel_weight (@phrasemodel_weights) {
|
||||||
print NEWINI $phrasemodel_weight;
|
print NEWINI ($phrasemodel_weight/$abs_weights);
|
||||||
print NEWINI "\n";
|
print NEWINI "\n";
|
||||||
readline(OLDINI);
|
readline(OLDINI);
|
||||||
}
|
}
|
||||||
} elsif (/weight-d/) {
|
} elsif (/weight-d/) {
|
||||||
print NEWINI "[weight-d]\n";
|
print NEWINI "[weight-d]\n";
|
||||||
print NEWINI $distortion_weight;
|
print NEWINI ($distortion_weight/$abs_weights);
|
||||||
print NEWINI "\n";
|
print NEWINI "\n";
|
||||||
readline(OLDINI);
|
readline(OLDINI);
|
||||||
foreach my $lexicalreordering_weight (@lexicalreordering_weights) {
|
foreach my $lexicalreordering_weight (@lexicalreordering_weights) {
|
||||||
print NEWINI $lexicalreordering_weight;
|
print NEWINI ($lexicalreordering_weight/$abs_weights);
|
||||||
print NEWINI "\n";
|
print NEWINI "\n";
|
||||||
readline(OLDINI);
|
readline(OLDINI);
|
||||||
}
|
}
|
||||||
} elsif (/weight-w/) {
|
} elsif (/weight-w/) {
|
||||||
print NEWINI "[weight-w]\n";
|
print NEWINI "[weight-w]\n";
|
||||||
print NEWINI $wordpenalty_weight;
|
print NEWINI ($wordpenalty_weight/$abs_weights);
|
||||||
print NEWINI "\n";
|
print NEWINI "\n";
|
||||||
readline(OLDINI);
|
readline(OLDINI);
|
||||||
} else {
|
} else {
|
||||||
print NEWINI;
|
print NEWINI;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
close NEWINI;
|
|
||||||
close OLDINI;
|
close OLDINI;
|
||||||
|
|
||||||
my $writtenExtraWeights = 0;
|
my $writtenExtraWeights = 0;
|
||||||
|
|
||||||
# if there are any non-core weights, write them to a weights file
|
# if there are any non-core weights, write them to a weights file (normalized)
|
||||||
my $extra_weight_file = undef;
|
my $extra_weight_file = undef;
|
||||||
if (%extra_weights) {
|
if (%extra_weights) {
|
||||||
$extra_weight_file = "$new_weight_file.scaled";
|
$extra_weight_file = "$new_weight_file.sparse.scaled";
|
||||||
if (! (open EXTRAWEIGHT,">$extra_weight_file")) {
|
if (! (open EXTRAWEIGHT,">$extra_weight_file")) {
|
||||||
print "Warning: unable to create extra weights file $extra_weight_file";
|
print "Warning: unable to create extra weights file $extra_weight_file";
|
||||||
next;
|
next;
|
||||||
@ -601,7 +618,7 @@ sub createTestScriptAndSubmit {
|
|||||||
foreach my $name (sort keys %extra_weights) {
|
foreach my $name (sort keys %extra_weights) {
|
||||||
next if ($name eq "core");
|
next if ($name eq "core");
|
||||||
next if ($name eq "DEFAULT_");
|
next if ($name eq "DEFAULT_");
|
||||||
my $value = $extra_weights{$name};
|
my $value = $extra_weights{$name}/$abs_weights;
|
||||||
|
|
||||||
# write only non-zero feature weights to file
|
# write only non-zero feature weights to file
|
||||||
if ($value) {
|
if ($value) {
|
||||||
@ -612,6 +629,11 @@ sub createTestScriptAndSubmit {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# add specification of sparse weight file to ini
|
||||||
|
print NEWINI "\n[weight-file] \n";
|
||||||
|
print NEWINI "$extra_weight_file \n";
|
||||||
|
close NEWINI;
|
||||||
|
|
||||||
print TEST "#!/bin/sh\n";
|
print TEST "#!/bin/sh\n";
|
||||||
print TEST "#\$ -N $job_name\n";
|
print TEST "#\$ -N $job_name\n";
|
||||||
print TEST "#\$ -wd $working_dir\n";
|
print TEST "#\$ -wd $working_dir\n";
|
||||||
@ -626,9 +648,10 @@ sub createTestScriptAndSubmit {
|
|||||||
print TEST "export LD_LIBRARY_PATH=/exports/informatics/inf_iccs_smt/shared/boost/lib:\$LD_LIBRARY_PATH\n";
|
print TEST "export LD_LIBRARY_PATH=/exports/informatics/inf_iccs_smt/shared/boost/lib:\$LD_LIBRARY_PATH\n";
|
||||||
}
|
}
|
||||||
print TEST "$test_exe $decoder_settings -i $input_file -f $new_ini_file ";
|
print TEST "$test_exe $decoder_settings -i $input_file -f $new_ini_file ";
|
||||||
if ($extra_weight_file) {
|
# now written to ini file
|
||||||
print TEST "-weight-file $extra_weight_file ";
|
# if ($extra_weight_file) {
|
||||||
}
|
# print TEST "-weight-file $extra_weight_file ";
|
||||||
|
# }
|
||||||
print TEST $extra_args;
|
print TEST $extra_args;
|
||||||
print TEST " 1> $output_file 2> $output_error_file\n";
|
print TEST " 1> $output_file 2> $output_error_file\n";
|
||||||
print TEST "echo \"Decoding of ".$testtype." set finished.\"\n";
|
print TEST "echo \"Decoding of ".$testtype." set finished.\"\n";
|
||||||
|
@ -89,6 +89,7 @@ public:
|
|||||||
|
|
||||||
VERBOSE(2,"\nTRANSLATING(" << lineNumber << "): " << *m_source);
|
VERBOSE(2,"\nTRANSLATING(" << lineNumber << "): " << *m_source);
|
||||||
|
|
||||||
|
if ((*m_source).GetSize() == 0) return;
|
||||||
ChartManager manager(*m_source, &system);
|
ChartManager manager(*m_source, &system);
|
||||||
manager.ProcessSentence();
|
manager.ProcessSentence();
|
||||||
|
|
||||||
|
@ -102,6 +102,7 @@ public:
|
|||||||
// execute the translation
|
// execute the translation
|
||||||
// note: this executes the search, resulting in a search graph
|
// note: this executes the search, resulting in a search graph
|
||||||
// we still need to apply the decision rule (MAP, MBR, ...)
|
// we still need to apply the decision rule (MAP, MBR, ...)
|
||||||
|
if ((*m_source).GetSize() == 0) return;
|
||||||
Manager manager(*m_source,staticData.GetSearchAlgorithm(), &system);
|
Manager manager(*m_source,staticData.GetSearchAlgorithm(), &system);
|
||||||
manager.ProcessSentence();
|
manager.ProcessSentence();
|
||||||
|
|
||||||
|
@ -526,8 +526,15 @@ filter
|
|||||||
rerun-on-change: filter-settings
|
rerun-on-change: filter-settings
|
||||||
pass-if: TRAINING:binarize-all
|
pass-if: TRAINING:binarize-all
|
||||||
ignore-if: use-hiero
|
ignore-if: use-hiero
|
||||||
|
filter-devtest
|
||||||
|
in: TRAINING:config input-devtest
|
||||||
|
out: filtered-config-devtest
|
||||||
|
default-name: tuning/moses.filtered.devtest.ini
|
||||||
|
rerun-on-change: filter-settings
|
||||||
|
pass-if: TRAINING:binarize-all
|
||||||
|
ignore-if: use-hiero
|
||||||
tune
|
tune
|
||||||
in: filtered-config input reference
|
in: filtered-config input reference filtered-config-devtest input-devtest reference-devtest
|
||||||
out: weight-config
|
out: weight-config
|
||||||
ignore-if: use-hiero
|
ignore-if: use-hiero
|
||||||
qsub-script: yes
|
qsub-script: yes
|
||||||
|
@ -947,6 +947,9 @@ sub define_step {
|
|||||||
elsif ($DO_STEP[$i] eq 'TUNING:filter') {
|
elsif ($DO_STEP[$i] eq 'TUNING:filter') {
|
||||||
&define_tuningevaluation_filter(undef,$i);
|
&define_tuningevaluation_filter(undef,$i);
|
||||||
}
|
}
|
||||||
|
elsif ($DO_STEP[$i] eq 'TUNING:filter-devtest') {
|
||||||
|
&define_tuningevaluation_filter(undef,$i,"devtest");
|
||||||
|
}
|
||||||
elsif ($DO_STEP[$i] eq 'TUNING:tune') {
|
elsif ($DO_STEP[$i] eq 'TUNING:tune') {
|
||||||
&define_tuning_tune($i);
|
&define_tuning_tune($i);
|
||||||
}
|
}
|
||||||
@ -1544,8 +1547,9 @@ sub define_tuning_tune {
|
|||||||
my ($step_id) = @_;
|
my ($step_id) = @_;
|
||||||
my $dir = &check_and_get("GENERAL:working-dir");
|
my $dir = &check_and_get("GENERAL:working-dir");
|
||||||
|
|
||||||
my ($tuned_config,
|
# the last variable only apply for mira tuning (devtest input and reference are read out later)
|
||||||
$config,$input,$reference) = &get_output_and_input($step_id);
|
my ($tuned_config,$config,$input,$reference,$config_devtest) = &get_output_and_input($step_id);
|
||||||
|
|
||||||
my $tuning_script = &check_and_get("TUNING:tuning-script");
|
my $tuning_script = &check_and_get("TUNING:tuning-script");
|
||||||
my $scripts = &check_backoff_and_get("TUNING:moses-script-dir");
|
my $scripts = &check_backoff_and_get("TUNING:moses-script-dir");
|
||||||
my $nbest_size = &check_and_get("TUNING:nbest");
|
my $nbest_size = &check_and_get("TUNING:nbest");
|
||||||
@ -1562,7 +1566,30 @@ sub define_tuning_tune {
|
|||||||
my $tuning_settings = &backoff_and_get("TUNING:tuning-settings");
|
my $tuning_settings = &backoff_and_get("TUNING:tuning-settings");
|
||||||
$tuning_settings = "" unless $tuning_settings;
|
$tuning_settings = "" unless $tuning_settings;
|
||||||
|
|
||||||
my $cmd = "$tuning_script $input $reference $decoder $config --nbest $nbest_size --working-dir $dir/tuning/tmp.$VERSION --decoder-flags \"$decoder_settings\" --rootdir $scripts $tuning_settings --no-filter-phrase-table";
|
my $use_mira = &check_and_get("TUNING:use-mira");
|
||||||
|
my $cmd = "";
|
||||||
|
if ($use_mira eq "true") {
|
||||||
|
my $experiment_dir = "$dir/tuning/tmp.$VERSION";
|
||||||
|
system("mkdir -p $experiment_dir");
|
||||||
|
|
||||||
|
my $mira_config = "$experiment_dir/mira-config.$VERSION.";
|
||||||
|
my $mira_config_log = $mira_config."log";
|
||||||
|
$mira_config .= "cfg";
|
||||||
|
|
||||||
|
write_mira_config($mira_config, $experiment_dir, $config, $config_devtest);
|
||||||
|
$cmd = "$tuning_script -config $mira_config -exec >& $mira_config_log";
|
||||||
|
|
||||||
|
# write script to select the best set of weights after training for the specified number of epochs -->
|
||||||
|
# cp to tuning/tmp.?/moses.ini
|
||||||
|
my $script_filename = "$experiment_dir/selectBestWeights.";
|
||||||
|
my $script_filename_log = $script_filename."log";
|
||||||
|
$script_filename .= "perl";
|
||||||
|
my $weight_output_file = "$experiment_dir/moses.ini";
|
||||||
|
write_selectBestMiraWeights($experiment_dir, $script_filename, $weight_output_file);
|
||||||
|
$cmd .= "\n$script_filename >& $script_filename_log";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$cmd = "$tuning_script $input $reference $decoder $config --nbest $nbest_size --working-dir $dir/tuning/tmp.$VERSION --decoder-flags \"$decoder_settings\" --rootdir $scripts $tuning_settings --no-filter-phrase-table";
|
||||||
$cmd .= " --lambdas \"$lambda\"" if $lambda;
|
$cmd .= " --lambdas \"$lambda\"" if $lambda;
|
||||||
$cmd .= " --continue" if $tune_continue;
|
$cmd .= " --continue" if $tune_continue;
|
||||||
$cmd .= " --inputtype $tune_inputtype" if $tune_inputtype;
|
$cmd .= " --inputtype $tune_inputtype" if $tune_inputtype;
|
||||||
@ -1570,15 +1597,157 @@ sub define_tuning_tune {
|
|||||||
my $qsub_args = &get_qsub_args("TUNING");
|
my $qsub_args = &get_qsub_args("TUNING");
|
||||||
$cmd .= " --queue-flags=\"$qsub_args\"" if ($CLUSTER && $qsub_args);
|
$cmd .= " --queue-flags=\"$qsub_args\"" if ($CLUSTER && $qsub_args);
|
||||||
$cmd .= " --jobs $jobs" if $CLUSTER && $jobs;
|
$cmd .= " --jobs $jobs" if $CLUSTER && $jobs;
|
||||||
|
|
||||||
my $tuning_dir = $tuned_config;
|
my $tuning_dir = $tuned_config;
|
||||||
$tuning_dir =~ s/\/[^\/]+$//;
|
$tuning_dir =~ s/\/[^\/]+$//;
|
||||||
$cmd .= "\nmkdir -p $tuning_dir";
|
$cmd .= "\nmkdir -p $tuning_dir";
|
||||||
|
}
|
||||||
|
|
||||||
$cmd .= "\ncp $dir/tuning/tmp.$VERSION/moses.ini $tuned_config";
|
$cmd .= "\ncp $dir/tuning/tmp.$VERSION/moses.ini $tuned_config";
|
||||||
|
|
||||||
&create_step($step_id,$cmd);
|
&create_step($step_id,$cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub write_mira_config {
|
||||||
|
my ($config_filename, $expt_dir, $tune_filtered_ini, $devtest_filtered_ini) = @_;
|
||||||
|
|
||||||
|
my $moses_src_dir = &check_and_get("GENERAL:moses-src-dir");
|
||||||
|
my $tuning_decoder_settings = &check_and_get("TUNING:decoder-settings");
|
||||||
|
my $core_weights = &check_and_get("TUNING:core-weight-config");
|
||||||
|
my $input = &check_and_get("TUNING:input");
|
||||||
|
my $reference = &check_and_get("TUNING:reference");
|
||||||
|
my $tuning_settings = &check_and_get("TUNING:tuning-settings");
|
||||||
|
my @settings = split(/ /, $tuning_settings);
|
||||||
|
my $mira_tuning_settings = &check_and_get("TUNING:mira-tuning-settings");
|
||||||
|
my $input_devtest = &check_and_get("TUNING:input-devtest");
|
||||||
|
my $reference_devtest = &check_and_get("TUNING:reference-devtest");
|
||||||
|
|
||||||
|
# convert core weights into format expected by mira
|
||||||
|
my $core_file = "$expt_dir/core_weights";
|
||||||
|
if ($core_weights) {
|
||||||
|
open(INI, $core_weights);
|
||||||
|
#print STDERR "Reading core weights from file $core_weights \n";
|
||||||
|
open(CORE, ">$core_file");
|
||||||
|
while(<INI>) {
|
||||||
|
if (/weight-l/) {
|
||||||
|
my @lm_weights;
|
||||||
|
while (<INI>) {
|
||||||
|
last if $_ eq "\n";
|
||||||
|
push(@lm_weights, $_);
|
||||||
|
}
|
||||||
|
|
||||||
|
print CORE "LM ".$lm_weights[0];
|
||||||
|
for my $i (1 .. $#lm_weights) {
|
||||||
|
print CORE "LM:".($i+1)." ".$lm_weights[$i];
|
||||||
|
}
|
||||||
|
|
||||||
|
} elsif (/weight-t/) {
|
||||||
|
my @pm_weights;
|
||||||
|
while (<INI>) {
|
||||||
|
last if $_ eq "\n";
|
||||||
|
push(@pm_weights, $_);
|
||||||
|
}
|
||||||
|
for my $i (0 .. $#pm_weights) {
|
||||||
|
print CORE "PhraseModel_".($i+1)." ".$pm_weights[$i];
|
||||||
|
}
|
||||||
|
} elsif (/weight-d/) {
|
||||||
|
my @d_weights;
|
||||||
|
while (<INI>) {
|
||||||
|
last if $_ eq "\n";
|
||||||
|
push(@d_weights, $_);
|
||||||
|
}
|
||||||
|
for my $i (0 .. $#d_weights) {
|
||||||
|
if ($i == 0) {
|
||||||
|
print CORE "Distortion ".$d_weights[0];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print CORE "LexicalReordering_wbe-msd-bidirectional-fe-allff_".($i+1)." ".$d_weights[$i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} elsif (/weight-w/) {
|
||||||
|
my $w = <INI>;
|
||||||
|
print CORE "WordPenalty ".$w;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close INI;
|
||||||
|
close CORE;
|
||||||
|
}
|
||||||
|
|
||||||
|
# mira config file
|
||||||
|
open(CFG, ">$config_filename");
|
||||||
|
print CFG "[general] \n";
|
||||||
|
print CFG "name=expt \n";
|
||||||
|
print CFG "fold=0 \n";
|
||||||
|
print CFG "mpienv=openmpi_fillup_mark2 \n";
|
||||||
|
print CFG "moses-home=".$moses_src_dir."\n";
|
||||||
|
print CFG "working-dir=".$expt_dir."\n";
|
||||||
|
print CFG "decoder-settings=".$tuning_decoder_settings."\n\n";
|
||||||
|
|
||||||
|
if ($core_weights) {
|
||||||
|
print CFG "[core] \n";
|
||||||
|
print CFG "weightfile=".$core_file."\n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
print CFG "[train] \n";
|
||||||
|
print CFG "trainer=\${moses-home}/dist/bin/mira \n";
|
||||||
|
print CFG "input-file=".$input."\n";
|
||||||
|
print CFG "reference-files=".$reference."\n";
|
||||||
|
print CFG "moses-ini-file=".$tune_filtered_ini."\n";
|
||||||
|
print CFG "hours=48 \n";
|
||||||
|
foreach my $setting (@settings) {
|
||||||
|
print CFG $setting."\n";
|
||||||
|
}
|
||||||
|
print CFG "extra-args=".$mira_tuning_settings."\n\n";
|
||||||
|
|
||||||
|
print CFG "[devtest] \n";
|
||||||
|
if (&get("TRAINING:hierarchical-rule-set")) {
|
||||||
|
print CFG "moses=\${moses-home}/moses-chart-cmd/src/moses_chart \n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print CFG "moses=\${moses-home}/moses-cmd/src/moses \n";
|
||||||
|
}
|
||||||
|
# use multi-bleu to select the best set of weights
|
||||||
|
print CFG "bleu=\${moses-home}/scripts/generic/multi-bleu.perl \n";
|
||||||
|
print CFG "input-file=".$input_devtest."\n";
|
||||||
|
print CFG "reference-file=".$reference_devtest."\n";
|
||||||
|
print CFG "moses-ini-file=".$devtest_filtered_ini."\n";
|
||||||
|
print CFG "hours=12 \nextra-args= \nskip-dev=1 \nskip-devtest=0 \nskip-submit=0 \n";
|
||||||
|
close(CFG);
|
||||||
|
}
|
||||||
|
|
||||||
|
sub write_selectBestMiraWeights {
|
||||||
|
my ($expt_dir, $script_filename, $weight_out_file) = @_;
|
||||||
|
open(SCR, ">$script_filename");
|
||||||
|
|
||||||
|
print SCR "#!/usr/bin/perl -w \nuse strict; \n\n";
|
||||||
|
print SCR "my \@devtest_bleu = glob(\"$expt_dir/*_devtest.bleu\"); \# expt_00_0_devtest.bleu \n";
|
||||||
|
print SCR "if (scalar(\@devtest_bleu) == 0) { \n";
|
||||||
|
print SCR "\tprint STDERR \"ERROR: no bleu files globbed, cannot find best weights.\\n\"; \n";
|
||||||
|
print SCR "\texit(1); \n";
|
||||||
|
print SCR "} \n\n";
|
||||||
|
print SCR "my (\$best_weights, \$best_id); \n";
|
||||||
|
print SCR "my \$best_bleu = -1; \n";
|
||||||
|
print SCR "my \$best_ratio = 0; \n";
|
||||||
|
print SCR "foreach my \$bleu_file (\@devtest_bleu) { \n";
|
||||||
|
print SCR "\t\$bleu_file =~ /_([\\d_]+)_devtest.bleu/; \n";
|
||||||
|
print SCR "\tmy \$id = \$1; \n";
|
||||||
|
print SCR "\topen(BLEU, \$bleu_file); \n";
|
||||||
|
print SCR "\tmy \$bleu = <BLEU>; \n";
|
||||||
|
print SCR "\t\$bleu =~ /BLEU = ([\\d\\.]+), .*ratio=([\\d\\.]+), /; \n";
|
||||||
|
print SCR "\tif (\$1 > \$best_bleu || (\$1 == \$best_bleu && (abs(1-\$2) < abs(1-\$best_ratio)))) { \n";
|
||||||
|
print SCR "\t\t\$best_bleu = \$1; \n";
|
||||||
|
print SCR "\t\t\$best_ratio = \$2; \n";
|
||||||
|
print SCR "\t\t# expt1-devtest.00_0.ini (incl. path to sparse weights) \n";
|
||||||
|
print SCR "\t\t(\$best_weights) = glob(\"$expt_dir/*devtest.\$id.ini\"); \n";
|
||||||
|
print SCR "\t} \n";
|
||||||
|
print SCR "} \n\n";
|
||||||
|
print SCR "print STDERR \"Best weights according to BLEU on devtest set: \$best_weights \\n\"; \n";
|
||||||
|
print SCR "system(\"cp \$best_weights $weight_out_file\"); \n\n";
|
||||||
|
|
||||||
|
close(SCR);
|
||||||
|
system("chmod u+x $script_filename");
|
||||||
|
}
|
||||||
|
|
||||||
sub define_training_prepare_data {
|
sub define_training_prepare_data {
|
||||||
my ($step_id) = @_;
|
my ($step_id) = @_;
|
||||||
|
|
||||||
@ -2128,7 +2297,7 @@ sub encode_factor_list {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sub define_tuningevaluation_filter {
|
sub define_tuningevaluation_filter {
|
||||||
my ($set,$step_id) = @_;
|
my ($set,$step_id, $type) = @_;
|
||||||
my $scripts = &check_and_get("GENERAL:moses-script-dir");
|
my $scripts = &check_and_get("GENERAL:moses-script-dir");
|
||||||
my $dir = &check_and_get("GENERAL:working-dir");
|
my $dir = &check_and_get("GENERAL:working-dir");
|
||||||
my $tuning_flag = !defined($set);
|
my $tuning_flag = !defined($set);
|
||||||
@ -2148,7 +2317,14 @@ sub define_tuningevaluation_filter {
|
|||||||
$input_filter = &get("TUNING:input-filter") if $tuning_flag;
|
$input_filter = &get("TUNING:input-filter") if $tuning_flag;
|
||||||
$input_filter = $input unless $input_filter;
|
$input_filter = $input unless $input_filter;
|
||||||
|
|
||||||
my $filter_dir = "$dir/tuning/filtered.$VERSION";
|
my $filter_dir;
|
||||||
|
if ($type) {
|
||||||
|
$filter_dir = "$dir/tuning/filtered.$type.$VERSION";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$filter_dir = "$dir/tuning/filtered.$VERSION";
|
||||||
|
}
|
||||||
|
|
||||||
$filter_dir = "$dir/evaluation/filtered.$set.$VERSION" unless $tuning_flag;
|
$filter_dir = "$dir/evaluation/filtered.$set.$VERSION" unless $tuning_flag;
|
||||||
|
|
||||||
my $settings = &backoff_and_get("EVALUATION:$set:filter-settings") unless $tuning_flag;
|
my $settings = &backoff_and_get("EVALUATION:$set:filter-settings") unless $tuning_flag;
|
||||||
|
Loading…
Reference in New Issue
Block a user