2010-10-28 20:20:57 +04:00
#!/usr/bin/env perl
use strict;
#eddie specific
use lib "/exports/informatics/inf_iccs_smt/perl/lib/perl5/site_perl";
use Config::Simple;
use File::Basename;
use Getopt::Long "GetOptions";
my ($config_file,$execute,$continue);
die("training-expt.perl -config config-file [-exec]")
unless &GetOptions('config=s' => \$config_file,
'cont=s' => \$continue,
'exec' => \$execute);
my $config = new Config::Simple($config_file) ||
die "Error: unable to read config file \"$config_file\"";
#substitution
foreach my $key ($config->param) {
my $value = $config->param($key);
while ($value =~ m/(.*?)\$\{(.*?)\}(.*)/) {
my $sub = $config->param($2);
if (! $sub) {
#try in this scope
my $scope = (split /\./, $key)[0];
$sub = $config->param($scope . "." . $2);
}
if (! $sub) {
#then general
$sub = $config->param("general." . $2);
}
$value = $1 . $sub . $3;
}
print STDERR "$key => "; print STDERR $value; print STDERR "\n";
$config->param($key,$value);
}
#check if we're using sge
my $have_sge = 0;
if (`which qsub 2>/dev/null`) {
print "Using sge for job control.\n";
$have_sge = 1;
} else {
print "No sge detected.\n";
}
#required global parameters
my $name = ¶m_required("general.name");
#optional globals
my $queue = ¶m("general.queue", "inf_iccs_smt");
2010-12-01 21:51:19 +03:00
my $mpienv = ¶m("general.mpienv", "openmpi_smp8_mark2");
2010-10-28 20:20:57 +04:00
my $vmem = ¶m("general.vmem", "6");
2011-02-24 18:19:18 +03:00
my $decoder_settings = ¶m("general.decoder-settings", "");
2010-10-28 20:20:57 +04:00
# Create the training script
#
#job control
my $working_dir = ¶m("general.working-dir");
system("mkdir -p $working_dir") == 0 or die "Error: unable to create directory \"$working_dir\"";
my $train_script = "$name-train";
my $job_name = "$name-t";
my $hours = ¶m("train.hours",48);
2011-06-08 02:23:38 +04:00
# Check if a weight file with core weights was given
my $core_weight_file = ¶m("core.weightfile");
2011-02-28 22:49:15 +03:00
2011-06-08 02:23:38 +04:00
#required training parameters
2010-10-28 20:20:57 +04:00
my $moses_ini_file = ¶m_required("train.moses-ini-file");
&check_exists ("moses ini file", $moses_ini_file);
my $input_file = ¶m_required("train.input-file");
&check_exists ("train input file", $input_file);
my $reference_files = ¶m_required("train.reference-files");
my $trainer_exe = ¶m_required("train.trainer");
&check_exists("Training executable", $trainer_exe);
#my $weights_file = ¶m_required("train.weights-file");
#&check_exists("weights file ", $weights_file);
#optional training parameters
my $epochs = ¶m("train.epochs", 2);
my $learner = ¶m("train.learner", "mira");
2011-01-07 21:19:11 +03:00
my $batch = ¶m("train.batch", 1);
2010-10-28 20:20:57 +04:00
my $extra_args = ¶m("train.extra-args");
2010-12-01 21:51:19 +03:00
my $continue_from_epoch = ¶m("train.continue-from-epoch", 0);
2011-02-24 18:19:18 +03:00
my $by_node = ¶m("train.by-node",0);
2011-02-28 22:49:15 +03:00
my $slots = ¶m("train.slots",8);
my $jobs = ¶m("train.jobs",8);
2011-04-30 20:17:37 +04:00
my $mixing_frequency = ¶m("train.mixing-frequency",0);
my $weight_dump_frequency = ¶m("train.weight-dump-frequency",0);
my $burn_in = ¶m("train.burn-in",0);
my $burn_in_input_file = ¶m("train.burn-in-input-file");
my $burn_in_reference_files = ¶m("train.burn-in-reference-files");
2011-06-08 02:23:38 +04:00
my $skipTrain = ¶m("train.skip", 0);
2010-10-28 20:20:57 +04:00
#test configuration
my ($test_input_file, $test_reference_file,$test_ini_file,$bleu_script,$use_moses);
my $test_exe = ¶m("test.moses");
&check_exists("test executable", $test_exe);
$bleu_script = ¶m_required("test.bleu");
&check_exists("multi-bleu script", $bleu_script);
$test_input_file = ¶m_required("test.input-file");
$test_reference_file = ¶m_required("test.reference-file");
&check_exists ("test input file", $test_input_file);
for my $ref (glob $test_reference_file . "*") {
&check_exists ("test ref file", $ref);
}
$test_ini_file = ¶m_required("test.moses-ini-file");
&check_exists ("test ini file", $test_ini_file);
my $weight_file_stem = "$name-weights";
2011-02-24 18:19:18 +03:00
my $extra_memory_test = ¶m("test.extra-memory",0);
2011-03-22 20:17:43 +03:00
my $skip_test = ¶m("test.skip-test",0);
2011-03-29 04:40:21 +04:00
my $skip_dev = ¶m("test.skip-dev",0);
2011-06-17 13:52:42 +04:00
my $skip_submit_test = ¶m("test.skip-submit",0);
2010-10-28 20:20:57 +04:00
2010-12-01 21:51:19 +03:00
# check that number of jobs, dump frequency and number of input sentences are compatible
# shard size = number of input sentences / number of jobs, ensure shard size >= dump frequency
my $result = `wc -l $input_file`;
my @result = split(/\s/, $result);
my $inputSize = $result[0];
my $shardSize = $inputSize / $jobs;
2011-04-30 20:17:37 +04:00
if ($mixing_frequency != 0) {
if ($shardSize < $mixing_frequency) {
$mixing_frequency = int($shardSize);
if ($mixing_frequency == 0) {
$mixing_frequency = 1;
}
print "Warning: mixing frequency must not be larger than shard size, setting mixing frequency to $mixing_frequency\n";
2011-03-22 20:17:43 +03:00
}
2011-04-30 20:17:37 +04:00
}
2011-03-22 20:17:43 +03:00
2011-04-30 20:17:37 +04:00
if ($weight_dump_frequency != 0) {
if ($shardSize < $weight_dump_frequency) {
$weight_dump_frequency = int($shardSize);
if ($weight_dump_frequency == 0) {
$weight_dump_frequency = 1;
}
print "Warning: weight dump frequency must not be larger than shard size, setting weight dump frequency to $weight_dump_frequency\n";
}
2010-12-01 21:51:19 +03:00
}
2010-10-28 20:20:57 +04:00
2011-04-30 20:17:37 +04:00
if ($mixing_frequency != 0) {
if ($mixing_frequency > ($shardSize/$batch)) {
$mixing_frequency = int($shardSize/$batch);
if ($mixing_frequency == 0) {
$mixing_frequency = 1;
}
print "Warning: mixing frequency must not be larger than (shard size/batch size), setting mixing frequency to $mixing_frequency\n";
2011-03-29 04:40:21 +04:00
}
2011-04-30 20:17:37 +04:00
}
if ($weight_dump_frequency != 0) {
if ($weight_dump_frequency > ($shardSize/$batch)) {
$weight_dump_frequency = int($shardSize/$batch);
if ($weight_dump_frequency == 0) {
$weight_dump_frequency = 1;
}
2011-03-29 04:40:21 +04:00
2011-04-30 20:17:37 +04:00
print "Warning: weight dump frequency must not be larger than (shard size/batch size), setting weight dump frequency to $weight_dump_frequency\n";
}
2011-03-29 04:40:21 +04:00
}
2010-10-28 20:20:57 +04:00
#file names
my $train_script_file = $working_dir . "/" . $train_script . ".sh";
my $train_out = $train_script . ".out";
my $train_err = $train_script . ".err";
2011-06-08 02:23:38 +04:00
my $train_job_id = 0;
my @refs;
if (ref($reference_files) eq 'ARRAY') {
@refs = @$reference_files;
} else {
@refs = glob $reference_files;
}
2010-10-28 20:20:57 +04:00
2011-06-08 02:23:38 +04:00
if (!$skipTrain) {
2010-10-28 20:20:57 +04:00
#write the script
open TRAIN, ">$train_script_file" or die "Unable to open \"$train_script_file\" for writing";
2011-02-28 22:49:15 +03:00
&header(*TRAIN,$job_name,$working_dir,$slots,$jobs,$hours,$vmem,$train_out,$train_err);
2011-06-08 02:23:38 +04:00
if ($jobs == 1) {
print TRAIN "$trainer_exe ";
2011-02-24 18:19:18 +03:00
}
else {
2011-06-08 02:23:38 +04:00
if ($by_node) {
print TRAIN "mpirun -np $jobs --bynode $trainer_exe \\\n";
}
else {
print TRAIN "mpirun -np \$NSLOTS $trainer_exe \\\n";
}
2011-02-24 18:19:18 +03:00
}
2011-06-08 02:23:38 +04:00
2010-10-28 20:20:57 +04:00
print TRAIN "-f $moses_ini_file \\\n";
print TRAIN "-i $input_file \\\n";
2011-06-08 02:23:38 +04:00
2010-10-28 20:20:57 +04:00
for my $ref (@refs) {
&check_exists("train ref file", $ref);
print TRAIN "-r $ref ";
}
print TRAIN "\\\n";
2011-06-08 02:23:38 +04:00
2011-04-30 20:17:37 +04:00
if ($burn_in) {
print TRAIN "--burn-in 1 \\\n";
print TRAIN "--burn-in-input-file $burn_in_input_file \\\n";
my @refs;
if (ref($burn_in_reference_files) eq 'ARRAY') {
@refs = @$burn_in_reference_files;
} else {
@refs = glob $burn_in_reference_files;
}
for my $ref (@refs) {
&check_exists("burn-in ref file", $ref);
print TRAIN "--burn-in-reference-files $ref ";
}
print TRAIN "\\\n";
}
2010-10-28 20:20:57 +04:00
#if ($weights_file) {
# print TRAIN "-w $weights_file \\\n";
#}
2011-06-08 02:23:38 +04:00
if (defined $core_weight_file) {
print TRAIN "--core-weights $core_weight_file \\\n";
}
2010-12-01 21:51:19 +03:00
print TRAIN "-l $learner \\\n";
2010-10-28 20:20:57 +04:00
print TRAIN "--weight-dump-stem $weight_file_stem \\\n";
2011-03-22 20:17:43 +03:00
print TRAIN "--mixing-frequency $mixing_frequency \\\n";
2011-04-30 20:17:37 +04:00
if ($weight_dump_frequency != -1) {
print TRAIN "--weight-dump-frequency $weight_dump_frequency \\\n";
}
2010-10-28 20:20:57 +04:00
print TRAIN "--epochs $epochs \\\n";
2011-01-07 21:19:11 +03:00
print TRAIN "-b $batch \\\n";
2011-02-24 18:19:18 +03:00
print TRAIN "--decoder-settings \"$decoder_settings\" \\\n";
2010-10-28 20:20:57 +04:00
print TRAIN $extra_args;
print TRAIN "\n";
2011-06-08 02:23:38 +04:00
if ($jobs == 1) {
print TRAIN "echo \"mira finished.\"\n";
}
else {
print TRAIN "echo \"mpirun finished.\"\n";
}
2010-10-28 20:20:57 +04:00
close TRAIN;
if (! $execute) {
print "Written train file: $train_script_file\n";
exit 0;
}
#submit the training job
if ($have_sge) {
2011-02-28 22:49:15 +03:00
$train_job_id = &submit_job_sge($train_script_file);
2010-10-28 20:20:57 +04:00
} else {
$train_job_id = &submit_job_no_sge($train_script_file, $train_out,$train_err);
}
die "Failed to submit training job" unless $train_job_id;
2011-06-08 02:23:38 +04:00
}
2010-10-28 20:20:57 +04:00
#wait for the next weights file to appear, or the training job to end
my $train_iteration = -1;
2010-12-01 21:51:19 +03:00
# optionally continue from a later epoch (if $continue_from_epoch > 0)
if ($continue_from_epoch > 0) {
2011-06-08 02:23:38 +04:00
$train_iteration = $continue_from_epoch - 1;
2010-12-01 21:51:19 +03:00
print "Continuing training from epoch $continue_from_epoch, with weights from ini file $moses_ini_file.\n";
}
2010-10-28 20:20:57 +04:00
while(1) {
2010-12-01 21:51:19 +03:00
my($epoch, $epoch_slice);
2011-06-08 02:23:38 +04:00
$train_iteration += 1; # starts at 0
2010-10-28 20:20:57 +04:00
my $new_weight_file = "$working_dir/$weight_file_stem" . "_";
2011-04-30 20:17:37 +04:00
if ($weight_dump_frequency == 0) {
print "No weights, no testing..\n";
exit(0);
}
if ($weight_dump_frequency == 1) {
2011-02-28 22:49:15 +03:00
if ($train_iteration < 10) {
2011-03-22 20:17:43 +03:00
$new_weight_file .= "0".$train_iteration;
2011-02-28 22:49:15 +03:00
}
else {
2011-03-22 20:17:43 +03:00
$new_weight_file .= $train_iteration;
2011-02-28 22:49:15 +03:00
}
2010-10-28 20:20:57 +04:00
} else {
2011-04-30 20:17:37 +04:00
#my $epoch = 1 + int $train_iteration / $weight_dump_frequency;
$epoch = int $train_iteration / $weight_dump_frequency;
$epoch_slice = $train_iteration % $weight_dump_frequency;
2011-01-07 21:19:11 +03:00
if ($epoch < 10) {
2011-03-22 20:17:43 +03:00
$new_weight_file .= "0".$epoch."_".$epoch_slice;
2011-01-07 21:19:11 +03:00
}
else {
2011-03-22 20:17:43 +03:00
$new_weight_file .= $epoch."_".$epoch_slice;
2011-01-07 21:19:11 +03:00
}
2010-10-28 20:20:57 +04:00
}
2011-01-07 21:19:11 +03:00
2011-01-26 20:51:45 +03:00
if (-e "$working_dir/stopping") {
2011-01-07 21:19:11 +03:00
print "Training finished at " . scalar(localtime()) . " because stopping criterion was reached.\n";
2010-10-28 20:20:57 +04:00
exit 0;
}
2011-01-07 21:19:11 +03:00
else {
print "Waiting for $new_weight_file\n";
2011-06-08 02:23:38 +04:00
if (!$skipTrain) {
while ((! -e $new_weight_file) && &check_running($train_job_id)) {
sleep 10;
}
2011-01-07 21:19:11 +03:00
}
if (! -e $new_weight_file ) {
print "Training finished at " . scalar(localtime()) . "\n";
exit 0;
}
}
2011-03-22 20:17:43 +03:00
#new weight file written. create test script and submit
my $suffix = "";
2011-06-08 02:23:38 +04:00
print "weight file exists? ".(-e $new_weight_file)."\n";
2011-03-22 20:17:43 +03:00
if (!$skip_test) {
2011-06-13 17:44:01 +04:00
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "test", $test_ini_file, $test_input_file, $test_reference_file, $skip_submit_test);
2011-03-29 04:40:21 +04:00
}
if (!$skip_dev) {
2011-06-13 17:44:01 +04:00
createTestScriptAndSubmit($epoch, $epoch_slice, $new_weight_file, $suffix, "dev", $moses_ini_file, $input_file, $refs[0], $skip_submit_test);
2011-03-22 20:17:43 +03:00
}
}
sub createTestScriptAndSubmit {
my $epoch = $_[0];
my $epoch_slice = $_[1];
my $new_weight_file = $_[2];
my $suffix = $_[3];
2011-03-29 04:40:21 +04:00
my $testtype = $_[4];
my $old_ini_file = $_[5];
my $input_file = $_[6];
my $reference_file = $_[7];
2011-06-13 17:44:01 +04:00
my $skip_submit = $_[8];
2011-03-22 20:17:43 +03:00
2010-10-28 20:20:57 +04:00
#file names
2011-03-29 04:40:21 +04:00
my $job_name = $name."_".$testtype."_".$train_iteration.$suffix;
my $test_script = "$name-$testtype";
2011-03-22 20:17:43 +03:00
my $test_script_file = $working_dir."/".$test_script.".$train_iteration".$suffix.".sh";
my $test_out = $test_script . ".$train_iteration" . $suffix . ".out";
my $test_err = $test_script . ".$train_iteration" . $suffix . ".err";
2011-03-29 04:40:21 +04:00
2010-12-01 21:51:19 +03:00
my $output_file;
my $output_error_file;
my $bleu_file;
2011-04-30 20:17:37 +04:00
if ($weight_dump_frequency == 1) {
2011-01-07 21:19:11 +03:00
if ($train_iteration < 10) {
2011-03-29 04:40:21 +04:00
$output_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".out";
$output_error_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".err";
$bleu_file = $working_dir."/".$name."_0".$train_iteration.$suffix."_$testtype".".bleu";
2011-01-07 21:19:11 +03:00
}
else {
2011-03-29 04:40:21 +04:00
$output_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".out";
$output_error_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".err";
$bleu_file = $working_dir."/".$name."_".$train_iteration.$suffix."_$testtype".".bleu";
2011-01-07 21:19:11 +03:00
}
2010-12-01 21:51:19 +03:00
}
else {
2011-01-07 21:19:11 +03:00
if ($epoch < 10) {
2011-03-29 04:40:21 +04:00
$output_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".out";
$output_error_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".err";
$bleu_file = $working_dir."/".$name."_0".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu";
2011-01-07 21:19:11 +03:00
}
else {
2011-03-29 04:40:21 +04:00
$output_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".out";
$output_error_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".err";
$bleu_file = $working_dir."/".$name."_".$epoch."_".$epoch_slice.$suffix."_$testtype".".bleu";
2011-01-07 21:19:11 +03:00
}
2010-12-01 21:51:19 +03:00
}
2010-10-28 20:20:57 +04:00
if (! (open TEST, ">$test_script_file" )) {
2011-03-22 20:17:43 +03:00
die "Unable to create test script $test_script_file\n";
}
2011-03-29 04:40:21 +04:00
2010-10-28 20:20:57 +04:00
my $hours = ¶m("test.hours",12);
my $extra_args = ¶m("test.extra-args");
2011-03-03 17:03:25 +03:00
# Splice the weights into the moses ini file.
my ($default_weight,$wordpenalty_weight,@phrasemodel_weights,$lm_weight,$lm2_weight,$distortion_weight,@lexicalreordering_weights);
2011-06-08 02:23:38 +04:00
if (! (open WEIGHTS, "$new_weight_file")) {
die "Unable to open weights file $new_weight_file\n";
2010-10-28 20:20:57 +04:00
}
2011-04-30 20:17:37 +04:00
my $readCoreWeights = 0;
2011-01-26 20:51:45 +03:00
my $readExtraWeights = 0;
2010-10-28 20:20:57 +04:00
my %extra_weights;
while(<WEIGHTS>) {
chomp;
2011-01-07 21:19:11 +03:00
my ($name,$value) = split;
2011-03-03 17:03:25 +03:00
next if ($name =~ /^!Unknown/);
2011-01-26 20:51:45 +03:00
next if ($name =~ /^BleuScore/);
2010-10-28 20:20:57 +04:00
if ($name eq "DEFAULT_") {
$default_weight = $value;
} else {
if ($name eq "WordPenalty") {
$wordpenalty_weight = $value;
2011-04-30 20:17:37 +04:00
$readCoreWeights += 1;
2010-10-28 20:20:57 +04:00
} elsif ($name =~ /^PhraseModel/) {
push @phrasemodel_weights,$value;
2011-06-08 02:23:38 +04:00
$readCoreWeights += 1;
2011-02-12 21:57:51 +03:00
} elsif ($name =~ /^LM\:2/) {
$lm2_weight = $value;
2011-04-30 20:17:37 +04:00
$readCoreWeights += 1;
2011-02-12 21:57:51 +03:00
}
elsif ($name =~ /^LM/) {
2010-10-28 20:20:57 +04:00
$lm_weight = $value;
2011-04-30 20:17:37 +04:00
$readCoreWeights += 1;
2010-10-28 20:20:57 +04:00
} elsif ($name eq "Distortion") {
$distortion_weight = $value;
2011-04-30 20:17:37 +04:00
$readCoreWeights += 1;
2010-12-01 21:51:19 +03:00
} elsif ($name =~ /^LexicalReordering/) {
push @lexicalreordering_weights,$value;
2011-06-08 02:23:38 +04:00
$readCoreWeights += 1;
2011-01-26 20:51:45 +03:00
} else {
2010-10-28 20:20:57 +04:00
$extra_weights{$name} = $value;
2011-01-26 20:51:45 +03:00
$readExtraWeights += 1;
2010-10-28 20:20:57 +04:00
}
}
}
close WEIGHTS;
2011-06-08 02:23:38 +04:00
if (!defined $core_weight_file) {
print "Number of core weights read: ".$readCoreWeights."\n";
2011-04-30 20:17:37 +04:00
}
2011-06-08 02:23:38 +04:00
print "Number of extra weights read: ".$readExtraWeights."\n";
2011-04-30 20:17:37 +04:00
2011-06-08 02:23:38 +04:00
# If there is a core weight file, we have to load the core weights from that file (NOTE: this is not necessary if the core weights are also printed to the weights file)
if (defined $core_weight_file) {
@phrasemodel_weights = ();
@lexicalreordering_weights = ();
$readCoreWeights = 0;
if (! (open CORE_WEIGHTS, "$core_weight_file")) {
die "Unable to open core weights file $core_weight_file\n";
}
print "Reading core weights from file..\n";
while(<CORE_WEIGHTS>) {
chomp;
my ($name,$value) = split;
next if ($name =~ /^!Unknown/);
next if ($name =~ /^BleuScore/);
if ($name eq "DEFAULT_") {
$default_weight = $value;
}
else {
if ($name eq "WordPenalty") {
$wordpenalty_weight = $value;
$readCoreWeights += 1;
} elsif ($name =~ /^PhraseModel/) {
push @phrasemodel_weights,$value;
$readCoreWeights += 1;
} elsif ($name =~ /^LM\:2/) {
$lm2_weight = $value;
$readCoreWeights += 1;
}
elsif ($name =~ /^LM/) {
$lm_weight = $value;
$readCoreWeights += 1;
} elsif ($name eq "Distortion") {
$distortion_weight = $value;
$readCoreWeights += 1;
} elsif ($name =~ /^LexicalReordering/) {
push @lexicalreordering_weights,$value;
$readCoreWeights += 1;
} else {
# there should be no extra weights in the core weights file
print "weight not matched: $name:$value\n";
}
}
}
close CORE_WEIGHTS;
print "Number of core weights read: ".$readCoreWeights."\n";
2010-10-28 20:20:57 +04:00
}
2011-06-08 02:23:38 +04:00
2010-12-01 21:51:19 +03:00
# Create new ini file
2011-03-29 04:40:21 +04:00
my $new_ini_file = $working_dir."/".$test_script.".".$train_iteration.$suffix.".ini";
if (! (open NEWINI, ">$new_ini_file" )) {
die "Unable to create ini file $new_ini_file\n";
2010-10-28 20:20:57 +04:00
}
2011-03-29 04:40:21 +04:00
if (! (open OLDINI, "$old_ini_file" )) {
die "Unable to read ini file $old_ini_file\n";
2010-10-28 20:20:57 +04:00
}
2010-12-01 21:51:19 +03:00
2010-10-28 20:20:57 +04:00
while(<OLDINI>) {
if (/weight-l/) {
print NEWINI "[weight-l]\n";
2011-03-03 17:03:25 +03:00
print NEWINI $lm_weight;
2010-10-28 20:20:57 +04:00
print NEWINI "\n";
2011-02-12 21:57:51 +03:00
if (defined $lm2_weight) {
readline(OLDINI);
2011-03-03 17:03:25 +03:00
print NEWINI $lm2_weight;
2011-02-12 21:57:51 +03:00
print NEWINI "\n";
}
2010-10-28 20:20:57 +04:00
readline(OLDINI);
} elsif (/weight-t/) {
print NEWINI "[weight-t]\n";
foreach my $phrasemodel_weight (@phrasemodel_weights) {
2011-03-03 17:03:25 +03:00
print NEWINI $phrasemodel_weight;
2010-10-28 20:20:57 +04:00
print NEWINI "\n";
readline(OLDINI);
}
} elsif (/weight-d/) {
print NEWINI "[weight-d]\n";
2011-03-03 17:03:25 +03:00
print NEWINI $distortion_weight;
2010-10-28 20:20:57 +04:00
print NEWINI "\n";
readline(OLDINI);
2010-12-01 21:51:19 +03:00
foreach my $lexicalreordering_weight (@lexicalreordering_weights) {
2011-03-03 17:03:25 +03:00
print NEWINI $lexicalreordering_weight;
2010-12-01 21:51:19 +03:00
print NEWINI "\n";
readline(OLDINI);
}
2010-10-28 20:20:57 +04:00
} elsif (/weight-w/) {
print NEWINI "[weight-w]\n";
2011-03-03 17:03:25 +03:00
print NEWINI $wordpenalty_weight;
2010-12-01 21:51:19 +03:00
print NEWINI "\n";
readline(OLDINI);
2010-10-28 20:20:57 +04:00
} else {
print NEWINI;
}
}
close NEWINI;
close OLDINI;
2011-01-26 20:51:45 +03:00
my $writtenExtraWeights = 0;
2010-10-28 20:20:57 +04:00
# if there are any non-core weights, write them to a weights file
my $extra_weight_file = undef;
if (%extra_weights) {
$extra_weight_file = "$new_weight_file.scaled";
if (! (open EXTRAWEIGHT,">$extra_weight_file")) {
print "Warning: unable to create extra weights file $extra_weight_file";
next;
}
2011-06-08 02:23:38 +04:00
# my $core_weight = 1;
# if ($have_core) {
# $default_weight = $extra_weights{"DEFAULT_"};
# $core_weight = $extra_weights{"core"};
# }
2010-10-28 20:20:57 +04:00
foreach my $name (sort keys %extra_weights) {
next if ($name eq "core");
next if ($name eq "DEFAULT_");
2011-03-03 17:03:25 +03:00
my $value = $extra_weights{$name};
2011-01-26 20:51:45 +03:00
2011-06-08 02:23:38 +04:00
# write only non-zero feature weights to file
2011-01-26 20:51:45 +03:00
if ($value) {
2011-06-08 02:23:38 +04:00
# $value /= $core_weight;
2011-01-26 20:51:45 +03:00
print EXTRAWEIGHT "$name $value\n";
$writtenExtraWeights += 1;
}
2010-10-28 20:20:57 +04:00
}
}
2011-03-03 17:03:25 +03:00
2010-10-28 20:20:57 +04:00
print TEST "#!/bin/sh\n";
print TEST "#\$ -N $job_name\n";
print TEST "#\$ -wd $working_dir\n";
print TEST "#\$ -l h_rt=$hours:00:00\n";
print TEST "#\$ -o $test_out\n";
print TEST "#\$ -e $test_err\n";
print TEST "\n";
2011-04-30 20:17:37 +04:00
if ($have_sge) {
# some eddie specific stuff
print TEST ". /etc/profile.d/modules.sh\n";
print TEST "module load openmpi/ethernet/gcc/latest\n";
print TEST "export LD_LIBRARY_PATH=/exports/informatics/inf_iccs_smt/shared/boost/lib:\$LD_LIBRARY_PATH\n";
}
2011-03-29 04:40:21 +04:00
print TEST "$test_exe $decoder_settings -i $input_file -f $new_ini_file ";
2010-10-28 20:20:57 +04:00
if ($extra_weight_file) {
print TEST "-weight-file $extra_weight_file ";
}
print TEST $extra_args;
2010-12-01 21:51:19 +03:00
print TEST " 1> $output_file 2> $output_error_file\n";
2011-03-29 04:40:21 +04:00
print TEST "echo \"Decoding of ".$testtype." set finished.\"\n";
print TEST "$bleu_script $reference_file < $output_file > $bleu_file\n";
print TEST "echo \"Computed BLEU score of ".$testtype." set.\"\n";
2010-10-28 20:20:57 +04:00
close TEST;
#launch testing
2011-06-13 17:44:01 +04:00
if(!$skip_submit) {
if ($have_sge) {
if ($extra_memory_test) {
print "Extra memory for test job: $extra_memory_test \n";
&submit_job_sge_extra_memory($test_script_file,$extra_memory_test);
}
else {
&submit_job_sge($test_script_file);
}
} else {
&submit_job_no_sge($test_script_file, $test_out,$test_err);
2011-01-26 20:51:45 +03:00
}
2011-06-13 17:44:01 +04:00
}
2011-03-22 20:17:43 +03:00
}
2010-10-28 20:20:57 +04:00
sub param {
my ($key,$default) = @_;
my $value = $config->param($key);
$value = $default if !$value;
# Empty arguments get interpreted as arrays
$value = "" if (ref($value) eq 'ARRAY' && scalar(@$value) == 0);
return $value;
}
sub param_required {
my ($key) = @_;
my $value = ¶m($key);
die "Error: required parameter \"$key\" was missing" if (!defined($value));
#$value = join $value if (ref($value) eq 'ARRAY');
return $value;
}
sub header {
2011-02-28 22:49:15 +03:00
my ($OUT,$name,$working_dir,$slots,$jobs,$hours,$vmem,$out,$err) = @_;
2010-10-28 20:20:57 +04:00
print $OUT "#!/bin/sh\n";
if ($have_sge) {
print $OUT "#\$ -N $name\n";
print $OUT "#\$ -wd $working_dir\n";
2011-06-08 02:23:38 +04:00
if ($jobs != 1) {
print $OUT "#\$ -pe $mpienv $slots\n";
}
2010-10-28 20:20:57 +04:00
print $OUT "#\$ -l h_rt=$hours:00:00\n";
print $OUT "#\$ -l h_vmem=$vmem" . "G" . "\n";
print $OUT "#\$ -o $out\n";
print $OUT "#\$ -e $err\n";
} else {
print $OUT "\nNSLOTS=$jobs\n";
}
print $OUT "\n";
if ($have_sge) {
# some eddie specific stuff
print $OUT ". /etc/profile.d/modules.sh\n";
print $OUT "module load openmpi/ethernet/gcc/latest\n";
print $OUT "export LD_LIBRARY_PATH=/exports/informatics/inf_iccs_smt/shared/boost/lib:\$LD_LIBRARY_PATH\n";
}
}
sub check_exists {
my ($name,$filename) = @_;
die "Error: unable to read $name: \"$filename\"" if ! -r $filename;
}
#
# Used to submit train/test jobs. Return the job id, or 0 on failure
#
sub submit_job_sge {
my($script_file) = @_;
2011-03-29 04:40:21 +04:00
my $qsub_result = `qsub -P $queue $script_file`;
print "SUBMIT CMD: qsub -P $queue $script_file\n";
2010-10-28 20:20:57 +04:00
if ($qsub_result !~ /Your job (\d+)/) {
print "Failed to qsub job: $qsub_result\n";
return 0;
}
my $job_name = basename($script_file);
print "Submitted job: $job_name id: $1 " .
scalar(localtime()) . "\n";
return $1;
}
2011-01-26 20:51:45 +03:00
sub submit_job_sge_extra_memory {
2011-02-24 18:19:18 +03:00
my($script_file,$extra_memory) = @_;
2011-03-29 04:40:21 +04:00
my $qsub_result = `qsub -pe $extra_memory -P $queue $script_file`;
print "SUBMIT CMD: qsub -pe $extra_memory -P $queue $script_file \n";
2011-01-26 20:51:45 +03:00
if ($qsub_result !~ /Your job (\d+)/) {
print "Failed to qsub job: $qsub_result\n";
return 0;
}
my $job_name = basename($script_file);
print "Submitted job: $job_name id: $1 " .
scalar(localtime()) . "\n";
return $1;
}
2010-10-28 20:20:57 +04:00
#
# As above, but no sge version. Returns the pid.
#
sub submit_job_no_sge {
my($script_file,$out,$err) = @_;
my $pid = undef;
if ($pid = fork) {
my $job_name = basename($script_file);
print "Launched : $job_name pid: $pid " . scalar(localtime()) . "\n";
return $pid;
2010-12-01 21:51:19 +03:00
} elsif (defined $pid) {
print "Executing script $script_file, writing to $out and $err.\n";
`cd $working_dir; sh $script_file 1>$out 2> $err`;
2010-10-28 20:20:57 +04:00
exit;
} else {
# Fork failed
return 0;
}
}
sub check_running {
my ($job_id) = @_;
if ($have_sge) {
return `qstat | grep $job_id`;
} else {
return `ps -e | grep $job_id | grep -v defunct`;
}
}