Merge branch 'master' of git://github.com/moses-smt/mosesdecoder

This commit is contained in:
phikoehn 2012-05-02 03:48:13 +01:00
commit f487eaa644
2 changed files with 209 additions and 215 deletions

View File

@ -43,6 +43,8 @@ class Reference {
std::vector<size_t> m_length;
};
// TODO(tetsuok): fix this function and related stuff.
// "average" reference length should not be calculated at sentence-level unlike "closest".
inline int Reference::CalcAverage() const {
int total = 0;
for (size_t i = 0; i < m_length.size(); ++i) {

View File

@ -1,4 +1,4 @@
#!/usr/bin/perl -w
#!/usr/bin/perl -w
# $Id$
# Usage:
# mert-moses.pl <foreign> <english> <decoder-executable> <decoder-config>
@ -13,13 +13,13 @@
# Sept 2011 multi-threaded mert (Barry Haddow)
# 3 Aug 2011 Added random directions, historic best, pairwise ranked (PK)
# Jul 2011 simplifications (Ondrej Bojar)
# -- rely on moses' -show-weights instead of parsing moses.ini
# -- rely on moses' -show-weights instead of parsing moses.ini
# ... so moses is also run once *before* mert starts, checking
# the model to some extent
# -- got rid of the 'triples' mess;
# use --range to supply bounds for random starting values:
# --range tm:-3..3 --range lm:-3..3
# 5 Aug 2009 Handling with different reference length policies (shortest, average, closest) for BLEU
# 5 Aug 2009 Handling with different reference length policies (shortest, average, closest) for BLEU
# and case-sensistive/insensitive evaluation (Nicola Bertoldi)
# 5 Jun 2008 Forked previous version to support new mert implementation.
# 13 Feb 2007 Better handling of default values for lambda, now works with multiple
@ -36,8 +36,8 @@
# 29 Jul 2006 run-filter, score-nbest and mert run on the queue (Nicola; Ondrej had to type it in again)
# 28 Jul 2006 attempt at foolproof usage, strong checking of input validity, merged the parallel and nonparallel version (Ondrej Bojar)
# 27 Jul 2006 adding the safesystem() function to handle with process failure
# 22 Jul 2006 fixed a bug about handling relative path of configuration file (Nicola Bertoldi)
# 21 Jul 2006 adapted for Moses-in-parallel (Nicola Bertoldi)
# 22 Jul 2006 fixed a bug about handling relative path of configuration file (Nicola Bertoldi)
# 21 Jul 2006 adapted for Moses-in-parallel (Nicola Bertoldi)
# 18 Jul 2006 adapted for Moses and cleaned up (PK)
# 21 Jan 2005 unified various versions, thorough cleanup (DWC)
# now indexing accumulated n-best list solely by feature vectors
@ -131,7 +131,7 @@ my $___NOCASE = 0;
my $___NONORM = 0;
# set 0 if input type is text, set 1 if input type is confusion network
my $___INPUTTYPE = 0;
my $___INPUTTYPE = 0;
my $mertdir = undef; # path to new mert directory
@ -144,7 +144,7 @@ my $qsubwrapper = undef;
my $moses_parallel_cmd = undef;
my $old_sge = 0; # assume sge<6.0
my $___CONFIG_ORIG = undef; # pathname to startup ini file before filtering
my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of features to work on
my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of features to work on
# if undef work on all features
# (others are fixed to the starting values)
my $___RANGES = undef;
@ -223,7 +223,7 @@ Options:
--mosesparallelcmd=STR ... use a different script instead of moses-parallel
--queue-flags=STRING ... anything you with to pass to qsub, eg.
'-l ws06osssmt=true'. The default is: '-hard'
To reset the parameters, please use
To reset the parameters, please use
--queue-flags=' '
(i.e. a space between the quotes).
--decoder-flags=STRING ... extra parameters for the decoder
@ -245,7 +245,7 @@ Options:
--mertdir=STRING ... path to new mert implementation
--mertargs=STRING ... extra args for both extractor and mert
--extractorargs=STRING ... extra args for extractor only
--mertmertargs=STRING ... extra args for mert only
--mertmertargs=STRING ... extra args for mert only
--scorenbestcmd=STRING ... path to score-nbest.py
--old-sge ... passed to parallelizers, assume Grid Engine < 6.0
--inputtype=[0|1|2] ... Handle different input types: (0 for text,
@ -265,7 +265,7 @@ Options:
is identical to:
--range=tm:0..1,-1..1,0..2
but not to:
--range=tm:0..2 --range=tm:0..1,-1..1
--range=tm:0..2 --range=tm:0..1,-1..1
--activate-features=STRING ... comma-separated list of features to optimize,
others are fixed to the starting values
default: optimize all features
@ -315,12 +315,12 @@ if (!defined $mertdir) {
}
my $mert_extract_cmd = "$mertdir/extractor";
my $mert_mert_cmd = "$mertdir/mert";
my $mert_pro_cmd = "$mertdir/pro";
my $mert_mert_cmd = "$mertdir/mert";
my $mert_pro_cmd = "$mertdir/pro";
die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd;
die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd;
die "Not executable: $mert_pro_cmd" if ! -x $mert_pro_cmd;
die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd;
die "Not executable: $mert_pro_cmd" if ! -x $mert_pro_cmd;
my $pro_optimizer = "$mertdir/megam_i686.opt"; # or set to your installation
if (($___PAIRWISE_RANKED_OPTIMIZER || $___PRO_STARTING_POINT) && ! -x $pro_optimizer) {
@ -334,29 +334,29 @@ if (($___PAIRWISE_RANKED_OPTIMIZER || $___PRO_STARTING_POINT) && ! -x $pro_optim
$mertargs = "" if !defined $mertargs;
my $scconfig = undef;
if ($mertargs =~ /\-\-scconfig\s+(.+?)(\s|$)/){
if ($mertargs =~ /\-\-scconfig\s+(.+?)(\s|$)/) {
$scconfig=$1;
$scconfig =~ s/\,/ /g;
$mertargs =~ s/\-\-scconfig\s+(.+?)(\s|$)//;
}
# handling reference lengh strategy
if (($___CLOSEST + $___AVERAGE + $___SHORTEST) > 1){
if (($___CLOSEST + $___AVERAGE + $___SHORTEST) > 1) {
die "You can specify just ONE reference length strategy (closest or shortest or average) not both\n";
}
if ($___SHORTEST){
if ($___SHORTEST) {
$scconfig .= " reflen:shortest";
}elsif ($___AVERAGE){
} elsif ($___AVERAGE) {
$scconfig .= " reflen:average";
}elsif ($___CLOSEST){
} elsif ($___CLOSEST) {
$scconfig .= " reflen:closest";
}
# handling case-insensitive flag
if ($___NOCASE) {
$scconfig .= " case:false";
}else{
} else {
$scconfig .= " case:true";
}
$scconfig =~ s/^\s+//;
@ -367,6 +367,8 @@ $scconfig = "--scconfig $scconfig" if ($scconfig);
my $mert_extract_args=$mertargs;
$mert_extract_args .=" $scconfig";
$extractorargs = "" unless $extractorargs;
$mert_extract_args .=" $extractorargs";
$mertmertargs = "" if !defined $mertmertargs;
@ -379,8 +381,8 @@ if ($___ACTIVATE_FEATURES){ $mert_mert_args .=" -o \"$___ACTIVATE_FEATURES\""; }
my ($just_cmd_filtercmd,$x) = split(/ /,$filtercmd);
die "Not executable: $just_cmd_filtercmd" if ! -x $just_cmd_filtercmd;
die "Not executable: $moses_parallel_cmd" if defined $___JOBS && ! -x $moses_parallel_cmd;
die "Not executable: $qsubwrapper" if defined $___JOBS && ! -x $qsubwrapper;
die "Not executable: $___DECODER" if ! -x $___DECODER;
die "Not executable: $qsubwrapper" if defined $___JOBS && ! -x $qsubwrapper;
die "Not executable: $___DECODER" if ! -x $___DECODER;
my $input_abs = ensure_full_path($___DEV_F);
die "File not found: $___DEV_F (interpreted as $input_abs)."
@ -400,8 +402,7 @@ my $ref_abs = ensure_full_path($___DEV_E);
my @references;
if (-e $ref_abs) {
push @references, $ref_abs;
}
else {
} else {
# if multiple file, get a full list of the files
my $part = 0;
if (! -e $ref_abs."0" && -e $ref_abs.".ref0") {
@ -415,18 +416,17 @@ else {
}
my $config_abs = ensure_full_path($___CONFIG);
die "File not found: $___CONFIG (interpreted as $config_abs)."
if ! -e $config_abs;
die "File not found: $___CONFIG (interpreted as $config_abs)." if ! -e $config_abs;
$___CONFIG = $config_abs;
# moses should use our config
if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
|| $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /
|| $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /
|| $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /
|| $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /
|| $___DECODER_FLAGS =~ /(^|\s)-(global-lexical-file) /
) {
|| $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /
|| $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /
|| $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /
|| $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /
|| $___DECODER_FLAGS =~ /(^|\s)-(global-lexical-file) /
) {
die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";
}
@ -435,14 +435,12 @@ if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
my $need_to_normalize = 1;
#store current directory and create the working directory (if needed)
my $cwd = `pawd 2>/dev/null`;
if(!$cwd){$cwd = `pwd`;}
chomp($cwd);
my $cwd = Cwd::getcwd();
mkpath($___WORKING_DIR);
{
# open local scope
{
#chdir to the working directory
chdir($___WORKING_DIR) or die "Can't chdir to $___WORKING_DIR";
@ -452,6 +450,7 @@ my $mert_outfile = "mert.out";
my $mert_logfile = "mert.log";
my $weights_in_file = "init.opt";
my $weights_out_file = "weights.txt";
my $finished_step_file = "finished_step.txt";
# set start run
my $start_run = 1;
@ -467,8 +466,7 @@ if ($___FILTER_PHRASE_TABLE) {
my $outdir = "filtered";
if (-e "$outdir/moses.ini") {
print STDERR "Assuming the tables are already filtered, reusing $outdir/moses.ini\n";
}
else {
} else {
# filter the phrase tables with respect to input, use --decoder-flags
print STDERR "filtering the phrase tables... ".`date`;
my $___FILTER_F = $___DEV_F;
@ -481,8 +479,7 @@ if ($___FILTER_PHRASE_TABLE) {
$___CONFIG_ORIG = $___CONFIG;
# the decoder should now use the filtered model
$___CONFIG = "$outdir/moses.ini";
}
else{
} else{
# do not filter phrase tables (useful if binary phrase tables are available)
# use the original configuration file
$___CONFIG_ORIG = $___CONFIG;
@ -528,10 +525,10 @@ for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
if ($continue) {
# getting the last finished step
print STDERR "Trying to continue an interrupted optimization.\n";
open IN, "finished_step.txt" or die "Failed to find the step number, failed to read finished_step.txt";
my $step = <IN>;
open my $fh, '<', $finished_step_file or die "$finished_step_file: $!";
my $step = <$fh>;
chomp $step;
close IN;
close $fh;
print STDERR "Last finished step is $step\n";
@ -539,42 +536,39 @@ if ($continue) {
my $firststep;
if ($prev_aggregate_nbl_size==-1){
$firststep=1;
}
else{
} else {
$firststep=$step-$prev_aggregate_nbl_size+1;
$firststep=($firststep>0)?$firststep:1;
}
#checking if all needed data are available
#checking if all needed data are available
if ($firststep<=$step){
print STDERR "First previous needed data index is $firststep\n";
print STDERR "Checking whether all needed data (from step $firststep to step $step) are available\n";
for (my $prevstep=$firststep; $prevstep<=$step;$prevstep++){
print STDERR "Checking whether data of step $prevstep are available\n";
for (my $prevstep=$firststep; $prevstep<=$step;$prevstep++) {
print STDERR "Checking whether data of step $prevstep are available\n";
if (! -e "run$prevstep.features.dat"){
die "Can't start from step $step, because run$prevstep.features.dat was not found!";
}else{
if (defined $prev_feature_file){
$prev_feature_file = "${prev_feature_file},run$prevstep.features.dat";
}
else{
$prev_feature_file = "run$prevstep.features.dat";
}
die "Can't start from step $step, because run$prevstep.features.dat was not found!";
} else {
if (defined $prev_feature_file){
$prev_feature_file = "${prev_feature_file},run$prevstep.features.dat";
} else {
$prev_feature_file = "run$prevstep.features.dat";
}
}
if (! -e "run$prevstep.scores.dat"){
die "Can't start from step $step, because run$prevstep.scores.dat was not found!";
}else{
if (defined $prev_score_file){
$prev_score_file = "${prev_score_file},run$prevstep.scores.dat";
}
else{
$prev_score_file = "run$prevstep.scores.dat";
}
die "Can't start from step $step, because run$prevstep.scores.dat was not found!";
} else {
if (defined $prev_score_file){
$prev_score_file = "${prev_score_file},run$prevstep.scores.dat";
} else {
$prev_score_file = "run$prevstep.scores.dat";
}
}
if (! -e "run$prevstep.${weights_in_file}"){
die "Can't start from step $step, because run$prevstep.${weights_in_file} was not found!";
}else{
die "Can't start from step $step, because run$prevstep.${weights_in_file} was not found!";
} else{
if (defined $prev_init_file){
$prev_init_file = "${prev_init_file},run$prevstep.${weights_in_file}";
}
@ -601,7 +595,7 @@ if ($continue) {
if !defined $bestpoint || !defined $devbleu;
print "($step) BEST at $step $bestpoint => $devbleu at ".`date`;
my @newweights = split /\s+/, $bestpoint;
# Sanity check: order of lambdas must match
sanity_check_order_of_lambdas($featlist,
"gunzip -c < run$step.best$___N_BEST_LIST_SIZE.out.gz |");
@ -618,7 +612,7 @@ if ($continue) {
###### MERT MAIN LOOP
my $run=$start_run-1;
my $run = $start_run - 1;
my $oldallsorted = undef;
my $allsorted = undef;
@ -627,7 +621,7 @@ my $nbest_file=undef;
my $lsamp_file=undef; #Lattice samples
my $orig_nbest_file=undef; # replaced if lattice sampling
while(1) {
while (1) {
$run++;
if ($maximum_iterations && $run > $maximum_iterations) {
print "Maximum number of iterations exceeded - stopping\n";
@ -641,7 +635,6 @@ while(1) {
# In case something dies later, we might wish to have a copy
create_config($___CONFIG, "./run$run.moses.ini", $featlist, $run, (defined$devbleu?$devbleu:"--not-estimated--"),$sparse_weights_file);
# skip running the decoder if the user wanted
if (!$skip_decoder) {
print "($run) run decoder to produce n-best lists\n";
@ -651,7 +644,7 @@ while(1) {
my $combined_file = "$nbest_file.comb";
safesystem("sort -k1,1n $nbest_file $lsamp_file > $combined_file") or
die("failed to merge nbest and lattice samples");
safesystem("gzip -f $nbest_file; gzip -f $lsamp_file") or
safesystem("gzip -f $nbest_file; gzip -f $lsamp_file") or
die "Failed to gzip nbests and lattice samples";
$orig_nbest_file = "$nbest_file.gz";
$orig_nbest_file = "$nbest_file.gz";
@ -661,8 +654,7 @@ while(1) {
}
safesystem("gzip -f $nbest_file") or die "Failed to gzip run*out";
$nbest_file = $nbest_file.".gz";
}
else {
} else {
$nbest_file="run$run.best$___N_BEST_LIST_SIZE.out.gz";
print "skipped decoder run $run\n";
$skip_decoder = 0;
@ -678,7 +670,7 @@ while(1) {
my $score_file = "run$run.${base_score_file}";
my $cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file --ffile $feature_file -r ".join(",", @references)." -n $nbest_file";
$cmd = create_extractor_script($cmd, $___WORKING_DIR);
$cmd = &create_extractor_script($cmd, $___WORKING_DIR);
&submit_or_exec($cmd,"extract.out","extract.err");
@ -688,15 +680,14 @@ while(1) {
my @MAX = @{$featlist->{"maxs"}};
my @CURR = @{$featlist->{"values"}};
my @NAME = @{$featlist->{"names"}};
open(OUT,"> $weights_in_file")
or die "Can't write $weights_in_file (WD now $___WORKING_DIR)";
print OUT join(" ", @CURR)."\n";
print OUT join(" ", @MIN)."\n"; # this is where we could pass MINS
print OUT join(" ", @MAX)."\n"; # this is where we could pass MAXS
close(OUT);
open my $out, '>', $weights_in_file or die "Can't write $weights_in_file (WD now $___WORKING_DIR)";
print $out join(" ", @CURR)."\n";
print $out join(" ", @MIN)."\n"; # this is where we could pass MINS
print $out join(" ", @MAX)."\n"; # this is where we could pass MAXS
close $out;
# print join(" ", @NAME)."\n";
# make a backup copy labelled with this run number
safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
@ -704,7 +695,7 @@ while(1) {
# run mert
$cmd = "$mert_mert_cmd -d $DIM $mert_mert_args";
my $mert_settings = " -n $___RANDOM_RESTARTS";
my $seed_settings = "";
if ($___PREDICTABLE_SEEDS) {
@ -727,23 +718,23 @@ while(1) {
my $ffiles = "";
my $scfiles = "";
if (defined $prev_feature_file) {
$ffiles = "$prev_feature_file,$feature_file";
}
else{
} else{
$ffiles = "$feature_file";
}
if (defined $prev_score_file) {
$scfiles = "$prev_score_file,$score_file";
}
else{
} else{
$scfiles = "$score_file";
}
my $file_settings = " --ffile $ffiles --scfile $scfiles";
my $pro_file_settings = "--ffile " . join( " --ffile ", split(/,/, $ffiles)) .
" --scfile " . join( " --scfile ", split(/,/, $scfiles));
" --scfile " . join( " --scfile ", split(/,/, $scfiles));
if ($___START_WITH_HISTORIC_BESTS && defined $prev_init_file) {
$file_settings .= " --ifile $prev_init_file,run$run.$weights_in_file";
}
@ -766,9 +757,10 @@ while(1) {
# ... get results ...
my %dummy;
($bestpoint,$devbleu) = &get_weights_from_mert("run$run.pro.out","run$run.pro.err",scalar @{$featlist->{"names"}},\%dummy);
open(PRO_START,">run$run.init.pro");
print PRO_START $bestpoint."\n";
close(PRO_START);
open my $pro_fh, '>', "run$run.init.pro" or die "run$run.init.pro: $!";
print $pro_fh $bestpoint."\n";
close $pro_fh;
# ... and run mert
$cmd =~ s/(--ifile \S+)/$1,run$run.init.pro/;
&submit_or_exec($cmd.$mert_settings,$mert_outfile,$mert_logfile);
@ -806,12 +798,13 @@ while(1) {
if ($___HISTORIC_INTERPOLATION>0 && $run>3) {
my %historic_sparse_weights;
if (-e "run$run.sparse-weights") {
open(SPARSE,"run$run.sparse-weights");
while(<SPARSE>) {
open my $sparse_fh, '<', "run$run.sparse-weights" or die "run$run.sparse-weights: $!";
while (<$sparse_fh>) {
chop;
my ($feature,$weight) = split;
my ($feature, $weight) = split;
$historic_sparse_weights{$feature} = $weight;
}
close $sparse_fh;
}
my $prev = $run-1;
my @historic_weights = split /\s+/, `cat run$prev.$weights_out_file`;
@ -828,21 +821,21 @@ while(1) {
#print STDERR "sparse_weights{$_} += (1-$___HISTORIC_INTERPOLATION) * $historic_sparse_weights{$_} -> $sparse_weights{$_}\n";
}
}
if ($___HISTORIC_INTERPOLATION>0) {
open(WEIGHTS,">run$run.$weights_out_file");
print WEIGHTS join(" ",@newweights);
close(WEIGHTS);
if ($___HISTORIC_INTERPOLATION > 0) {
open my $weights_fh, '>', "run$run.$weights_out_file" or die "run$run.$weights_out_file: $!";
print $weights_fh join(" ", @newweights);
close $weights_fh;
}
$featlist->{"values"} = \@newweights;
if (scalar keys %sparse_weights) {
$sparse_weights_file = "run".($run+1).".sparse-weights";
open(SPARSE,">".$sparse_weights_file);
open my $sparse_fh, '>', $sparse_weights_file or die "$sparse_weights_file: $!";
foreach my $feature (keys %sparse_weights) {
print SPARSE "$feature $sparse_weights{$feature}\n";
print $sparse_fh "$feature $sparse_weights{$feature}\n";
}
close(SPARSE);
close $sparse_fh;
}
## additional stopping criterion: weights have not changed
@ -856,9 +849,7 @@ while(1) {
}
}
open F, "> finished_step.txt" or die "Can't mark finished step";
print F $run."\n";
close F;
&save_finished_step($finished_step_file, $run);
if ($shouldstop) {
print STDERR "None of the weights changed more than $minimum_required_change_in_weights. Stopping.\n";
@ -877,7 +868,7 @@ while(1) {
$prev_feature_file = undef;
$prev_score_file = undef;
$prev_init_file = undef;
for (my $i=$firstrun;$i<=$run;$i++){
for (my $i=$firstrun;$i<=$run;$i++){
if (defined $prev_feature_file){
$prev_feature_file = "${prev_feature_file},run${i}.${base_feature_file}";
}
@ -903,7 +894,9 @@ while(1) {
}
print "Training finished at ".`date`;
if (defined $allsorted){ safesystem ("\\rm -f $allsorted") or die; };
if (defined $allsorted) {
safesystem ("\\rm -f $allsorted") or die;
}
safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
safesystem("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
@ -911,10 +904,7 @@ safesystem("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
create_config($___CONFIG_ORIG, "./moses.ini", $featlist, $run, $devbleu, $sparse_weights_file);
# just to be sure that we have the really last finished step marked
open F, "> finished_step.txt" or die "Can't mark finished step";
print F $run."\n";
close F;
&save_finished_step($finished_step_file, $run);
#chdir back to the original directory # useless, just to remind we were not there
chdir($cwd);
@ -922,41 +912,37 @@ chdir($cwd);
} # end of local scope
sub get_weights_from_mert {
my ($outfile,$logfile,$weight_count,$sparse_weights) = @_;
my ($bestpoint,$devbleu);
my ($outfile, $logfile, $weight_count, $sparse_weights) = @_;
my ($bestpoint, $devbleu);
if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/)) {
open(IN,$outfile) or die "Can't open $outfile";
my (@WEIGHT,$sum);
for(my $i=0;$i<$weight_count;$i++) { push @WEIGHT, 0; }
while(<IN>) {
# regular features
if (/^F(\d+) ([\-\.\de]+)/) {
open my $fh, '<', $outfile or die "Can't open $outfile: $!";
my (@WEIGHT, $sum);
for(my $i = 0; $i < $weight_count; $i++) { push @WEIGHT, 0; }
while (<$fh>) {
if (/^F(\d+) ([\-\.\de]+)/) { # regular features
$WEIGHT[$1] = $2;
$sum += abs($2);
}
# sparse features
elsif(/^(.+_.+) ([\-\.\de]+)/) {
} elsif (/^(.+_.+) ([\-\.\de]+)/) { # sparse features
$$sparse_weights{$1} = $2;
}
}
$devbleu = "unknown";
foreach (@WEIGHT) { $_ /= $sum; }
foreach (keys %{$sparse_weights}) { $$sparse_weights{$_} /= $sum; }
$bestpoint = join(" ",@WEIGHT);
close IN;
}
else {
open(IN,$logfile) or die "Can't open $logfile";
while (<IN>) {
$bestpoint = join(" ", @WEIGHT);
close $fh;
} else {
open my $fh, '<', $logfile or die "Can't open $logfile: $!";
while (<$fh>) {
if (/Best point:\s*([\s\d\.\-e]+?)\s*=> ([\-\d\.]+)/) {
$bestpoint = $1;
$devbleu = $2;
last;
}
}
close IN;
close $fh;
}
return ($bestpoint,$devbleu);
return ($bestpoint, $devbleu);
}
sub run_decoder {
@ -968,7 +954,7 @@ sub run_decoder {
my $lsamp_filename_template = "run%d.lsamp$___LATTICE_SAMPLES.out";
$lsamp_filename = sprintf($lsamp_filename_template, $run);
}
# user-supplied parameters
print "params = $___DECODER_FLAGS\n";
@ -1064,7 +1050,7 @@ sub sanity_check_order_of_lambdas {
die "Mismatched lambdas. Decoder returned @got, we expected @expected_lambdas"
if "@got" ne "@expected_lambdas";
}
sub get_featlist_from_moses {
# run moses with the given config file and return the list of features and
@ -1082,14 +1068,14 @@ sub get_featlist_from_moses {
# read feature list
my @names = ();
my @startvalues = ();
open(INI,$featlistfn) or die "Can't read $featlistfn";
open my $fh, '<', $featlistfn or die "Can't read $featlistfn : $!";
my $nr = 0;
my @errs = ();
while (<INI>) {
while (<$fh>) {
$nr++;
chomp;
/^(.+) (\S+) (\S+)$/ || die("invalid feature: $_");
my ($longname, $feature, $value) = ($1,$2,$3);
/^(.+) (\S+) (\S+)$/ || die "invalid feature: $_";
my ($longname, $feature, $value) = ($1, $2, $3);
next if $value eq "sparse";
push @errs, "$featlistfn:$nr:Bad initial value of $feature: $value\n"
if $value !~ /^[+-]?[0-9.e]+$/;
@ -1098,7 +1084,8 @@ sub get_featlist_from_moses {
push @names, $feature;
push @startvalues, $value;
}
close INI;
close $fh;
if (scalar @errs) {
print STDERR join("", @errs);
exit 1;
@ -1112,9 +1099,9 @@ sub get_order_of_scores_from_nbestlist {
# return the score labels in order
my $fname_or_source = shift;
# print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n";
open IN, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source'";
my $line = <IN>;
close IN;
open my $fh, '<', $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source': $!";
my $line = <$fh>;
close $fh;
die "Line empty in nbestlist '$fname_or_source'" if !defined $line;
my ($sent, $hypo, $scores, $total) = split /\|\|\|/, $line;
$scores =~ s/^\s*|\s*$//g;
@ -1155,22 +1142,22 @@ sub create_config {
my %P; # the hash of all parameters we wish to override
# first convert the command line parameters to the hash
{ # ensure local scope of vars
my $parameter=undef;
print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n";
# ensure local scope of vars
{
my $parameter = undef;
print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n";
$___DECODER_FLAGS =~ s/^\s*|\s*$//;
$___DECODER_FLAGS =~ s/\s+/ /;
foreach (split(/ /,$___DECODER_FLAGS)) {
if (/^\-([^\d].*)$/) {
$parameter = $1;
$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
}
else {
foreach (split(/ /,$___DECODER_FLAGS)) {
if (/^\-([^\d].*)$/) {
$parameter = $1;
$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
} else {
die "Found value with no -paramname before it: $_"
if !defined $parameter;
push @{$P{$parameter}},$_;
}
}
push @{$P{$parameter}},$_;
}
}
}
# First delete all weights params from the input, we're overwriting them.
@ -1195,64 +1182,66 @@ sub create_config {
}
# create new moses.ini decoder config file by cloning and overriding the original one
open(INI,$infn) or die "Can't read $infn";
delete($P{"config"}); # never output
open my $ini_fh, '<', $infn or die "Can't read $infn: $!";
delete($P{"config"}); # never output
print "Saving new config to: $outfn\n";
open(OUT,"> $outfn") or die "Can't write $outfn";
print OUT "# MERT optimized configuration\n";
print OUT "# decoder $___DECODER\n";
print OUT "# BLEU $bleu_achieved on dev $___DEV_F\n";
print OUT "# We were before running iteration $iteration\n";
print OUT "# finished ".`date`;
my $line = <INI>;
open my $out, '>', $outfn or die "Can't write $outfn: $!";
print $out "# MERT optimized configuration\n";
print $out "# decoder $___DECODER\n";
print $out "# BLEU $bleu_achieved on dev $___DEV_F\n";
print $out "# We were before running iteration $iteration\n";
print $out "# finished ".`date`;
my $line = <$ini_fh>;
while(1) {
last unless $line;
last unless $line;
# skip until hit [parameter]
if ($line !~ /^\[(.+)\]\s*$/) {
$line = <INI>;
print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
next;
}
# skip until hit [parameter]
if ($line !~ /^\[(.+)\]\s*$/) {
$line = <$ini_fh>;
print $out $line if $line =~ /^\#/ || $line =~ /^\s+$/;
next;
}
# parameter name
my $parameter = $1;
$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
print OUT "[$parameter]\n";
# parameter name
my $parameter = $1;
$parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
print $out "[$parameter]\n";
# change parameter, if new values
if (defined($P{$parameter})) {
# write new values
foreach (@{$P{$parameter}}) {
print OUT $_."\n";
}
delete($P{$parameter});
# skip until new parameter, only write comments
while($line = <INI>) {
print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
last if $line =~ /^\[/;
last unless $line;
}
next;
}
# unchanged parameter, write old
while($line = <INI>) {
last if $line =~ /^\[/;
print OUT $line;
}
# change parameter, if new values
if (defined($P{$parameter})) {
# write new values
foreach (@{$P{$parameter}}) {
print $out $_."\n";
}
delete($P{$parameter});
# skip until new parameter, only write comments
while ($line = <$ini_fh>) {
print $out $line if $line =~ /^\#/ || $line =~ /^\s+$/;
last if $line =~ /^\[/;
last unless $line;
}
next;
}
# unchanged parameter, write old
while ($line = <$ini_fh>) {
last if $line =~ /^\[/;
print $out $line;
}
}
# write all additional parameters
foreach my $parameter (keys %P) {
print OUT "\n[$parameter]\n";
foreach (@{$P{$parameter}}) {
print OUT $_."\n";
}
print $out "\n[$parameter]\n";
foreach (@{$P{$parameter}}) {
print $out $_."\n";
}
}
close(INI);
close(OUT);
close $ini_fh;
close $out;
print STDERR "Saved: $outfn\n";
}
@ -1262,25 +1251,23 @@ sub safesystem {
if ($? == -1) {
print STDERR "Failed to execute: @_\n $!\n";
exit(1);
}
elsif ($? & 127) {
} elsif ($? & 127) {
printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
($? & 127), ($? & 128) ? 'with' : 'without';
exit(1);
}
else {
} else {
my $exitcode = $? >> 8;
print STDERR "Exit code: $exitcode\n" if $exitcode;
return ! $exitcode;
}
}
sub ensure_full_path {
my $PATH = shift;
$PATH =~ s/\/nfsmnt//;
$PATH =~ s/\/nfsmnt//;
return $PATH if $PATH =~ /^\//;
my $dir = `pawd 2>/dev/null`;
if(!$dir){$dir = `pwd`;}
chomp($dir);
my $dir = Cwd::getcwd();
$PATH = $dir."/".$PATH;
$PATH =~ s/[\r\n]//g;
$PATH =~ s/\/\.\//\//g;
@ -1292,24 +1279,22 @@ $PATH =~ s/\/nfsmnt//;
}
$PATH =~ s/\/[^\/]+\/\.\.$//;
$PATH =~ s/\/+$//;
$PATH =~ s/\/nfsmnt//;
$PATH =~ s/\/nfsmnt//;
return $PATH;
}
sub submit_or_exec {
my ($cmd,$stdout,$stderr) = @_;
my ($cmd, $stdout, $stderr) = @_;
print STDERR "exec: $cmd\n";
if (defined $___JOBS && $___JOBS > 0) {
safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=$stdout -stderr=$stderr" )
or die "ERROR: Failed to submit '$cmd' (via $qsubwrapper)";
}
else {
} else {
safesystem("$cmd > $stdout 2> $stderr") or die "ERROR: Failed to run '$cmd'.";
}
}
sub create_extractor_script()
{
sub create_extractor_script() {
my ($cmd, $outdir) = @_;
my $script_path = File::Spec->catfile($outdir, "extractor.sh");
@ -1318,9 +1303,16 @@ sub create_extractor_script()
print $out "#!/bin/bash\n";
print $out "cd $outdir\n";
print $out "$cmd\n";
close($out);
close $out;
`chmod +x $script_path`;
return $script_path;
}
sub save_finished_step {
my ($filename, $step) = @_;
open my $fh, '>', $filename or die "$filename: $!";
print $fh $step . "\n";
close $fh;
}