- change from pawd to pwd, because pawd is not available on some Linux distribution

- moses-parallel.pl: new way of passing parameters to decoder with parameter  -decoder-parameters
- moses-parallel.pl: possibility of saving decoder logs (parameter -logfile)



git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1099 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
nicolabertoldi 2006-12-29 10:48:41 +00:00
parent ddd2fdeb20
commit 92eadd7c0c
2 changed files with 60 additions and 23 deletions

View File

@ -23,9 +23,7 @@ use strict;
#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's') #NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G -hard"; my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G -hard";
my $workingdir=`pawd`; my $workingdir = `pwd`; chomp $workingdir;
chomp($workingdir);
my $tmpdir="$workingdir/tmp$$"; my $tmpdir="$workingdir/tmp$$";
my $splitpfx="split$$"; my $splitpfx="split$$";
@ -52,6 +50,8 @@ my $nbestfile=undef;
my $orinbestfile=undef; my $orinbestfile=undef;
my $nbest=undef; my $nbest=undef;
my $nbestflag=0; my $nbestflag=0;
my $orilogfile="";
my $logflag="";
my $qsubname="MOSES"; my $qsubname="MOSES";
my $inputtype=0; my $inputtype=0;
my $old_sge = 0; # assume old Sun Grid Engine (<6.0) where qsub does not my $old_sge = 0; # assume old Sun Grid Engine (<6.0) where qsub does not
@ -66,21 +66,24 @@ sub init(){
'debug'=>\$dbg, 'debug'=>\$dbg,
'jobs=i'=>\$jobs, 'jobs=i'=>\$jobs,
'decoder=s'=> \$mosescmd, 'decoder=s'=> \$mosescmd,
'decoder-parameters=s'=> \$mosesparameters,
'logfile=s'=> \$orilogfile,
'i|inputfile|input-file=s'=> \$orifile, 'i|inputfile|input-file=s'=> \$orifile,
'n-best-file=s'=> \$orinbestfile, 'n-best-file=s'=> \$orinbestfile,
'n-best-size=i'=> \$nbest, 'n-best-size=i'=> \$nbest,
'qsub-prefix=s'=> \$qsubname, 'qsub-prefix=s'=> \$qsubname,
'queue-parameters=s'=> \$queueparameters, 'queue-parameters=s'=> \$queueparameters,
'inputtype=i'=> \$inputtype, 'inputtype=i'=> \$inputtype,
'config=s'=>\$cfgfile, 'config=s'=>\$cfgfile,
'old-sge' => \$old_sge, 'old-sge' => \$old_sge,
) or exit(1); ) or exit(1);
chomp($nbestfile=`basename $orinbestfile`) if defined $orinbestfile; chomp($nbestfile=`basename $orinbestfile`) if defined $orinbestfile;
chomp($testfile=`basename $orifile`) if defined $orifile; chomp($testfile=`basename $orifile`) if defined $orifile;
$mosesparameters="@ARGV -config $cfgfile -inputtype $inputtype"; $mosesparameters.="@ARGV -config $cfgfile -inputtype $inputtype";
getNbestParameters(); getNbestParameters();
getLogParameters();
} }
@ -98,7 +101,9 @@ sub version(){
# print STDERR "version 1.8 (31-07-2006)\n"; # print STDERR "version 1.8 (31-07-2006)\n";
# print STDERR "version 1.9 (01-08-2006)\n"; # print STDERR "version 1.9 (01-08-2006)\n";
# print STDERR "version 1.10 (02-08-2006)\n"; # print STDERR "version 1.10 (02-08-2006)\n";
print STDERR "version 1.11 (10-10-2006)\n"; # print STDERR "version 1.11 (10-10-2006)\n";
# print STDERR "version 1.12 (27-12-2006)\n";
print STDERR "version 1.13 (29-12-2006)\n";
exit(1); exit(1);
} }
@ -111,21 +116,25 @@ sub usage(){
print STDERR "* -i|inputfile|input-file <file> the input text to translate\n"; print STDERR "* -i|inputfile|input-file <file> the input text to translate\n";
print STDERR "* -jobs <N> number of required jobs\n"; print STDERR "* -jobs <N> number of required jobs\n";
print STDERR " -qsub-prefix <string> name for sumbitte jobs\n"; print STDERR " -qsub-prefix <string> name for sumbitte jobs\n";
print STDERR " -queue-parameters <string> specific requirements for queue\n"; print STDERR " -queue-parameters <string> specific requirements for queue\n";
print STDERR " -old-sge Assume Sun Grid Engine < 6.0\n"; print STDERR " -old-sge Assume Sun Grid Engine < 6.0\n";
print STDERR " -debug debug\n"; print STDERR " -debug debug\n";
print STDERR " -version print version of the script\n"; print STDERR " -version print version of the script\n";
print STDERR " -help this help\n"; print STDERR " -help this help\n";
print STDERR "Moses options:\n"; print STDERR "Moses options:\n";
print STDERR " -inputtype <0|1> 0 for text, 1 for confusion networks\n"; print STDERR " -inputtype <0|1> 0 for text, 1 for confusion networks\n";
print STDERR "* -config <cfgfile> configuration file\n"; print STDERR "* -config <cfgfile> configuration file\n";
print STDERR " -decoder-parameters <string> specific parameters for the decoder\n";
print STDERR "All other options are passed to Moses\n"; print STDERR "All other options are passed to Moses\n";
exit(1); print STDERR " (This way to pass parameters is maintained for back compatibility\n";
print STDERR " but preferably use -decoder-parameters)\n";
exit(1);
} }
#printparameters #printparameters
sub print_parameters(){ sub print_parameters(){
print STDERR "Inputfile: $orifile\n"; print STDERR "Inputfile: $orifile\n";
print STDERR "Logfile: $orilogfile\n";
print STDERR "Configuration file: $cfgfile\n"; print STDERR "Configuration file: $cfgfile\n";
print STDERR "Decoder in use: $mosescmd\n"; print STDERR "Decoder in use: $mosescmd\n";
if ($nbestflag) { if ($nbestflag) {
@ -134,13 +143,18 @@ sub print_parameters(){
} }
print STDERR "Number of jobs:$jobs\n"; print STDERR "Number of jobs:$jobs\n";
print STDERR "Qsub name: $qsubname\n"; print STDERR "Qsub name: $qsubname\n";
print STDERR "Queue parameters: $queueparameters\n"; print STDERR "Queue parameters: $queueparameters\n";
print STDERR "Inputtype: text\n" if $inputtype == 0; print STDERR "Inputtype: text\n" if $inputtype == 0;
print STDERR "Inputtype: confusion network\n" if $inputtype == 1; print STDERR "Inputtype: confusion network\n" if $inputtype == 1;
print STDERR "parameters directly passed to Moses: $mosesparameters\n"; print STDERR "parameters directly passed to Moses: $mosesparameters\n";
} }
#get parameters for log file
sub getLogParameters(){
$logflag=1 if $orilogfile;
}
#get parameters for nbest computation from configuration file #get parameters for nbest computation from configuration file
sub getNbestParameters(){ sub getNbestParameters(){
if ($orinbestfile) { $nbestflag=1; } if ($orinbestfile) { $nbestflag=1; }
@ -169,25 +183,25 @@ usage() if $help;
if (!defined $orifile || !defined $mosescmd || ! defined $cfgfile) { if (!defined $orifile || !defined $mosescmd || ! defined $cfgfile) {
print STDERR "Please specify -input-file, -decoder and -config\n"; print STDERR "Please specify -input-file, -decoder and -config\n";
exit 1; usage();
} }
#checking if inputfile exists #checking if inputfile exists
if (! -e ${orifile} ){ if (! -e ${orifile} ){
print STDERR "Inputfile ($orifile) does not exists\n"; print STDERR "Inputfile ($orifile) does not exists\n";
exit 1; usage();
} }
#checking if decoder exists #checking if decoder exists
if (! -e $mosescmd) { if (! -e $mosescmd) {
print STDERR "Decoder ($mosescmd) does not exists\n"; print STDERR "Decoder ($mosescmd) does not exists\n";
exit 1; usage();
} }
#checking if configfile exists #checking if configfile exists
if (! -e $cfgfile) { if (! -e $cfgfile) {
print STDERR "Configuration file ($cfgfile) does not exists\n"; print STDERR "Configuration file ($cfgfile) does not exists\n";
exit 1; usage();
} }
@ -269,7 +283,10 @@ my @sgepids =();
my $failure=0; my $failure=0;
foreach my $idx (@idxlist){ foreach my $idx (@idxlist){
$cmd="qsub $queueparameters -j y -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash >& ${jobscript}${idx}.log"; print STDERR "qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash\n" if $dbg;
$cmd="qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash >& ${jobscript}${idx}.log";
safesystem($cmd) or die; safesystem($cmd) or die;
my ($res,$id); my ($res,$id);
@ -291,7 +308,7 @@ if ($old_sge) {
# we need to implement our own waiting script # we need to implement our own waiting script
safesystem("echo 'date' > sync_workaround_script.sh") or kill_all_and_quit(); safesystem("echo 'date' > sync_workaround_script.sh") or kill_all_and_quit();
my $pwd = `pawd`; chomp $pwd; my $pwd = `pwd`; chomp $pwd;
my $checkpointfile = "sync_workaround_checkpoint"; my $checkpointfile = "sync_workaround_checkpoint";
# delete previous checkpoint, if left from previous runs # delete previous checkpoint, if left from previous runs
@ -338,7 +355,9 @@ check_translation();
#concatenating translations and removing temporary files #concatenating translations and removing temporary files
concatenate_1best(); concatenate_1best();
if ($nbestflag){ concatenate_nbest(); } concatenate_logs() if $logflag;
concatenate_nbest() if $nbestflag;
remove_temporary_files(); remove_temporary_files();
@ -355,6 +374,13 @@ sub preparing_script(){
if ($nbestflag){ if ($nbestflag){
print OUT "$mosescmd $mosesparameters -n-best-list $tmpdir/${nbestfile}.$splitpfx$idx $nbest -input-file ${testfile}.$splitpfx$idx > $tmpdir/${testfile}.$splitpfx$idx.trans\n\n"; print OUT "$mosescmd $mosesparameters -n-best-list $tmpdir/${nbestfile}.$splitpfx$idx $nbest -input-file ${testfile}.$splitpfx$idx > $tmpdir/${testfile}.$splitpfx$idx.trans\n\n";
print OUT "echo exit status \$\?\n\n"; print OUT "echo exit status \$\?\n\n";
if ($inputtype==1){ #confusion network
print OUT "cat $tmpdir/${nbestfile}.$splitpfx$idx | perl $ENV{SCRIPTS_ROOTDIR}/generic/filterscores.pl > $tmpdir/${nbestfile}.$splitpfx$idx.tmp\n";
print OUT "echo exit status \$\?\n\n";
print OUT "mv $tmpdir/${nbestfile}.$splitpfx$idx.tmp $tmpdir/${nbestfile}.$splitpfx$idx; ";
print OUT "echo exit status \$\?\n\n";
}
print OUT "mv $tmpdir/${nbestfile}.$splitpfx$idx .\n\n"; print OUT "mv $tmpdir/${nbestfile}.$splitpfx$idx .\n\n";
print OUT "echo exit status \$\?\n\n"; print OUT "echo exit status \$\?\n\n";
}else{ }else{
@ -439,6 +465,19 @@ sub concatenate_1best(){
} }
} }
sub concatenate_logs(){
open (OUT, "> ${orilogfile}");
foreach my $idx (@idxlist){
my @in=();
open (IN, "$qsubout$idx");
@in=<IN>;
print OUT "@in";
close(IN);
}
close(OUT);
}
sub check_exit_status(){ sub check_exit_status(){
print STDERR "check_exit_status\n"; print STDERR "check_exit_status\n";
my $failure=0; my $failure=0;

View File

@ -8,9 +8,7 @@ use strict;
#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's') #NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G"; my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G";
my $workingdir=`pawd`; my $workingdir = `pwd`; chomp $workingdir;
chomp($workingdir);
my $tmpdir="$workingdir/tmp$$"; my $tmpdir="$workingdir/tmp$$";
my $jobscript="$workingdir/job$$"; my $jobscript="$workingdir/job$$";
my $qsubout="$workingdir/out.job$$"; my $qsubout="$workingdir/out.job$$";