mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-04 01:45:52 +03:00
- change from pawd to pwd, because pawd is not available on some Linux distribution
- moses-parallel.pl: new way of passing parameters to decoder with parameter -decoder-parameters - moses-parallel.pl: possibility of saving decoder logs (parameter -logfile) git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1099 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
ddd2fdeb20
commit
92eadd7c0c
@ -23,9 +23,7 @@ use strict;
|
|||||||
#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
|
#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
|
||||||
my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G -hard";
|
my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G -hard";
|
||||||
|
|
||||||
my $workingdir=`pawd`;
|
my $workingdir = `pwd`; chomp $workingdir;
|
||||||
chomp($workingdir);
|
|
||||||
|
|
||||||
my $tmpdir="$workingdir/tmp$$";
|
my $tmpdir="$workingdir/tmp$$";
|
||||||
my $splitpfx="split$$";
|
my $splitpfx="split$$";
|
||||||
|
|
||||||
@ -52,6 +50,8 @@ my $nbestfile=undef;
|
|||||||
my $orinbestfile=undef;
|
my $orinbestfile=undef;
|
||||||
my $nbest=undef;
|
my $nbest=undef;
|
||||||
my $nbestflag=0;
|
my $nbestflag=0;
|
||||||
|
my $orilogfile="";
|
||||||
|
my $logflag="";
|
||||||
my $qsubname="MOSES";
|
my $qsubname="MOSES";
|
||||||
my $inputtype=0;
|
my $inputtype=0;
|
||||||
my $old_sge = 0; # assume old Sun Grid Engine (<6.0) where qsub does not
|
my $old_sge = 0; # assume old Sun Grid Engine (<6.0) where qsub does not
|
||||||
@ -66,21 +66,24 @@ sub init(){
|
|||||||
'debug'=>\$dbg,
|
'debug'=>\$dbg,
|
||||||
'jobs=i'=>\$jobs,
|
'jobs=i'=>\$jobs,
|
||||||
'decoder=s'=> \$mosescmd,
|
'decoder=s'=> \$mosescmd,
|
||||||
|
'decoder-parameters=s'=> \$mosesparameters,
|
||||||
|
'logfile=s'=> \$orilogfile,
|
||||||
'i|inputfile|input-file=s'=> \$orifile,
|
'i|inputfile|input-file=s'=> \$orifile,
|
||||||
'n-best-file=s'=> \$orinbestfile,
|
'n-best-file=s'=> \$orinbestfile,
|
||||||
'n-best-size=i'=> \$nbest,
|
'n-best-size=i'=> \$nbest,
|
||||||
'qsub-prefix=s'=> \$qsubname,
|
'qsub-prefix=s'=> \$qsubname,
|
||||||
'queue-parameters=s'=> \$queueparameters,
|
'queue-parameters=s'=> \$queueparameters,
|
||||||
'inputtype=i'=> \$inputtype,
|
'inputtype=i'=> \$inputtype,
|
||||||
'config=s'=>\$cfgfile,
|
'config=s'=>\$cfgfile,
|
||||||
'old-sge' => \$old_sge,
|
'old-sge' => \$old_sge,
|
||||||
) or exit(1);
|
) or exit(1);
|
||||||
|
|
||||||
chomp($nbestfile=`basename $orinbestfile`) if defined $orinbestfile;
|
chomp($nbestfile=`basename $orinbestfile`) if defined $orinbestfile;
|
||||||
chomp($testfile=`basename $orifile`) if defined $orifile;
|
chomp($testfile=`basename $orifile`) if defined $orifile;
|
||||||
|
|
||||||
$mosesparameters="@ARGV -config $cfgfile -inputtype $inputtype";
|
$mosesparameters.="@ARGV -config $cfgfile -inputtype $inputtype";
|
||||||
getNbestParameters();
|
getNbestParameters();
|
||||||
|
getLogParameters();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -98,7 +101,9 @@ sub version(){
|
|||||||
# print STDERR "version 1.8 (31-07-2006)\n";
|
# print STDERR "version 1.8 (31-07-2006)\n";
|
||||||
# print STDERR "version 1.9 (01-08-2006)\n";
|
# print STDERR "version 1.9 (01-08-2006)\n";
|
||||||
# print STDERR "version 1.10 (02-08-2006)\n";
|
# print STDERR "version 1.10 (02-08-2006)\n";
|
||||||
print STDERR "version 1.11 (10-10-2006)\n";
|
# print STDERR "version 1.11 (10-10-2006)\n";
|
||||||
|
# print STDERR "version 1.12 (27-12-2006)\n";
|
||||||
|
print STDERR "version 1.13 (29-12-2006)\n";
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,21 +116,25 @@ sub usage(){
|
|||||||
print STDERR "* -i|inputfile|input-file <file> the input text to translate\n";
|
print STDERR "* -i|inputfile|input-file <file> the input text to translate\n";
|
||||||
print STDERR "* -jobs <N> number of required jobs\n";
|
print STDERR "* -jobs <N> number of required jobs\n";
|
||||||
print STDERR " -qsub-prefix <string> name for sumbitte jobs\n";
|
print STDERR " -qsub-prefix <string> name for sumbitte jobs\n";
|
||||||
print STDERR " -queue-parameters <string> specific requirements for queue\n";
|
print STDERR " -queue-parameters <string> specific requirements for queue\n";
|
||||||
print STDERR " -old-sge Assume Sun Grid Engine < 6.0\n";
|
print STDERR " -old-sge Assume Sun Grid Engine < 6.0\n";
|
||||||
print STDERR " -debug debug\n";
|
print STDERR " -debug debug\n";
|
||||||
print STDERR " -version print version of the script\n";
|
print STDERR " -version print version of the script\n";
|
||||||
print STDERR " -help this help\n";
|
print STDERR " -help this help\n";
|
||||||
print STDERR "Moses options:\n";
|
print STDERR "Moses options:\n";
|
||||||
print STDERR " -inputtype <0|1> 0 for text, 1 for confusion networks\n";
|
print STDERR " -inputtype <0|1> 0 for text, 1 for confusion networks\n";
|
||||||
print STDERR "* -config <cfgfile> configuration file\n";
|
print STDERR "* -config <cfgfile> configuration file\n";
|
||||||
|
print STDERR " -decoder-parameters <string> specific parameters for the decoder\n";
|
||||||
print STDERR "All other options are passed to Moses\n";
|
print STDERR "All other options are passed to Moses\n";
|
||||||
exit(1);
|
print STDERR " (This way to pass parameters is maintained for back compatibility\n";
|
||||||
|
print STDERR " but preferably use -decoder-parameters)\n";
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#printparameters
|
#printparameters
|
||||||
sub print_parameters(){
|
sub print_parameters(){
|
||||||
print STDERR "Inputfile: $orifile\n";
|
print STDERR "Inputfile: $orifile\n";
|
||||||
|
print STDERR "Logfile: $orilogfile\n";
|
||||||
print STDERR "Configuration file: $cfgfile\n";
|
print STDERR "Configuration file: $cfgfile\n";
|
||||||
print STDERR "Decoder in use: $mosescmd\n";
|
print STDERR "Decoder in use: $mosescmd\n";
|
||||||
if ($nbestflag) {
|
if ($nbestflag) {
|
||||||
@ -134,13 +143,18 @@ sub print_parameters(){
|
|||||||
}
|
}
|
||||||
print STDERR "Number of jobs:$jobs\n";
|
print STDERR "Number of jobs:$jobs\n";
|
||||||
print STDERR "Qsub name: $qsubname\n";
|
print STDERR "Qsub name: $qsubname\n";
|
||||||
print STDERR "Queue parameters: $queueparameters\n";
|
print STDERR "Queue parameters: $queueparameters\n";
|
||||||
print STDERR "Inputtype: text\n" if $inputtype == 0;
|
print STDERR "Inputtype: text\n" if $inputtype == 0;
|
||||||
print STDERR "Inputtype: confusion network\n" if $inputtype == 1;
|
print STDERR "Inputtype: confusion network\n" if $inputtype == 1;
|
||||||
|
|
||||||
print STDERR "parameters directly passed to Moses: $mosesparameters\n";
|
print STDERR "parameters directly passed to Moses: $mosesparameters\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#get parameters for log file
|
||||||
|
sub getLogParameters(){
|
||||||
|
$logflag=1 if $orilogfile;
|
||||||
|
}
|
||||||
|
|
||||||
#get parameters for nbest computation from configuration file
|
#get parameters for nbest computation from configuration file
|
||||||
sub getNbestParameters(){
|
sub getNbestParameters(){
|
||||||
if ($orinbestfile) { $nbestflag=1; }
|
if ($orinbestfile) { $nbestflag=1; }
|
||||||
@ -169,25 +183,25 @@ usage() if $help;
|
|||||||
|
|
||||||
if (!defined $orifile || !defined $mosescmd || ! defined $cfgfile) {
|
if (!defined $orifile || !defined $mosescmd || ! defined $cfgfile) {
|
||||||
print STDERR "Please specify -input-file, -decoder and -config\n";
|
print STDERR "Please specify -input-file, -decoder and -config\n";
|
||||||
exit 1;
|
usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
#checking if inputfile exists
|
#checking if inputfile exists
|
||||||
if (! -e ${orifile} ){
|
if (! -e ${orifile} ){
|
||||||
print STDERR "Inputfile ($orifile) does not exists\n";
|
print STDERR "Inputfile ($orifile) does not exists\n";
|
||||||
exit 1;
|
usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
#checking if decoder exists
|
#checking if decoder exists
|
||||||
if (! -e $mosescmd) {
|
if (! -e $mosescmd) {
|
||||||
print STDERR "Decoder ($mosescmd) does not exists\n";
|
print STDERR "Decoder ($mosescmd) does not exists\n";
|
||||||
exit 1;
|
usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
#checking if configfile exists
|
#checking if configfile exists
|
||||||
if (! -e $cfgfile) {
|
if (! -e $cfgfile) {
|
||||||
print STDERR "Configuration file ($cfgfile) does not exists\n";
|
print STDERR "Configuration file ($cfgfile) does not exists\n";
|
||||||
exit 1;
|
usage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -269,7 +283,10 @@ my @sgepids =();
|
|||||||
|
|
||||||
my $failure=0;
|
my $failure=0;
|
||||||
foreach my $idx (@idxlist){
|
foreach my $idx (@idxlist){
|
||||||
$cmd="qsub $queueparameters -j y -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash >& ${jobscript}${idx}.log";
|
print STDERR "qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash\n" if $dbg;
|
||||||
|
|
||||||
|
$cmd="qsub $queueparameters -b no -j yes -o $qsubout$idx -e $qsuberr$idx -N $qsubname$idx ${jobscript}${idx}.bash >& ${jobscript}${idx}.log";
|
||||||
|
|
||||||
safesystem($cmd) or die;
|
safesystem($cmd) or die;
|
||||||
|
|
||||||
my ($res,$id);
|
my ($res,$id);
|
||||||
@ -291,7 +308,7 @@ if ($old_sge) {
|
|||||||
# we need to implement our own waiting script
|
# we need to implement our own waiting script
|
||||||
safesystem("echo 'date' > sync_workaround_script.sh") or kill_all_and_quit();
|
safesystem("echo 'date' > sync_workaround_script.sh") or kill_all_and_quit();
|
||||||
|
|
||||||
my $pwd = `pawd`; chomp $pwd;
|
my $pwd = `pwd`; chomp $pwd;
|
||||||
my $checkpointfile = "sync_workaround_checkpoint";
|
my $checkpointfile = "sync_workaround_checkpoint";
|
||||||
|
|
||||||
# delete previous checkpoint, if left from previous runs
|
# delete previous checkpoint, if left from previous runs
|
||||||
@ -338,7 +355,9 @@ check_translation();
|
|||||||
|
|
||||||
#concatenating translations and removing temporary files
|
#concatenating translations and removing temporary files
|
||||||
concatenate_1best();
|
concatenate_1best();
|
||||||
if ($nbestflag){ concatenate_nbest(); }
|
concatenate_logs() if $logflag;
|
||||||
|
concatenate_nbest() if $nbestflag;
|
||||||
|
|
||||||
|
|
||||||
remove_temporary_files();
|
remove_temporary_files();
|
||||||
|
|
||||||
@ -355,6 +374,13 @@ sub preparing_script(){
|
|||||||
if ($nbestflag){
|
if ($nbestflag){
|
||||||
print OUT "$mosescmd $mosesparameters -n-best-list $tmpdir/${nbestfile}.$splitpfx$idx $nbest -input-file ${testfile}.$splitpfx$idx > $tmpdir/${testfile}.$splitpfx$idx.trans\n\n";
|
print OUT "$mosescmd $mosesparameters -n-best-list $tmpdir/${nbestfile}.$splitpfx$idx $nbest -input-file ${testfile}.$splitpfx$idx > $tmpdir/${testfile}.$splitpfx$idx.trans\n\n";
|
||||||
print OUT "echo exit status \$\?\n\n";
|
print OUT "echo exit status \$\?\n\n";
|
||||||
|
|
||||||
|
if ($inputtype==1){ #confusion network
|
||||||
|
print OUT "cat $tmpdir/${nbestfile}.$splitpfx$idx | perl $ENV{SCRIPTS_ROOTDIR}/generic/filterscores.pl > $tmpdir/${nbestfile}.$splitpfx$idx.tmp\n";
|
||||||
|
print OUT "echo exit status \$\?\n\n";
|
||||||
|
print OUT "mv $tmpdir/${nbestfile}.$splitpfx$idx.tmp $tmpdir/${nbestfile}.$splitpfx$idx; ";
|
||||||
|
print OUT "echo exit status \$\?\n\n";
|
||||||
|
}
|
||||||
print OUT "mv $tmpdir/${nbestfile}.$splitpfx$idx .\n\n";
|
print OUT "mv $tmpdir/${nbestfile}.$splitpfx$idx .\n\n";
|
||||||
print OUT "echo exit status \$\?\n\n";
|
print OUT "echo exit status \$\?\n\n";
|
||||||
}else{
|
}else{
|
||||||
@ -439,6 +465,19 @@ sub concatenate_1best(){
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub concatenate_logs(){
|
||||||
|
open (OUT, "> ${orilogfile}");
|
||||||
|
foreach my $idx (@idxlist){
|
||||||
|
my @in=();
|
||||||
|
open (IN, "$qsubout$idx");
|
||||||
|
@in=<IN>;
|
||||||
|
print OUT "@in";
|
||||||
|
close(IN);
|
||||||
|
}
|
||||||
|
close(OUT);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
sub check_exit_status(){
|
sub check_exit_status(){
|
||||||
print STDERR "check_exit_status\n";
|
print STDERR "check_exit_status\n";
|
||||||
my $failure=0;
|
my $failure=0;
|
||||||
|
@ -8,9 +8,7 @@ use strict;
|
|||||||
#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
|
#NOTE: group name is ws06ossmt (with 2 's') and not ws06osmt (with 1 's')
|
||||||
my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G";
|
my $queueparameters="-l ws06ossmt=true -l mem_free=0.5G";
|
||||||
|
|
||||||
my $workingdir=`pawd`;
|
my $workingdir = `pwd`; chomp $workingdir;
|
||||||
chomp($workingdir);
|
|
||||||
|
|
||||||
my $tmpdir="$workingdir/tmp$$";
|
my $tmpdir="$workingdir/tmp$$";
|
||||||
my $jobscript="$workingdir/job$$";
|
my $jobscript="$workingdir/job$$";
|
||||||
my $qsubout="$workingdir/out.job$$";
|
my $qsubout="$workingdir/out.job$$";
|
||||||
|
Loading…
Reference in New Issue
Block a user