mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
tighten up extract-parallel on osx. Can now use gsplit and bsd split
This commit is contained in:
parent
b83803203e
commit
ca54852641
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env perl
|
#!/usr/bin/env perl
|
||||||
#
|
#
|
||||||
# This file is part of moses. Its use is licensed under the GNU Lesser General
|
# This file is part of moses. Its use is licensed under the GNU Lesser General
|
||||||
# Public License version 2.1 or, at your option, any later version.
|
# Public License version 2.1 or, at your option, any later version.
|
||||||
@ -15,8 +15,7 @@ sub systemCheck($);
|
|||||||
sub NumStr($);
|
sub NumStr($);
|
||||||
sub DigitStr($);
|
sub DigitStr($);
|
||||||
sub CharStr($);
|
sub CharStr($);
|
||||||
|
sub GetSplitVersion($);
|
||||||
my $is_osx = ($^O eq "darwin");
|
|
||||||
|
|
||||||
my $alph = "abcdefghijklmnopqrstuvwxyz";
|
my $alph = "abcdefghijklmnopqrstuvwxyz";
|
||||||
my @alph = (split(//,$alph));
|
my @alph = (split(//,$alph));
|
||||||
@ -42,7 +41,7 @@ my $baselineExtract;
|
|||||||
my $glueFile;
|
my $glueFile;
|
||||||
my $phraseOrientation = 0;
|
my $phraseOrientation = 0;
|
||||||
my $phraseOrientationPriorsFile;
|
my $phraseOrientationPriorsFile;
|
||||||
my $splitCmdOption="-d";
|
my $splitCmdOption = "";
|
||||||
|
|
||||||
my $GZIP_EXEC;
|
my $GZIP_EXEC;
|
||||||
if(`which pigz`) {
|
if(`which pigz`) {
|
||||||
@ -53,6 +52,15 @@ else {
|
|||||||
}
|
}
|
||||||
print STDERR "using $GZIP_EXEC \n";
|
print STDERR "using $GZIP_EXEC \n";
|
||||||
|
|
||||||
|
my $isBSDSplit = GetSplitVersion($splitCmd);
|
||||||
|
print STDERR "isBSDSplit=$isBSDSplit \n";
|
||||||
|
|
||||||
|
if ($isBSDSplit == 0) {
|
||||||
|
$splitCmdOption .= "-d";
|
||||||
|
}
|
||||||
|
|
||||||
|
my $gzOut = 0;
|
||||||
|
|
||||||
for (my $i = 8; $i < $#ARGV + 1; ++$i)
|
for (my $i = 8; $i < $#ARGV + 1; ++$i)
|
||||||
{
|
{
|
||||||
$makeTTable = 0 if $ARGV[$i] eq "--NoTTable";
|
$makeTTable = 0 if $ARGV[$i] eq "--NoTTable";
|
||||||
@ -73,11 +81,15 @@ for (my $i = 8; $i < $#ARGV + 1; ++$i)
|
|||||||
$phraseOrientationPriorsFile = $ARGV[++$i];
|
$phraseOrientationPriorsFile = $ARGV[++$i];
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
$splitCmdOption="",next if $ARGV[$i] eq "--NoNumericSuffix";
|
if ($ARGV[$i] eq '--GZOutput') {
|
||||||
|
$gzOut = 1;
|
||||||
|
}
|
||||||
|
|
||||||
$otherExtractArgs .= $ARGV[$i] ." ";
|
$otherExtractArgs .= $ARGV[$i] ." ";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
die("Need to specify --GZOutput for parallel extract") if ($gzOut == 0);
|
||||||
|
|
||||||
my $cmd;
|
my $cmd;
|
||||||
my $TMPDIR=dirname($extract) ."/tmp.$$";
|
my $TMPDIR=dirname($extract) ."/tmp.$$";
|
||||||
$cmd = "mkdir -p $TMPDIR; ls -l $TMPDIR";
|
$cmd = "mkdir -p $TMPDIR; ls -l $TMPDIR";
|
||||||
@ -272,7 +284,7 @@ if ($phraseOrientation && defined($phraseOrientationPriorsFile)) {
|
|||||||
|
|
||||||
# delete temporary files
|
# delete temporary files
|
||||||
$cmd = "rm -rf $TMPDIR \n";
|
$cmd = "rm -rf $TMPDIR \n";
|
||||||
`$cmd`;
|
systemCheck($cmd);
|
||||||
|
|
||||||
print STDERR "Finished ".localtime() ."\n";
|
print STDERR "Finished ".localtime() ."\n";
|
||||||
|
|
||||||
@ -352,10 +364,22 @@ sub CharStr($)
|
|||||||
sub NumStr($)
|
sub NumStr($)
|
||||||
{
|
{
|
||||||
my $i = shift;
|
my $i = shift;
|
||||||
if ($is_osx){
|
if ($isBSDSplit){
|
||||||
return CharStr($i);
|
return CharStr($i);
|
||||||
}else{
|
}else{
|
||||||
return DigitStr($i);
|
return DigitStr($i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub GetSplitVersion($)
|
||||||
|
{
|
||||||
|
my $splitCmd = shift;
|
||||||
|
my $retVal = system("$splitCmd -h");
|
||||||
|
if ($retVal != 0) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user