mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
use pigz rather than gzip if it exists
This commit is contained in:
parent
542a65a16e
commit
90d4b2d713
@ -32,6 +32,15 @@ my $glueFile;
|
||||
my $phraseOrientation = 0;
|
||||
my $phraseOrientationPriorsFile;
|
||||
|
||||
my $GZIP_EXEC; # = which("pigz");
|
||||
if(-f "/usr/bin/pigz") {
|
||||
$GZIP_EXEC = 'pigz';
|
||||
}
|
||||
else {
|
||||
$GZIP_EXEC = 'gzip';
|
||||
}
|
||||
print STDERR "using $GZIP_EXEC \n";
|
||||
|
||||
for (my $i = 8; $i < $#ARGV + 1; ++$i)
|
||||
{
|
||||
$makeTTable = 0 if $ARGV[$i] eq "--NoTTable";
|
||||
@ -178,11 +187,11 @@ if (defined($baselineExtract)) {
|
||||
$catOCmd .= "$baselineExtract.o$sorted.gz ";
|
||||
}
|
||||
|
||||
$catCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | gzip -c > $extract.sorted.gz 2>> /dev/stderr \n";
|
||||
$catInvCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | gzip -c > $extract.inv.sorted.gz 2>> /dev/stderr \n";
|
||||
$catOCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | gzip -c > $extract.o.sorted.gz 2>> /dev/stderr \n";
|
||||
$catContextCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | uniq | gzip -c > $extract.context.sorted.gz 2>> /dev/stderr \n";
|
||||
$catContextInvCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | uniq | gzip -c > $extract.context.inv.sorted.gz 2>> /dev/stderr \n";
|
||||
$catCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | $GZIP_EXEC -c > $extract.sorted.gz 2>> /dev/stderr \n";
|
||||
$catInvCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | $GZIP_EXEC -c > $extract.inv.sorted.gz 2>> /dev/stderr \n";
|
||||
$catOCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | $GZIP_EXEC -c > $extract.o.sorted.gz 2>> /dev/stderr \n";
|
||||
$catContextCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | uniq | $GZIP_EXEC -c > $extract.context.sorted.gz 2>> /dev/stderr \n";
|
||||
$catContextInvCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR 2>> /dev/stderr | uniq | $GZIP_EXEC -c > $extract.context.inv.sorted.gz 2>> /dev/stderr \n";
|
||||
|
||||
|
||||
@children = ();
|
||||
|
@ -13,6 +13,15 @@ sub GetSourcePhrase($);
|
||||
sub NumStr($);
|
||||
sub CutContextFile($$$);
|
||||
|
||||
my $GZIP_EXEC; # = which("pigz");
|
||||
if(-f "/usr/bin/pigz") {
|
||||
$GZIP_EXEC = 'pigz';
|
||||
}
|
||||
else {
|
||||
$GZIP_EXEC = 'gzip';
|
||||
}
|
||||
print STDERR "using $GZIP_EXEC \n";
|
||||
|
||||
#my $EXTRACT_SPLIT_LINES = 5000000;
|
||||
my $EXTRACT_SPLIT_LINES = 50000000;
|
||||
|
||||
@ -100,7 +109,7 @@ else
|
||||
}
|
||||
|
||||
my $filePath = "$TMPDIR/extract.$fileCount.gz";
|
||||
open (OUT, "| gzip -c > $filePath") or die "error starting gzip $!";
|
||||
open (OUT, "| $GZIP_EXEC -c > $filePath") or die "error starting $GZIP_EXEC $!";
|
||||
|
||||
my $lineCount = 0;
|
||||
my $line;
|
||||
@ -133,7 +142,7 @@ else
|
||||
++$fileCount;
|
||||
my $filePath = $fileCount;
|
||||
$filePath = "$TMPDIR/extract.$filePath.gz";
|
||||
open (OUT, "| gzip -c > $filePath") or die "error starting gzip $!";
|
||||
open (OUT, "| $GZIP_EXEC -c > $filePath") or die "error starting $GZIP_EXEC $!";
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -175,7 +184,7 @@ for (my $i = 0; $i < $fileCount; ++$i)
|
||||
$cmd .= "zcat $TMPDIR/phrase-table.half.$numStr.gz | $FlexibilityCmd $TMPDIR/extract.context.$i.gz";
|
||||
$cmd .= " --Inverse" if ($otherExtractArgs =~ /--Inverse/);
|
||||
$cmd .= " --Hierarchical" if ($otherExtractArgs =~ /--Hierarchical/);
|
||||
$cmd .= " | gzip -c > $TMPDIR/phrase-table.half.$numStr.flex.gz\n";
|
||||
$cmd .= " | $GZIP_EXEC -c > $TMPDIR/phrase-table.half.$numStr.flex.gz\n";
|
||||
$cmd .= "mv $TMPDIR/phrase-table.half.$numStr.flex.gz $TMPDIR/phrase-table.half.$numStr.gz\n";
|
||||
}
|
||||
|
||||
@ -219,7 +228,7 @@ else
|
||||
$cmd .= "| LC_ALL=C $sortCmd -T $TMPDIR ";
|
||||
}
|
||||
|
||||
$cmd .= " | gzip -c > $ptHalf 2>> /dev/stderr ";
|
||||
$cmd .= " | $GZIP_EXEC -c > $ptHalf 2>> /dev/stderr ";
|
||||
}
|
||||
print STDERR $cmd;
|
||||
systemCheck($cmd);
|
||||
@ -356,7 +365,7 @@ sub CutContextFile($$$)
|
||||
my $sourcePhrase;
|
||||
|
||||
my $filePath = "$TMPDIR/extract.context.$fileCount.gz";
|
||||
open (OUT_CONTEXT, "| gzip -c > $filePath") or die "error starting gzip $!";
|
||||
open (OUT_CONTEXT, "| $GZIP_EXEC -c > $filePath") or die "error starting $GZIP_EXEC $!";
|
||||
|
||||
if ($lastline ne "") {
|
||||
print OUT_CONTEXT "$lastline\n";
|
||||
|
@ -306,6 +306,15 @@ else {
|
||||
$SORT_EXEC = 'sort';
|
||||
}
|
||||
|
||||
my $GZIP_EXEC; # = which("pigz");
|
||||
if(-f "/usr/bin/pigz") {
|
||||
$GZIP_EXEC = 'pigz';
|
||||
}
|
||||
else {
|
||||
$GZIP_EXEC = 'gzip';
|
||||
}
|
||||
print STDERR "using $GZIP_EXEC \n";
|
||||
|
||||
my $__SORT_BUFFER_SIZE = "";
|
||||
$__SORT_BUFFER_SIZE = "-S $_SORT_BUFFER_SIZE" if $_SORT_BUFFER_SIZE;
|
||||
|
||||
@ -353,7 +362,7 @@ my $PHRASE_CONSOLIDATE = "$SCRIPTS_ROOTDIR/../bin/consolidate";
|
||||
my $FLEX_SCORER = "$SCRIPTS_ROOTDIR/training/flexibility_score.py";
|
||||
|
||||
# utilities
|
||||
my $ZCAT = "gzip -cd";
|
||||
my $ZCAT = "$GZIP_EXEC -cd";
|
||||
my $BZCAT = "bzcat";
|
||||
|
||||
# do a sanity check to make sure we can find the necessary binaries since
|
||||
@ -1209,7 +1218,7 @@ sub run_single_giza {
|
||||
die "ERROR: Giza did not produce the output file $dir/$f-$e.$___GIZA_EXTENSION. Is your corpus clean (reasonably-sized sentences)?"
|
||||
if ! -e "$dir/$f-$e.$___GIZA_EXTENSION";
|
||||
safesystem("rm -f $dir/$f-$e.$___GIZA_EXTENSION.gz") or die;
|
||||
safesystem("gzip $dir/$f-$e.$___GIZA_EXTENSION") or die;
|
||||
safesystem("$GZIP_EXEC $dir/$f-$e.$___GIZA_EXTENSION") or die;
|
||||
}
|
||||
|
||||
sub run_single_snt2cooc {
|
||||
@ -1486,9 +1495,9 @@ sub extract_phrase {
|
||||
|
||||
if (defined($_BASELINE_EXTRACT) && $PHRASE_EXTRACT !~ /extract-parallel.perl/) {
|
||||
print STDERR "merging with baseline extract from $_BASELINE_EXTRACT\n";
|
||||
safesystem("$ZCAT $_BASELINE_EXTRACT.gz $extract_file$suffix.gz | gzip > $extract_file.gz");
|
||||
safesystem("$ZCAT $_BASELINE_EXTRACT.inv.gz $extract_file$suffix.inv.gz | gzip > $extract_file.inv.gz");
|
||||
safesystem("$ZCAT $_BASELINE_EXTRACT.o.gz $extract_file$suffix.o.gz | gzip > $extract_file.o.gz")
|
||||
safesystem("$ZCAT $_BASELINE_EXTRACT.gz $extract_file$suffix.gz | $GZIP_EXEC > $extract_file.gz");
|
||||
safesystem("$ZCAT $_BASELINE_EXTRACT.inv.gz $extract_file$suffix.inv.gz | $GZIP_EXEC > $extract_file.inv.gz");
|
||||
safesystem("$ZCAT $_BASELINE_EXTRACT.o.gz $extract_file$suffix.o.gz | $GZIP_EXEC > $extract_file.o.gz")
|
||||
if -e "$extract_file$suffix.o.gz";
|
||||
safesystem("rm $extract_file$suffix.gz");
|
||||
safesystem("rm $extract_file$suffix.inv.gz");
|
||||
@ -1681,7 +1690,7 @@ sub score_phrase_phrase_extract {
|
||||
$cmd .= " --KneserNey $ttable_file.half.f2e.gz.coc" if $KNESER_NEY;
|
||||
$cmd .= " --SourceLabels $_GHKM_SOURCE_LABELS_FILE" if $_GHKM_SOURCE_LABELS && defined($_GHKM_SOURCE_LABELS_FILE);
|
||||
|
||||
$cmd .= " | gzip -c > $ttable_file.gz";
|
||||
$cmd .= " | $GZIP_EXEC -c > $ttable_file.gz";
|
||||
|
||||
safesystem($cmd) or die "ERROR: Consolidating the two phrase table halves failed";
|
||||
if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
|
||||
@ -1698,7 +1707,7 @@ sub score_phrase_memscore {
|
||||
|
||||
# The output is sorted to avoid breaking scripts that rely on the
|
||||
# sorting behaviour of the previous scoring algorithm.
|
||||
my $cmd = "$MEMSCORE $options | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip >$ttable_file.gz";
|
||||
my $cmd = "$MEMSCORE $options | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | $GZIP_EXEC >$ttable_file.gz";
|
||||
if (-e "$extract_file.gz") {
|
||||
$cmd = "$ZCAT $extract_file.gz | ".$cmd;
|
||||
} else {
|
||||
@ -1863,7 +1872,7 @@ sub get_generation {
|
||||
}
|
||||
close(GEN);
|
||||
safesystem("rm -f $file.gz") or die("ERROR");
|
||||
safesystem("gzip $file") or die("ERROR");
|
||||
safesystem("$GZIP_EXEC $file") or die("ERROR");
|
||||
}
|
||||
|
||||
### (9) CREATE CONFIGURATION FILE
|
||||
|
Loading…
Reference in New Issue
Block a user