mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-02 17:09:36 +03:00
corpus compression correctly used even for generation step
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1568 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
7f3e34207a
commit
f7a1fb5b9c
@ -375,14 +375,7 @@ sub reduce_factors {
|
||||
# }
|
||||
my @INCLUDE = sort {$a <=> $b} split(/,/,$factors);
|
||||
|
||||
my $read = $full;
|
||||
if ($full =~ /\.bz2$/) {
|
||||
$read = "$BZCAT $full|";
|
||||
} elsif ($full =~ /\.gz$/) {
|
||||
$read = "$ZCAT $full|";
|
||||
}
|
||||
open(IN,$read) or die "Can't read $full ($read)";
|
||||
|
||||
*IN = open_or_zcat($full);
|
||||
open(OUT,">".$reduced) or die "Can't write $reduced";
|
||||
my $nr = 0;
|
||||
while(<IN>) {
|
||||
@ -1335,7 +1328,7 @@ sub get_generation {
|
||||
}
|
||||
|
||||
my (%GENERATION,%GENERATION_TOTAL_SOURCE,%GENERATION_TOTAL_TARGET);
|
||||
open(E,$___CORPUS.".".$___E) or die "Can't read ".$___CORPUS.".".$___E;
|
||||
*E = open_or_zcat($___CORPUS.".".$___E.$___CORPUS_COMPRESSION);
|
||||
$alignment_id=0;
|
||||
while(<E>) {
|
||||
chomp;
|
||||
@ -1603,3 +1596,16 @@ sub safesystem {
|
||||
return ! $exitcode;
|
||||
}
|
||||
}
|
||||
|
||||
sub open_or_zcat {
|
||||
my $fn = shift;
|
||||
my $read = $fn;
|
||||
if ($fn =~ /\.bz2$/) {
|
||||
$read = "$BZCAT $fn|";
|
||||
} elsif ($fn =~ /\.gz$/) {
|
||||
$read = "$ZCAT $fn|";
|
||||
}
|
||||
my $hdl;
|
||||
open($hdl,$read) or die "Can't read $fn ($read)";
|
||||
return $hdl;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user