corpus compression correctly used even for generation step

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1568 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bojar 2008-02-22 16:14:30 +00:00
parent 7f3e34207a
commit f7a1fb5b9c

View File

@ -375,14 +375,7 @@ sub reduce_factors {
# }
my @INCLUDE = sort {$a <=> $b} split(/,/,$factors);
my $read = $full;
if ($full =~ /\.bz2$/) {
$read = "$BZCAT $full|";
} elsif ($full =~ /\.gz$/) {
$read = "$ZCAT $full|";
}
open(IN,$read) or die "Can't read $full ($read)";
*IN = open_or_zcat($full);
open(OUT,">".$reduced) or die "Can't write $reduced";
my $nr = 0;
while(<IN>) {
@ -1335,7 +1328,7 @@ sub get_generation {
}
my (%GENERATION,%GENERATION_TOTAL_SOURCE,%GENERATION_TOTAL_TARGET);
open(E,$___CORPUS.".".$___E) or die "Can't read ".$___CORPUS.".".$___E;
*E = open_or_zcat($___CORPUS.".".$___E.$___CORPUS_COMPRESSION);
$alignment_id=0;
while(<E>) {
chomp;
@ -1603,3 +1596,16 @@ sub safesystem {
return ! $exitcode;
}
}
sub open_or_zcat {
my $fn = shift;
my $read = $fn;
if ($fn =~ /\.bz2$/) {
$read = "$BZCAT $fn|";
} elsif ($fn =~ /\.gz$/) {
$read = "$ZCAT $fn|";
}
my $hdl;
open($hdl,$read) or die "Can't read $fn ($read)";
return $hdl;
}