mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
commit
68dbe85545
@ -13,10 +13,10 @@ chomp(@OUT);
|
|||||||
while(<SRC>) {
|
while(<SRC>) {
|
||||||
chomp;
|
chomp;
|
||||||
if (/^<srcset/) {
|
if (/^<srcset/) {
|
||||||
s/<srcset/<tstset trglang="$language"/;
|
s/<srcset/<tstset trglang="$language"/i;
|
||||||
}
|
}
|
||||||
elsif (/^<\/srcset/) {
|
elsif (/^<\/srcset/) {
|
||||||
s/<\/srcset/<\/tstset/;
|
s/<\/srcset/<\/tstset/i;
|
||||||
}
|
}
|
||||||
elsif (/^<doc/i) {
|
elsif (/^<doc/i) {
|
||||||
s/ *sysid="[^\"]+"//;
|
s/ *sysid="[^\"]+"//;
|
||||||
@ -26,10 +26,10 @@ while(<SRC>) {
|
|||||||
my $line = shift(@OUT);
|
my $line = shift(@OUT);
|
||||||
$line = "" if $line =~ /NO BEST TRANSLATION/;
|
$line = "" if $line =~ /NO BEST TRANSLATION/;
|
||||||
if (/<\/seg>/) {
|
if (/<\/seg>/) {
|
||||||
s/(<seg[^>]+> *).*(<\/seg>)/$1$line$2/;
|
s/(<seg[^>]+> *).*(<\/seg>)/$1$line$2/i;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
s/(<seg[^>]+> *)[^<]*/$1$line/;
|
s/(<seg[^>]+> *)[^<]*/$1$line/i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
print $_."\n";
|
print $_."\n";
|
||||||
|
@ -171,7 +171,7 @@ if ($TIMING)
|
|||||||
|
|
||||||
# tokenize a batch of texts saved in an array
|
# tokenize a batch of texts saved in an array
|
||||||
# input: an array containing a batch of texts
|
# input: an array containing a batch of texts
|
||||||
# return: another array cotaining a batch of tokenized texts for the input array
|
# return: another array containing a batch of tokenized texts for the input array
|
||||||
sub tokenize_batch
|
sub tokenize_batch
|
||||||
{
|
{
|
||||||
my(@text_list) = @_;
|
my(@text_list) = @_;
|
||||||
|
@ -47,7 +47,7 @@ my $l1input = "$corpus.$l1";
|
|||||||
if (-e $l1input) {
|
if (-e $l1input) {
|
||||||
$opn = $l1input;
|
$opn = $l1input;
|
||||||
} elsif (-e $l1input.".gz") {
|
} elsif (-e $l1input.".gz") {
|
||||||
$opn = "zcat $l1input.gz |";
|
$opn = "gunzip -c $l1input.gz |";
|
||||||
} else {
|
} else {
|
||||||
die "Error: $l1input does not exist";
|
die "Error: $l1input does not exist";
|
||||||
}
|
}
|
||||||
@ -57,7 +57,7 @@ my $l2input = "$corpus.$l2";
|
|||||||
if (-e $l2input) {
|
if (-e $l2input) {
|
||||||
$opn = $l2input;
|
$opn = $l2input;
|
||||||
} elsif (-e $l2input.".gz") {
|
} elsif (-e $l2input.".gz") {
|
||||||
$opn = "zcat $l2input.gz |";
|
$opn = "gunzip -c $l2input.gz |";
|
||||||
} else {
|
} else {
|
||||||
die "Error: $l2input does not exist";
|
die "Error: $l2input does not exist";
|
||||||
}
|
}
|
||||||
@ -160,3 +160,4 @@ sub word_count {
|
|||||||
my @w = split(/ /,$line);
|
my @w = split(/ /,$line);
|
||||||
return scalar @w;
|
return scalar @w;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user