Merge pull request #28 from amittai/amittai

three small things
This commit is contained in:
Hieu Hoang 2013-02-27 07:33:52 -08:00
commit 68dbe85545
3 changed files with 8 additions and 7 deletions

View File

@ -13,10 +13,10 @@ chomp(@OUT);
while(<SRC>) { while(<SRC>) {
chomp; chomp;
if (/^<srcset/) { if (/^<srcset/) {
s/<srcset/<tstset trglang="$language"/; s/<srcset/<tstset trglang="$language"/i;
} }
elsif (/^<\/srcset/) { elsif (/^<\/srcset/) {
s/<\/srcset/<\/tstset/; s/<\/srcset/<\/tstset/i;
} }
elsif (/^<doc/i) { elsif (/^<doc/i) {
s/ *sysid="[^\"]+"//; s/ *sysid="[^\"]+"//;
@ -26,10 +26,10 @@ while(<SRC>) {
my $line = shift(@OUT); my $line = shift(@OUT);
$line = "" if $line =~ /NO BEST TRANSLATION/; $line = "" if $line =~ /NO BEST TRANSLATION/;
if (/<\/seg>/) { if (/<\/seg>/) {
s/(<seg[^>]+> *).*(<\/seg>)/$1$line$2/; s/(<seg[^>]+> *).*(<\/seg>)/$1$line$2/i;
} }
else { else {
s/(<seg[^>]+> *)[^<]*/$1$line/; s/(<seg[^>]+> *)[^<]*/$1$line/i;
} }
} }
print $_."\n"; print $_."\n";

View File

@ -171,7 +171,7 @@ if ($TIMING)
# tokenize a batch of texts saved in an array # tokenize a batch of texts saved in an array
# input: an array containing a batch of texts # input: an array containing a batch of texts
# return: another array cotaining a batch of tokenized texts for the input array # return: another array containing a batch of tokenized texts for the input array
sub tokenize_batch sub tokenize_batch
{ {
my(@text_list) = @_; my(@text_list) = @_;

View File

@ -47,7 +47,7 @@ my $l1input = "$corpus.$l1";
if (-e $l1input) { if (-e $l1input) {
$opn = $l1input; $opn = $l1input;
} elsif (-e $l1input.".gz") { } elsif (-e $l1input.".gz") {
$opn = "zcat $l1input.gz |"; $opn = "gunzip -c $l1input.gz |";
} else { } else {
die "Error: $l1input does not exist"; die "Error: $l1input does not exist";
} }
@ -57,7 +57,7 @@ my $l2input = "$corpus.$l2";
if (-e $l2input) { if (-e $l2input) {
$opn = $l2input; $opn = $l2input;
} elsif (-e $l2input.".gz") { } elsif (-e $l2input.".gz") {
$opn = "zcat $l2input.gz |"; $opn = "gunzip -c $l2input.gz |";
} else { } else {
die "Error: $l2input does not exist"; die "Error: $l2input does not exist";
} }
@ -160,3 +160,4 @@ sub word_count {
my @w = split(/ /,$line); my @w = split(/ /,$line);
return scalar @w; return scalar @w;
} }