remove duplicate spaces caused when XML are stripped

This commit is contained in:
Hieu Hoang 2014-01-24 18:09:55 +00:00
parent dc3d5b8d38
commit b6d47733da
2 changed files with 15 additions and 2 deletions

View File

@ -8,6 +8,7 @@ while (my $line = <STDIN>) {
my $len = length($line);
my $inXML = 0;
my $prevSpace = 1;
for (my $i = 0; $i < $len; ++$i) {
my $c = substr($line, $i, 1);
@ -17,10 +18,20 @@ while (my $line = <STDIN>) {
elsif ($c eq ">") {
--$inXML;
}
elsif ($prevSpace == 1 && $c eq " ")
{ # duplicate space. Do nothing
}
elsif ($inXML == 0) {
if ($c eq " ") {
$prevSpace = 1;
}
else {
$prevSpace = 0;
}
print $c;
}
}
print "\n";
}

View File

@ -1,4 +1,4 @@
#!/usr/bin/perl -w
#!/usr/bin/perl -w
# $Id$
# Given a moses.ini file and an input text prepare minimized translation
@ -255,7 +255,9 @@ if ($opt_hierarchical) {
open(FILEHANDLE,">$filename") or die "Can't open $filename for writing";
$TMP_INPUT_FILENAME{$key} = $filename;
my @FACTOR = split(/,/, $key);
open(PIPE,"$SCRIPTS_ROOTDIR/training/reduce_combine.pl $input @FACTOR |");
my $cmd = "$SCRIPTS_ROOTDIR/training/reduce_combine.pl $input @FACTOR |";
print STDERR "Executing: $cmd\n";
open(PIPE,$cmd);
while (my $line = <PIPE>) {
print FILEHANDLE $line
}