mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
don't normalise quotes if tokenizing like Penn /Phil Williams
This commit is contained in:
parent
19d7c44aad
commit
30e31d4a95
@ -3,11 +3,14 @@
|
||||
use strict;
|
||||
|
||||
my $language = "en";
|
||||
my $PENN = 0;
|
||||
|
||||
while (@ARGV) {
|
||||
$_ = shift;
|
||||
/^-b$/ && ($| = 1, next); # not buffered (flush each line)
|
||||
/^-l$/ && ($language = shift, next);
|
||||
/^[^\-]/ && ($language = $_, next);
|
||||
/^-penn$/ && ($PENN = 1, next);
|
||||
}
|
||||
|
||||
while(<STDIN>) {
|
||||
@ -22,8 +25,11 @@ while(<STDIN>) {
|
||||
s/ :/:/g;
|
||||
s/ ;/;/g;
|
||||
# normalize unicode punctuation
|
||||
s/\`/\'/g;
|
||||
s/\'\'/ \" /g;
|
||||
if ($PENN == 0) {
|
||||
s/\`/\'/g;
|
||||
s/\'\'/ \" /g;
|
||||
}
|
||||
|
||||
s/„/\"/g;
|
||||
s/“/\"/g;
|
||||
s/”/\"/g;
|
||||
|
Loading…
Reference in New Issue
Block a user