don't normalise quotes if tokenizing like Penn /Phil Williams

This commit is contained in:
Hieu Hoang 2015-01-16 12:34:22 +00:00
parent 19d7c44aad
commit 30e31d4a95

View File

@ -3,11 +3,14 @@
use strict;
my $language = "en";
my $PENN = 0;
while (@ARGV) {
$_ = shift;
/^-b$/ && ($| = 1, next); # not buffered (flush each line)
/^-l$/ && ($language = shift, next);
/^[^\-]/ && ($language = $_, next);
/^-penn$/ && ($PENN = 1, next);
}
while(<STDIN>) {
@ -22,8 +25,11 @@ while(<STDIN>) {
s/ :/:/g;
s/ ;/;/g;
# normalize unicode punctuation
s/\`/\'/g;
s/\'\'/ \" /g;
if ($PENN == 0) {
s/\`/\'/g;
s/\'\'/ \" /g;
}
s/„/\"/g;
s/“/\"/g;
s/”/\"/g;