Merge pull request #212 from moses-smt/alvations-patch-regexes

The dot before an acronym should be optional.
This commit is contained in:
Hieu Hoang 2019-09-04 08:07:06 +01:00 committed by GitHub
commit fd06cdf026
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -191,7 +191,7 @@ sub preprocess {
my $starting_punct = $2;
if ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 1 && !$starting_punct) {
# Not breaking;
} elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) {
} elsif ($words[$i] =~ /(\.?)[\p{IsUpper}\-]+(\.+)$/) {
# Not breaking - upper case acronym
} elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) {
# The next word has a bunch of initial quotes, maybe a