mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 19:37:58 +03:00
Merge pull request #212 from moses-smt/alvations-patch-regexes
The dot before an acronym should be optional.
This commit is contained in:
commit
fd06cdf026
@ -191,7 +191,7 @@ sub preprocess {
|
||||
my $starting_punct = $2;
|
||||
if ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 1 && !$starting_punct) {
|
||||
# Not breaking;
|
||||
} elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) {
|
||||
} elsif ($words[$i] =~ /(\.?)[\p{IsUpper}\-]+(\.+)$/) {
|
||||
# Not breaking - upper case acronym
|
||||
} elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) {
|
||||
# The next word has a bunch of initial quotes, maybe a
|
||||
|
Loading…
Reference in New Issue
Block a user