From 555829a771cd897bb807f495a95737953a7ca9a3 Mon Sep 17 00:00:00 2001 From: alvations Date: Tue, 1 Oct 2019 05:27:06 +0800 Subject: [PATCH] Undoing 05788925812f0d3265e355565cbb1701a0ad7510 Causes abbreviations to not split when ending with a fullstop. E.g. > The restructuring of IBM was essential to enable it organisationally to take up the responsibilities entrusted in the role with the recent changes in the policy and legislations, revised charter of function of IBM and the new activities and initiatives undertaken by IBM. IBM is also engaged in handholding the States for auction of mineral blocks for greater transparency in allocation of mineral concessions. --- scripts/ems/support/split-sentences.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl index 2c2319a12..f3494bc88 100755 --- a/scripts/ems/support/split-sentences.perl +++ b/scripts/ems/support/split-sentences.perl @@ -193,7 +193,7 @@ sub preprocess { my $starting_punct = $2; if ($prefix && $NONBREAKING_PREFIX{$prefix} && $NONBREAKING_PREFIX{$prefix} == 1 && !$starting_punct) { # Not breaking; - } elsif ($words[$i] =~ /(\.?)[\p{IsUpper}\-]+(\.+)$/) { + } elsif ($words[$i] =~ /(\.)[\p{IsUpper}\-]+(\.+)$/) { # Not breaking - upper case acronym } elsif($words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/) { # The next word has a bunch of initial quotes, maybe a