ignore words where there is nothing to case

This commit is contained in:
Ondrej Bojar 2017-04-07 17:28:13 +02:00
parent 1d65006190
commit d9faf8f901

View File

@ -44,6 +44,12 @@ while(<CORPUS>) {
$firstWordOfSentence = 1;
}
if ($currentWord !~ /[\p{Ll}\p{Lu}\p{Lt}]/) {
# skip words with nothing to case
$firstWordOfSentence = 0;
next;
}
my $currentWordWeight = 0;
if (! $firstWordOfSentence) {
$currentWordWeight = 1;