mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 19:37:58 +03:00
Changed \p{Hyphen} to \p{LineBreak}
Using Perl v5.18.2, it's reporting this warning: **Use of 'Hyphen' in \p{} or \P{} is deprecated because: Supplanted by Line_Break property values; see www.unicode.org/reports/tr14**
This commit is contained in:
parent
25b87e14de
commit
c6c3bc84b7
@ -19,6 +19,8 @@ binmode STDERR, ":utf8";
|
|||||||
# version 13a
|
# version 13a
|
||||||
# * modified the scoring functions to prevent division-by-zero errors when a system segment is empty
|
# * modified the scoring functions to prevent division-by-zero errors when a system segment is empty
|
||||||
# * affected methods: 'bleu_score' and 'bleu_score_smoothing'
|
# * affected methods: 'bleu_score' and 'bleu_score_smoothing'
|
||||||
|
# * use \p{Line_Breaks} instead of \p{Hyphen} when stripping end-of-line hyphenation and join lines
|
||||||
|
# * because \p{Hyphen} is deprecated since 2016-06-01, see http://www.unicode.org/reports/tr14/#Hyphen
|
||||||
#
|
#
|
||||||
# version 13
|
# version 13
|
||||||
# * Uses a XML parser to read data (only when extension is .xml)
|
# * Uses a XML parser to read data (only when extension is .xml)
|
||||||
@ -948,7 +950,7 @@ sub tokenization_international
|
|||||||
my ($norm_text) = @_;
|
my ($norm_text) = @_;
|
||||||
|
|
||||||
$norm_text =~ s/<skipped>//g; # strip "skipped" tags
|
$norm_text =~ s/<skipped>//g; # strip "skipped" tags
|
||||||
$norm_text =~ s/\p{Hyphen}\p{Zl}//g; # strip end-of-line hyphenation and join lines
|
$norm_text =~ s/\p{Line_Break}\p{Zl}//g; # strip end-of-line hyphenation and join lines
|
||||||
$norm_text =~ s/\p{Zl}/ /g; # join lines
|
$norm_text =~ s/\p{Zl}/ /g; # join lines
|
||||||
|
|
||||||
# replace entities
|
# replace entities
|
||||||
|
Loading…
Reference in New Issue
Block a user