mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
fix a detokenization bug that was preventing the removal of the whitespace following a contracted French or Italian article/pronoun (e.g. "l' immigration") when the contraction was the second-last word in the segment
remove the expectation of failure on the corresponding unit test git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4133 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
79142d18e6
commit
14587cdafc
@ -82,9 +82,7 @@ Moi, j'ai une apostrophe.
|
||||
EXP
|
||||
);
|
||||
|
||||
# A (failing) French test involving an apostrophe on the second-last word
|
||||
{
|
||||
my $testCase =
|
||||
# A French test involving an apostrophe on the second-last word
|
||||
&addDetokenizerTest("TEST_FRENCH_APOSTROPHE_PENULTIMATE", "fr",
|
||||
<<'TOK'
|
||||
de musique rap issus de l' immigration
|
||||
@ -95,9 +93,6 @@ de musique rap issus de l'immigration
|
||||
EXP
|
||||
);
|
||||
|
||||
$testCase->setExpectedToFail("A bug is causing this to be detokenized wrong.");
|
||||
}
|
||||
|
||||
# A German test involving non-ASCII characters
|
||||
# Note: We don't specify a language because the detokenizer errors if you pass in a language for which it has no special rules, of which German is an example.
|
||||
&addDetokenizerTest("TEST_GERMAN_NONASCII", undef,
|
||||
|
@ -92,7 +92,7 @@ sub detokenize {
|
||||
#left-shift floats in Czech
|
||||
$text=$text.$words[$i];
|
||||
$prependSpace = " ";
|
||||
} elsif ((($language eq "fr") ||($language eq "it")) && ($i<(scalar(@words)-2)) && ($words[$i] =~ /[\p{IsAlpha}][\']$/) && ($words[$i+1] =~ /^[\p{IsAlpha}]/)) {
|
||||
} elsif ((($language eq "fr") ||($language eq "it")) && ($i<=(scalar(@words)-2)) && ($words[$i] =~ /[\p{IsAlpha}][\']$/) && ($words[$i+1] =~ /^[\p{IsAlpha}]/)) {
|
||||
#right-shift the contraction for French and Italian
|
||||
$text = $text.$prependSpace.$words[$i];
|
||||
$prependSpace = "";
|
||||
|
Loading…
Reference in New Issue
Block a user