From 14587cdafc42cdbff9221c07b5545551ecec475b Mon Sep 17 00:00:00 2001
From: bgottesman <bgottesman@1f5c12ca-751b-0410-a591-d2e778427230>
Date: Mon, 8 Aug 2011 15:02:56 +0000
Subject: [PATCH] fix a detokenization bug that was preventing the removal of
 the whitespace following a contracted French or Italian article/pronoun (e.g.
 "l' immigration") when the contraction was the second-last word in the
 segment

remove the expectation of failure on the corresponding unit test


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4133 1f5c12ca-751b-0410-a591-d2e778427230
---
 regression-testing/run-test-detokenizer.t | 7 +------
 scripts/tokenizer/detokenizer.perl        | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/regression-testing/run-test-detokenizer.t b/regression-testing/run-test-detokenizer.t
index f9cc3423a..9d677b43e 100644
--- a/regression-testing/run-test-detokenizer.t
+++ b/regression-testing/run-test-detokenizer.t
@@ -82,9 +82,7 @@ Moi, j'ai une apostrophe.
 EXP
 );
 
-# A (failing) French test involving an apostrophe on the second-last word
-{
-my $testCase =
+# A French test involving an apostrophe on the second-last word
 &addDetokenizerTest("TEST_FRENCH_APOSTROPHE_PENULTIMATE", "fr",
 <<'TOK'
 de musique rap issus de l' immigration
@@ -95,9 +93,6 @@ de musique rap issus de l'immigration
 EXP
 );
 
-$testCase->setExpectedToFail("A bug is causing this to be detokenized wrong.");
-}
-
 # A German test involving non-ASCII characters
 # Note: We don't specify a language because the detokenizer errors if you pass in a language for which it has no special rules, of which German is an example.
 &addDetokenizerTest("TEST_GERMAN_NONASCII", undef,
diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl
index 0b8e5af73..f049b8080 100755
--- a/scripts/tokenizer/detokenizer.perl
+++ b/scripts/tokenizer/detokenizer.perl
@@ -92,7 +92,7 @@ sub detokenize {
 			#left-shift floats in Czech
 			$text=$text.$words[$i];
 			$prependSpace = " ";
-		}  elsif ((($language eq "fr") ||($language eq "it")) && ($i<(scalar(@words)-2)) && ($words[$i] =~ /[\p{IsAlpha}][\']$/) && ($words[$i+1] =~ /^[\p{IsAlpha}]/)) {
+		}  elsif ((($language eq "fr") ||($language eq "it")) && ($i<=(scalar(@words)-2)) && ($words[$i] =~ /[\p{IsAlpha}][\']$/) && ($words[$i+1] =~ /^[\p{IsAlpha}]/)) {
 			#right-shift the contraction for French and Italian
 			$text = $text.$prependSpace.$words[$i];
 			$prependSpace = "";