From cdc4179ce130fceba77422f774554cb1073c906b Mon Sep 17 00:00:00 2001 From: rafpayen Date: Thu, 16 Jun 2011 17:24:25 +0000 Subject: [PATCH] Add a space before double punctuation signs in French git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4016 1f5c12ca-751b-0410-a591-d2e778427230 --- scripts/tokenizer/detokenizer.perl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl index 3538fd6db..0b8e5af73 100755 --- a/scripts/tokenizer/detokenizer.perl +++ b/scripts/tokenizer/detokenizer.perl @@ -78,6 +78,9 @@ sub detokenize { $text = $text.$prependSpace.$words[$i]; $prependSpace = ""; } elsif ($words[$i] =~ /^[\,\.\?\!\:\;\\\%\}\]\)]+$/){ + if (($language eq "fr") && ($words[$i] =~ /^[\?\!\:\;\\\%]$/)) { + #these punctuations are prefixed with a non-breakable space in french + $text .= " "; } #perform left shift on punctuation items $text=$text.$words[$i]; $prependSpace = " ";