Merge pull request #190 from moses-smt/patch-detokenizer-ko

Korean sentences have spaces =)
This commit is contained in:
Hieu Hoang 2018-01-19 10:54:50 +00:00 committed by GitHub
commit 5be48ce9db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -106,7 +106,7 @@ sub detokenize {
my $prependSpace = " ";
for ($i=0;$i<(scalar(@words));$i++) {
if (&startsWithCJKChar($words[$i])) {
if ($i > 0 && &endsWithCJKChar($words[$i-1])) {
if (($i > 0 && &endsWithCJKChar($words[$i-1])) && ($language ne "ko")) {
# perform left shift if this is a second consecutive CJK (Chinese/Japanese/Korean) word
$text=$text.$words[$i];
} else {