a few more detokenization tests, including a TODO one that exposes a bug

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4124 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bgottesman 2011-08-05 16:23:47 +00:00
parent f8a99e5d6d
commit 76c3ef4dba

View File

@ -49,16 +49,51 @@ This one is no more difficult, but, hey, it is on a new line.
EXP
);
# An English test involving double-quotes
&runDetokenizerTest("TEST_ENGLISH_DOUBLEQUOTES", "en",
<<'TOK',
This is a somewhat " less simple " test .
TOK
<<'EXP'
This is a somewhat "less simple" test.
EXP
);
# A simple French test
&runDetokenizerTest("TEST_FRENCH_EASY", "fr",
<<'TOK',
Ici une phrase simple .
Voici une phrase simple .
TOK
<<'EXP'
Ici une phrase simple.
Voici une phrase simple.
EXP
);
# A French test involving an apostrophe
&runDetokenizerTest("TEST_FRENCH_APOSTROPHE", "fr",
<<'TOK',
Moi , j' ai une apostrophe .
TOK
<<'EXP'
Moi, j'ai une apostrophe.
EXP
);
TODO: {
local $TODO = "A bug is causing this to be detokenized wrong.";
# A French test involving an apostrophe on the second-last word
&runDetokenizerTest("TEST_FRENCH_APOSTROPHE_PENULTIMATE", "fr",
<<'TOK',
de musique rap issus de l' immigration
TOK
<<'EXP'
de musique rap issus de l'immigration
EXP
);
}
######################################
# end of individual test cases
######################################