mosesdecoder/mert/UtilTest.cpp
Tetsuo Kiso 8c7dfe04e7 Bug fix: tokenizer used in mert; add unit tests for that.
When tokenizing a string delimited by spaces (say, "9 9 8 7 ")
with Tokenize(), resulting a sequence of strings are
{"9", "9", "8", "7", "" }, which is different
from we have expected. We are not interested in empty strings.

This commit fix this issue, and add unit tests for
the tokenize functions.
2012-02-20 07:39:24 +09:00

66 lines
1.5 KiB
C++

#include "Util.h"
#define BOOST_TEST_MODULE UtilTest
#include <boost/test/unit_test.hpp>
BOOST_AUTO_TEST_CASE(util_get_next_pound_test) {
{
std::string str("9 9 7 ");
std::string substr;
std::vector<std::string> res;
while (!str.empty()) {
getNextPound(str, substr);
res.push_back(substr);
}
BOOST_REQUIRE(res.size() == 3);
BOOST_CHECK_EQUAL("9", res[0]);
BOOST_CHECK_EQUAL("9", res[1]);
BOOST_CHECK_EQUAL("7", res[2]);
}
{
std::string str("ref.0,ref.1,ref.2");
std::string substr;
std::vector<std::string> res;
const std::string delim(",");
while (!str.empty()) {
getNextPound(str, substr, delim);
res.push_back(substr);
}
BOOST_REQUIRE(res.size() == 3);
BOOST_CHECK_EQUAL("ref.0", res[0]);
BOOST_CHECK_EQUAL("ref.1", res[1]);
BOOST_CHECK_EQUAL("ref.2", res[2]);
}
}
BOOST_AUTO_TEST_CASE(util_tokenize_test) {
{
std::vector<std::string> res;
Tokenize("9 9 7", ' ', &res);
BOOST_REQUIRE(res.size() == 3);
BOOST_CHECK_EQUAL("9", res[0]);
BOOST_CHECK_EQUAL("9", res[1]);
BOOST_CHECK_EQUAL("7", res[2]);
}
{
std::vector<std::string> res;
Tokenize("9 8 7 ", ' ', &res);
BOOST_REQUIRE(res.size() == 3);
BOOST_CHECK_EQUAL("9", res[0]);
BOOST_CHECK_EQUAL("8", res[1]);
BOOST_CHECK_EQUAL("7", res[2]);
}
{
std::vector<std::string> res;
Tokenize("ref.0,ref.1,", ',', &res);
BOOST_REQUIRE(res.size() == 2);
BOOST_CHECK_EQUAL("ref.0", res[0]);
BOOST_CHECK_EQUAL("ref.1", res[1]);
}
}