mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
8c7dfe04e7
When tokenizing a string delimited by spaces (say, "9 9 8 7 ") with Tokenize(), resulting a sequence of strings are {"9", "9", "8", "7", "" }, which is different from we have expected. We are not interested in empty strings. This commit fix this issue, and add unit tests for the tokenize functions.
66 lines
1.5 KiB
C++
66 lines
1.5 KiB
C++
#include "Util.h"
|
|
|
|
#define BOOST_TEST_MODULE UtilTest
|
|
#include <boost/test/unit_test.hpp>
|
|
|
|
BOOST_AUTO_TEST_CASE(util_get_next_pound_test) {
|
|
{
|
|
std::string str("9 9 7 ");
|
|
std::string substr;
|
|
std::vector<std::string> res;
|
|
|
|
while (!str.empty()) {
|
|
getNextPound(str, substr);
|
|
res.push_back(substr);
|
|
}
|
|
BOOST_REQUIRE(res.size() == 3);
|
|
BOOST_CHECK_EQUAL("9", res[0]);
|
|
BOOST_CHECK_EQUAL("9", res[1]);
|
|
BOOST_CHECK_EQUAL("7", res[2]);
|
|
}
|
|
|
|
{
|
|
std::string str("ref.0,ref.1,ref.2");
|
|
std::string substr;
|
|
std::vector<std::string> res;
|
|
const std::string delim(",");
|
|
|
|
while (!str.empty()) {
|
|
getNextPound(str, substr, delim);
|
|
res.push_back(substr);
|
|
}
|
|
BOOST_REQUIRE(res.size() == 3);
|
|
BOOST_CHECK_EQUAL("ref.0", res[0]);
|
|
BOOST_CHECK_EQUAL("ref.1", res[1]);
|
|
BOOST_CHECK_EQUAL("ref.2", res[2]);
|
|
}
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(util_tokenize_test) {
|
|
{
|
|
std::vector<std::string> res;
|
|
Tokenize("9 9 7", ' ', &res);
|
|
BOOST_REQUIRE(res.size() == 3);
|
|
BOOST_CHECK_EQUAL("9", res[0]);
|
|
BOOST_CHECK_EQUAL("9", res[1]);
|
|
BOOST_CHECK_EQUAL("7", res[2]);
|
|
}
|
|
|
|
{
|
|
std::vector<std::string> res;
|
|
Tokenize("9 8 7 ", ' ', &res);
|
|
BOOST_REQUIRE(res.size() == 3);
|
|
BOOST_CHECK_EQUAL("9", res[0]);
|
|
BOOST_CHECK_EQUAL("8", res[1]);
|
|
BOOST_CHECK_EQUAL("7", res[2]);
|
|
}
|
|
|
|
{
|
|
std::vector<std::string> res;
|
|
Tokenize("ref.0,ref.1,", ',', &res);
|
|
BOOST_REQUIRE(res.size() == 2);
|
|
BOOST_CHECK_EQUAL("ref.0", res[0]);
|
|
BOOST_CHECK_EQUAL("ref.1", res[1]);
|
|
}
|
|
}
|