mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-08 20:46:59 +03:00
10a0a7b05a
Oops. Forgot these in my previous commit. Sorry!
70 lines
1.8 KiB
C++
70 lines
1.8 KiB
C++
#include "util/tokenize.hh"
|
|
|
|
#define BOOST_TEST_MODULE TokenizeTest
|
|
#include <boost/test/unit_test.hpp>
|
|
|
|
namespace util
|
|
{
|
|
namespace
|
|
{
|
|
|
|
BOOST_AUTO_TEST_CASE(empty_text_yields_empty_vector)
|
|
{
|
|
const std::vector<std::string> tokens = util::tokenize("");
|
|
BOOST_CHECK_EQUAL(tokens.size(), 0);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(whitespace_only_yields_empty_vector)
|
|
{
|
|
const std::vector<std::string> tokens = util::tokenize(" ");
|
|
BOOST_CHECK_EQUAL(tokens.size(), 0);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(parses_single_token)
|
|
{
|
|
const std::vector<std::string> tokens = util::tokenize("mytoken");
|
|
BOOST_CHECK_EQUAL(tokens.size(), 1);
|
|
BOOST_CHECK_EQUAL(tokens[0], "mytoken");
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(ignores_leading_whitespace)
|
|
{
|
|
const std::vector<std::string> tokens = util::tokenize(" \t mytoken");
|
|
BOOST_CHECK_EQUAL(tokens.size(), 1);
|
|
BOOST_CHECK_EQUAL(tokens[0], "mytoken");
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(ignores_trailing_whitespace)
|
|
{
|
|
const std::vector<std::string> tokens = util::tokenize("mytoken \t ");
|
|
BOOST_CHECK_EQUAL(tokens.size(), 1);
|
|
BOOST_CHECK_EQUAL(tokens[0], "mytoken");
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(splits_tokens_on_tabs)
|
|
{
|
|
const std::vector<std::string> tokens = util::tokenize("one\ttwo");
|
|
BOOST_CHECK_EQUAL(tokens.size(), 2);
|
|
BOOST_CHECK_EQUAL(tokens[0], "one");
|
|
BOOST_CHECK_EQUAL(tokens[1], "two");
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(splits_tokens_on_spaces)
|
|
{
|
|
const std::vector<std::string> tokens = util::tokenize("one two");
|
|
BOOST_CHECK_EQUAL(tokens.size(), 2);
|
|
BOOST_CHECK_EQUAL(tokens[0], "one");
|
|
BOOST_CHECK_EQUAL(tokens[1], "two");
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(treats_sequence_of_space_as_one_space)
|
|
{
|
|
const std::vector<std::string> tokens = util::tokenize("one\t \ttwo");
|
|
BOOST_CHECK_EQUAL(tokens.size(), 2);
|
|
BOOST_CHECK_EQUAL(tokens[0], "one");
|
|
BOOST_CHECK_EQUAL(tokens[1], "two");
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace util
|