Add a test and a multi-token breaker

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4357 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
heafield 2011-10-14 11:51:15 +00:00
parent 897fe0f88b
commit 7b129fa461
3 changed files with 128 additions and 1 deletions

View File

@ -2,7 +2,7 @@
#Run tests. Requires Boost.
set -e
./compile.sh
for i in util/{bit_packing,file_piece,joint_sort,key_value_packing,probing_hash_table,sorted_uniform}_test lm/{model,left}_test; do
for i in util/{bit_packing,file_piece,joint_sort,key_value_packing,probing_hash_table,sorted_uniform,tokenize_piece}_test lm/{model,left}_test; do
g++ -I. -O3 $CXXFLAGS $i.cc {lm,util}/*.o -lboost_test_exec_monitor -lz -o $i
pushd $(dirname $i) >/dev/null && ./$(basename $i) || echo "$i failed"; popd >/dev/null
done

View File

@ -5,6 +5,8 @@
#include <boost/iterator/iterator_facade.hpp>
#include <algorithm>
/* Usage:
*
* for (PieceIterator<' '> i(" foo \r\n bar "); i; ++i) {
@ -64,6 +66,52 @@ template <char d> class PieceIterator : public boost::iterator_facade<PieceItera
StringPiece after_;
};
class MultiTokenIterator : public boost::iterator_facade<MultiTokenIterator, const StringPiece, boost::forward_traversal_tag> {
public:
MultiTokenIterator() {}
MultiTokenIterator(const StringPiece &str, const StringPiece &delim) : after_(str), delimiter_(delim) {
increment();
}
bool operator!() const {
return current_.data() == 0;
}
operator bool() const {
return current_.data() != 0;
}
static MultiTokenIterator end() {
return MultiTokenIterator();
}
private:
friend class boost::iterator_core_access;
void increment() {
const char *found = std::search(after_.data(), after_.data() + after_.size(), delimiter_.data(), delimiter_.data() + delimiter_.size());
current_ = StringPiece(after_.data(), found - after_.data());
if (found == after_.data() + after_.size()) {
after_ = StringPiece(NULL, 0);
} else {
after_ = StringPiece(found + delimiter_.size(), after_.data() - found + after_.size() - delimiter_.size());
}
}
bool equal(const MultiTokenIterator &other) const {
return after_.data() == other.after_.data();
}
const StringPiece &dereference() const {
return current_;
}
StringPiece current_;
StringPiece after_;
StringPiece delimiter_;
};
} // namespace util
#endif // UTIL_TOKENIZE_PIECE__

View File

@ -0,0 +1,79 @@
#include "util/tokenize_piece.hh"
#include "util/string_piece.hh"
#define BOOST_TEST_MODULE TokenIteratorTest
#include <boost/test/unit_test.hpp>
#include <iostream>
namespace util {
namespace {
BOOST_AUTO_TEST_CASE(simple) {
PieceIterator<' '> it("single spaced words.");
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece("single"), *it);
++it;
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece("spaced"), *it);
++it;
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece("words."), *it);
++it;
BOOST_CHECK(!it);
}
BOOST_AUTO_TEST_CASE(null_delimiter) {
const char str[] = "\0first\0\0second\0\0\0third\0fourth\0\0\0";
PieceIterator<'\0'> it(StringPiece(str, sizeof(str) - 1));
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece("first"), *it);
++it;
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece("second"), *it);
++it;
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece("third"), *it);
++it;
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece("fourth"), *it);
++it;
BOOST_CHECK(!it);
}
BOOST_AUTO_TEST_CASE(null_entries) {
const char str[] = "\0split\0\0 \0me\0 ";
PieceIterator<' '> it(StringPiece(str, sizeof(str) - 1));
BOOST_REQUIRE(it);
const char first[] = "\0split\0\0";
BOOST_CHECK_EQUAL(StringPiece(first, sizeof(first) - 1), *it);
++it;
BOOST_REQUIRE(it);
const char second[] = "\0me\0";
BOOST_CHECK_EQUAL(StringPiece(second, sizeof(second) - 1), *it);
++it;
BOOST_CHECK(!it);
}
BOOST_AUTO_TEST_CASE(pipe_pipe_none) {
const char str[] = "nodelimit at all";
MultiTokenIterator it(str, "|||");
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece(str), *it);
++it;
BOOST_CHECK(!it);
}
BOOST_AUTO_TEST_CASE(pipe_pipe_two) {
const char str[] = "|||";
MultiTokenIterator it(str, "|||");
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece(), *it);
++it;
BOOST_REQUIRE(it);
BOOST_CHECK_EQUAL(StringPiece(), *it);
++it;
BOOST_CHECK(!it);
}
} // namespace
} // namespace util