KenLM e06ec4dc59f765482d7545b3cb797b8fc128ab9f

This commit is contained in:
Kenneth Heafield 2016-01-12 23:39:27 +00:00
parent 3d051db850
commit 2a74f3a521
9 changed files with 79 additions and 211 deletions

View File

@ -13,7 +13,7 @@ cmake_minimum_required(VERSION 2.8.8)
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
set(KENLM_MAX_ORDER 6)
set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order")
add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
@ -64,76 +64,27 @@ set(EXE_LIST
build_binary
)
# Iterate through the executable list
foreach(exe ${EXE_LIST})
# Compile the executable, linking against the requisite dependent object files
add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
# Link the executable against boost
target_link_libraries(${exe} ${Boost_LIBRARIES} pthread)
# Group executables together
set_target_properties(${exe} PROPERTIES FOLDER executables)
# End for loop
endforeach(exe)
# Install the executable files
install(TARGETS ${EXE_LIST} DESTINATION bin)
AddExes(EXES ${EXE_LIST}
DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
LIBRARIES ${Boost_LIBRARIES} pthread)
# Conditionally build the interpolation code
if(BUILD_INTERPOLATE)
add_subdirectory(interpolate)
endif()
if(BUILD_TESTING)
# Explicitly list the Boost test files to be compiled
set(KENLM_BOOST_TESTS_LIST
left_test
model_test
partial_test
)
# Iterate through the Boost tests list
foreach(test ${KENLM_BOOST_TESTS_LIST})
# Compile the executable, linking against the requisite dependent object files
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
# Require the following compile flag
set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
# Link the executable against boost
target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
# model_test requires an extra command line parameter
if ("${test}" STREQUAL "model_test")
set(test_params
${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa
)
else()
set(test_params
${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
)
endif()
# Specify command arguments for how to run each unit test
#
# Assuming that foo was defined via add_executable(foo ...),
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
#
add_test(NAME ${test}_test
COMMAND $<TARGET_FILE:${test}> ${test_params})
# Group unit tests together
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
# End for loop
endforeach(test)
set(KENLM_BOOST_TESTS_LIST left_test partial_test)
AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
LIBRARIES ${Boost_LIBRARIES} pthread
TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa)
# model_test requires an extra command line parameter
KenLMAddTest(TEST model_test
DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
LIBRARIES ${Boost_LIBRARIES} pthread
TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa)
endif()

View File

@ -52,36 +52,16 @@ set_target_properties(lmplz PROPERTIES FOLDER executables)
if(BUILD_TESTING)
# Explicitly list the Boost test files to be compiled
set(KENLM_BOOST_TESTS_LIST
adjust_counts_test
corpus_count_test
)
# Iterate through the Boost tests list
foreach(test ${KENLM_BOOST_TESTS_LIST})
# Compile the executable, linking against the requisite dependent object files
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
# Require the following compile flag
set_target_properties(${test} PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK")
# Link the executable against boost
target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
# Specify command arguments for how to run each unit test
#
# Assuming that foo was defined via add_executable(foo ...),
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
#
add_test(NAME ${test}_test
COMMAND $<TARGET_FILE:${test}>)
# Group unit tests together
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
# End for loop
endforeach(test)
# Explicitly list the Boost test files to be compiled
set(KENLM_BOOST_TESTS_LIST
adjust_counts_test
corpus_count_test
)
AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
DEPENDS $<TARGET_OBJECTS:kenlm>
$<TARGET_OBJECTS:kenlm_common>
$<TARGET_OBJECTS:kenlm_util>
$<TARGET_OBJECTS:kenlm_builder>
LIBRARIES ${Boost_LIBRARIES} pthread)
endif()

View File

@ -269,7 +269,7 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
std::size_t same = full->end() - 1 - different;
// STEP 1: Output all the n-grams that changed.
for (; lower_valid >= &streams[same]; --lower_valid) {
for (; lower_valid >= streams.begin() + same; --lower_valid) {
uint64_t order_minus_1 = lower_valid - streams_begin;
if(actual_counts[order_minus_1] <= prune_thresholds_[order_minus_1])
(*lower_valid)->Value().Mark();

View File

@ -58,52 +58,24 @@ add_library(kenlm_util OBJECT ${KENLM_UTIL_DOUBLECONVERSION_SOURCE} ${KENLM_UTIL
# Only compile and run unit tests if tests should be run
if(BUILD_TESTING)
# Explicitly list the Boost test files to be compiled
set(KENLM_BOOST_TESTS_LIST
bit_packing_test
file_piece_test
joint_sort_test
multi_intersection_test
probing_hash_table_test
read_compressed_test
sorted_uniform_test
tokenize_piece_test
)
# Explicitly list the Boost test files to be compiled
set(KENLM_BOOST_TESTS_LIST
bit_packing_test
joint_sort_test
multi_intersection_test
probing_hash_table_test
read_compressed_test
sorted_uniform_test
tokenize_piece_test
)
# Iterate through the Boost tests list
foreach(test ${KENLM_BOOST_TESTS_LIST})
# Compile the executable, linking against the requisite dependent object files
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm_util>)
# Require the following compile flag
set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
# Link the executable against boost
target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
# file_piece_test requires an extra command line parameter
if ("${test}" STREQUAL "file_piece_test")
set(test_params
${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc
)
else()
set(test_params
)
endif()
# Specify command arguments for how to run each unit test
#
# Assuming that foo was defined via add_executable(foo ...),
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
#
add_test(NAME ${test}_test
COMMAND $<TARGET_FILE:${test}> ${test_params})
# Group unit tests together
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
# End for loop
endforeach(test)
AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
DEPENDS $<TARGET_OBJECTS:kenlm_util>
LIBRARIES ${Boost_LIBRARIES} pthread)
# file_piece_test requires an extra command line parameter
KenLMAddTest(TEST file_piece_test
DEPENDS $<TARGET_OBJECTS:kenlm_util>
LIBRARIES ${Boost_LIBRARIES} pthread
TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc)
endif()

View File

@ -35,7 +35,7 @@ import testing ;
run file_piece_test.o kenutil /top//boost_unit_test_framework : : file_piece.cc ;
for local t in [ glob *_test.cc : file_piece_test.cc read_compressed_test.cc ] {
local name = [ MATCH "(.*)\.cc" : $(t) ] ;
unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_filesystem /top//boost_system ;
unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_system ;
}
build-project stream ;

View File

@ -24,25 +24,23 @@ void Exception::SetLocation(const char *file, unsigned int line, const char *fun
* them down.
*/
std::string old_text;
std::swap(old_text, what_);
StringStream stream;
stream << what_;
stream << file << ':' << line;
if (func) stream << " in " << func << " threw ";
what_.swap(old_text);
what_ << file << ':' << line;
if (func) what_ << " in " << func << " threw ";
if (child_name) {
stream << child_name;
what_ << child_name;
} else {
#ifdef __GXX_RTTI
stream << typeid(this).name();
what_ << typeid(this).name();
#else
stream << "an exception";
what_ << "an exception";
#endif
}
if (condition) {
stream << " because `" << condition << '\'';
what_ << " because `" << condition << '\'';
}
stream << ".\n";
stream << old_text;
what_ << ".\n";
what_ << old_text;
}
namespace {

View File

@ -8,7 +8,7 @@
#include <string>
#include <stdint.h>
// TODO(hieu) delete this
// TODO(hieu): delete this
#include <sstream>
namespace util {
@ -20,7 +20,7 @@ class Exception : public std::exception {
Exception() throw();
virtual ~Exception() throw();
const char *what() const throw() { return what_.c_str(); }
const char *what() const throw() { return what_.str().c_str(); }
// For use by the UTIL_THROW macros.
void SetLocation(
@ -38,7 +38,7 @@ class Exception : public std::exception {
typedef T Identity;
};
std::string what_;
StringStream what_;
};
/* This implements the normal operator<< for Exception and all its children.
@ -46,12 +46,10 @@ class Exception : public std::exception {
* boost::enable_if.
*/
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
// TODO(hieu): change this to
// StringStream(e.what_) << data;
// TODO(hieu): delete this.
std::stringstream moses_hack;
moses_hack << data;
e.what_ += moses_hack.str();
e.what_ << moses_hack.str();
return e;
}

View File

@ -37,38 +37,14 @@ set(KENLM_UTIL_STREAM_SOURCE
if(BUILD_TESTING)
# Explicitly list the Boost test files to be compiled
set(KENLM_BOOST_TESTS_LIST
io_test
sort_test
stream_test
)
# Explicitly list the Boost test files to be compiled
set(KENLM_BOOST_TESTS_LIST
io_test
sort_test
stream_test
)
# Iterate through the Boost tests list
foreach(test ${KENLM_BOOST_TESTS_LIST})
# Compile the executable, linking against the requisite dependent object files
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm_util>)
# Require the following compile flag
set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
# Link the executable against boost
target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
# Specify command arguments for how to run each unit test
#
# Assuming that foo was defined via add_executable(foo ...),
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
#
add_test(NAME ${test}_test
COMMAND $<TARGET_FILE:${test}>)
# Group unit tests together
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
# End for loop
endforeach(test)
AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
DEPENDS $<TARGET_OBJECTS:kenlm_util>
LIBRARIES ${Boost_LIBRARIES} pthread)
endif()

View File

@ -10,14 +10,8 @@ namespace util {
class StringStream : public FakeOStream<StringStream> {
public:
// Semantics: appends to string. Remember to clear first!
StringStream() {}
explicit StringStream()
{}
/*
explicit StringStream(std::string &out)
: out_(out) {}
*/
StringStream &flush() { return *this; }
StringStream &write(const void *data, std::size_t length) {
@ -25,12 +19,11 @@ class StringStream : public FakeOStream<StringStream> {
return *this;
}
const std::string &str() const
{ return out_; }
void str(const std::string &val)
{
out_ = val;
}
const std::string &str() const { return out_; }
void str(const std::string &val) { out_ = val; }
void swap(std::string &str) { std::swap(out_, str); }
protected:
friend class FakeOStream<StringStream>;