mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-05 15:58:03 +03:00
KenLM e06ec4dc59f765482d7545b3cb797b8fc128ab9f
This commit is contained in:
parent
3d051db850
commit
2a74f3a521
@ -13,7 +13,7 @@ cmake_minimum_required(VERSION 2.8.8)
|
||||
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
|
||||
|
||||
|
||||
set(KENLM_MAX_ORDER 6)
|
||||
set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order")
|
||||
|
||||
add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
|
||||
|
||||
@ -64,76 +64,27 @@ set(EXE_LIST
|
||||
build_binary
|
||||
)
|
||||
|
||||
# Iterate through the executable list
|
||||
foreach(exe ${EXE_LIST})
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(${exe} ${Boost_LIBRARIES} pthread)
|
||||
|
||||
# Group executables together
|
||||
set_target_properties(${exe} PROPERTIES FOLDER executables)
|
||||
|
||||
# End for loop
|
||||
endforeach(exe)
|
||||
|
||||
|
||||
# Install the executable files
|
||||
install(TARGETS ${EXE_LIST} DESTINATION bin)
|
||||
|
||||
AddExes(EXES ${EXE_LIST}
|
||||
DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
|
||||
LIBRARIES ${Boost_LIBRARIES} pthread)
|
||||
|
||||
# Conditionally build the interpolation code
|
||||
if(BUILD_INTERPOLATE)
|
||||
add_subdirectory(interpolate)
|
||||
endif()
|
||||
|
||||
if(BUILD_TESTING)
|
||||
|
||||
# Explicitly list the Boost test files to be compiled
|
||||
set(KENLM_BOOST_TESTS_LIST
|
||||
left_test
|
||||
model_test
|
||||
partial_test
|
||||
)
|
||||
|
||||
# Iterate through the Boost tests list
|
||||
foreach(test ${KENLM_BOOST_TESTS_LIST})
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Require the following compile flag
|
||||
set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
|
||||
|
||||
# model_test requires an extra command line parameter
|
||||
if ("${test}" STREQUAL "model_test")
|
||||
set(test_params
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa
|
||||
)
|
||||
else()
|
||||
set(test_params
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
|
||||
)
|
||||
endif()
|
||||
|
||||
# Specify command arguments for how to run each unit test
|
||||
#
|
||||
# Assuming that foo was defined via add_executable(foo ...),
|
||||
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
|
||||
#
|
||||
add_test(NAME ${test}_test
|
||||
COMMAND $<TARGET_FILE:${test}> ${test_params})
|
||||
|
||||
# Group unit tests together
|
||||
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
|
||||
|
||||
# End for loop
|
||||
endforeach(test)
|
||||
set(KENLM_BOOST_TESTS_LIST left_test partial_test)
|
||||
AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
|
||||
DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
|
||||
LIBRARIES ${Boost_LIBRARIES} pthread
|
||||
TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa)
|
||||
|
||||
# model_test requires an extra command line parameter
|
||||
KenLMAddTest(TEST model_test
|
||||
DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
|
||||
LIBRARIES ${Boost_LIBRARIES} pthread
|
||||
TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa)
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -52,36 +52,16 @@ set_target_properties(lmplz PROPERTIES FOLDER executables)
|
||||
|
||||
if(BUILD_TESTING)
|
||||
|
||||
# Explicitly list the Boost test files to be compiled
|
||||
set(KENLM_BOOST_TESTS_LIST
|
||||
adjust_counts_test
|
||||
corpus_count_test
|
||||
)
|
||||
|
||||
# Iterate through the Boost tests list
|
||||
foreach(test ${KENLM_BOOST_TESTS_LIST})
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Require the following compile flag
|
||||
set_target_properties(${test} PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK")
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
|
||||
|
||||
# Specify command arguments for how to run each unit test
|
||||
#
|
||||
# Assuming that foo was defined via add_executable(foo ...),
|
||||
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
|
||||
#
|
||||
add_test(NAME ${test}_test
|
||||
COMMAND $<TARGET_FILE:${test}>)
|
||||
|
||||
# Group unit tests together
|
||||
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
|
||||
|
||||
# End for loop
|
||||
endforeach(test)
|
||||
# Explicitly list the Boost test files to be compiled
|
||||
set(KENLM_BOOST_TESTS_LIST
|
||||
adjust_counts_test
|
||||
corpus_count_test
|
||||
)
|
||||
|
||||
AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
|
||||
DEPENDS $<TARGET_OBJECTS:kenlm>
|
||||
$<TARGET_OBJECTS:kenlm_common>
|
||||
$<TARGET_OBJECTS:kenlm_util>
|
||||
$<TARGET_OBJECTS:kenlm_builder>
|
||||
LIBRARIES ${Boost_LIBRARIES} pthread)
|
||||
endif()
|
||||
|
@ -269,7 +269,7 @@ void AdjustCounts::Run(const util::stream::ChainPositions &positions) {
|
||||
std::size_t same = full->end() - 1 - different;
|
||||
|
||||
// STEP 1: Output all the n-grams that changed.
|
||||
for (; lower_valid >= &streams[same]; --lower_valid) {
|
||||
for (; lower_valid >= streams.begin() + same; --lower_valid) {
|
||||
uint64_t order_minus_1 = lower_valid - streams_begin;
|
||||
if(actual_counts[order_minus_1] <= prune_thresholds_[order_minus_1])
|
||||
(*lower_valid)->Value().Mark();
|
||||
|
@ -58,52 +58,24 @@ add_library(kenlm_util OBJECT ${KENLM_UTIL_DOUBLECONVERSION_SOURCE} ${KENLM_UTIL
|
||||
# Only compile and run unit tests if tests should be run
|
||||
if(BUILD_TESTING)
|
||||
|
||||
# Explicitly list the Boost test files to be compiled
|
||||
set(KENLM_BOOST_TESTS_LIST
|
||||
bit_packing_test
|
||||
file_piece_test
|
||||
joint_sort_test
|
||||
multi_intersection_test
|
||||
probing_hash_table_test
|
||||
read_compressed_test
|
||||
sorted_uniform_test
|
||||
tokenize_piece_test
|
||||
)
|
||||
# Explicitly list the Boost test files to be compiled
|
||||
set(KENLM_BOOST_TESTS_LIST
|
||||
bit_packing_test
|
||||
joint_sort_test
|
||||
multi_intersection_test
|
||||
probing_hash_table_test
|
||||
read_compressed_test
|
||||
sorted_uniform_test
|
||||
tokenize_piece_test
|
||||
)
|
||||
|
||||
# Iterate through the Boost tests list
|
||||
foreach(test ${KENLM_BOOST_TESTS_LIST})
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Require the following compile flag
|
||||
set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
|
||||
|
||||
# file_piece_test requires an extra command line parameter
|
||||
if ("${test}" STREQUAL "file_piece_test")
|
||||
set(test_params
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc
|
||||
)
|
||||
else()
|
||||
set(test_params
|
||||
)
|
||||
endif()
|
||||
|
||||
# Specify command arguments for how to run each unit test
|
||||
#
|
||||
# Assuming that foo was defined via add_executable(foo ...),
|
||||
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
|
||||
#
|
||||
add_test(NAME ${test}_test
|
||||
COMMAND $<TARGET_FILE:${test}> ${test_params})
|
||||
|
||||
# Group unit tests together
|
||||
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
|
||||
|
||||
# End for loop
|
||||
endforeach(test)
|
||||
AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
|
||||
DEPENDS $<TARGET_OBJECTS:kenlm_util>
|
||||
LIBRARIES ${Boost_LIBRARIES} pthread)
|
||||
|
||||
# file_piece_test requires an extra command line parameter
|
||||
KenLMAddTest(TEST file_piece_test
|
||||
DEPENDS $<TARGET_OBJECTS:kenlm_util>
|
||||
LIBRARIES ${Boost_LIBRARIES} pthread
|
||||
TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/file_piece.cc)
|
||||
endif()
|
||||
|
@ -35,7 +35,7 @@ import testing ;
|
||||
run file_piece_test.o kenutil /top//boost_unit_test_framework : : file_piece.cc ;
|
||||
for local t in [ glob *_test.cc : file_piece_test.cc read_compressed_test.cc ] {
|
||||
local name = [ MATCH "(.*)\.cc" : $(t) ] ;
|
||||
unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_filesystem /top//boost_system ;
|
||||
unit-test $(name) : $(t) kenutil /top//boost_unit_test_framework /top//boost_system ;
|
||||
}
|
||||
|
||||
build-project stream ;
|
||||
|
@ -24,25 +24,23 @@ void Exception::SetLocation(const char *file, unsigned int line, const char *fun
|
||||
* them down.
|
||||
*/
|
||||
std::string old_text;
|
||||
std::swap(old_text, what_);
|
||||
StringStream stream;
|
||||
stream << what_;
|
||||
stream << file << ':' << line;
|
||||
if (func) stream << " in " << func << " threw ";
|
||||
what_.swap(old_text);
|
||||
what_ << file << ':' << line;
|
||||
if (func) what_ << " in " << func << " threw ";
|
||||
if (child_name) {
|
||||
stream << child_name;
|
||||
what_ << child_name;
|
||||
} else {
|
||||
#ifdef __GXX_RTTI
|
||||
stream << typeid(this).name();
|
||||
what_ << typeid(this).name();
|
||||
#else
|
||||
stream << "an exception";
|
||||
what_ << "an exception";
|
||||
#endif
|
||||
}
|
||||
if (condition) {
|
||||
stream << " because `" << condition << '\'';
|
||||
what_ << " because `" << condition << '\'';
|
||||
}
|
||||
stream << ".\n";
|
||||
stream << old_text;
|
||||
what_ << ".\n";
|
||||
what_ << old_text;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
|
||||
// TODO(hieu) delete this
|
||||
// TODO(hieu): delete this
|
||||
#include <sstream>
|
||||
|
||||
namespace util {
|
||||
@ -20,7 +20,7 @@ class Exception : public std::exception {
|
||||
Exception() throw();
|
||||
virtual ~Exception() throw();
|
||||
|
||||
const char *what() const throw() { return what_.c_str(); }
|
||||
const char *what() const throw() { return what_.str().c_str(); }
|
||||
|
||||
// For use by the UTIL_THROW macros.
|
||||
void SetLocation(
|
||||
@ -38,7 +38,7 @@ class Exception : public std::exception {
|
||||
typedef T Identity;
|
||||
};
|
||||
|
||||
std::string what_;
|
||||
StringStream what_;
|
||||
};
|
||||
|
||||
/* This implements the normal operator<< for Exception and all its children.
|
||||
@ -46,12 +46,10 @@ class Exception : public std::exception {
|
||||
* boost::enable_if.
|
||||
*/
|
||||
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
|
||||
// TODO(hieu): change this to
|
||||
// StringStream(e.what_) << data;
|
||||
|
||||
// TODO(hieu): delete this.
|
||||
std::stringstream moses_hack;
|
||||
moses_hack << data;
|
||||
e.what_ += moses_hack.str();
|
||||
e.what_ << moses_hack.str();
|
||||
return e;
|
||||
}
|
||||
|
||||
|
@ -37,38 +37,14 @@ set(KENLM_UTIL_STREAM_SOURCE
|
||||
|
||||
|
||||
if(BUILD_TESTING)
|
||||
# Explicitly list the Boost test files to be compiled
|
||||
set(KENLM_BOOST_TESTS_LIST
|
||||
io_test
|
||||
sort_test
|
||||
stream_test
|
||||
)
|
||||
|
||||
# Explicitly list the Boost test files to be compiled
|
||||
set(KENLM_BOOST_TESTS_LIST
|
||||
io_test
|
||||
sort_test
|
||||
stream_test
|
||||
)
|
||||
|
||||
# Iterate through the Boost tests list
|
||||
foreach(test ${KENLM_BOOST_TESTS_LIST})
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Require the following compile flag
|
||||
set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(${test} ${Boost_LIBRARIES} pthread)
|
||||
|
||||
# Specify command arguments for how to run each unit test
|
||||
#
|
||||
# Assuming that foo was defined via add_executable(foo ...),
|
||||
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
|
||||
#
|
||||
add_test(NAME ${test}_test
|
||||
COMMAND $<TARGET_FILE:${test}>)
|
||||
|
||||
# Group unit tests together
|
||||
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
|
||||
|
||||
# End for loop
|
||||
endforeach(test)
|
||||
|
||||
AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
|
||||
DEPENDS $<TARGET_OBJECTS:kenlm_util>
|
||||
LIBRARIES ${Boost_LIBRARIES} pthread)
|
||||
endif()
|
||||
|
@ -10,14 +10,8 @@ namespace util {
|
||||
|
||||
class StringStream : public FakeOStream<StringStream> {
|
||||
public:
|
||||
// Semantics: appends to string. Remember to clear first!
|
||||
StringStream() {}
|
||||
|
||||
explicit StringStream()
|
||||
{}
|
||||
/*
|
||||
explicit StringStream(std::string &out)
|
||||
: out_(out) {}
|
||||
*/
|
||||
StringStream &flush() { return *this; }
|
||||
|
||||
StringStream &write(const void *data, std::size_t length) {
|
||||
@ -25,12 +19,11 @@ class StringStream : public FakeOStream<StringStream> {
|
||||
return *this;
|
||||
}
|
||||
|
||||
const std::string &str() const
|
||||
{ return out_; }
|
||||
void str(const std::string &val)
|
||||
{
|
||||
out_ = val;
|
||||
}
|
||||
const std::string &str() const { return out_; }
|
||||
|
||||
void str(const std::string &val) { out_ = val; }
|
||||
|
||||
void swap(std::string &str) { std::swap(out_, str); }
|
||||
|
||||
protected:
|
||||
friend class FakeOStream<StringStream>;
|
||||
|
Loading…
Reference in New Issue
Block a user