From e9e5ac6782a57b9ac8eb98afd9c988870fb4c798 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 14 Jun 2021 15:02:42 +0100 Subject: [PATCH] Partial test-apps and tolerance in evaluations (#184) * Partial test applications Previously service-cli was used to generate output and accomplish regression testing for all of: (1) translated-text (2) alignment tokens + scores (3) quality scores (4) indirectly annotation and tokenizations. The --mode native now only outputs a faithful to source translated text of the input source on stdin. Test apps are separated into testing only individual functionalities. This can help in independently testing ssplit-cpp, quality-scores for the quality estimation implementation etc. Separating numbers and text have the advantage of being able to compare one with tolerance using BLEU (text) and some allowed error-rates (numbers). * Removing #mac tag * Moving test apps to src/tests * Tests are always on for CI Unit tests are turned off looking for WASM_COMPATIBLE_SOURCES. * Fixing WASM_COMPATIBLE_SOURCE -> USE_WASM_COMPATIBLE_SOURCE * Workaround for now; CMakeLists.txt horrors are starting to bite * BRT: use bergamot-test instead of bergamot now * This should fix issues: CMakeLists.txt has so many paths * Casing to camelCase and removing legacyServiceCli * removing leftover service-cli declaration, some doc updates * #pragma once is starting to look easier * All the more reasons to do #pragma once * Updating marian-dev with intgemm::kCPU print, resolved from INTGEMM_CPUID * BRT: Use --gemm-highest-arch instead of python script * Adding intgemm resolve here, where always(?) have intgemm on? * intgemm-resolve in default binary directory * BRT: Update to use intgemm-resolve * marian-dev: Reset to without --gemm-highest-precision Co-authored-by: Kenneth Heafield --- .github/workflows/native.yml | 8 +- 3rd_party/CMakeLists.txt | 2 + CMakeLists.txt | 3 + app/cli.h | 51 +-------- bergamot-translator-tests | 2 +- src/CMakeLists.txt | 5 +- src/tests/CMakeLists.txt | 38 ++++--- src/tests/apps.cpp | 116 +++++++++++++++++++++ src/tests/apps.h | 49 +++++++++ src/tests/cli.cpp | 27 +++++ src/tests/intgemm_resolve.cpp | 8 ++ src/tests/units/CMakeLists.txt | 22 ++++ src/tests/{ => units}/annotation_tests.cpp | 0 src/tests/{ => units}/run_tests.cpp | 0 14 files changed, 261 insertions(+), 70 deletions(-) create mode 100644 src/tests/apps.cpp create mode 100644 src/tests/apps.h create mode 100644 src/tests/cli.cpp create mode 100644 src/tests/intgemm_resolve.cpp create mode 100644 src/tests/units/CMakeLists.txt rename src/tests/{ => units}/annotation_tests.cpp (100%) rename src/tests/{ => units}/run_tests.cpp (100%) diff --git a/.github/workflows/native.yml b/.github/workflows/native.yml index eb3f4d4..c572cb5 100644 --- a/.github/workflows/native.yml +++ b/.github/workflows/native.yml @@ -30,7 +30,7 @@ jobs: - name: Ubuntu 18.04 minimal os: ubuntu-18.04 identifier: ubuntu_1804_minimal - cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on + cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on brt_tags: "'#wasm'" unittests: 'false' - name: Ubuntu 20.04 full @@ -42,7 +42,7 @@ jobs: - name: Ubuntu 20.04 minimal os: ubuntu-20.04 identifier: ubuntu_2004_minimal - cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on + cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on brt_tags: "'#wasm'" unittests: 'false' name: ${{ matrix.name }} @@ -140,12 +140,12 @@ jobs: os: macos-10.15 identifier: mac_1015_full cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=off - brt_tags: "'#mac'" + brt_tags: "" unittests: 'true' - name: MacOS 10.15 minimal os: macos-10.15 identifier: mac_1015_minimal - cmake: -DCOMPILE_TESTS=off -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on + cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on brt_tags: "'#wasm'" unittests: 'false' name: ${{ matrix.name }} diff --git a/3rd_party/CMakeLists.txt b/3rd_party/CMakeLists.txt index 74ce906..70e50d6 100644 --- a/3rd_party/CMakeLists.txt +++ b/3rd_party/CMakeLists.txt @@ -1,3 +1,5 @@ +# marian-dev is tested elsewhere in both paths, turning off here. +set(COMPILE_TESTS OFF) add_subdirectory(marian-dev) if(COMPILE_WASM) diff --git a/CMakeLists.txt b/CMakeLists.txt index e561ed9..c58ddd4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,9 @@ include(CMakeDependentOption) # Project specific cmake options option(COMPILE_WASM "Compile for WASM" OFF) cmake_dependent_option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" OFF "NOT COMPILE_WASM" ON) + +# WASM disables a million libraries, which also includes the unit test-library. +cmake_dependent_option(COMPILE_UNIT_TESTS "Compile unit tests" OFF "USE_WASM_COMPATIBLE_SOURCE" ON) option(COMPILE_TESTS "Compile bergamot-tests" OFF) # Set 3rd party submodule specific cmake options for this project diff --git a/app/cli.h b/app/cli.h index 292d21c..d6e930f 100644 --- a/app/cli.h +++ b/app/cli.h @@ -1,5 +1,6 @@ #ifndef BERGAMOT_APP_CLI_H #define BERGAMOT_APP_CLI_H +#include #include #include #include @@ -103,8 +104,7 @@ void decoder(Ptr options) { /// [brt/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh) /// /// * Input: reads from stdin, blob of text, read as a whole ; sentence-splitting etc handled internally. -/// * Output: to stdout, translation of the source text and additional information like sentences, alignments between -/// source and target tokens and quality scores. +/// * Output: to stdout, translation of the source text faithful to source structure. /// /// @param [in] options: options to build translator void native(Ptr options) { @@ -124,58 +124,13 @@ void native(Ptr options) { std::string input = std_input.str(); ResponseOptions responseOptions; - responseOptions.qualityScores = true; - responseOptions.alignment = true; - responseOptions.alignmentThreshold = 0.2f; // Wait on future until Response is complete std::future responseFuture = service.translate(std::move(input), responseOptions); responseFuture.wait(); Response response = responseFuture.get(); - std::cout << "[original]: " << response.source.text << '\n'; - std::cout << "[translated]: " << response.target.text << '\n'; - for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) { - std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx) << '\n'; - std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx) << '\n'; - std::cout << "Alignments" << '\n'; - typedef std::pair Point; - - // Initialize a point vector. - std::vector> aggregate(response.source.numWords(sentenceIdx)); - - // Handle alignments - auto &alignments = response.alignments[sentenceIdx]; - for (auto &p : alignments) { - aggregate[p.src].emplace_back(p.tgt, p.prob); - } - - for (size_t src = 0; src < aggregate.size(); src++) { - std::cout << response.source.word(sentenceIdx, src) << ": "; - for (auto &p : aggregate[src]) { - std::cout << response.target.word(sentenceIdx, p.first) << "(" << p.second << ") "; - } - std::cout << '\n'; - } - - // Handle quality. - auto &quality = response.qualityScores[sentenceIdx]; - std::cout << "Quality: whole(" << quality.sequence << "), tokens below:" << '\n'; - size_t wordIdx = 0; - bool first = true; - for (auto &p : quality.word) { - if (first) { - first = false; - } else { - std::cout << " "; - } - std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p << ")"; - wordIdx++; - } - std::cout << '\n'; - } - std::cout << "--------------------------\n"; - std::cout << '\n'; + std::cout << response.target.text; } } // namespace app diff --git a/bergamot-translator-tests b/bergamot-translator-tests index 020135a..b0ba62e 160000 --- a/bergamot-translator-tests +++ b/bergamot-translator-tests @@ -1 +1 @@ -Subproject commit 020135af1b620caa27929c1403c50ec3299e5bff +Subproject commit b0ba62eade4af7752c65c76cb17eab421ea02445 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c2d62ef..856831b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,7 +1,6 @@ add_subdirectory(translator) -if(COMPILE_TESTS) - # Catch currently comes from marian sources. - add_subdirectory(tests) +if (COMPILE_TESTS) + add_subdirectory(tests) endif(COMPILE_TESTS) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 5c1bc00..483bd07 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,22 +1,32 @@ # Unit tests -set(UNIT_TESTS - annotation_tests -) -foreach(test ${UNIT_TESTS}) - add_executable("run_${test}" run_tests.cpp "${test}.cpp") - target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src") +# Include Catch explicitly from marian. +set(CATCH_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rd_party/marian-dev/3rd-party) +add_library(Catch INTERFACE) +target_include_directories(Catch INTERFACE ${CATCH_INCLUDE_DIR}) +if (COMPILE_UNIT_TESTS) + add_subdirectory(units) +endif (COMPILE_UNIT_TESTS) + + + +if(NOT MSVC) + # Testing apps + set(APP_TESTS) + add_executable("bergamot-test" "cli.cpp" "apps.cpp") + if(CUDA_FOUND) - target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator) + target_link_libraries("bergamot-test" bergamot-translator) else(CUDA_FOUND) - target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator) + target_link_libraries("bergamot-test" bergamot-translator) endif(CUDA_FOUND) + + set_target_properties("bergamot-test" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") - if(msvc) - # disable c4305: truncation from 'double' to '_ty' - target_compile_options("run_${test}" public /wd4305) - endif(msvc) + # Adding an intgemm_resolve cmdline + add_executable(intgemm-resolve intgemm_resolve.cpp) + target_link_libraries(intgemm-resolve PRIVATE bergamot-translator) + set_target_properties(intgemm-resolve PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") +endif(NOT MSVC) - add_test(NAME ${test} COMMAND "run_${test}") -endforeach(test) diff --git a/src/tests/apps.cpp b/src/tests/apps.cpp new file mode 100644 index 0000000..9c00bff --- /dev/null +++ b/src/tests/apps.cpp @@ -0,0 +1,116 @@ +#include "apps.h" + +namespace marian { +namespace bergamot { +namespace testapp { + +// Utility function, common for all testapps. +Response translateFromStdin(Ptr options, ResponseOptions responseOptions) { + // Prepare memories for bytearrays (including model, shortlist and vocabs) + MemoryBundle memoryBundle; + + if (options->get("bytearray")) { + // Load legit values into bytearrays. + memoryBundle = getMemoryBundleFromConfig(options); + } + + Service service(options, std::move(memoryBundle)); + + // Read a large input text blob from stdin + std::ostringstream inputStream; + inputStream << std::cin.rdbuf(); + std::string input = inputStream.str(); + + // Wait on future until Response is complete + std::future responseFuture = service.translate(std::move(input), responseOptions); + responseFuture.wait(); + Response response = responseFuture.get(); + return response; +} + +void qualityScores(Ptr options) { + ResponseOptions responseOptions; + responseOptions.qualityScores = true; + + Response response = translateFromStdin(options, responseOptions); + for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) { + auto &quality = response.qualityScores[sentenceIdx]; + std::cout << ((sentenceIdx == 0) ? "" : "\n") << quality.sequence << '\n'; + for (int wordIdx = 0; wordIdx < quality.word.size(); wordIdx++) { + std::cout << ((wordIdx == 0) ? "" : " "); + std::cout << quality.word[wordIdx]; + } + std::cout << '\n'; + } +} + +void alignmentAggregatedToSource(Ptr options, bool numeric) { + ResponseOptions responseOptions; + responseOptions.alignment = true; + responseOptions.alignmentThreshold = 0.2f; + Response response = translateFromStdin(options, responseOptions); + + for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) { + std::cout << (sentenceIdx == 0 ? "" : "\n"); + + // We are aggregating at source, which does not depend on matrix-multiplications and printing only target so we can + // do BLEU based stuff on the text. + // + typedef std::pair Point; + + std::vector> aggregate(response.source.numWords(sentenceIdx)); + auto &alignments = response.alignments[sentenceIdx]; + for (auto &p : alignments) { + aggregate[p.src].emplace_back(p.tgt, p.prob); + } + + for (size_t sourceIdx = 0; sourceIdx < aggregate.size(); sourceIdx++) { + // Sort in order of target tokens. + auto cmp = [](const Point &p, const Point &q) { return p.first < q.first; }; + std::sort(aggregate[sourceIdx].begin(), aggregate[sourceIdx].end(), cmp); + + if (!numeric) { + std::cout << response.source.word(sentenceIdx, sourceIdx) << ": "; + } + + for (size_t j = 0; j < aggregate[sourceIdx].size(); j++) { + if (numeric) { + float alignmentScore = aggregate[sourceIdx][j].second; + std::cout << (j == 0 ? "" : " "); + std::cout << alignmentScore; + } else { + std::cout << " "; + size_t targetIdx = aggregate[sourceIdx][j].first; + std::cout << response.target.word(sentenceIdx, targetIdx); + } + } + std::cout << '\n'; + } + } +} + +void annotatedTextWords(Ptr options, bool source) { + ResponseOptions responseOptions; + Response response = translateFromStdin(options, responseOptions); + AnnotatedText &annotatedText = source ? response.source : response.target; + for (size_t s = 0; s < annotatedText.numSentences(); s++) { + for (size_t w = 0; w < annotatedText.numWords(s); w++) { + std::cout << (w == 0 ? "" : "\t"); + std::cout << annotatedText.word(s, w); + } + std::cout << "\n"; + } +} + +void annotatedTextSentences(Ptr options, bool source) { + ResponseOptions responseOptions; + Response response = translateFromStdin(options, responseOptions); + AnnotatedText &annotatedText = source ? response.source : response.target; + for (size_t s = 0; s < annotatedText.numSentences(); s++) { + std::cout << annotatedText.sentence(s) << "\n"; + } +} + +} // namespace testapp +} // namespace bergamot +} // namespace marian diff --git a/src/tests/apps.h b/src/tests/apps.h new file mode 100644 index 0000000..2ccf2c4 --- /dev/null +++ b/src/tests/apps.h @@ -0,0 +1,49 @@ +#ifndef BERGAMOT_SRC_TESTS_APPS_H +#define BERGAMOT_SRC_TESTS_APPS_H +#include +#include +#include +#include +#include + +#include "common/definitions.h" +#include "common/timer.h" +#include "common/utils.h" +#include "marian.h" +#include "translator/byte_array_util.h" +#include "translator/parser.h" +#include "translator/response.h" +#include "translator/response_options.h" +#include "translator/service.h" + +namespace marian { +namespace bergamot { + +namespace testapp { + +// Utility function, common for all testapps. Reads content from stdin, builds a Service based on options and constructs +// a response containing translation data according responseOptions. +Response translateFromStdin(Ptr options, ResponseOptions responseOptions); + +// Reads from stdin and translates. The quality score for the translations (each sentence) are printed separated by +// empty-lines. The first line contains whole quality scores and the second line word quality scores, for each entry. +void qualityScores(Ptr options); + +// Reads from stdin and translates. Alignments are printed aligned to the source-tokens, following format src-token: +// [possible-target-alignments], if numeric is false. If numeric is true, only alignment probabilities are printed +// instead of the tokens. +void alignmentAggregatedToSource(Ptr options, bool numeric = false); + +// Reads from stdin and translates. Prints the tokens separated by space for each sentence. Prints words from source +// side text annotation if source=true, target annotation otherwise. +void annotatedTextWords(Ptr options, bool source = true); + +// Reads from stdin and translates the read content. Prints the sentences in source or target in constructed response +// in each line, depending on source = true or false respectively. +void annotatedTextSentences(Ptr options, bool source = true); + +} // namespace testapp +} // namespace bergamot +} // namespace marian + +#endif // BERGAMOT_SRC_TESTS_APPS_H diff --git a/src/tests/cli.cpp b/src/tests/cli.cpp new file mode 100644 index 0000000..f2f0218 --- /dev/null +++ b/src/tests/cli.cpp @@ -0,0 +1,27 @@ + +#include "apps.h" + +int main(int argc, char *argv[]) { + auto cp = marian::bergamot::createConfigParser(); + auto options = cp.parseOptions(argc, argv, true); + const std::string mode = options->get("bergamot-mode"); + using namespace marian::bergamot; + if (mode == "test-quality-scores") { + testapp::qualityScores(options); + } else if (mode == "test-alignment-scores") { + testapp::alignmentAggregatedToSource(options, /*numeric=*/true); + } else if (mode == "test-alignment-words") { + testapp::alignmentAggregatedToSource(options, /*numeric=*/false); + } else if (mode == "test-response-source-sentences") { + testapp::annotatedTextSentences(options, /*source=*/true); + } else if (mode == "test-response-target-sentences") { + testapp::annotatedTextSentences(options, /*source=*/false); + } else if (mode == "test-response-source-words") { + testapp::annotatedTextWords(options, /*source=*/true); + } else if (mode == "test-response-target-words") { + testapp::annotatedTextWords(options, /*source=*/false); + } else { + ABORT("Unknown --mode {}. Please run a valid test", mode); + } + return 0; +} diff --git a/src/tests/intgemm_resolve.cpp b/src/tests/intgemm_resolve.cpp new file mode 100644 index 0000000..f95d0c4 --- /dev/null +++ b/src/tests/intgemm_resolve.cpp @@ -0,0 +1,8 @@ +#include + +#include "intgemm/intgemm.h" + +int main() { + std::cout << static_cast(intgemm::kCPU) << "\n"; + return 0; +} diff --git a/src/tests/units/CMakeLists.txt b/src/tests/units/CMakeLists.txt new file mode 100644 index 0000000..5c1bc00 --- /dev/null +++ b/src/tests/units/CMakeLists.txt @@ -0,0 +1,22 @@ +# Unit tests +set(UNIT_TESTS + annotation_tests +) + +foreach(test ${UNIT_TESTS}) + add_executable("run_${test}" run_tests.cpp "${test}.cpp") + target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src") + + if(CUDA_FOUND) + target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator) + else(CUDA_FOUND) + target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator) + endif(CUDA_FOUND) + + if(msvc) + # disable c4305: truncation from 'double' to '_ty' + target_compile_options("run_${test}" public /wd4305) + endif(msvc) + + add_test(NAME ${test} COMMAND "run_${test}") +endforeach(test) diff --git a/src/tests/annotation_tests.cpp b/src/tests/units/annotation_tests.cpp similarity index 100% rename from src/tests/annotation_tests.cpp rename to src/tests/units/annotation_tests.cpp diff --git a/src/tests/run_tests.cpp b/src/tests/units/run_tests.cpp similarity index 100% rename from src/tests/run_tests.cpp rename to src/tests/units/run_tests.cpp