mirror of
https://github.com/browsermt/bergamot-translator.git
synced 2024-08-15 16:40:26 +03:00
Partial test-apps and tolerance in evaluations (#184)
* Partial test applications Previously service-cli was used to generate output and accomplish regression testing for all of: (1) translated-text (2) alignment tokens + scores (3) quality scores (4) indirectly annotation and tokenizations. The --mode native now only outputs a faithful to source translated text of the input source on stdin. Test apps are separated into testing only individual functionalities. This can help in independently testing ssplit-cpp, quality-scores for the quality estimation implementation etc. Separating numbers and text have the advantage of being able to compare one with tolerance using BLEU (text) and some allowed error-rates (numbers). * Removing #mac tag * Moving test apps to src/tests * Tests are always on for CI Unit tests are turned off looking for WASM_COMPATIBLE_SOURCES. * Fixing WASM_COMPATIBLE_SOURCE -> USE_WASM_COMPATIBLE_SOURCE * Workaround for now; CMakeLists.txt horrors are starting to bite * BRT: use bergamot-test instead of bergamot now * This should fix issues: CMakeLists.txt has so many paths * Casing to camelCase and removing legacyServiceCli * removing leftover service-cli declaration, some doc updates * #pragma once is starting to look easier * All the more reasons to do #pragma once * Updating marian-dev with intgemm::kCPU print, resolved from INTGEMM_CPUID * BRT: Use --gemm-highest-arch instead of python script * Adding intgemm resolve here, where always(?) have intgemm on? * intgemm-resolve in default binary directory * BRT: Update to use intgemm-resolve * marian-dev: Reset to without --gemm-highest-precision Co-authored-by: Kenneth Heafield <kpu@users.noreply.github.com>
This commit is contained in:
parent
16eb47f47e
commit
e9e5ac6782
8
.github/workflows/native.yml
vendored
8
.github/workflows/native.yml
vendored
@ -30,7 +30,7 @@ jobs:
|
||||
- name: Ubuntu 18.04 minimal
|
||||
os: ubuntu-18.04
|
||||
identifier: ubuntu_1804_minimal
|
||||
cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on
|
||||
cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
|
||||
brt_tags: "'#wasm'"
|
||||
unittests: 'false'
|
||||
- name: Ubuntu 20.04 full
|
||||
@ -42,7 +42,7 @@ jobs:
|
||||
- name: Ubuntu 20.04 minimal
|
||||
os: ubuntu-20.04
|
||||
identifier: ubuntu_2004_minimal
|
||||
cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on
|
||||
cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
|
||||
brt_tags: "'#wasm'"
|
||||
unittests: 'false'
|
||||
name: ${{ matrix.name }}
|
||||
@ -140,12 +140,12 @@ jobs:
|
||||
os: macos-10.15
|
||||
identifier: mac_1015_full
|
||||
cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=off
|
||||
brt_tags: "'#mac'"
|
||||
brt_tags: ""
|
||||
unittests: 'true'
|
||||
- name: MacOS 10.15 minimal
|
||||
os: macos-10.15
|
||||
identifier: mac_1015_minimal
|
||||
cmake: -DCOMPILE_TESTS=off -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
|
||||
cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
|
||||
brt_tags: "'#wasm'"
|
||||
unittests: 'false'
|
||||
name: ${{ matrix.name }}
|
||||
|
2
3rd_party/CMakeLists.txt
vendored
2
3rd_party/CMakeLists.txt
vendored
@ -1,3 +1,5 @@
|
||||
# marian-dev is tested elsewhere in both paths, turning off here.
|
||||
set(COMPILE_TESTS OFF)
|
||||
add_subdirectory(marian-dev)
|
||||
|
||||
if(COMPILE_WASM)
|
||||
|
@ -41,6 +41,9 @@ include(CMakeDependentOption)
|
||||
# Project specific cmake options
|
||||
option(COMPILE_WASM "Compile for WASM" OFF)
|
||||
cmake_dependent_option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" OFF "NOT COMPILE_WASM" ON)
|
||||
|
||||
# WASM disables a million libraries, which also includes the unit test-library.
|
||||
cmake_dependent_option(COMPILE_UNIT_TESTS "Compile unit tests" OFF "USE_WASM_COMPATIBLE_SOURCE" ON)
|
||||
option(COMPILE_TESTS "Compile bergamot-tests" OFF)
|
||||
|
||||
# Set 3rd party submodule specific cmake options for this project
|
||||
|
51
app/cli.h
51
app/cli.h
@ -1,5 +1,6 @@
|
||||
#ifndef BERGAMOT_APP_CLI_H
|
||||
#define BERGAMOT_APP_CLI_H
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
@ -103,8 +104,7 @@ void decoder(Ptr<Options> options) {
|
||||
/// [brt/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh)
|
||||
///
|
||||
/// * Input: reads from stdin, blob of text, read as a whole ; sentence-splitting etc handled internally.
|
||||
/// * Output: to stdout, translation of the source text and additional information like sentences, alignments between
|
||||
/// source and target tokens and quality scores.
|
||||
/// * Output: to stdout, translation of the source text faithful to source structure.
|
||||
///
|
||||
/// @param [in] options: options to build translator
|
||||
void native(Ptr<Options> options) {
|
||||
@ -124,58 +124,13 @@ void native(Ptr<Options> options) {
|
||||
std::string input = std_input.str();
|
||||
|
||||
ResponseOptions responseOptions;
|
||||
responseOptions.qualityScores = true;
|
||||
responseOptions.alignment = true;
|
||||
responseOptions.alignmentThreshold = 0.2f;
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
|
||||
responseFuture.wait();
|
||||
Response response = responseFuture.get();
|
||||
|
||||
std::cout << "[original]: " << response.source.text << '\n';
|
||||
std::cout << "[translated]: " << response.target.text << '\n';
|
||||
for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx) << '\n';
|
||||
std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx) << '\n';
|
||||
std::cout << "Alignments" << '\n';
|
||||
typedef std::pair<size_t, float> Point;
|
||||
|
||||
// Initialize a point vector.
|
||||
std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
|
||||
|
||||
// Handle alignments
|
||||
auto &alignments = response.alignments[sentenceIdx];
|
||||
for (auto &p : alignments) {
|
||||
aggregate[p.src].emplace_back(p.tgt, p.prob);
|
||||
}
|
||||
|
||||
for (size_t src = 0; src < aggregate.size(); src++) {
|
||||
std::cout << response.source.word(sentenceIdx, src) << ": ";
|
||||
for (auto &p : aggregate[src]) {
|
||||
std::cout << response.target.word(sentenceIdx, p.first) << "(" << p.second << ") ";
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
|
||||
// Handle quality.
|
||||
auto &quality = response.qualityScores[sentenceIdx];
|
||||
std::cout << "Quality: whole(" << quality.sequence << "), tokens below:" << '\n';
|
||||
size_t wordIdx = 0;
|
||||
bool first = true;
|
||||
for (auto &p : quality.word) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
std::cout << " ";
|
||||
}
|
||||
std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p << ")";
|
||||
wordIdx++;
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
std::cout << "--------------------------\n";
|
||||
std::cout << '\n';
|
||||
std::cout << response.target.text;
|
||||
}
|
||||
|
||||
} // namespace app
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 020135af1b620caa27929c1403c50ec3299e5bff
|
||||
Subproject commit b0ba62eade4af7752c65c76cb17eab421ea02445
|
@ -1,7 +1,6 @@
|
||||
add_subdirectory(translator)
|
||||
|
||||
if(COMPILE_TESTS)
|
||||
# Catch currently comes from marian sources.
|
||||
add_subdirectory(tests)
|
||||
if (COMPILE_TESTS)
|
||||
add_subdirectory(tests)
|
||||
endif(COMPILE_TESTS)
|
||||
|
||||
|
@ -1,22 +1,32 @@
|
||||
# Unit tests
|
||||
set(UNIT_TESTS
|
||||
annotation_tests
|
||||
)
|
||||
|
||||
foreach(test ${UNIT_TESTS})
|
||||
add_executable("run_${test}" run_tests.cpp "${test}.cpp")
|
||||
target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
|
||||
# Include Catch explicitly from marian.
|
||||
set(CATCH_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rd_party/marian-dev/3rd-party)
|
||||
add_library(Catch INTERFACE)
|
||||
target_include_directories(Catch INTERFACE ${CATCH_INCLUDE_DIR})
|
||||
|
||||
if (COMPILE_UNIT_TESTS)
|
||||
add_subdirectory(units)
|
||||
endif (COMPILE_UNIT_TESTS)
|
||||
|
||||
|
||||
|
||||
if(NOT MSVC)
|
||||
# Testing apps
|
||||
set(APP_TESTS)
|
||||
add_executable("bergamot-test" "cli.cpp" "apps.cpp")
|
||||
|
||||
if(CUDA_FOUND)
|
||||
target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
|
||||
target_link_libraries("bergamot-test" bergamot-translator)
|
||||
else(CUDA_FOUND)
|
||||
target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
|
||||
target_link_libraries("bergamot-test" bergamot-translator)
|
||||
endif(CUDA_FOUND)
|
||||
|
||||
set_target_properties("bergamot-test" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
|
||||
if(msvc)
|
||||
# disable c4305: truncation from 'double' to '_ty'
|
||||
target_compile_options("run_${test}" public /wd4305)
|
||||
endif(msvc)
|
||||
# Adding an intgemm_resolve cmdline
|
||||
add_executable(intgemm-resolve intgemm_resolve.cpp)
|
||||
target_link_libraries(intgemm-resolve PRIVATE bergamot-translator)
|
||||
set_target_properties(intgemm-resolve PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
endif(NOT MSVC)
|
||||
|
||||
add_test(NAME ${test} COMMAND "run_${test}")
|
||||
endforeach(test)
|
||||
|
116
src/tests/apps.cpp
Normal file
116
src/tests/apps.cpp
Normal file
@ -0,0 +1,116 @@
|
||||
#include "apps.h"
|
||||
|
||||
namespace marian {
|
||||
namespace bergamot {
|
||||
namespace testapp {
|
||||
|
||||
// Utility function, common for all testapps.
|
||||
Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOptions) {
|
||||
// Prepare memories for bytearrays (including model, shortlist and vocabs)
|
||||
MemoryBundle memoryBundle;
|
||||
|
||||
if (options->get<bool>("bytearray")) {
|
||||
// Load legit values into bytearrays.
|
||||
memoryBundle = getMemoryBundleFromConfig(options);
|
||||
}
|
||||
|
||||
Service service(options, std::move(memoryBundle));
|
||||
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream inputStream;
|
||||
inputStream << std::cin.rdbuf();
|
||||
std::string input = inputStream.str();
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
|
||||
responseFuture.wait();
|
||||
Response response = responseFuture.get();
|
||||
return response;
|
||||
}
|
||||
|
||||
void qualityScores(Ptr<Options> options) {
|
||||
ResponseOptions responseOptions;
|
||||
responseOptions.qualityScores = true;
|
||||
|
||||
Response response = translateFromStdin(options, responseOptions);
|
||||
for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
auto &quality = response.qualityScores[sentenceIdx];
|
||||
std::cout << ((sentenceIdx == 0) ? "" : "\n") << quality.sequence << '\n';
|
||||
for (int wordIdx = 0; wordIdx < quality.word.size(); wordIdx++) {
|
||||
std::cout << ((wordIdx == 0) ? "" : " ");
|
||||
std::cout << quality.word[wordIdx];
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
void alignmentAggregatedToSource(Ptr<Options> options, bool numeric) {
|
||||
ResponseOptions responseOptions;
|
||||
responseOptions.alignment = true;
|
||||
responseOptions.alignmentThreshold = 0.2f;
|
||||
Response response = translateFromStdin(options, responseOptions);
|
||||
|
||||
for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << (sentenceIdx == 0 ? "" : "\n");
|
||||
|
||||
// We are aggregating at source, which does not depend on matrix-multiplications and printing only target so we can
|
||||
// do BLEU based stuff on the text.
|
||||
//
|
||||
typedef std::pair<size_t, float> Point;
|
||||
|
||||
std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
|
||||
auto &alignments = response.alignments[sentenceIdx];
|
||||
for (auto &p : alignments) {
|
||||
aggregate[p.src].emplace_back(p.tgt, p.prob);
|
||||
}
|
||||
|
||||
for (size_t sourceIdx = 0; sourceIdx < aggregate.size(); sourceIdx++) {
|
||||
// Sort in order of target tokens.
|
||||
auto cmp = [](const Point &p, const Point &q) { return p.first < q.first; };
|
||||
std::sort(aggregate[sourceIdx].begin(), aggregate[sourceIdx].end(), cmp);
|
||||
|
||||
if (!numeric) {
|
||||
std::cout << response.source.word(sentenceIdx, sourceIdx) << ": ";
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < aggregate[sourceIdx].size(); j++) {
|
||||
if (numeric) {
|
||||
float alignmentScore = aggregate[sourceIdx][j].second;
|
||||
std::cout << (j == 0 ? "" : " ");
|
||||
std::cout << alignmentScore;
|
||||
} else {
|
||||
std::cout << " ";
|
||||
size_t targetIdx = aggregate[sourceIdx][j].first;
|
||||
std::cout << response.target.word(sentenceIdx, targetIdx);
|
||||
}
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void annotatedTextWords(Ptr<Options> options, bool source) {
|
||||
ResponseOptions responseOptions;
|
||||
Response response = translateFromStdin(options, responseOptions);
|
||||
AnnotatedText &annotatedText = source ? response.source : response.target;
|
||||
for (size_t s = 0; s < annotatedText.numSentences(); s++) {
|
||||
for (size_t w = 0; w < annotatedText.numWords(s); w++) {
|
||||
std::cout << (w == 0 ? "" : "\t");
|
||||
std::cout << annotatedText.word(s, w);
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void annotatedTextSentences(Ptr<Options> options, bool source) {
|
||||
ResponseOptions responseOptions;
|
||||
Response response = translateFromStdin(options, responseOptions);
|
||||
AnnotatedText &annotatedText = source ? response.source : response.target;
|
||||
for (size_t s = 0; s < annotatedText.numSentences(); s++) {
|
||||
std::cout << annotatedText.sentence(s) << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace testapp
|
||||
} // namespace bergamot
|
||||
} // namespace marian
|
49
src/tests/apps.h
Normal file
49
src/tests/apps.h
Normal file
@ -0,0 +1,49 @@
|
||||
#ifndef BERGAMOT_SRC_TESTS_APPS_H
|
||||
#define BERGAMOT_SRC_TESTS_APPS_H
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "common/definitions.h"
|
||||
#include "common/timer.h"
|
||||
#include "common/utils.h"
|
||||
#include "marian.h"
|
||||
#include "translator/byte_array_util.h"
|
||||
#include "translator/parser.h"
|
||||
#include "translator/response.h"
|
||||
#include "translator/response_options.h"
|
||||
#include "translator/service.h"
|
||||
|
||||
namespace marian {
|
||||
namespace bergamot {
|
||||
|
||||
namespace testapp {
|
||||
|
||||
// Utility function, common for all testapps. Reads content from stdin, builds a Service based on options and constructs
|
||||
// a response containing translation data according responseOptions.
|
||||
Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOptions);
|
||||
|
||||
// Reads from stdin and translates. The quality score for the translations (each sentence) are printed separated by
|
||||
// empty-lines. The first line contains whole quality scores and the second line word quality scores, for each entry.
|
||||
void qualityScores(Ptr<Options> options);
|
||||
|
||||
// Reads from stdin and translates. Alignments are printed aligned to the source-tokens, following format src-token:
|
||||
// [possible-target-alignments], if numeric is false. If numeric is true, only alignment probabilities are printed
|
||||
// instead of the tokens.
|
||||
void alignmentAggregatedToSource(Ptr<Options> options, bool numeric = false);
|
||||
|
||||
// Reads from stdin and translates. Prints the tokens separated by space for each sentence. Prints words from source
|
||||
// side text annotation if source=true, target annotation otherwise.
|
||||
void annotatedTextWords(Ptr<Options> options, bool source = true);
|
||||
|
||||
// Reads from stdin and translates the read content. Prints the sentences in source or target in constructed response
|
||||
// in each line, depending on source = true or false respectively.
|
||||
void annotatedTextSentences(Ptr<Options> options, bool source = true);
|
||||
|
||||
} // namespace testapp
|
||||
} // namespace bergamot
|
||||
} // namespace marian
|
||||
|
||||
#endif // BERGAMOT_SRC_TESTS_APPS_H
|
27
src/tests/cli.cpp
Normal file
27
src/tests/cli.cpp
Normal file
@ -0,0 +1,27 @@
|
||||
|
||||
#include "apps.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
auto cp = marian::bergamot::createConfigParser();
|
||||
auto options = cp.parseOptions(argc, argv, true);
|
||||
const std::string mode = options->get<std::string>("bergamot-mode");
|
||||
using namespace marian::bergamot;
|
||||
if (mode == "test-quality-scores") {
|
||||
testapp::qualityScores(options);
|
||||
} else if (mode == "test-alignment-scores") {
|
||||
testapp::alignmentAggregatedToSource(options, /*numeric=*/true);
|
||||
} else if (mode == "test-alignment-words") {
|
||||
testapp::alignmentAggregatedToSource(options, /*numeric=*/false);
|
||||
} else if (mode == "test-response-source-sentences") {
|
||||
testapp::annotatedTextSentences(options, /*source=*/true);
|
||||
} else if (mode == "test-response-target-sentences") {
|
||||
testapp::annotatedTextSentences(options, /*source=*/false);
|
||||
} else if (mode == "test-response-source-words") {
|
||||
testapp::annotatedTextWords(options, /*source=*/true);
|
||||
} else if (mode == "test-response-target-words") {
|
||||
testapp::annotatedTextWords(options, /*source=*/false);
|
||||
} else {
|
||||
ABORT("Unknown --mode {}. Please run a valid test", mode);
|
||||
}
|
||||
return 0;
|
||||
}
|
8
src/tests/intgemm_resolve.cpp
Normal file
8
src/tests/intgemm_resolve.cpp
Normal file
@ -0,0 +1,8 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "intgemm/intgemm.h"
|
||||
|
||||
int main() {
|
||||
std::cout << static_cast<int>(intgemm::kCPU) << "\n";
|
||||
return 0;
|
||||
}
|
22
src/tests/units/CMakeLists.txt
Normal file
22
src/tests/units/CMakeLists.txt
Normal file
@ -0,0 +1,22 @@
|
||||
# Unit tests
|
||||
set(UNIT_TESTS
|
||||
annotation_tests
|
||||
)
|
||||
|
||||
foreach(test ${UNIT_TESTS})
|
||||
add_executable("run_${test}" run_tests.cpp "${test}.cpp")
|
||||
target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
|
||||
|
||||
if(CUDA_FOUND)
|
||||
target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
|
||||
else(CUDA_FOUND)
|
||||
target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
|
||||
endif(CUDA_FOUND)
|
||||
|
||||
if(msvc)
|
||||
# disable c4305: truncation from 'double' to '_ty'
|
||||
target_compile_options("run_${test}" public /wd4305)
|
||||
endif(msvc)
|
||||
|
||||
add_test(NAME ${test} COMMAND "run_${test}")
|
||||
endforeach(test)
|
Loading…
Reference in New Issue
Block a user