Partial test-apps and tolerance in evaluations (#184)

* Partial test applications

Previously service-cli was used to generate output and accomplish
regression testing for all of: (1) translated-text (2) alignment tokens
+ scores (3) quality scores (4) indirectly annotation and tokenizations.

The --mode native now only outputs a faithful to source translated text
of the input source on stdin.

Test apps are separated into testing only individual functionalities.
This can help in independently testing ssplit-cpp, quality-scores for
the quality estimation implementation etc.

Separating numbers and text have the advantage of being able to compare
one with tolerance using BLEU (text) and some allowed error-rates
(numbers).

* Removing #mac tag

* Moving test apps to src/tests

* Tests are always on for CI

Unit tests are turned off looking for WASM_COMPATIBLE_SOURCES.

* Fixing WASM_COMPATIBLE_SOURCE -> USE_WASM_COMPATIBLE_SOURCE

* Workaround for now; CMakeLists.txt horrors are starting to bite

* BRT: use bergamot-test instead of bergamot now

* This should fix issues: CMakeLists.txt has so many paths

* Casing to camelCase and removing legacyServiceCli

* removing leftover service-cli declaration, some doc updates

* #pragma once is starting to look easier

* All the more reasons to do #pragma once

* Updating marian-dev with intgemm::kCPU print, resolved from INTGEMM_CPUID

* BRT: Use --gemm-highest-arch instead of python script

* Adding intgemm resolve here, where always(?) have intgemm on?

* intgemm-resolve in default binary directory

* BRT: Update to use intgemm-resolve

* marian-dev: Reset to without --gemm-highest-precision

Co-authored-by: Kenneth Heafield <kpu@users.noreply.github.com>
This commit is contained in:
Jerin Philip 2021-06-14 15:02:42 +01:00 committed by GitHub
parent 16eb47f47e
commit e9e5ac6782
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 261 additions and 70 deletions

View File

@ -30,7 +30,7 @@ jobs:
- name: Ubuntu 18.04 minimal
os: ubuntu-18.04
identifier: ubuntu_1804_minimal
cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on
cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
brt_tags: "'#wasm'"
unittests: 'false'
- name: Ubuntu 20.04 full
@ -42,7 +42,7 @@ jobs:
- name: Ubuntu 20.04 minimal
os: ubuntu-20.04
identifier: ubuntu_2004_minimal
cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on
cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
brt_tags: "'#wasm'"
unittests: 'false'
name: ${{ matrix.name }}
@ -140,12 +140,12 @@ jobs:
os: macos-10.15
identifier: mac_1015_full
cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=off
brt_tags: "'#mac'"
brt_tags: ""
unittests: 'true'
- name: MacOS 10.15 minimal
os: macos-10.15
identifier: mac_1015_minimal
cmake: -DCOMPILE_TESTS=off -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
brt_tags: "'#wasm'"
unittests: 'false'
name: ${{ matrix.name }}

View File

@ -1,3 +1,5 @@
# marian-dev is tested elsewhere in both paths, turning off here.
set(COMPILE_TESTS OFF)
add_subdirectory(marian-dev)
if(COMPILE_WASM)

View File

@ -41,6 +41,9 @@ include(CMakeDependentOption)
# Project specific cmake options
option(COMPILE_WASM "Compile for WASM" OFF)
cmake_dependent_option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" OFF "NOT COMPILE_WASM" ON)
# WASM disables a million libraries, which also includes the unit test-library.
cmake_dependent_option(COMPILE_UNIT_TESTS "Compile unit tests" OFF "USE_WASM_COMPATIBLE_SOURCE" ON)
option(COMPILE_TESTS "Compile bergamot-tests" OFF)
# Set 3rd party submodule specific cmake options for this project

View File

@ -1,5 +1,6 @@
#ifndef BERGAMOT_APP_CLI_H
#define BERGAMOT_APP_CLI_H
#include <algorithm>
#include <cstdlib>
#include <future>
#include <iostream>
@ -103,8 +104,7 @@ void decoder(Ptr<Options> options) {
/// [brt/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh)
///
/// * Input: reads from stdin, blob of text, read as a whole ; sentence-splitting etc handled internally.
/// * Output: to stdout, translation of the source text and additional information like sentences, alignments between
/// source and target tokens and quality scores.
/// * Output: to stdout, translation of the source text faithful to source structure.
///
/// @param [in] options: options to build translator
void native(Ptr<Options> options) {
@ -124,58 +124,13 @@ void native(Ptr<Options> options) {
std::string input = std_input.str();
ResponseOptions responseOptions;
responseOptions.qualityScores = true;
responseOptions.alignment = true;
responseOptions.alignmentThreshold = 0.2f;
// Wait on future until Response is complete
std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
responseFuture.wait();
Response response = responseFuture.get();
std::cout << "[original]: " << response.source.text << '\n';
std::cout << "[translated]: " << response.target.text << '\n';
for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx) << '\n';
std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx) << '\n';
std::cout << "Alignments" << '\n';
typedef std::pair<size_t, float> Point;
// Initialize a point vector.
std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
// Handle alignments
auto &alignments = response.alignments[sentenceIdx];
for (auto &p : alignments) {
aggregate[p.src].emplace_back(p.tgt, p.prob);
}
for (size_t src = 0; src < aggregate.size(); src++) {
std::cout << response.source.word(sentenceIdx, src) << ": ";
for (auto &p : aggregate[src]) {
std::cout << response.target.word(sentenceIdx, p.first) << "(" << p.second << ") ";
}
std::cout << '\n';
}
// Handle quality.
auto &quality = response.qualityScores[sentenceIdx];
std::cout << "Quality: whole(" << quality.sequence << "), tokens below:" << '\n';
size_t wordIdx = 0;
bool first = true;
for (auto &p : quality.word) {
if (first) {
first = false;
} else {
std::cout << " ";
}
std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p << ")";
wordIdx++;
}
std::cout << '\n';
}
std::cout << "--------------------------\n";
std::cout << '\n';
std::cout << response.target.text;
}
} // namespace app

@ -1 +1 @@
Subproject commit 020135af1b620caa27929c1403c50ec3299e5bff
Subproject commit b0ba62eade4af7752c65c76cb17eab421ea02445

View File

@ -1,7 +1,6 @@
add_subdirectory(translator)
if(COMPILE_TESTS)
# Catch currently comes from marian sources.
add_subdirectory(tests)
if (COMPILE_TESTS)
add_subdirectory(tests)
endif(COMPILE_TESTS)

View File

@ -1,22 +1,32 @@
# Unit tests
set(UNIT_TESTS
annotation_tests
)
foreach(test ${UNIT_TESTS})
add_executable("run_${test}" run_tests.cpp "${test}.cpp")
target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
# Include Catch explicitly from marian.
set(CATCH_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rd_party/marian-dev/3rd-party)
add_library(Catch INTERFACE)
target_include_directories(Catch INTERFACE ${CATCH_INCLUDE_DIR})
if (COMPILE_UNIT_TESTS)
add_subdirectory(units)
endif (COMPILE_UNIT_TESTS)
if(NOT MSVC)
# Testing apps
set(APP_TESTS)
add_executable("bergamot-test" "cli.cpp" "apps.cpp")
if(CUDA_FOUND)
target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
target_link_libraries("bergamot-test" bergamot-translator)
else(CUDA_FOUND)
target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
target_link_libraries("bergamot-test" bergamot-translator)
endif(CUDA_FOUND)
set_target_properties("bergamot-test" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
if(msvc)
# disable c4305: truncation from 'double' to '_ty'
target_compile_options("run_${test}" public /wd4305)
endif(msvc)
# Adding an intgemm_resolve cmdline
add_executable(intgemm-resolve intgemm_resolve.cpp)
target_link_libraries(intgemm-resolve PRIVATE bergamot-translator)
set_target_properties(intgemm-resolve PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
endif(NOT MSVC)
add_test(NAME ${test} COMMAND "run_${test}")
endforeach(test)

116
src/tests/apps.cpp Normal file
View File

@ -0,0 +1,116 @@
#include "apps.h"
namespace marian {
namespace bergamot {
namespace testapp {
// Utility function, common for all testapps.
Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOptions) {
// Prepare memories for bytearrays (including model, shortlist and vocabs)
MemoryBundle memoryBundle;
if (options->get<bool>("bytearray")) {
// Load legit values into bytearrays.
memoryBundle = getMemoryBundleFromConfig(options);
}
Service service(options, std::move(memoryBundle));
// Read a large input text blob from stdin
std::ostringstream inputStream;
inputStream << std::cin.rdbuf();
std::string input = inputStream.str();
// Wait on future until Response is complete
std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
responseFuture.wait();
Response response = responseFuture.get();
return response;
}
void qualityScores(Ptr<Options> options) {
ResponseOptions responseOptions;
responseOptions.qualityScores = true;
Response response = translateFromStdin(options, responseOptions);
for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
auto &quality = response.qualityScores[sentenceIdx];
std::cout << ((sentenceIdx == 0) ? "" : "\n") << quality.sequence << '\n';
for (int wordIdx = 0; wordIdx < quality.word.size(); wordIdx++) {
std::cout << ((wordIdx == 0) ? "" : " ");
std::cout << quality.word[wordIdx];
}
std::cout << '\n';
}
}
void alignmentAggregatedToSource(Ptr<Options> options, bool numeric) {
ResponseOptions responseOptions;
responseOptions.alignment = true;
responseOptions.alignmentThreshold = 0.2f;
Response response = translateFromStdin(options, responseOptions);
for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
std::cout << (sentenceIdx == 0 ? "" : "\n");
// We are aggregating at source, which does not depend on matrix-multiplications and printing only target so we can
// do BLEU based stuff on the text.
//
typedef std::pair<size_t, float> Point;
std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
auto &alignments = response.alignments[sentenceIdx];
for (auto &p : alignments) {
aggregate[p.src].emplace_back(p.tgt, p.prob);
}
for (size_t sourceIdx = 0; sourceIdx < aggregate.size(); sourceIdx++) {
// Sort in order of target tokens.
auto cmp = [](const Point &p, const Point &q) { return p.first < q.first; };
std::sort(aggregate[sourceIdx].begin(), aggregate[sourceIdx].end(), cmp);
if (!numeric) {
std::cout << response.source.word(sentenceIdx, sourceIdx) << ": ";
}
for (size_t j = 0; j < aggregate[sourceIdx].size(); j++) {
if (numeric) {
float alignmentScore = aggregate[sourceIdx][j].second;
std::cout << (j == 0 ? "" : " ");
std::cout << alignmentScore;
} else {
std::cout << " ";
size_t targetIdx = aggregate[sourceIdx][j].first;
std::cout << response.target.word(sentenceIdx, targetIdx);
}
}
std::cout << '\n';
}
}
}
void annotatedTextWords(Ptr<Options> options, bool source) {
ResponseOptions responseOptions;
Response response = translateFromStdin(options, responseOptions);
AnnotatedText &annotatedText = source ? response.source : response.target;
for (size_t s = 0; s < annotatedText.numSentences(); s++) {
for (size_t w = 0; w < annotatedText.numWords(s); w++) {
std::cout << (w == 0 ? "" : "\t");
std::cout << annotatedText.word(s, w);
}
std::cout << "\n";
}
}
void annotatedTextSentences(Ptr<Options> options, bool source) {
ResponseOptions responseOptions;
Response response = translateFromStdin(options, responseOptions);
AnnotatedText &annotatedText = source ? response.source : response.target;
for (size_t s = 0; s < annotatedText.numSentences(); s++) {
std::cout << annotatedText.sentence(s) << "\n";
}
}
} // namespace testapp
} // namespace bergamot
} // namespace marian

49
src/tests/apps.h Normal file
View File

@ -0,0 +1,49 @@
#ifndef BERGAMOT_SRC_TESTS_APPS_H
#define BERGAMOT_SRC_TESTS_APPS_H
#include <algorithm>
#include <cstdlib>
#include <future>
#include <iostream>
#include <sstream>
#include "common/definitions.h"
#include "common/timer.h"
#include "common/utils.h"
#include "marian.h"
#include "translator/byte_array_util.h"
#include "translator/parser.h"
#include "translator/response.h"
#include "translator/response_options.h"
#include "translator/service.h"
namespace marian {
namespace bergamot {
namespace testapp {
// Utility function, common for all testapps. Reads content from stdin, builds a Service based on options and constructs
// a response containing translation data according responseOptions.
Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOptions);
// Reads from stdin and translates. The quality score for the translations (each sentence) are printed separated by
// empty-lines. The first line contains whole quality scores and the second line word quality scores, for each entry.
void qualityScores(Ptr<Options> options);
// Reads from stdin and translates. Alignments are printed aligned to the source-tokens, following format src-token:
// [possible-target-alignments], if numeric is false. If numeric is true, only alignment probabilities are printed
// instead of the tokens.
void alignmentAggregatedToSource(Ptr<Options> options, bool numeric = false);
// Reads from stdin and translates. Prints the tokens separated by space for each sentence. Prints words from source
// side text annotation if source=true, target annotation otherwise.
void annotatedTextWords(Ptr<Options> options, bool source = true);
// Reads from stdin and translates the read content. Prints the sentences in source or target in constructed response
// in each line, depending on source = true or false respectively.
void annotatedTextSentences(Ptr<Options> options, bool source = true);
} // namespace testapp
} // namespace bergamot
} // namespace marian
#endif // BERGAMOT_SRC_TESTS_APPS_H

27
src/tests/cli.cpp Normal file
View File

@ -0,0 +1,27 @@
#include "apps.h"
int main(int argc, char *argv[]) {
auto cp = marian::bergamot::createConfigParser();
auto options = cp.parseOptions(argc, argv, true);
const std::string mode = options->get<std::string>("bergamot-mode");
using namespace marian::bergamot;
if (mode == "test-quality-scores") {
testapp::qualityScores(options);
} else if (mode == "test-alignment-scores") {
testapp::alignmentAggregatedToSource(options, /*numeric=*/true);
} else if (mode == "test-alignment-words") {
testapp::alignmentAggregatedToSource(options, /*numeric=*/false);
} else if (mode == "test-response-source-sentences") {
testapp::annotatedTextSentences(options, /*source=*/true);
} else if (mode == "test-response-target-sentences") {
testapp::annotatedTextSentences(options, /*source=*/false);
} else if (mode == "test-response-source-words") {
testapp::annotatedTextWords(options, /*source=*/true);
} else if (mode == "test-response-target-words") {
testapp::annotatedTextWords(options, /*source=*/false);
} else {
ABORT("Unknown --mode {}. Please run a valid test", mode);
}
return 0;
}

View File

@ -0,0 +1,8 @@
#include <iostream>
#include "intgemm/intgemm.h"
int main() {
std::cout << static_cast<int>(intgemm::kCPU) << "\n";
return 0;
}

View File

@ -0,0 +1,22 @@
# Unit tests
set(UNIT_TESTS
annotation_tests
)
foreach(test ${UNIT_TESTS})
add_executable("run_${test}" run_tests.cpp "${test}.cpp")
target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
if(CUDA_FOUND)
target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
else(CUDA_FOUND)
target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
endif(CUDA_FOUND)
if(msvc)
# disable c4305: truncation from 'double' to '_ty'
target_compile_options("run_${test}" public /wd4305)
endif(msvc)
add_test(NAME ${test} COMMAND "run_${test}")
endforeach(test)