Partial test-apps and tolerance in evaluations (#184)

* Partial test applications Previously service-cli was used to generate output and accomplish regression testing for all of: (1) translated-text (2) alignment tokens + scores (3) quality scores (4) indirectly annotation and tokenizations. The --mode native now only outputs a faithful to source translated text of the input source on stdin. Test apps are separated into testing only individual functionalities. This can help in independently testing ssplit-cpp, quality-scores for the quality estimation implementation etc. Separating numbers and text have the advantage of being able to compare one with tolerance using BLEU (text) and some allowed error-rates (numbers). * Removing #mac tag * Moving test apps to src/tests * Tests are always on for CI Unit tests are turned off looking for WASM_COMPATIBLE_SOURCES. * Fixing WASM_COMPATIBLE_SOURCE -> USE_WASM_COMPATIBLE_SOURCE * Workaround for now; CMakeLists.txt horrors are starting to bite * BRT: use bergamot-test instead of bergamot now * This should fix issues: CMakeLists.txt has so many paths * Casing to camelCase and removing legacyServiceCli * removing leftover service-cli declaration, some doc updates * #pragma once is starting to look easier * All the more reasons to do #pragma once * Updating marian-dev with intgemm::kCPU print, resolved from INTGEMM_CPUID * BRT: Use --gemm-highest-arch instead of python script * Adding intgemm resolve here, where always(?) have intgemm on? * intgemm-resolve in default binary directory * BRT: Update to use intgemm-resolve * marian-dev: Reset to without --gemm-highest-precision Co-authored-by: Kenneth Heafield <kpu@users.noreply.github.com>
2024-08-15 16:40:26 +03:00 · 2021-06-14 15:02:42 +01:00 · 2021-06-14 15:02:42 +01:00 · e9e5ac6782
commit e9e5ac6782
parent 16eb47f47e
14 changed files with 261 additions and 70 deletions
--- a/.github/workflows/native.yml
+++ b/.github/workflows/native.yml
@ -30,7 +30,7 @@ jobs:
        - name: Ubuntu 18.04 minimal
          os: ubuntu-18.04
          identifier: ubuntu_1804_minimal
-          cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on
+          cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
          brt_tags: "'#wasm'"
          unittests: 'false'
        - name: Ubuntu 20.04 full
@ -42,7 +42,7 @@ jobs:
        - name: Ubuntu 20.04 minimal
          os: ubuntu-20.04
          identifier: ubuntu_2004_minimal
-          cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on
+          cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
          brt_tags: "'#wasm'"
          unittests: 'false'
    name: ${{ matrix.name }}
@ -140,12 +140,12 @@ jobs:
          os: macos-10.15
          identifier: mac_1015_full
          cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=off
-          brt_tags: "'#mac'"
+          brt_tags: ""
          unittests: 'true'
        - name: MacOS 10.15 minimal
          os: macos-10.15
          identifier: mac_1015_minimal
-          cmake: -DCOMPILE_TESTS=off -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
+          cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
          brt_tags: "'#wasm'"
          unittests: 'false'
    name: ${{ matrix.name }}
--- a/3rd_party/CMakeLists.txt
+++ b/3rd_party/CMakeLists.txt
@ -1,3 +1,5 @@
+# marian-dev is tested elsewhere in both paths, turning off here.
+set(COMPILE_TESTS OFF)
 add_subdirectory(marian-dev)

 if(COMPILE_WASM)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -41,6 +41,9 @@ include(CMakeDependentOption)
 # Project specific cmake options
 option(COMPILE_WASM "Compile for WASM" OFF)
 cmake_dependent_option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" OFF "NOT COMPILE_WASM" ON)
+
+# WASM disables a million libraries, which also includes the unit test-library.
+cmake_dependent_option(COMPILE_UNIT_TESTS "Compile unit tests" OFF "USE_WASM_COMPATIBLE_SOURCE" ON)
 option(COMPILE_TESTS "Compile bergamot-tests" OFF)

 # Set 3rd party submodule specific cmake options for this project
--- a/app/cli.h
+++ b/app/cli.h
@ -1,5 +1,6 @@
 #ifndef BERGAMOT_APP_CLI_H
 #define BERGAMOT_APP_CLI_H
+#include <algorithm>
 #include <cstdlib>
 #include <future>
 #include <iostream>
@ -103,8 +104,7 @@ void decoder(Ptr<Options> options) {
 /// [brt/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh)
 ///
 /// * Input: reads from stdin, blob of text, read as a whole ; sentence-splitting etc handled internally.
-/// * Output: to stdout, translation of the source text and additional information like sentences, alignments between
-/// source and target tokens and quality scores.
+/// * Output: to stdout, translation of the source text faithful to source structure.
 ///
 /// @param [in] options: options to build translator
 void native(Ptr<Options> options) {
@ -124,58 +124,13 @@ void native(Ptr<Options> options) {
  std::string input = std_input.str();

  ResponseOptions responseOptions;
-  responseOptions.qualityScores = true;
-  responseOptions.alignment = true;
-  responseOptions.alignmentThreshold = 0.2f;

  // Wait on future until Response is complete
  std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
  responseFuture.wait();
  Response response = responseFuture.get();

-  std::cout << "[original]: " << response.source.text << '\n';
-  std::cout << "[translated]: " << response.target.text << '\n';
-  for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
-    std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx) << '\n';
-    std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx) << '\n';
-    std::cout << "Alignments" << '\n';
-    typedef std::pair<size_t, float> Point;
-
-    // Initialize a point vector.
-    std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
-
-    // Handle alignments
-    auto &alignments = response.alignments[sentenceIdx];
-    for (auto &p : alignments) {
-      aggregate[p.src].emplace_back(p.tgt, p.prob);
-    }
-
-    for (size_t src = 0; src < aggregate.size(); src++) {
-      std::cout << response.source.word(sentenceIdx, src) << ": ";
-      for (auto &p : aggregate[src]) {
-        std::cout << response.target.word(sentenceIdx, p.first) << "(" << p.second << ") ";
-      }
-      std::cout << '\n';
-    }
-
-    // Handle quality.
-    auto &quality = response.qualityScores[sentenceIdx];
-    std::cout << "Quality: whole(" << quality.sequence << "), tokens below:" << '\n';
-    size_t wordIdx = 0;
-    bool first = true;
-    for (auto &p : quality.word) {
-      if (first) {
-        first = false;
-      } else {
-        std::cout << " ";
-      }
-      std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p << ")";
-      wordIdx++;
-    }
-    std::cout << '\n';
-  }
-  std::cout << "--------------------------\n";
-  std::cout << '\n';
+  std::cout << response.target.text;
 }

 }  // namespace app
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 020135af1b620caa27929c1403c50ec3299e5bff
+Subproject commit b0ba62eade4af7752c65c76cb17eab421ea02445
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -1,7 +1,6 @@
 add_subdirectory(translator)

-if(COMPILE_TESTS)
-  # Catch currently comes from marian sources.
-  add_subdirectory(tests)
+if (COMPILE_TESTS)
+    add_subdirectory(tests)
 endif(COMPILE_TESTS)

--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@ -1,22 +1,32 @@
 # Unit tests
-set(UNIT_TESTS
-    annotation_tests
-)

-foreach(test ${UNIT_TESTS})
-  add_executable("run_${test}" run_tests.cpp "${test}.cpp")
-  target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
+# Include Catch explicitly from marian.
+set(CATCH_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rd_party/marian-dev/3rd-party)
+add_library(Catch INTERFACE)
+target_include_directories(Catch INTERFACE ${CATCH_INCLUDE_DIR})

+if (COMPILE_UNIT_TESTS)
+    add_subdirectory(units)
+endif (COMPILE_UNIT_TESTS)
+
+
+
+if(NOT MSVC)
+  # Testing apps
+  set(APP_TESTS)
+  add_executable("bergamot-test" "cli.cpp" "apps.cpp")
+  
  if(CUDA_FOUND)
-    target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
+    target_link_libraries("bergamot-test" bergamot-translator)
  else(CUDA_FOUND)
-    target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
+    target_link_libraries("bergamot-test" bergamot-translator)
  endif(CUDA_FOUND)
+  
+  set_target_properties("bergamot-test" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")

-  if(msvc)
-    # disable c4305: truncation from 'double' to '_ty'
-    target_compile_options("run_${test}" public /wd4305)
-  endif(msvc)
+  # Adding an intgemm_resolve cmdline
+  add_executable(intgemm-resolve intgemm_resolve.cpp)
+  target_link_libraries(intgemm-resolve PRIVATE bergamot-translator)
+  set_target_properties(intgemm-resolve PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
+endif(NOT MSVC)

-  add_test(NAME ${test} COMMAND "run_${test}")
-endforeach(test)
--- a/src/tests/apps.cpp
+++ b/src/tests/apps.cpp
@ -0,0 +1,116 @@
+#include "apps.h"
+
+namespace marian {
+namespace bergamot {
+namespace testapp {
+
+// Utility function, common for all testapps.
+Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOptions) {
+  // Prepare memories for bytearrays (including model, shortlist and vocabs)
+  MemoryBundle memoryBundle;
+
+  if (options->get<bool>("bytearray")) {
+    // Load legit values into bytearrays.
+    memoryBundle = getMemoryBundleFromConfig(options);
+  }
+
+  Service service(options, std::move(memoryBundle));
+
+  // Read a large input text blob from stdin
+  std::ostringstream inputStream;
+  inputStream << std::cin.rdbuf();
+  std::string input = inputStream.str();
+
+  // Wait on future until Response is complete
+  std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
+  responseFuture.wait();
+  Response response = responseFuture.get();
+  return response;
+}
+
+void qualityScores(Ptr<Options> options) {
+  ResponseOptions responseOptions;
+  responseOptions.qualityScores = true;
+
+  Response response = translateFromStdin(options, responseOptions);
+  for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
+    auto &quality = response.qualityScores[sentenceIdx];
+    std::cout << ((sentenceIdx == 0) ? "" : "\n") << quality.sequence << '\n';
+    for (int wordIdx = 0; wordIdx < quality.word.size(); wordIdx++) {
+      std::cout << ((wordIdx == 0) ? "" : " ");
+      std::cout << quality.word[wordIdx];
+    }
+    std::cout << '\n';
+  }
+}
+
+void alignmentAggregatedToSource(Ptr<Options> options, bool numeric) {
+  ResponseOptions responseOptions;
+  responseOptions.alignment = true;
+  responseOptions.alignmentThreshold = 0.2f;
+  Response response = translateFromStdin(options, responseOptions);
+
+  for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
+    std::cout << (sentenceIdx == 0 ? "" : "\n");
+
+    // We are aggregating at source, which does not depend on matrix-multiplications and printing only target so we can
+    // do BLEU based stuff on the text.
+    //
+    typedef std::pair<size_t, float> Point;
+
+    std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
+    auto &alignments = response.alignments[sentenceIdx];
+    for (auto &p : alignments) {
+      aggregate[p.src].emplace_back(p.tgt, p.prob);
+    }
+
+    for (size_t sourceIdx = 0; sourceIdx < aggregate.size(); sourceIdx++) {
+      // Sort in order of target tokens.
+      auto cmp = [](const Point &p, const Point &q) { return p.first < q.first; };
+      std::sort(aggregate[sourceIdx].begin(), aggregate[sourceIdx].end(), cmp);
+
+      if (!numeric) {
+        std::cout << response.source.word(sentenceIdx, sourceIdx) << ": ";
+      }
+
+      for (size_t j = 0; j < aggregate[sourceIdx].size(); j++) {
+        if (numeric) {
+          float alignmentScore = aggregate[sourceIdx][j].second;
+          std::cout << (j == 0 ? "" : " ");
+          std::cout << alignmentScore;
+        } else {
+          std::cout << " ";
+          size_t targetIdx = aggregate[sourceIdx][j].first;
+          std::cout << response.target.word(sentenceIdx, targetIdx);
+        }
+      }
+      std::cout << '\n';
+    }
+  }
+}
+
+void annotatedTextWords(Ptr<Options> options, bool source) {
+  ResponseOptions responseOptions;
+  Response response = translateFromStdin(options, responseOptions);
+  AnnotatedText &annotatedText = source ? response.source : response.target;
+  for (size_t s = 0; s < annotatedText.numSentences(); s++) {
+    for (size_t w = 0; w < annotatedText.numWords(s); w++) {
+      std::cout << (w == 0 ? "" : "\t");
+      std::cout << annotatedText.word(s, w);
+    }
+    std::cout << "\n";
+  }
+}
+
+void annotatedTextSentences(Ptr<Options> options, bool source) {
+  ResponseOptions responseOptions;
+  Response response = translateFromStdin(options, responseOptions);
+  AnnotatedText &annotatedText = source ? response.source : response.target;
+  for (size_t s = 0; s < annotatedText.numSentences(); s++) {
+    std::cout << annotatedText.sentence(s) << "\n";
+  }
+}
+
+}  // namespace testapp
+}  // namespace bergamot
+}  // namespace marian
--- a/src/tests/apps.h
+++ b/src/tests/apps.h
@ -0,0 +1,49 @@
+#ifndef BERGAMOT_SRC_TESTS_APPS_H
+#define BERGAMOT_SRC_TESTS_APPS_H
+#include <algorithm>
+#include <cstdlib>
+#include <future>
+#include <iostream>
+#include <sstream>
+
+#include "common/definitions.h"
+#include "common/timer.h"
+#include "common/utils.h"
+#include "marian.h"
+#include "translator/byte_array_util.h"
+#include "translator/parser.h"
+#include "translator/response.h"
+#include "translator/response_options.h"
+#include "translator/service.h"
+
+namespace marian {
+namespace bergamot {
+
+namespace testapp {
+
+// Utility function, common for all testapps. Reads content from stdin, builds a Service based on options and constructs
+// a response containing translation data according responseOptions.
+Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOptions);
+
+// Reads from stdin and translates. The quality score for the translations (each sentence) are printed separated by
+// empty-lines. The first line contains whole quality scores and the second line word quality scores, for each entry.
+void qualityScores(Ptr<Options> options);
+
+// Reads from stdin and translates. Alignments are printed aligned to the source-tokens, following format src-token:
+// [possible-target-alignments], if numeric is false. If numeric is true, only alignment probabilities are printed
+// instead of the tokens.
+void alignmentAggregatedToSource(Ptr<Options> options, bool numeric = false);
+
+// Reads from stdin and translates.  Prints the tokens separated by space for each sentence. Prints words from source
+// side text annotation if source=true, target annotation otherwise.
+void annotatedTextWords(Ptr<Options> options, bool source = true);
+
+// Reads from stdin and translates the read content. Prints the sentences in source or target in constructed response
+// in each line, depending on source = true or false respectively.
+void annotatedTextSentences(Ptr<Options> options, bool source = true);
+
+}  // namespace testapp
+}  // namespace bergamot
+}  // namespace marian
+
+#endif  // BERGAMOT_SRC_TESTS_APPS_H
--- a/src/tests/cli.cpp
+++ b/src/tests/cli.cpp
@ -0,0 +1,27 @@
+
+#include "apps.h"
+
+int main(int argc, char *argv[]) {
+  auto cp = marian::bergamot::createConfigParser();
+  auto options = cp.parseOptions(argc, argv, true);
+  const std::string mode = options->get<std::string>("bergamot-mode");
+  using namespace marian::bergamot;
+  if (mode == "test-quality-scores") {
+    testapp::qualityScores(options);
+  } else if (mode == "test-alignment-scores") {
+    testapp::alignmentAggregatedToSource(options, /*numeric=*/true);
+  } else if (mode == "test-alignment-words") {
+    testapp::alignmentAggregatedToSource(options, /*numeric=*/false);
+  } else if (mode == "test-response-source-sentences") {
+    testapp::annotatedTextSentences(options, /*source=*/true);
+  } else if (mode == "test-response-target-sentences") {
+    testapp::annotatedTextSentences(options, /*source=*/false);
+  } else if (mode == "test-response-source-words") {
+    testapp::annotatedTextWords(options, /*source=*/true);
+  } else if (mode == "test-response-target-words") {
+    testapp::annotatedTextWords(options, /*source=*/false);
+  } else {
+    ABORT("Unknown --mode {}. Please run a valid test", mode);
+  }
+  return 0;
+}
--- a/src/tests/intgemm_resolve.cpp
+++ b/src/tests/intgemm_resolve.cpp
@ -0,0 +1,8 @@
+#include <iostream>
+
+#include "intgemm/intgemm.h"
+
+int main() {
+  std::cout << static_cast<int>(intgemm::kCPU) << "\n";
+  return 0;
+}
--- a/src/tests/units/CMakeLists.txt
+++ b/src/tests/units/CMakeLists.txt
@ -0,0 +1,22 @@
+# Unit tests
+set(UNIT_TESTS
+    annotation_tests
+)
+
+foreach(test ${UNIT_TESTS})
+  add_executable("run_${test}" run_tests.cpp "${test}.cpp")
+  target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
+
+  if(CUDA_FOUND)
+    target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
+  else(CUDA_FOUND)
+    target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
+  endif(CUDA_FOUND)
+
+  if(msvc)
+    # disable c4305: truncation from 'double' to '_ty'
+    target_compile_options("run_${test}" public /wd4305)
+  endif(msvc)
+
+  add_test(NAME ${test} COMMAND "run_${test}")
+endforeach(test)
--- a/src/tests/units/annotation_tests.cpp
+++ b/src/tests/units/annotation_tests.cpp
--- a/src/tests/units/run_tests.cpp
+++ b/src/tests/units/run_tests.cpp