From e9e5ac6782a57b9ac8eb98afd9c988870fb4c798 Mon Sep 17 00:00:00 2001
From: Jerin Philip <jphilip@ed.ac.uk>
Date: Mon, 14 Jun 2021 15:02:42 +0100
Subject: [PATCH] Partial test-apps and tolerance in evaluations (#184)

* Partial test applications

Previously service-cli was used to generate output and accomplish
regression testing for all of: (1) translated-text (2) alignment tokens
+ scores (3) quality scores (4) indirectly annotation and tokenizations.

The --mode native now only outputs a faithful to source translated text
of the input source on stdin.

Test apps are separated into testing only individual functionalities.
This can help in independently testing ssplit-cpp, quality-scores for
the quality estimation implementation etc.

Separating numbers and text have the advantage of being able to compare
one with tolerance using BLEU (text) and some allowed error-rates
(numbers).

* Removing #mac tag

* Moving test apps to src/tests

* Tests are always on for CI

Unit tests are turned off looking for WASM_COMPATIBLE_SOURCES.

* Fixing WASM_COMPATIBLE_SOURCE -> USE_WASM_COMPATIBLE_SOURCE

* Workaround for now; CMakeLists.txt horrors are starting to bite

* BRT: use bergamot-test instead of bergamot now

* This should fix issues: CMakeLists.txt has so many paths

* Casing to camelCase and removing legacyServiceCli

* removing leftover service-cli declaration, some doc updates

* #pragma once is starting to look easier

* All the more reasons to do #pragma once

* Updating marian-dev with intgemm::kCPU print, resolved from INTGEMM_CPUID

* BRT: Use --gemm-highest-arch instead of python script

* Adding intgemm resolve here, where always(?) have intgemm on?

* intgemm-resolve in default binary directory

* BRT: Update to use intgemm-resolve

* marian-dev: Reset to without --gemm-highest-precision

Co-authored-by: Kenneth Heafield <kpu@users.noreply.github.com>
---
 .github/workflows/native.yml               |   8 +-
 3rd_party/CMakeLists.txt                   |   2 +
 CMakeLists.txt                             |   3 +
 app/cli.h                                  |  51 +--------
 bergamot-translator-tests                  |   2 +-
 src/CMakeLists.txt                         |   5 +-
 src/tests/CMakeLists.txt                   |  38 ++++---
 src/tests/apps.cpp                         | 116 +++++++++++++++++++++
 src/tests/apps.h                           |  49 +++++++++
 src/tests/cli.cpp                          |  27 +++++
 src/tests/intgemm_resolve.cpp              |   8 ++
 src/tests/units/CMakeLists.txt             |  22 ++++
 src/tests/{ => units}/annotation_tests.cpp |   0
 src/tests/{ => units}/run_tests.cpp        |   0
 14 files changed, 261 insertions(+), 70 deletions(-)
 create mode 100644 src/tests/apps.cpp
 create mode 100644 src/tests/apps.h
 create mode 100644 src/tests/cli.cpp
 create mode 100644 src/tests/intgemm_resolve.cpp
 create mode 100644 src/tests/units/CMakeLists.txt
 rename src/tests/{ => units}/annotation_tests.cpp (100%)
 rename src/tests/{ => units}/run_tests.cpp (100%)

diff --git a/.github/workflows/native.yml b/.github/workflows/native.yml
index eb3f4d4..c572cb5 100644
--- a/.github/workflows/native.yml
+++ b/.github/workflows/native.yml
@@ -30,7 +30,7 @@ jobs:
         - name: Ubuntu 18.04 minimal
           os: ubuntu-18.04
           identifier: ubuntu_1804_minimal
-          cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on
+          cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
           brt_tags: "'#wasm'"
           unittests: 'false'
         - name: Ubuntu 20.04 full
@@ -42,7 +42,7 @@ jobs:
         - name: Ubuntu 20.04 minimal
           os: ubuntu-20.04
           identifier: ubuntu_2004_minimal
-          cmake: -DCOMPILE_TESTS=off -DUSE_WASM_COMPATIBLE_SOURCE=on
+          cmake: -DCOMPILE_TESTS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
           brt_tags: "'#wasm'"
           unittests: 'false'
     name: ${{ matrix.name }}
@@ -140,12 +140,12 @@ jobs:
           os: macos-10.15
           identifier: mac_1015_full
           cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=off
-          brt_tags: "'#mac'"
+          brt_tags: ""
           unittests: 'true'
         - name: MacOS 10.15 minimal
           os: macos-10.15
           identifier: mac_1015_minimal
-          cmake: -DCOMPILE_TESTS=off -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
+          cmake: -DCOMPILE_TESTS=on -DUSE_APPLE_ACCELERATE=off -DUSE_FBGEMM=off -DUSE_STATIC_LIBS=on -DUSE_WASM_COMPATIBLE_SOURCE=on
           brt_tags: "'#wasm'"
           unittests: 'false'
     name: ${{ matrix.name }}
diff --git a/3rd_party/CMakeLists.txt b/3rd_party/CMakeLists.txt
index 74ce906..70e50d6 100644
--- a/3rd_party/CMakeLists.txt
+++ b/3rd_party/CMakeLists.txt
@@ -1,3 +1,5 @@
+# marian-dev is tested elsewhere in both paths, turning off here.
+set(COMPILE_TESTS OFF)
 add_subdirectory(marian-dev)
 
 if(COMPILE_WASM)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e561ed9..c58ddd4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -41,6 +41,9 @@ include(CMakeDependentOption)
 # Project specific cmake options
 option(COMPILE_WASM "Compile for WASM" OFF)
 cmake_dependent_option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" OFF "NOT COMPILE_WASM" ON)
+
+# WASM disables a million libraries, which also includes the unit test-library.
+cmake_dependent_option(COMPILE_UNIT_TESTS "Compile unit tests" OFF "USE_WASM_COMPATIBLE_SOURCE" ON)
 option(COMPILE_TESTS "Compile bergamot-tests" OFF)
 
 # Set 3rd party submodule specific cmake options for this project
diff --git a/app/cli.h b/app/cli.h
index 292d21c..d6e930f 100644
--- a/app/cli.h
+++ b/app/cli.h
@@ -1,5 +1,6 @@
 #ifndef BERGAMOT_APP_CLI_H
 #define BERGAMOT_APP_CLI_H
+#include <algorithm>
 #include <cstdlib>
 #include <future>
 #include <iostream>
@@ -103,8 +104,7 @@ void decoder(Ptr<Options> options) {
 /// [brt/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh)
 ///
 /// * Input: reads from stdin, blob of text, read as a whole ; sentence-splitting etc handled internally.
-/// * Output: to stdout, translation of the source text and additional information like sentences, alignments between
-/// source and target tokens and quality scores.
+/// * Output: to stdout, translation of the source text faithful to source structure.
 ///
 /// @param [in] options: options to build translator
 void native(Ptr<Options> options) {
@@ -124,58 +124,13 @@ void native(Ptr<Options> options) {
   std::string input = std_input.str();
 
   ResponseOptions responseOptions;
-  responseOptions.qualityScores = true;
-  responseOptions.alignment = true;
-  responseOptions.alignmentThreshold = 0.2f;
 
   // Wait on future until Response is complete
   std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
   responseFuture.wait();
   Response response = responseFuture.get();
 
-  std::cout << "[original]: " << response.source.text << '\n';
-  std::cout << "[translated]: " << response.target.text << '\n';
-  for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
-    std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx) << '\n';
-    std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx) << '\n';
-    std::cout << "Alignments" << '\n';
-    typedef std::pair<size_t, float> Point;
-
-    // Initialize a point vector.
-    std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
-
-    // Handle alignments
-    auto &alignments = response.alignments[sentenceIdx];
-    for (auto &p : alignments) {
-      aggregate[p.src].emplace_back(p.tgt, p.prob);
-    }
-
-    for (size_t src = 0; src < aggregate.size(); src++) {
-      std::cout << response.source.word(sentenceIdx, src) << ": ";
-      for (auto &p : aggregate[src]) {
-        std::cout << response.target.word(sentenceIdx, p.first) << "(" << p.second << ") ";
-      }
-      std::cout << '\n';
-    }
-
-    // Handle quality.
-    auto &quality = response.qualityScores[sentenceIdx];
-    std::cout << "Quality: whole(" << quality.sequence << "), tokens below:" << '\n';
-    size_t wordIdx = 0;
-    bool first = true;
-    for (auto &p : quality.word) {
-      if (first) {
-        first = false;
-      } else {
-        std::cout << " ";
-      }
-      std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p << ")";
-      wordIdx++;
-    }
-    std::cout << '\n';
-  }
-  std::cout << "--------------------------\n";
-  std::cout << '\n';
+  std::cout << response.target.text;
 }
 
 }  // namespace app
diff --git a/bergamot-translator-tests b/bergamot-translator-tests
index 020135a..b0ba62e 160000
--- a/bergamot-translator-tests
+++ b/bergamot-translator-tests
@@ -1 +1 @@
-Subproject commit 020135af1b620caa27929c1403c50ec3299e5bff
+Subproject commit b0ba62eade4af7752c65c76cb17eab421ea02445
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c2d62ef..856831b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,7 +1,6 @@
 add_subdirectory(translator)
 
-if(COMPILE_TESTS)
-  # Catch currently comes from marian sources.
-  add_subdirectory(tests)
+if (COMPILE_TESTS)
+    add_subdirectory(tests)
 endif(COMPILE_TESTS)
 
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index 5c1bc00..483bd07 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,22 +1,32 @@
 # Unit tests
-set(UNIT_TESTS
-    annotation_tests
-)
 
-foreach(test ${UNIT_TESTS})
-  add_executable("run_${test}" run_tests.cpp "${test}.cpp")
-  target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
+# Include Catch explicitly from marian.
+set(CATCH_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rd_party/marian-dev/3rd-party)
+add_library(Catch INTERFACE)
+target_include_directories(Catch INTERFACE ${CATCH_INCLUDE_DIR})
 
+if (COMPILE_UNIT_TESTS)
+    add_subdirectory(units)
+endif (COMPILE_UNIT_TESTS)
+
+
+
+if(NOT MSVC)
+  # Testing apps
+  set(APP_TESTS)
+  add_executable("bergamot-test" "cli.cpp" "apps.cpp")
+  
   if(CUDA_FOUND)
-    target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
+    target_link_libraries("bergamot-test" bergamot-translator)
   else(CUDA_FOUND)
-    target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
+    target_link_libraries("bergamot-test" bergamot-translator)
   endif(CUDA_FOUND)
+  
+  set_target_properties("bergamot-test" PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
 
-  if(msvc)
-    # disable c4305: truncation from 'double' to '_ty'
-    target_compile_options("run_${test}" public /wd4305)
-  endif(msvc)
+  # Adding an intgemm_resolve cmdline
+  add_executable(intgemm-resolve intgemm_resolve.cpp)
+  target_link_libraries(intgemm-resolve PRIVATE bergamot-translator)
+  set_target_properties(intgemm-resolve PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
+endif(NOT MSVC)
 
-  add_test(NAME ${test} COMMAND "run_${test}")
-endforeach(test)
diff --git a/src/tests/apps.cpp b/src/tests/apps.cpp
new file mode 100644
index 0000000..9c00bff
--- /dev/null
+++ b/src/tests/apps.cpp
@@ -0,0 +1,116 @@
+#include "apps.h"
+
+namespace marian {
+namespace bergamot {
+namespace testapp {
+
+// Utility function, common for all testapps.
+Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOptions) {
+  // Prepare memories for bytearrays (including model, shortlist and vocabs)
+  MemoryBundle memoryBundle;
+
+  if (options->get<bool>("bytearray")) {
+    // Load legit values into bytearrays.
+    memoryBundle = getMemoryBundleFromConfig(options);
+  }
+
+  Service service(options, std::move(memoryBundle));
+
+  // Read a large input text blob from stdin
+  std::ostringstream inputStream;
+  inputStream << std::cin.rdbuf();
+  std::string input = inputStream.str();
+
+  // Wait on future until Response is complete
+  std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
+  responseFuture.wait();
+  Response response = responseFuture.get();
+  return response;
+}
+
+void qualityScores(Ptr<Options> options) {
+  ResponseOptions responseOptions;
+  responseOptions.qualityScores = true;
+
+  Response response = translateFromStdin(options, responseOptions);
+  for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
+    auto &quality = response.qualityScores[sentenceIdx];
+    std::cout << ((sentenceIdx == 0) ? "" : "\n") << quality.sequence << '\n';
+    for (int wordIdx = 0; wordIdx < quality.word.size(); wordIdx++) {
+      std::cout << ((wordIdx == 0) ? "" : " ");
+      std::cout << quality.word[wordIdx];
+    }
+    std::cout << '\n';
+  }
+}
+
+void alignmentAggregatedToSource(Ptr<Options> options, bool numeric) {
+  ResponseOptions responseOptions;
+  responseOptions.alignment = true;
+  responseOptions.alignmentThreshold = 0.2f;
+  Response response = translateFromStdin(options, responseOptions);
+
+  for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
+    std::cout << (sentenceIdx == 0 ? "" : "\n");
+
+    // We are aggregating at source, which does not depend on matrix-multiplications and printing only target so we can
+    // do BLEU based stuff on the text.
+    //
+    typedef std::pair<size_t, float> Point;
+
+    std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
+    auto &alignments = response.alignments[sentenceIdx];
+    for (auto &p : alignments) {
+      aggregate[p.src].emplace_back(p.tgt, p.prob);
+    }
+
+    for (size_t sourceIdx = 0; sourceIdx < aggregate.size(); sourceIdx++) {
+      // Sort in order of target tokens.
+      auto cmp = [](const Point &p, const Point &q) { return p.first < q.first; };
+      std::sort(aggregate[sourceIdx].begin(), aggregate[sourceIdx].end(), cmp);
+
+      if (!numeric) {
+        std::cout << response.source.word(sentenceIdx, sourceIdx) << ": ";
+      }
+
+      for (size_t j = 0; j < aggregate[sourceIdx].size(); j++) {
+        if (numeric) {
+          float alignmentScore = aggregate[sourceIdx][j].second;
+          std::cout << (j == 0 ? "" : " ");
+          std::cout << alignmentScore;
+        } else {
+          std::cout << " ";
+          size_t targetIdx = aggregate[sourceIdx][j].first;
+          std::cout << response.target.word(sentenceIdx, targetIdx);
+        }
+      }
+      std::cout << '\n';
+    }
+  }
+}
+
+void annotatedTextWords(Ptr<Options> options, bool source) {
+  ResponseOptions responseOptions;
+  Response response = translateFromStdin(options, responseOptions);
+  AnnotatedText &annotatedText = source ? response.source : response.target;
+  for (size_t s = 0; s < annotatedText.numSentences(); s++) {
+    for (size_t w = 0; w < annotatedText.numWords(s); w++) {
+      std::cout << (w == 0 ? "" : "\t");
+      std::cout << annotatedText.word(s, w);
+    }
+    std::cout << "\n";
+  }
+}
+
+void annotatedTextSentences(Ptr<Options> options, bool source) {
+  ResponseOptions responseOptions;
+  Response response = translateFromStdin(options, responseOptions);
+  AnnotatedText &annotatedText = source ? response.source : response.target;
+  for (size_t s = 0; s < annotatedText.numSentences(); s++) {
+    std::cout << annotatedText.sentence(s) << "\n";
+  }
+}
+
+}  // namespace testapp
+}  // namespace bergamot
+}  // namespace marian
diff --git a/src/tests/apps.h b/src/tests/apps.h
new file mode 100644
index 0000000..2ccf2c4
--- /dev/null
+++ b/src/tests/apps.h
@@ -0,0 +1,49 @@
+#ifndef BERGAMOT_SRC_TESTS_APPS_H
+#define BERGAMOT_SRC_TESTS_APPS_H
+#include <algorithm>
+#include <cstdlib>
+#include <future>
+#include <iostream>
+#include <sstream>
+
+#include "common/definitions.h"
+#include "common/timer.h"
+#include "common/utils.h"
+#include "marian.h"
+#include "translator/byte_array_util.h"
+#include "translator/parser.h"
+#include "translator/response.h"
+#include "translator/response_options.h"
+#include "translator/service.h"
+
+namespace marian {
+namespace bergamot {
+
+namespace testapp {
+
+// Utility function, common for all testapps. Reads content from stdin, builds a Service based on options and constructs
+// a response containing translation data according responseOptions.
+Response translateFromStdin(Ptr<Options> options, ResponseOptions responseOptions);
+
+// Reads from stdin and translates. The quality score for the translations (each sentence) are printed separated by
+// empty-lines. The first line contains whole quality scores and the second line word quality scores, for each entry.
+void qualityScores(Ptr<Options> options);
+
+// Reads from stdin and translates. Alignments are printed aligned to the source-tokens, following format src-token:
+// [possible-target-alignments], if numeric is false. If numeric is true, only alignment probabilities are printed
+// instead of the tokens.
+void alignmentAggregatedToSource(Ptr<Options> options, bool numeric = false);
+
+// Reads from stdin and translates.  Prints the tokens separated by space for each sentence. Prints words from source
+// side text annotation if source=true, target annotation otherwise.
+void annotatedTextWords(Ptr<Options> options, bool source = true);
+
+// Reads from stdin and translates the read content. Prints the sentences in source or target in constructed response
+// in each line, depending on source = true or false respectively.
+void annotatedTextSentences(Ptr<Options> options, bool source = true);
+
+}  // namespace testapp
+}  // namespace bergamot
+}  // namespace marian
+
+#endif  // BERGAMOT_SRC_TESTS_APPS_H
diff --git a/src/tests/cli.cpp b/src/tests/cli.cpp
new file mode 100644
index 0000000..f2f0218
--- /dev/null
+++ b/src/tests/cli.cpp
@@ -0,0 +1,27 @@
+
+#include "apps.h"
+
+int main(int argc, char *argv[]) {
+  auto cp = marian::bergamot::createConfigParser();
+  auto options = cp.parseOptions(argc, argv, true);
+  const std::string mode = options->get<std::string>("bergamot-mode");
+  using namespace marian::bergamot;
+  if (mode == "test-quality-scores") {
+    testapp::qualityScores(options);
+  } else if (mode == "test-alignment-scores") {
+    testapp::alignmentAggregatedToSource(options, /*numeric=*/true);
+  } else if (mode == "test-alignment-words") {
+    testapp::alignmentAggregatedToSource(options, /*numeric=*/false);
+  } else if (mode == "test-response-source-sentences") {
+    testapp::annotatedTextSentences(options, /*source=*/true);
+  } else if (mode == "test-response-target-sentences") {
+    testapp::annotatedTextSentences(options, /*source=*/false);
+  } else if (mode == "test-response-source-words") {
+    testapp::annotatedTextWords(options, /*source=*/true);
+  } else if (mode == "test-response-target-words") {
+    testapp::annotatedTextWords(options, /*source=*/false);
+  } else {
+    ABORT("Unknown --mode {}. Please run a valid test", mode);
+  }
+  return 0;
+}
diff --git a/src/tests/intgemm_resolve.cpp b/src/tests/intgemm_resolve.cpp
new file mode 100644
index 0000000..f95d0c4
--- /dev/null
+++ b/src/tests/intgemm_resolve.cpp
@@ -0,0 +1,8 @@
+#include <iostream>
+
+#include "intgemm/intgemm.h"
+
+int main() {
+  std::cout << static_cast<int>(intgemm::kCPU) << "\n";
+  return 0;
+}
diff --git a/src/tests/units/CMakeLists.txt b/src/tests/units/CMakeLists.txt
new file mode 100644
index 0000000..5c1bc00
--- /dev/null
+++ b/src/tests/units/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Unit tests
+set(UNIT_TESTS
+    annotation_tests
+)
+
+foreach(test ${UNIT_TESTS})
+  add_executable("run_${test}" run_tests.cpp "${test}.cpp")
+  target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
+
+  if(CUDA_FOUND)
+    target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
+  else(CUDA_FOUND)
+    target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
+  endif(CUDA_FOUND)
+
+  if(msvc)
+    # disable c4305: truncation from 'double' to '_ty'
+    target_compile_options("run_${test}" public /wd4305)
+  endif(msvc)
+
+  add_test(NAME ${test} COMMAND "run_${test}")
+endforeach(test)
diff --git a/src/tests/annotation_tests.cpp b/src/tests/units/annotation_tests.cpp
similarity index 100%
rename from src/tests/annotation_tests.cpp
rename to src/tests/units/annotation_tests.cpp
diff --git a/src/tests/run_tests.cpp b/src/tests/units/run_tests.cpp
similarity index 100%
rename from src/tests/run_tests.cpp
rename to src/tests/units/run_tests.cpp