Merge remote-tracking branch 'upstream/main' into upstream-sync

2024-08-15 16:40:26 +03:00 · 2021-04-14 14:35:47 +02:00 · 2021-04-14 14:35:47 +02:00 · 1574a4586c
commit 1574a4586c
parent f5dffeb5ca e4b58357db
62 changed files with 4302 additions and 755 deletions
--- a/.github/workflows/macos-custom-marian-native.yml
+++ b/.github/workflows/macos-custom-marian-native.yml
@ -1,32 +0,0 @@
-name: MacOS Native (Custom)
-
-on:
-  push:
-    branches: [ main, ci-sandbox ]
-  pull_request:
-    branches: [ main, ci-sandbox ]
-
-jobs:
-  build-macos:
-    name: Native (With Custom Marian)
-    runs-on: macos-10.15
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-        with:
-          submodules: recursive
-
-      - name: Configure CMake
-        run: |
-          mkdir -p build-native
-          cd build-native
-          cmake ..
-
-      - name: Compile
-        working-directory: build-native
-        run: make -j2
-
-      - name: Print versions
-        working-directory: build-native
-        run: ./app/bergamot-translator-app --version
--- a/.github/workflows/native-custom_marian-mac.yml
+++ b/.github/workflows/native-custom_marian-mac.yml
@ -0,0 +1,33 @@
+name: Native (Custom Marian) MacOS
+
+on:
+  push:
+    branches: [ main, ci-sandbox ]
+  pull_request:
+    branches: [ main, ci-sandbox ]
+
+jobs:
+  build-macos:
+    name: MacOS
+    runs-on: macos-10.15
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+      with:
+        submodules: recursive
+
+    - name: Configure CMake
+      run: |
+        mkdir -p build
+        cd build
+        cmake ..
+
+    - name: Compile
+      working-directory: build
+      run: make -j2
+
+    - name: Print versions
+      working-directory: build
+      run: |
+        ./app/bergamot-translator-app --version
--- a/.github/workflows/native-custom_marian-ubuntu.yml
+++ b/.github/workflows/native-custom_marian-ubuntu.yml
@ -0,0 +1,33 @@
+name: Native (Custom Marian) Ubuntu
+
+on:
+  push:
+    branches: [ main, ci-sandbox ]
+  pull_request:
+    branches: [ main, ci-sandbox ]
+
+jobs:
+  build-macos:
+    name: Ubuntu
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+      with:
+        submodules: recursive
+
+    - name: Configure CMake
+      run: |
+        mkdir -p build
+        cd build
+        cmake ..
+
+    - name: Compile
+      working-directory: build
+      run: make -j2
+
+    - name: Print versions
+      working-directory: build
+      run: |
+        ./app/bergamot-translator-app --version
--- a/.github/workflows/native-full_marian-mac.yml
+++ b/.github/workflows/native-full_marian-mac.yml
@ -1,4 +1,4 @@
-name: MacOS
+name: Native (Full Marian) MacOS

 on:
  push:
@ -39,16 +39,15 @@ jobs:
          -DUSE_FBGEMM=on \
          -DUSE_SENTENCEPIECE=on \
          -DUSE_STATIC_LIBS=off \
-          -DUSE_WASM_COMPATIBLE_SOURCES=off
+          -DUSE_WASM_COMPATIBLE_SOURCE=off

    - name: Compile
      working-directory: build
      run: make -j2

-    # Removing unit-tests, taken care of in browsermt/marian-dev
-    # - name: Run unit tests
-    # - working-directory: build
-    # - run: make test
+    - name: Run unit tests
+      working-directory: build
+      run: make test

    - name: Print versions
      working-directory: build
--- a/.github/workflows/native-full_marian-ubuntu.yml
+++ b/.github/workflows/native-full_marian-ubuntu.yml
@ -1,10 +1,10 @@
-name: Ubuntu
+name: Native (Full Marian) Ubuntu

 on:
  push:
-    branches: [ main, ci-sandbox ]
+    branches: [ main, ci-test ]
  pull_request:
-    branches: [ main, ci-sandbox ]
+    branches: [ main, ci-test ]

 jobs:
  build-ubuntu:
@ -15,7 +15,7 @@ jobs:
          - name: "Ubuntu CPU-only"
            os: ubuntu-latest
            cuda: ""
-            gcc: 7
+            gcc: 8
            cpu: true
            gpu: false
          # GPU Builds are commented out, for bergamot-translator CI runs.
@ -62,7 +62,7 @@ jobs:
    # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because
    # it is installed together with libprotobuf-dev
    - name: Install dependencies
-      run: sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-all-dev
+      run: sudo apt-get update && sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-all-dev g++-8

    # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
    - name: Install MKL
@ -97,19 +97,17 @@ jobs:
          -DUSE_FBGEMM=${{ matrix.cpu }} \
          -DUSE_SENTENCEPIECE=on \
          -DUSE_STATIC_LIBS=on \
-          -DUSE_WASM_COMPATIBLE_SOURCES=off
+          -DUSE_WASM_COMPATIBLE_SOURCE=off

    - name: Compile
      working-directory: build
      run: make -j2

-    # Removing unit-tests, taken care of in browsermt/marian-dev
-    # TODO: add a flag to CMake to compile unit tests only on CPU
-    # - name: Run unit tests
-    #   working-directory: build
-    #   run: make test
-    #   # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds
-    #   if: matrix.gpu == false
+    - name: Run unit tests
+      working-directory: build
+      run: make test
+      # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds
+      if: matrix.gpu == false

    - name: Print versions
      working-directory: build
--- a/.github/workflows/macos-custom-marian-wasm.yml
+++ b/.github/workflows/macos-custom-marian-wasm.yml
@ -1,4 +1,4 @@
-name: MacOS WASM (Custom)
+name: WASM (Custom Marian) MacOS

 on:
  push:
@ -8,7 +8,7 @@ on:

 jobs:
  build-wasm:
-    name: WASM (With Custom Marian)
+    name: WASM (Custom Marian) MacOS
    runs-on: macos-10.15

    steps:
@ -35,10 +35,7 @@ jobs:

      - name: Instantiate simd wormhole
        working-directory: build-wasm
-        run: |
-          sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
-          sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
-          sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
+        run: bash ../wasm/patch-artifacts-enable-wormhole.sh

      - name: Check artifacts
        working-directory: build-wasm
--- a/.github/workflows/wasm-custom_marian-ubuntu.yml
+++ b/.github/workflows/wasm-custom_marian-ubuntu.yml
@ -0,0 +1,51 @@
+name: WASM (Custom Marian) Ubuntu
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  build-wasm:
+    name: WASM (Custom Marian) Ubuntu
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Setup Emscripten toolchain
+        uses: mymindstorm/setup-emsdk@v8
+
+      - name: Verify Emscripten setup
+        run: emcc -v
+
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          submodules: recursive
+
+      - name: Configure builds
+        run: |
+          mkdir -p build-wasm
+          cd build-wasm
+          emcmake cmake -DCOMPILE_WASM=on ..
+
+      - name: Compile
+        working-directory: build-wasm
+        run: emmake make -j2
+
+      - name: Instantiate simd wormhole
+        working-directory: build-wasm
+        run: bash ../wasm/patch-artifacts-enable-wormhole.sh
+
+      - name: Check artifacts
+        working-directory: build-wasm
+        run: |
+          export WASM_ARTIFACTS_DIR=wasm
+          ls -all ${WASM_ARTIFACTS_DIR}
+          if ls ${WASM_ARTIFACTS_DIR}/*.wasm &>/dev/null && ls ${WASM_ARTIFACTS_DIR}/*.js &>/dev/null
+          then
+            echo "Artifacts Successfully Generated"
+          else
+            echo "Failure: Artifacts Not Present"
+            exit 1
+          fi
--- a/3rd_party/marian-dev
+++ b/3rd_party/marian-dev
@ -1 +1 @@
-Subproject commit 8ddb73fad1001ae4c1697d2514ac1e5bd43e2ed3
+Subproject commit 0f0bcf99626c660227bb68b76267a8d2451e7172
--- a/1
+++ b/1
@ -0,0 +1 @@
+v0.0.0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -13,29 +13,21 @@ include(CMakeDependentOption)

 # Project specific cmake options
 option(COMPILE_WASM "Compile for WASM" OFF)
-option(USE_WASM_COMPATIBLE_SOURCES "Use wasm compatible sources" ON)
+option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" ON)
+option(COMPILE_TESTS "Compile bergamot-tests" OFF)
+
 SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be packaged (pre-loaded) in wasm builds")

-# Set marian (3rd party submodule) cmake options to compile for this project
+# Set 3rd party submodule specific cmake options for this project
 SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version")
 SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece")
 SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs")
-if (USE_WASM_COMPATIBLE_SOURCES)
-  # If using wasm compatible marian then set following flags
+if (USE_WASM_COMPATIBLE_SOURCE)
  SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.")
  SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
-  SET(COMPILE_DECODER_ONLY ON CACHE BOOL "Compile marian-decoder only")
-  SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support")
-  SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds")
-  SET(COMPILE_WITHOUT_EXCEPTIONS ON CACHE BOOL "Compile without exceptions")
-  if(COMPILE_WASM)
-    # Set WORMHOLE to ON for marian whenever compiling for wasm platform
-    SET(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160")
-  endif()
+  # # Setting the ssplit-cpp submodule specific cmake options for wasm
+  SET(USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
 endif()
-# Set ssplit (3rd party submodule) cmake options to compile for this project
-CMAKE_DEPENDENT_OPTION(USE_INTERNAL_PCRE2 "Use internal PCRE2 instead of system PCRE2" ON
-                       "USE_WASM_COMPATIBLE_SOURCES" OFF)

 # Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html
 # Ensures the submodules are set correctly during a build.
@ -60,11 +52,17 @@ if(NOT COMPILE_WASM)
 endif()

 if(COMPILE_WASM)
+  set(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160")
  list(APPEND WASM_COMPILE_FLAGS -pthread -O3 -g2 -fPIC -mssse3 -msimd128)
  list(APPEND WASM_COMPILE_FLAGS "SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=0" "SHELL:-s DISABLE_EXCEPTION_CATCHING=1" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1")
  list(APPEND WASM_COMPILE_FLAGS -Wno-error=pthreads-mem-growth)
 endif(COMPILE_WASM)

+# Needs to be enabled before including the folder containing tests (src/tests)
+if(COMPILE_TESTS)
+    enable_testing()
+endif(COMPILE_TESTS)
+
 add_subdirectory(3rd_party)
 add_subdirectory(src)

@ -73,3 +71,4 @@ if(COMPILE_WASM)
 else()
  add_subdirectory(app)
 endif(COMPILE_WASM)
+
--- a/Doxyfile.in
+++ b/Doxyfile.in
--- a/README.md
+++ b/README.md
@ -77,17 +77,16 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.
            emmake make -j
            ```

+        The wasm artifacts (.js and .wasm files) will be available in `wasm` folder of build directory ("build-wasm" in this case).
+
    3. Enable SIMD Wormhole via Wasm instantiation API in generated artifacts
+        ```bash
+        bash ../wasm/patch-artifacts-enable-wormhole.sh
        ```
-        sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
-        sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
-        sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
-        ```
-    The artefacts (.js and .wasm files) will be available in `wasm` folder of build directory ("build-wasm" in this case).

 #### Recompiling
-As long as you don't update any submodule, just follow steps in `4.ii` to recompile.\
-If you update a submodule, execute following command before executing steps in `4.ii` to recompile.
+As long as you don't update any submodule, just follow steps in `4.ii` and `4.iii` to recompile.\
+If you update a submodule, execute following command before executing steps in `4.ii` and `4.iii` to recompile.
 ```bash
 git submodule update --init --recursive
 ```
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@ -1,10 +1,16 @@
-add_executable(bergamot-translator-app main.cpp)
+add_executable(bergamot-translator-app bergamot-translator-app.cpp)
 target_link_libraries(bergamot-translator-app PRIVATE bergamot-translator)

-if (NOT USE_WASM_COMPATIBLE_SOURCES)
-    add_executable(service-cli main-mts.cpp)
+add_executable(bergamot-translator-app-bytearray bergamot-translator-app-bytearray.cpp)
+target_link_libraries(bergamot-translator-app-bytearray PRIVATE bergamot-translator)
+
+if (NOT USE_WASM_COMPATIBLE_SOURCE)
+    add_executable(service-cli service-cli.cpp)
    target_link_libraries(service-cli PRIVATE bergamot-translator)

+    add_executable(service-cli-bytearray service-cli-bytearray.cpp)
+    target_link_libraries(service-cli-bytearray PRIVATE bergamot-translator)
+
    add_executable(marian-decoder-new marian-decoder-new.cpp)
    target_link_libraries(marian-decoder-new PRIVATE bergamot-translator)
 endif()
--- a/app/bergamot-translator-app-bytearray.cpp
+++ b/app/bergamot-translator-app-bytearray.cpp
@ -0,0 +1,42 @@
+/*
+ * main.cpp
+ *
+ * An example application to demonstrate the use of Bergamot translator.
+ *
+ */
+
+#include <iostream>
+
+#include "TranslationModel.h"
+#include "translator/parser.h"
+#include "translator/byte_array_util.h"
+
+int main(int argc, char **argv) {
+
+  // Create a configParser and load command line parameters into a YAML config
+  // string.
+  auto configParser = marian::bergamot::createConfigParser();
+  auto options = configParser.parseOptions(argc, argv, true);
+  std::string config = options->asYamlString();
+
+  // Route the config string to construct marian model through TranslationModel
+  TranslationModel model(config, marian::bergamot::getModelMemoryFromConfig(options));
+
+  TranslationRequest translationRequest;
+  std::vector<std::string> texts;
+
+  for (std::string line; std::getline(std::cin, line);) {
+        texts.emplace_back(line);
+  }
+
+  auto results = model.translate(std::move(texts), translationRequest);
+
+  // Resolve the future and get the actual result
+  //std::vector<TranslationResult> results = futureResults.get();
+
+  for (auto &result : results) {
+    std::cout << result.getTranslatedText() << std::endl;
+  }
+
+  return 0;
+}
--- a/app/bergamot-translator-app.cpp
+++ b/app/bergamot-translator-app.cpp
@ -0,0 +1,43 @@
+/*
+ * main.cpp
+ *
+ * An application which accepts line separated texts in stdin and returns translated ones in stdout.
+ * It is convenient for batch processing and can be used with tools like SacreBLEU.
+ *
+ */
+
+#include <iostream>
+#include <string>
+
+#include "TranslationModel.h"
+#include "translator/parser.h"
+
+int main(int argc, char **argv) {
+
+  // Create a configParser and load command line parameters into a YAML config
+  // string.
+  auto configParser = marian::bergamot::createConfigParser();
+  auto options = configParser.parseOptions(argc, argv, true);
+  std::string config = options->asYamlString();
+
+  // Route the config string to construct marian model through TranslationModel
+  TranslationModel model(config);
+
+  TranslationRequest translationRequest;
+  std::vector<std::string> texts;
+
+  for (std::string line; std::getline(std::cin, line);) {
+        texts.emplace_back(line);
+  }
+
+  auto results = model.translate(std::move(texts), translationRequest);
+
+  // Resolve the future and get the actual result
+  //std::vector<TranslationResult> results = futureResults.get();
+
+  for (auto &result : results) {
+    std::cout << result.getTranslatedText() << std::endl;
+  }
+
+  return 0;
+}
--- a/app/main-mts.cpp
+++ b/app/main-mts.cpp
@ -1,33 +0,0 @@
-#include <cstdlib>
-#include <future>
-#include <iostream>
-#include <sstream>
-
-#include "common/definitions.h"
-#include "common/utils.h"
-#include "marian.h"
-#include "translator/parser.h"
-#include "translator/response.h"
-#include "translator/service.h"
-
-int main(int argc, char *argv[]) {
-  auto cp = marian::bergamot::createConfigParser();
-  auto options = cp.parseOptions(argc, argv, true);
-  marian::bergamot::Service service(options);
-
-  // Read a large input text blob from stdin
-  std::ostringstream std_input;
-  std_input << std::cin.rdbuf();
-  std::string input = std_input.str();
-  using marian::bergamot::Response;
-
-  // Wait on future until Response is complete
-  std::future<Response> responseFuture = service.translate(std::move(input));
-  responseFuture.wait();
-  Response response = responseFuture.get();
-  std::cout << response.translation() << std::endl;
-
-  // Stop Service.
-  service.stop();
-  return 0;
-}
--- a/app/main.cpp
+++ b/app/main.cpp
@ -1,67 +0,0 @@
-/*
- * main.cpp
- *
- * An example application to demonstrate the use of Bergamot translator.
- *
- */
-
-#include <iostream>
-
-#include "AbstractTranslationModel.h"
-#include "TranslationRequest.h"
-#include "TranslationResult.h"
-#include "translator/parser.h"
-
-int main(int argc, char **argv) {
-
-  // Create a configParser and load command line parameters into a YAML config
-  // string.
-  auto configParser = marian::bergamot::createConfigParser();
-  auto options = configParser.parseOptions(argc, argv, true);
-  std::string config = options->asYamlString();
-
-  // Route the config string to construct marian model through
-  // AbstractTranslationModel
-  std::shared_ptr<AbstractTranslationModel> model =
-      AbstractTranslationModel::createInstance(config);
-
-  TranslationRequest translationRequest;
-  std::vector<std::string> texts;
-  texts.emplace_back(
-      "The Bergamot project will add and improve client-side machine "
-      "translation in a web browser.  Unlike current cloud-based "
-      "options, running directly on users’ machines empowers citizens to "
-      "preserve their privacy and increases the uptake of language "
-      "technologies in Europe in various sectors that require "
-      "confidentiality.");
-  texts.emplace_back(
-      "Free software integrated with an open-source web "
-      "browser, such as Mozilla Firefox, will enable bottom-up adoption "
-      "by non-experts, resulting in cost savings for private and public "
-      "sector users who would otherwise procure translation or operate "
-      "monolingually.  Bergamot is a consortium coordinated by the "
-      "University of Edinburgh with partners Charles University in "
-      "Prague, the University of Sheffield, University of Tartu, and "
-      "Mozilla.");
-
-  auto results = model->translate(std::move(texts), translationRequest);
-
-  // Resolve the future and get the actual result
-  //std::vector<TranslationResult> results = futureResults.get();
-
-  for (auto &result : results) {
-    std::cout << "[original]: " << result.getOriginalText() << std::endl;
-    std::cout << "[translated]: " << result.getTranslatedText() << std::endl;
-    auto mappings = result.getSentenceMappings();
-    for (auto &p : mappings) {
-      std::string_view src = p.first;
-      std::string_view tgt = p.second;
-
-      std::cout << " [src Sentence]: " << src << std::endl;
-      std::cout << " [tgt Sentence]: " << tgt << std::endl;
-    }
-    std::cout << std::endl;
-  }
-
-  return 0;
-}
--- a/app/marian-decoder-new.cpp
+++ b/app/marian-decoder-new.cpp
@ -14,25 +14,11 @@
 #include "translator/response.h"
 #include "translator/service.h"

-void marian_decoder_minimal(const marian::Histories &histories,
-                            marian::Ptr<marian::Vocab const> targetVocab,
+void marian_decoder_minimal(const marian::bergamot::Response &response,
                            marian::Ptr<marian::Options> options) {
-
-  bool doNbest = options->get<bool>("n-best");
-  auto collector =
-      marian::New<marian::OutputCollector>(options->get<std::string>("output"));
-
-  // There is a dependency of vocabs here.
-  auto printer = marian::New<marian::OutputPrinter>(options, targetVocab);
-  if (options->get<bool>("quiet-translation"))
-    collector->setPrintingStrategy(marian::New<marian::QuietPrinting>());
-
-  for (auto &history : histories) {
-    std::stringstream best1;
-    std::stringstream bestn;
-    printer->print(history, best1, bestn);
-    collector->Write((long)history->getLineNum(), best1.str(), bestn.str(),
-                     doNbest);
+  // We are no longer marian-decoder compatible. Server ideas are on hold.
+  for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
+    std::cout << response.target.sentence(sentenceIdx) << "\n";
  }
 }

@ -53,9 +39,8 @@ int main(int argc, char *argv[]) {
  responseFuture.wait();
  const Response &response = responseFuture.get();

-  marian_decoder_minimal(response.histories(), service.targetVocab(), options);
+  marian_decoder_minimal(response, options);

  LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed());
-  service.stop();
  return 0;
 }
--- a/app/service-cli-bytearray.cpp
+++ b/app/service-cli-bytearray.cpp
@ -0,0 +1,86 @@
+#include <cstdlib>
+#include <future>
+#include <iostream>
+#include <sstream>
+
+#include "common/definitions.h"
+#include "common/utils.h"
+#include "marian.h"
+#include "translator/parser.h"
+#include "translator/response.h"
+#include "translator/service.h"
+#include "translator/byte_array_util.h"
+
+int main(int argc, char *argv[]) {
+  auto cp = marian::bergamot::createConfigParser();
+  auto options = cp.parseOptions(argc, argv, true);
+
+  // Prepare memories for model and shortlist
+  marian::bergamot::AlignedMemory modelBytes = marian::bergamot::getModelMemoryFromConfig(options);
+  marian::bergamot::AlignedMemory shortlistBytes = marian::bergamot::getShortlistMemoryFromConfig(options);
+
+  marian::bergamot::Service service(options, std::move(modelBytes), std::move(shortlistBytes));
+
+  // Read a large input text blob from stdin
+  std::ostringstream std_input;
+  std_input << std::cin.rdbuf();
+  std::string input = std_input.str();
+  using marian::bergamot::Response;
+
+  // Wait on future until Response is complete
+  std::future<Response> responseFuture = service.translate(std::move(input));
+  responseFuture.wait();
+  Response response = responseFuture.get();
+
+  std::cout << "[original]: " << response.source.text << '\n';
+  std::cout << "[translated]: " << response.target.text << '\n';
+  for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
+    std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx)
+              << '\n';
+    std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx)
+              << '\n';
+    std::cout << "Alignments" << '\n';
+    typedef std::pair<size_t, float> Point;
+
+    // Initialize a point vector.
+    std::vector<std::vector<Point>> aggregate(
+        response.source.numWords(sentenceIdx));
+
+    // Handle alignments
+    auto &alignments = response.alignments[sentenceIdx];
+    for (auto &p : alignments) {
+      aggregate[p.src].emplace_back(p.tgt, p.prob);
+    }
+
+    for (size_t src = 0; src < aggregate.size(); src++) {
+      std::cout << response.source.word(sentenceIdx, src) << ": ";
+      for (auto &p : aggregate[src]) {
+        std::cout << response.target.word(sentenceIdx, p.first) << "("
+                  << p.second << ") ";
+      }
+      std::cout << '\n';
+    }
+
+    // Handle quality.
+    auto &quality = response.qualityScores[sentenceIdx];
+    std::cout << "Quality: whole(" << quality.sequence
+              << "), tokens below:" << '\n';
+    size_t wordIdx = 0;
+    bool first = true;
+    for (auto &p : quality.word) {
+      if (first) {
+        first = false;
+      } else {
+        std::cout << " ";
+      }
+      std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p
+                << ")";
+      wordIdx++;
+    }
+    std::cout << '\n';
+  }
+  std::cout << "--------------------------\n";
+  std::cout << '\n';
+
+  return 0;
+}
--- a/app/service-cli.cpp
+++ b/app/service-cli.cpp
@ -0,0 +1,80 @@
+#include <cstdlib>
+#include <future>
+#include <iostream>
+#include <sstream>
+
+#include "common/definitions.h"
+#include "common/utils.h"
+#include "marian.h"
+#include "translator/parser.h"
+#include "translator/response.h"
+#include "translator/service.h"
+
+int main(int argc, char *argv[]) {
+  auto cp = marian::bergamot::createConfigParser();
+  auto options = cp.parseOptions(argc, argv, true);
+  marian::bergamot::Service service(options);
+
+  // Read a large input text blob from stdin
+  std::ostringstream std_input;
+  std_input << std::cin.rdbuf();
+  std::string input = std_input.str();
+  using marian::bergamot::Response;
+
+  // Wait on future until Response is complete
+  std::future<Response> responseFuture = service.translate(std::move(input));
+  responseFuture.wait();
+  Response response = responseFuture.get();
+
+  std::cout << "[original]: " << response.source.text << '\n';
+  std::cout << "[translated]: " << response.target.text << '\n';
+  for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
+    std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx)
+              << '\n';
+    std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx)
+              << '\n';
+    std::cout << "Alignments" << '\n';
+    typedef std::pair<size_t, float> Point;
+
+    // Initialize a point vector.
+    std::vector<std::vector<Point>> aggregate(
+        response.source.numWords(sentenceIdx));
+
+    // Handle alignments
+    auto &alignments = response.alignments[sentenceIdx];
+    for (auto &p : alignments) {
+      aggregate[p.src].emplace_back(p.tgt, p.prob);
+    }
+
+    for (size_t src = 0; src < aggregate.size(); src++) {
+      std::cout << response.source.word(sentenceIdx, src) << ": ";
+      for (auto &p : aggregate[src]) {
+        std::cout << response.target.word(sentenceIdx, p.first) << "("
+                  << p.second << ") ";
+      }
+      std::cout << '\n';
+    }
+
+    // Handle quality.
+    auto &quality = response.qualityScores[sentenceIdx];
+    std::cout << "Quality: whole(" << quality.sequence
+              << "), tokens below:" << '\n';
+    size_t wordIdx = 0;
+    bool first = true;
+    for (auto &p : quality.word) {
+      if (first) {
+        first = false;
+      } else {
+        std::cout << " ";
+      }
+      std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p
+                << ")";
+      wordIdx++;
+    }
+    std::cout << '\n';
+  }
+  std::cout << "--------------------------\n";
+  std::cout << '\n';
+
+  return 0;
+}
--- a/doc/.gitignore
+++ b/doc/.gitignore
@ -0,0 +1,4 @@
+api
+build
+doxygen
+venv
--- a/doc/README.md
+++ b/doc/README.md
@ -0,0 +1,51 @@
+# Marian NMT code documentation and library API
+
+This directory contains code documentation and library API for developers of Marian NMT.
+
+The documentation is generated using
+[Sphinx](https://www.sphinx-doc.org/en/master/usage/quickstart.html) +
+[Breathe](https://breathe.readthedocs.io/en/latest/directives.html) +
+[Doxygen](http://www.doxygen.nl/manual/docblocks.html) +
+[Exhale](https://exhale.readthedocs.io/en/latest/usage.html).
+The documentation source code is written in `.rst` or `.md` files with special directives that allow
+to reference to C++ source code and documentation. The source documents are then build into static
+HTML pages.
+
+
+## Installation
+
+On Ubuntu 20.04, install the following packages:
+
+    sudo apt-get install python3 python3-pip python3-setuptools doxygen
+
+Then set up a Python environment and install modules:
+
+    pip3 install virtualenv
+    virtualenv venv -p python3
+    source venv/bin/activate
+    pip install -r requirements.txt
+
+Documentation building should also work on Windows, but it has not been tested.
+
+
+## Generation
+
+The documentation can be generated by running:
+
+    make html
+
+The website will be generated into `build/html` and accessible by opening _index.html_ in your
+browser.
+
+Directories:
+
+- `build` - automatically output directory for HTML documentation
+- `doxygen` - automatically generated Doxygen XML files
+- `api` - automatic library API generated with Exhale
+- `.rst` and `.md` files in this directory and its subdirectories are documentation source files
+- `_static` - custom CSS and JavaScript files
+
+
+## Writing documentation
+
+To be documented...
--- a/doc/_static/css/custom.css
+++ b/doc/_static/css/custom.css
@ -0,0 +1,4 @@
+.wy-body-for-nav > .wy-grid-for-nav > .wy-nav-side {
+    border-bottom: 5px solid #28bbee;
+    /*background-color: #494d55;*/
+}
--- a/doc/conf.py
+++ b/doc/conf.py
@ -0,0 +1,120 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import datetime
+import sys
+
+sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'Bergamot Translator'
+copyright = '2021, Bergamot Translator Team'
+author = 'Bergamot Translator Team'
+
+# The full version, including alpha/beta/rc tags
+# TODO: add GitHub commit hash to the version
+version_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'BERGAMOT_VERSION')
+with open(os.path.abspath(version_file)) as f:
+    version = f.read().strip()
+release = version + ' ' + str(datetime.date.today())
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.imgmath',
+    'sphinx.ext.todo',
+    'breathe',
+    'exhale',
+    'recommonmark',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = [
+    'build',
+    'doxygen',
+    'venv',
+    'README.md',
+]
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+htmlhelp_basename = 'bergamot-translator'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+html_css_files = ['css/custom.css']
+
+# The base URL which points to the root of the HTML documentation
+html_baseurl = 'http://jerinphilip.github.io/bergamot-translator'
+
+
+# -- Extension configuration -------------------------------------------------
+
+breathe_projects = { 'bergamot-translator': './doxygen/xml' }
+breathe_default_project = 'bergamot-translator'
+
+doxygen_config = """
+INPUT                = ../src
+EXCLUDE             += ../3rd_party
+EXCLUDE             += ../src/tests
+EXCLUDE_PATTERNS     = *.md *.txt
+FILE_PATTERNS       += *.cu
+EXTENSION_MAPPING   += cu=C++ inc=C++
+ENABLE_PREPROCESSING = YES
+JAVADOC_AUTOBRIEF    = YES
+WARN_IF_UNDOCUMENTED = NO
+"""
+
+exhale_args = {
+    'containmentFolder'     : './api',
+    'rootFileName'          : 'library_index.rst',
+    'rootFileTitle'         : 'Library API',
+    'doxygenStripFromPath'  : '..',
+    'createTreeView'        : True,
+    'exhaleExecutesDoxygen' : True,
+    'exhaleDoxygenStdin'    : doxygen_config.strip(),
+}
+
+primary_domain = 'cpp'
+highlight_language = 'cpp'
+
+# A trick to include markdown files from outside the source directory using
+# 'mdinclude'. Warning: all other markdown files not included via 'mdinclude'
+# will be rendered using recommonmark as recommended by Sphinx
+from m2r import MdInclude
+
+def setup(app):
+    # from m2r to make `mdinclude` work
+    app.add_config_value('no_underscore_emphasis', False, 'env')
+    app.add_config_value('m2r_parse_relative_links', False, 'env')
+    app.add_config_value('m2r_anonymous_references', False, 'env')
+    app.add_config_value('m2r_disable_inline_math', False, 'env')
+    app.add_directive('mdinclude', MdInclude)
--- a/doc/index.rst
+++ b/doc/index.rst
@ -0,0 +1,38 @@
+Welcome to Bergamot Translator's documentation!
+===============================================
+
+|buildcpu| |tests| |release| |license|
+
+Bergamot translator provides a unified API for (Marian NMT framework based)
+neural machine translation functionality in accordance with the Bergamot
+project that focuses on improving client-side machine translation in a web
+browser.
+
+This is developer documentation. 
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   marian-integration
+   api/library_index
+
+
+
+Indices and tables
+------------------
+
+* :ref:`genindex`
+
+
+.. |buildcpu| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/browsermt/job/bergamot-translator.svg?label=CPU%20Build
+   :target: http://vali.inf.ed.ac.uk/jenkins/job/bergamot-translator
+   :alt: CPU build status
+
+.. |tests| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/bergamot-translator-regression-tests.svg?label=Tests
+   :target: http://vali.inf.ed.ac.uk/jenkins/job/bergamot-translator-regression-tests/
+   :alt: Tests status
+
+.. |license| image:: https://img.shields.io/badge/License-MPL%202.0-brightgreen.svg
+   :target: https://opensource.org/licenses/MPL-2.0
+   :alt: License: MPL
--- a/doc/make.bat
+++ b/doc/make.bat
@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
--- a/doc/marian-integration.md
+++ b/doc/marian-integration.md
@ -1,4 +1,4 @@
-# Marian Integration
+# Building marian code for bergamot

 This document summarizes the minimal build instructions develop for the
 marian-code powering bergamot-translator.
@ -10,7 +10,7 @@ $ git clone https://github.com/browsermt/bergamot-translator
 $ cd bergamot-translator
 $ mkdir build
 $ cd build
-$ cmake .. -DUSE_WASM_COMPATIBLE_SOURCES=off -DCMAKE_BUILD_TYPE=Release
+$ cmake .. -DUSE_WASM_COMPATIBLE_SOURCE=off -DCMAKE_BUILD_TYPE=Release
 $ make -j
 ```

--- a/doc/references.bib
+++ b/doc/references.bib
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@ -0,0 +1,6 @@
+sphinx==2.4.4
+breathe==4.13.0
+exhale
+sphinx_rtd_theme
+recommonmark
+m2r
--- a/src/AbstractTranslationModel.h
+++ b/src/AbstractTranslationModel.h
@ -1,68 +0,0 @@
-/*
- * AbstractTranslationModel.h
- *
- * An interface for a translation model for translating a plain (without any
- * markups and emojis) UTF-8 encoded text. The model supports translation from 1
- * source language to 1 target language. There can be different implementations
- * of this interface.
- */
-
-#ifndef SRC_TRANSLATOR_ABSTRACTTRANSLATIONMODEL_H_
-#define SRC_TRANSLATOR_ABSTRACTTRANSLATIONMODEL_H_
-
-#include <future>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "TranslationRequest.h"
-#include "TranslationResult.h"
-
-/* An interface for a translation model for translating a plain (without any
- * markups and emojis) UTF-8 encoded text. The model supports translation from 1
- * source language to 1 target language.
- */
-class AbstractTranslationModel {
-public:
-  /* A Factory method to create and return an instance of an implementation of
-   * AbstractTranslationModel. The instance is created using translation model
-   * configuration provided as yaml-formatted string.
-   */
-  static std::shared_ptr<AbstractTranslationModel>
-  createInstance(const std::string &config);
-
-  AbstractTranslationModel() = default;
-
-  virtual ~AbstractTranslationModel() = default;
-
-  /* This method performs translation on a list of (UTF-8 encoded) texts and
-   * returns a list of results in the same order. Each text entry can either be
-   * a word, a phrase, a sentence or a list of sentences and should contain
-   * plain text (without any markups or emojis). Additional information related
-   * to the translated text can be requested via TranslationRequest which is
-   * applied equally to each text entry.
-   *
-   * The translated text corresponding to each text entry and the additional
-   * information (as specified in the TranslationRequest) is encapsulated and
-   * returned in TranslationResult.
-   *
-   * The API splits each text entry into sentences internally, which are then
-   * translated independent of each other. The translated sentences are then
-   * joined together and returned in TranslationResult. Please refer to the
-   * TranslationRequest class to find out what additional information can be
-   * requested. The alignment information can only be requested if the model
-   * supports it (check isAlignmentSupported() API).
-   *
-   * The texts argument will become empty after the execution of this API (each
-   * entry of texts list will be moved to its corresponding TranslationResult
-   * object).
-   */
-  virtual std::vector<TranslationResult>
-  translate(std::vector<std::string> &&texts, TranslationRequest request) = 0;
-
-  /* Check if the model can provide alignment information b/w original and
-   * translated text. */
-  virtual bool isAlignmentSupported() const = 0;
-};
-
-#endif /* SRC_TRANSLATOR_ABSTRACTTRANSLATIONMODEL_H_ */
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -1 +1,7 @@
-add_subdirectory(translator)
+add_subdirectory(translator)
+
+if(COMPILE_TESTS)
+  # Catch currently comes from marian sources.
+  add_subdirectory(tests)
+endif(COMPILE_TESTS)
+
--- a/src/translator/TranslationModel.h
+++ b/src/translator/TranslationModel.h
@ -1,7 +1,7 @@
 /*
 * TranslationModel.h
 *
- *  A implementation of AbstractTranslationModel interface.
+ * Main interface for translation API.
 */

 #ifndef SRC_TRANSLATOR_TRANSLATIONMODEL_H_
@ -15,19 +15,28 @@
 #include "3rd_party/marian-dev/src/common/options.h"

 // All local project includes
-#include "AbstractTranslationModel.h"
-#include "translator/service_base.h"
+#include "TranslationRequest.h"
+#include "TranslationResult.h"
+#include "translator/definitions.h"
+#include "translator/service.h"

 /* A Translation model that translates a plain (without any markups and emojis)
 * UTF-8 encoded text. This implementation supports translation from 1 source
 * language to 1 target language.
 */
-class TranslationModel : public AbstractTranslationModel {
+class TranslationModel {
 public:
  /* Construct the model using the model configuration options as yaml-formatted
   * string
   */
-  TranslationModel(const std::string &config);
+  /**
+   * @param config Marian yml config file in the form of a string
+   * @param model_memory optional byte array (aligned to 64!!!) that contains
+   * the bytes of a model.bin.
+   */
+  TranslationModel(const std::string &config,
+                   marian::bergamot::AlignedMemory modelMemory = marian::bergamot::AlignedMemory(),
+                   marian::bergamot::AlignedMemory shortlistMemory = marian::bergamot::AlignedMemory());

  ~TranslationModel();

@ -56,16 +65,16 @@ public:
   * object).
   */
  std::vector<TranslationResult> translate(std::vector<std::string> &&texts,
-                                           TranslationRequest request) override;
+                                           TranslationRequest request);

  /* Check if the model can provide alignment information b/w original and
   * translated text. */
-  bool isAlignmentSupported() const override;
+  bool isAlignmentSupported() const;

 private:
  // Model configuration options
  std::shared_ptr<marian::Options> configOptions_; // ORDER DEPENDECNY
-  marian::bergamot::NonThreadedService service_;   // ORDER DEPENDENCY
+  marian::bergamot::Service service_;              // ORDER DEPENDENCY
 };

 #endif /* SRC_TRANSLATOR_TRANSLATIONMODEL_H_ */
--- a/src/TranslationRequest.h
+++ b/src/TranslationRequest.h
@ -2,7 +2,7 @@
 * TranslationRequest.h
 *
 *  This file defines the translation request class to be used in
- * AbstractTranslationModel::translate() API.
+ *  TranslationModel::translate() API.
 */

 #ifndef SRC_TRANSLATOR_TRANSLATIONREQUEST_H_
--- a/src/TranslationResult.h
+++ b/src/TranslationResult.h
@ -1,7 +1,7 @@
 /*
 * TranslationResult.h
 *
- * The class that represents the result of AbstractTranslationModel::translate()
+ * The class that represents the result of TranslationModel::translate()
 * API for each of its text entry and TranslationRequest.
 */

@ -13,7 +13,7 @@

 #include "QualityScore.h"

-/* This class represents the result of AbstractTranslationModel::translate() API
+/* This class represents the result of TranslationModel::translate() API
 * for each of its text entry and TranslationRequest.
 */
 class TranslationResult {
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@ -0,0 +1,22 @@
+# Unit tests
+set(UNIT_TESTS
+    annotation_tests
+)
+
+foreach(test ${UNIT_TESTS})
+  add_executable("run_${test}" run_tests.cpp "${test}.cpp")
+  target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
+
+  if(CUDA_FOUND)
+    target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
+  else(CUDA_FOUND)
+    target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
+  endif(CUDA_FOUND)
+
+  if(msvc)
+    # disable c4305: truncation from 'double' to '_ty'
+    target_compile_options("run_${test}" public /wd4305)
+  endif(msvc)
+
+  add_test(NAME ${test} COMMAND "run_${test}")
+endforeach(test)
--- a/src/tests/annotation_tests.cpp
+++ b/src/tests/annotation_tests.cpp
@ -0,0 +1,220 @@
+#include "catch.hpp"
+#include "translator/sentence_ranges.h"
+#include <random>
+#include <vector>
+
+using namespace marian::bergamot;
+
+TEST_CASE("Test Annotation API with random sentences") {
+  /// Objective here is to test insertion for sentences, and that whatever comes
+  /// out adheres to the way it was inserted. Towards this, we keep externally
+  /// which sentence went in where and try to use accessor methods on
+  /// AnnotatedText to check if what we have as ground-truth by construction is
+  /// consistent with what is returned.
+  size_t sentences = 500;
+  size_t maxWords = 40;
+
+  // Set in case needed to see output. The output is in lines of #sentences +
+  // header, which can be split and compared for easy understanding. The ideal
+  // way to inspect what is going wrong is to redirect output and use to split
+  // the different stages by sentences + 1 lines and check the diff.
+  bool debug{false};
+
+  std::mt19937 randomIntGen_;
+  randomIntGen_.seed(42);
+
+  AnnotatedText testAnnotation; // This the container we add through API and
+                                // check if the access is correct.
+
+  // External book-keeping so we have ground truths. Each element represents a
+  // sentence.
+
+  // word byte ranges - for testAnnotation.word(sId, wId)
+  std::vector<std::vector<ByteRange>> groundTruthWords;
+  // sentence byte ranges - for testAnnotation.sentence(sId, wId)
+  std::vector<ByteRange> groundTruthSentences;
+
+  // Prepare the text and construct ByteRanges as intended for sentences and
+  // words. The ByteRanges we construct here are expected to be the
+  // ground-truths for words and sentences. The string being constructed is like
+  // as follows:
+  //
+  //     0-0 0-1 0-2 0-3
+  //     1-0 1-1 1-2 1-3 1-4
+  //     2-0 2-1
+  //
+  //     4-0 4-1 4-2 4-3
+  //
+  // Words are separated by space units.
+  //
+  // Below, we accumulate the text with intended structure as above, and
+  // ground-truth tables populated to be aware of the ByteRanges where they are
+  // meant to be.
+  if (debug) {
+    std::cout << "Preparing text and ground truth-tables" << std::endl;
+  }
+  for (size_t idx = 0; idx < sentences; idx++) {
+    if (idx != 0)
+      testAnnotation.text += "\n";
+
+    // Words can be zero, we need to support empty word sentences as well.
+    size_t numWords = randomIntGen_() % maxWords;
+
+    std::vector<ByteRange> wordByteRanges;
+    wordByteRanges.reserve(numWords);
+
+    // For empty sentence, we expect it to be empty and marked in position where
+    // the existing string is if needed to be pointed out.
+    size_t before = testAnnotation.text.size() - 1;
+    size_t sentenceBegin{before}, sentenceEnd{before};
+
+    for (size_t idw = 0; idw < numWords; idw++) {
+      if (idw != 0) {
+        testAnnotation.text += " ";
+        if (debug) {
+          std::cout << " ";
+        }
+      }
+
+      // Get new beginning, accounting for space above.
+      before = testAnnotation.text.size();
+
+      // Add the word
+      std::string word = std::to_string(idx) + "-" + std::to_string(idw);
+      testAnnotation.text += word;
+
+      // Do math, before, before + new-word's size.
+      wordByteRanges.push_back((ByteRange){before, before + word.size()});
+
+      if (debug) {
+        std::cout << word;
+      }
+
+      if (idw == 0) {
+        sentenceBegin = before;
+      }
+      if (idw == numWords - 1) {
+        sentenceEnd = before + word.size();
+      }
+    }
+    if (debug) {
+      std::cout << std::endl;
+    }
+
+    groundTruthWords.push_back(wordByteRanges);
+    groundTruthSentences.push_back((ByteRange){sentenceBegin, sentenceEnd});
+  }
+
+  // We prepare string_views now with the known ByteRanges and use the
+  // string_view based AnnotatedText.addSentence(...) API to add sentences to
+  // transparently convert from string_views to ByteRanges, rebasing/working out
+  // the math underneath.
+
+  if (debug) {
+    std::cout << "Inserting words onto container and save ground-truth-table:"
+              << std::endl;
+  }
+
+  std::vector<std::vector<marian::string_view>> wordStringViews;
+  for (auto &sentence : groundTruthWords) {
+    std::vector<marian::string_view> wordByteRanges;
+    bool first{true};
+    for (auto &word : sentence) {
+      marian::string_view wordView(&testAnnotation.text[word.begin],
+                                   word.size());
+      wordByteRanges.push_back(wordView);
+      if (debug) {
+        if (first) {
+          first = false;
+        } else {
+          std::cout << " ";
+        }
+        std::cout << std::string(wordView);
+      }
+    }
+    testAnnotation.addSentence(wordByteRanges);
+    wordStringViews.push_back(wordByteRanges);
+    if (debug) {
+      std::cout << std::endl;
+    }
+  }
+
+  if (debug) {
+    std::cout
+        << "Inserting sentences onto container and save ground-truth-table"
+        << std::endl;
+  }
+  std::vector<marian::string_view> sentenceStringViews;
+  for (auto &sentenceByteRange : groundTruthSentences) {
+    char *data = &(testAnnotation.text[sentenceByteRange.begin]);
+    marian::string_view sentenceView(data, sentenceByteRange.size());
+    sentenceStringViews.push_back(sentenceView);
+
+    if (debug) {
+      std::cout << sentenceView << std::endl;
+    }
+  }
+
+  // Access from the sentence(sentenceIdx) API and confirm that the ground truth
+  // we expect is same as what comes out of the container.
+  if (debug) {
+    std::cout << "From container: Sentences" << std::endl;
+  }
+  for (int idx = 0; idx < groundTruthSentences.size(); idx++) {
+    ByteRange expected = groundTruthSentences[idx];
+    ByteRange obtained = testAnnotation.sentenceAsByteRange(idx);
+    if (debug) {
+      std::cout << std::string(testAnnotation.sentence(idx)) << std::endl;
+    }
+    CHECK(expected.begin == obtained.begin);
+    CHECK(expected.end == obtained.end);
+    std::string expected_string = std::string(sentenceStringViews[idx]);
+    std::string obtained_string = std::string(testAnnotation.sentence(idx));
+    CHECK(expected_string == obtained_string);
+  }
+
+  /// Access the word(sentenceIdx, wordIdx) API and confirm what we hold as
+  /// expected words are the same as those obtained from the container.
+  if (debug) {
+    std::cout << "From container: Words" << std::endl;
+  }
+
+  CHECK(groundTruthWords.size() == testAnnotation.numSentences());
+  for (int idx = 0; idx < groundTruthWords.size(); idx++) {
+    CHECK(groundTruthWords[idx].size() == testAnnotation.numWords(idx));
+  }
+
+  for (int idx = 0; idx < groundTruthWords.size(); idx++) {
+    for (int idw = 0; idw < groundTruthWords[idx].size(); idw++) {
+      ByteRange expected = groundTruthWords[idx][idw];
+      ByteRange obtained = testAnnotation.wordAsByteRange(idx, idw);
+      if (debug) {
+        std::cout << std::string(testAnnotation.word(idx, idw)) << " ";
+      }
+      CHECK(expected.begin == obtained.begin);
+      CHECK(expected.end == obtained.end);
+
+      std::string expected_string = std::string(wordStringViews[idx][idw]);
+      std::string obtained_string = std::string(testAnnotation.word(idx, idw));
+      CHECK(expected_string == obtained_string);
+    }
+    if (debug) {
+      std::cout << std::endl;
+    }
+  }
+
+  // Try inserting an empty Sentence. This is ensuring we check for empty
+  // Sentence if the random test above does not cover it for some reason.
+  int emptySentenceIdx = sentences;
+  std::vector<marian::string_view> emptySentence;
+  testAnnotation.addSentence(emptySentence);
+
+  // There are no words.
+  CHECK(testAnnotation.numWords(emptySentenceIdx) == 0);
+
+  // Empty sentence expected at output.
+  std::string expectedEmptyString = "";
+  marian::string_view emptyView = testAnnotation.sentence(emptySentenceIdx);
+  std::string obtainedString = std::string(emptyView.data(), emptyView.size());
+  CHECK(expectedEmptyString == obtainedString);
+}
--- a/src/tests/run_tests.cpp
+++ b/src/tests/run_tests.cpp
@ -0,0 +1,2 @@
+#define CATCH_CONFIG_MAIN
+#include "catch.hpp"
--- a/src/translator/AbstractTranslationModel.cpp
+++ b/src/translator/AbstractTranslationModel.cpp
@ -1,14 +0,0 @@
-/*
- * AbstractTranslationModel.cpp
- *
- */
-#include <memory>
-
-// All local includes
-#include "AbstractTranslationModel.h"
-#include "TranslationModel.h"
-
-std::shared_ptr<AbstractTranslationModel>
-AbstractTranslationModel::createInstance(const std::string &config) {
-  return std::make_shared<TranslationModel>(config);
-}
--- a/src/translator/CMakeLists.txt
+++ b/src/translator/CMakeLists.txt
@ -1,32 +1,26 @@
-if (NOT USE_WASM_COMPATIBLE_SOURCES)
-    set(MULTITHREADED_SERVICE_SOURCE "service.cpp")
-endif()
-
 add_library(bergamot-translator STATIC
-    AbstractTranslationModel.cpp
    TranslationModel.cpp
-
-    # Following files added from browsermt/mts@nuke
+    byte_array_util.cpp
    text_processor.cpp
    sentence_splitter.cpp
    batch_translator.cpp 
    multifactor_priority.cpp 
    request.cpp 
-    service_base.cpp
-    ${MULTITHREADED_SERVICE_SOURCE}
    batcher.cpp
    response.cpp
    batch.cpp
    sentence_ranges.cpp
+    service.cpp
 )
-if (COMPILE_DECODER_ONLY)
-  # A dirty hack because of marian's bad cmake practices
-  target_compile_definitions(bergamot-translator PUBLIC DECODER_ONLY)
+if (USE_WASM_COMPATIBLE_SOURCE)
+  # Using wasm compatible sources should include this compile definition;
+  # Has to be done here because we are including marian headers + some sources
+  # in local repository use these definitions
+  target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM_COMPATIBLE_SOURCE)
 endif()

 if(COMPILE_WASM)
-  # A dirty hack because of marian's bad cmake practices
-  target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM)
+  target_compile_definitions(bergamot-translator PUBLIC WASM)
  # Enable code that is required for generating JS bindings
  target_compile_definitions(bergamot-translator PRIVATE WASM_BINDINGS)
  target_compile_options(bergamot-translator PRIVATE ${WASM_COMPILE_FLAGS})
@ -35,7 +29,5 @@ endif(COMPILE_WASM)
 target_link_libraries(bergamot-translator marian ssplit)

 target_include_directories(bergamot-translator
-    PRIVATE ${CMAKE_SOURCE_DIR}
+    PUBLIC ${CMAKE_SOURCE_DIR}
    PUBLIC ${CMAKE_SOURCE_DIR}/src)
-
-
--- a/src/translator/TranslationModel.cpp
+++ b/src/translator/TranslationModel.cpp
@ -6,53 +6,15 @@
 #include <future>
 #include <vector>

-// All 3rd party includes
-#include "3rd_party/marian-dev/src/3rd_party/yaml-cpp/yaml.h"
-#include "3rd_party/marian-dev/src/common/config_parser.h"
-#include "common/config_validator.h"
-#include "common/options.h"
-
 // All local project includes
 #include "TranslationModel.h"
 #include "translator/parser.h"
-#include "translator/service_base.h"
+#include "translator/service.h"

-std::shared_ptr<marian::Options> parseOptions(const std::string &config) {
-  marian::Options options;
-
-  // @TODO(jerinphilip) There's something off here, @XapaJIaMnu suggests
-  // that should not be using the defaultConfig. This function only has access
-  // to std::string config and needs to be able to construct Options from the
-  // same.
-
-  // Absent the following code-segment, there is a parsing exception thrown on
-  // rebuilding YAML.
-  //
-  // Error: Unhandled exception of type 'N4YAML11InvalidNodeE': invalid node;
-  // this may result from using a map iterator as a sequence iterator, or
-  // vice-versa
-  //
-  // Error: Aborted from void unhandledException() in
-  // 3rd_party/marian-dev/src/common/logging.cpp:113
-
-  marian::ConfigParser configParser = marian::bergamot::createConfigParser();
-  const YAML::Node &defaultConfig = configParser.getConfig();
-
-  options.merge(defaultConfig);
-
-  // Parse configs onto defaultConfig.
-  options.parse(config);
-  YAML::Node configCopy = options.cloneToYamlNode();
-
-  marian::ConfigValidator validator(configCopy);
-  validator.validateOptions(marian::cli::mode::translation);
-
-  return std::make_shared<marian::Options>(options);
-}
-
-TranslationModel::TranslationModel(const std::string &config)
-    : configOptions_(std::move(parseOptions(config))),
-      AbstractTranslationModel(), service_(configOptions_) {}
+TranslationModel::TranslationModel(const std::string &config,
+                                   marian::bergamot::AlignedMemory model_memory,
+                                   marian::bergamot::AlignedMemory lexical_memory)
+    : service_(config, std::move(model_memory), std::move(lexical_memory)) {}

 TranslationModel::~TranslationModel() {}

@ -71,24 +33,19 @@ TranslationModel::translate(std::vector<std::string> &&texts,
    intermediate.wait();
    auto marianResponse(std::move(intermediate.get()));

-    // This mess because marian::string_view != std::string_view
-    std::string source, translation;
-    marian::bergamot::Response::SentenceMappings mSentenceMappings;
-    marianResponse.move(source, translation, mSentenceMappings);
-
-    // Convert to UnifiedAPI::TranslationResult
    TranslationResult::SentenceMappings sentenceMappings;
-    for (auto &p : mSentenceMappings) {
-      std::string_view src(p.first.data(), p.first.size()),
-          tgt(p.second.data(), p.second.size());
-      sentenceMappings.emplace_back(src, tgt);
+    for (size_t idx = 0; idx < marianResponse.size(); idx++) {
+      marian::string_view src = marianResponse.source.sentence(idx);
+      marian::string_view tgt = marianResponse.target.sentence(idx);
+      sentenceMappings.emplace_back(std::string_view(src.data(), src.size()),
+                                    std::string_view(tgt.data(), tgt.size()));
    }

    // In place construction.
    translationResults.emplace_back(
-        std::move(source),          // &&marianResponse.source_
-        std::move(translation),     // &&marianResponse.translation_
-        std::move(sentenceMappings) // &&sentenceMappings
+        std::move(marianResponse.source.text), // &&marianResponse.source_
+        std::move(marianResponse.target.text), // &&marianResponse.translation_
+        std::move(sentenceMappings)            // &&sentenceMappings
    );
  }

--- a/src/translator/aligned.h
+++ b/src/translator/aligned.h
@ -0,0 +1,71 @@
+#pragma once
+#include <cstdlib>
+#include <new>
+#ifdef _MSC_VER
+#include <malloc.h>
+#endif
+
+// Aligned simple vector.
+
+namespace marian {
+namespace bergamot {
+
+template <class T> class AlignedVector {
+public:
+  AlignedVector() : mem_(nullptr), size_(0) {}
+
+  explicit AlignedVector(std::size_t size, std::size_t alignment = 64 /* CPU cares about this */)
+          : size_(size) {
+#ifdef _MSC_VER
+    mem_ = static_cast<T*>(_aligned_malloc(size * sizeof(T), alignment));
+      if (!mem_) throw std::bad_alloc();
+#else
+    if (posix_memalign(reinterpret_cast<void **>(&mem_), alignment, size * sizeof(T))) {
+      throw std::bad_alloc();
+    }
+#endif
+  }
+
+  AlignedVector(AlignedVector &&from) : mem_(from.mem_), size_(from.size_) {
+    from.mem_ = nullptr;
+    from.size_ = 0;
+  }
+
+  AlignedVector &operator=(AlignedVector &&from) {
+    mem_ = from.mem_;
+    size_ = from.size_;
+    from.mem_ = nullptr;
+    from.size_ = 0;
+    return *this;
+  }
+
+  AlignedVector(const AlignedVector&) = delete;
+  AlignedVector& operator=(const AlignedVector&) = delete;
+
+  ~AlignedVector() {
+#ifdef _MSC_VER
+    _aligned_free(mem_);
+#else
+    std::free(mem_);
+#endif
+  }
+
+  std::size_t size() const { return size_; }
+
+  T &operator[](std::size_t offset) { return mem_[offset]; }
+  const T &operator[](std::size_t offset) const { return mem_[offset]; }
+
+  T *begin() { return mem_; }
+  const T *begin() const { return mem_; }
+  T *end() { return mem_ + size_; }
+  const T *end() const { return mem_ + size_; }
+
+  template <typename ReturnType>
+  ReturnType *as() { return reinterpret_cast<ReturnType*>(mem_); }
+
+private:
+  T *mem_;
+  std::size_t size_;
+};
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/batch_translator.cpp
+++ b/src/translator/batch_translator.cpp
@ -10,26 +10,46 @@ namespace bergamot {

 BatchTranslator::BatchTranslator(DeviceId const device,
                                 std::vector<Ptr<Vocab const>> &vocabs,
-                                 Ptr<Options> options)
-    : device_(device), options_(options), vocabs_(&vocabs) {}
+                                 Ptr<Options> options,
+                                 const AlignedMemory* modelMemory,
+                                 const AlignedMemory* shortlistMemory)
+    : device_(device), options_(options), vocabs_(&vocabs),
+    modelMemory_(modelMemory), shortlistMemory_(shortlistMemory) {}

 void BatchTranslator::initialize() {
  // Initializes the graph.
  if (options_->hasAndNotEmpty("shortlist")) {
    int srcIdx = 0, trgIdx = 1;
    bool shared_vcb = vocabs_->front() == vocabs_->back();
-    slgen_ = New<data::LexicalShortlistGenerator>(options_, vocabs_->front(),
-                                                  vocabs_->back(), srcIdx,
-                                                  trgIdx, shared_vcb);
+    if (shortlistMemory_->size() > 0 && shortlistMemory_->begin() != nullptr) {
+      bool check = options_->get<bool>("check-bytearray",true);
+      slgen_ = New<data::BinaryShortlistGenerator>(shortlistMemory_->begin(), shortlistMemory_->size(),
+                                                     vocabs_->front(), vocabs_->back(),
+                                                     srcIdx, trgIdx, shared_vcb, check);
+    }
+    else {
+      // Changed to BinaryShortlistGenerator to enable loading binary shortlist file
+      // This class also supports text shortlist file
+      slgen_ = New<data::BinaryShortlistGenerator>(options_, vocabs_->front(),
+                                                    vocabs_->back(), srcIdx,
+                                                    trgIdx, shared_vcb);
+    }
  }

-  graph_ = New<ExpressionGraph>(true); // always optimize
+  graph_ = New<ExpressionGraph>(true); // set the graph to be inference only
  auto prec = options_->get<std::vector<std::string>>("precision", {"float32"});
  graph_->setDefaultElementType(typeFromString(prec[0]));
  graph_->setDevice(device_);
  graph_->getBackend()->configureDevice(options_);
  graph_->reserveWorkspaceMB(options_->get<size_t>("workspace"));
-  scorers_ = createScorers(options_);
+  if (modelMemory_->size() > 0 && modelMemory_->begin() != nullptr) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file
+    ABORT_IF((uintptr_t)modelMemory_->begin() % 256 != 0,
+             "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
+    const std::vector<const void *> container = {modelMemory_->begin()}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding.
+    scorers_ = createScorers(options_, container);
+  } else {
+    scorers_ = createScorers(options_);
+  }
  for (auto scorer : scorers_) {
    scorer->init(graph_);
    if (slgen_) {
--- a/src/translator/batch_translator.h
+++ b/src/translator/batch_translator.h
@ -12,7 +12,7 @@
 #include "translator/history.h"
 #include "translator/scorers.h"

-#ifdef WITH_PTHREADS
+#ifndef WASM_COMPATIBLE_SOURCE
 #include "pcqueue.h"
 #endif

@ -26,8 +26,16 @@ class BatchTranslator {
  // shut down in Service which calls join() on the threads.

 public:
-  BatchTranslator(DeviceId const device, std::vector<Ptr<Vocab const>> &vocabs,
-                  Ptr<Options> options);
+  /**
+   * Initialise the marian translator.
+   * @param device DeviceId that performs translation. Could be CPU or GPU
+   * @param vocabs Vector that contains ptrs to two vocabs
+   * @param options Marian options object
+   * @param modelMemory byte array (aligned to 256!!!) that contains the bytes of a model.bin. Provide a nullptr if not used.
+   * @param shortlistMemory byte array of shortlist (aligned to 64)
+   */
+  explicit BatchTranslator(DeviceId const device, std::vector<Ptr<Vocab const>> &vocabs,
+                  Ptr<Options> options, const AlignedMemory* modelMemory, const AlignedMemory* shortlistMemory);

  // convenience function for logging. TODO(jerin)
  std::string _identifier() { return "worker" + std::to_string(device_.no); }
@ -41,6 +49,8 @@ private:
  Ptr<ExpressionGraph> graph_;
  std::vector<Ptr<Scorer>> scorers_;
  Ptr<data::ShortlistGenerator const> slgen_;
+  const AlignedMemory* modelMemory_{nullptr};
+  const AlignedMemory* shortlistMemory_{nullptr};
 };

 } // namespace bergamot
--- a/src/translator/batcher.h
+++ b/src/translator/batcher.h
@ -7,7 +7,7 @@
 #include "definitions.h"
 #include "request.h"

-#ifdef WITH_PTHREADS
+#ifndef WASM_COMPATIBLE_SOURCE
 #include "pcqueue.h"
 #endif

--- a/src/translator/byte_array_util.cpp
+++ b/src/translator/byte_array_util.cpp
@ -0,0 +1,108 @@
+#include "byte_array_util.h"
+#include <stdlib.h>
+#include <iostream>
+
+namespace marian {
+namespace bergamot {
+
+namespace {
+
+// This is a basic validator that checks if the file has not been truncated
+// it basically loads up the header and checks
+
+// This struct and the getter are copied from the marian source, because it's located
+// inside src/common/binary.cpp:15 and we can't include it.
+struct Header {
+  uint64_t nameLength;
+  uint64_t type;
+  uint64_t shapeLength;
+  uint64_t dataLength;
+};
+
+// cast current void pointer to T pointer and move forward by num elements
+template <typename T>
+const T* get(const void*& current, uint64_t num = 1) {
+  const T* ptr = (const T*)current;
+  current = (const T*)current + num;
+  return ptr;
+}
+
+bool validateBinaryModel(AlignedMemory& model, uint64_t fileSize) {
+  const void * current = &model[0];
+  uint64_t memoryNeeded = sizeof(uint64_t)*2; // We keep track of how much memory we would need if we have a complete file
+  uint64_t numHeaders;
+  if (fileSize >= memoryNeeded) { // We have enough filesize to fetch the headers.
+    uint64_t binaryFileVersion = *get<uint64_t>(current);
+    numHeaders = *get<uint64_t>(current); // number of item headers that follow
+  } else {
+    return false;
+  }
+  memoryNeeded += numHeaders*sizeof(Header);
+  const Header* headers;
+  if (fileSize >= memoryNeeded) {
+    headers = get<Header>(current, numHeaders); // read that many headers
+  } else {
+    return false;
+  }
+
+  // Calculate how many bytes we are going to for reading just the names and the shape
+  for (uint64_t i = 0; i < numHeaders; i++) {
+    memoryNeeded += headers[i].nameLength + headers[i].shapeLength*sizeof(int);
+    // Advance the pointers.
+    get<char>(current, headers[i].nameLength);
+    get<int>(current, headers[i].shapeLength);
+  }
+
+  // Before we start reading the data, there is a small padding to ensure alignment
+  // Read that in, before calculating the actual tensor memory requirements.
+  uint64_t aligned_offset;
+  if (fileSize >= memoryNeeded) {
+    aligned_offset = *get<uint64_t>(current); // Offset to align memory to 256 size
+    memoryNeeded += aligned_offset + sizeof(uint64_t);
+  } else {
+    return false;
+  }
+
+  // Finally the tensor size:
+  for (uint64_t i = 0; i < numHeaders; i++) {
+    memoryNeeded += headers[i].dataLength;
+  }
+
+  // If this final check passes, the file is at least big enough to contain the model
+  if (fileSize >= memoryNeeded) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+} // Anonymous namespace
+
+AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){
+  uint64_t fileSize = filesystem::fileSize(path);
+  io::InputFileStream in(path);
+  ABORT_IF(in.bad(), "Failed opening file stream: {}", path);
+  AlignedMemory alignedMemory(fileSize, alignment);
+  in.read(reinterpret_cast<char *>(alignedMemory.begin()), fileSize);
+  ABORT_IF(alignedMemory.size() != fileSize, "Error reading file {}", path);
+  return alignedMemory;
+}
+
+AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options){
+    auto models = options->get<std::vector<std::string>>("models");
+    ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
+    marian::filesystem::Path modelPath(models[0]);
+    ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin");
+    AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
+    ABORT_IF(!validateBinaryModel(alignedMemory, alignedMemory.size()), "The binary file is invalid. Incomplete or corrupted download?");
+    return alignedMemory;
+}
+
+AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options){
+  auto shortlist = options->get<std::vector<std::string>>("shortlist");
+  ABORT_IF(shortlist.empty(), "No path to shortlist file is given.");
+  return loadFileToMemory(shortlist[0], 64);
+}
+
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/byte_array_util.h
+++ b/src/translator/byte_array_util.h
@ -0,0 +1,12 @@
+#include "marian.h"
+#include "definitions.h"
+
+namespace marian {
+namespace bergamot {
+
+AlignedMemory loadFileToMemory(const std::string& path, size_t alignment);
+AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
+AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options);
+
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/definitions.h
+++ b/src/translator/definitions.h
@ -3,6 +3,7 @@

 #include "data/types.h"
 #include "data/vocab_base.h"
+#include "aligned.h"
 #include <vector>

 namespace marian {
@ -21,6 +22,9 @@ template <class T, typename... Args> UPtr<T> UNew(Args &&... args) {

 template <class T> UPtr<T> UNew(UPtr<T> p) { return UPtr<T>(p); }

+/// Shortcut to AlignedVector<const void*> for byte arrays
+typedef AlignedVector<const void*> AlignedMemory;
+
 } // namespace bergamot
 } // namespace marian

--- a/src/translator/parser.h
+++ b/src/translator/parser.h
@ -1,6 +1,10 @@
 #ifndef SRC_BERGAMOT_PARSER_H
 #define SRC_BERGAMOT_PARSER_H

+#include "3rd_party/yaml-cpp/yaml.h"
+#include "common/config_parser.h"
+#include "common/config_validator.h"
+#include "common/options.h"
 #include "marian.h"

 namespace marian {
@ -19,7 +23,45 @@ inline marian::ConfigParser createConfigParser() {
      "--max-length-break", "Bergamot Options",
      "Maximum input tokens to be processed in a single sentence.", 128);

-  return cp;
+  cp.addOption<bool>(
+      "--check-bytearray", "Bergamot Options",
+      "Flag holds whether to check the content of the bytearray (true by default)", true);
+
+    return cp;
+}
+
+inline std::shared_ptr<marian::Options>
+parseOptions(const std::string &config) {
+  marian::Options options;
+
+  // @TODO(jerinphilip) There's something off here, @XapaJIaMnu suggests
+  // that should not be using the defaultConfig. This function only has access
+  // to std::string config and needs to be able to construct Options from the
+  // same.
+
+  // Absent the following code-segment, there is a parsing exception thrown on
+  // rebuilding YAML.
+  //
+  // Error: Unhandled exception of type 'N4YAML11InvalidNodeE': invalid node;
+  // this may result from using a map iterator as a sequence iterator, or
+  // vice-versa
+  //
+  // Error: Aborted from void unhandledException() in
+  // 3rd_party/marian-dev/src/common/logging.cpp:113
+
+  marian::ConfigParser configParser = createConfigParser();
+  const YAML::Node &defaultConfig = configParser.getConfig();
+
+  options.merge(defaultConfig);
+
+  // Parse configs onto defaultConfig.
+  options.parse(config);
+  YAML::Node configCopy = options.cloneToYamlNode();
+
+  marian::ConfigValidator validator(configCopy);
+  validator.validateOptions(marian::cli::mode::translation);
+
+  return std::make_shared<marian::Options>(options);
 }

 } //  namespace bergamot
--- a/src/translator/request.cpp
+++ b/src/translator/request.cpp
@ -12,12 +12,10 @@ namespace bergamot {

 // -----------------------------------------------------------------
 Request::Request(size_t Id, size_t lineNumberBegin,
-                 std::vector<Ptr<Vocab const>> &vocabs, std::string &&source,
-                 Segments &&segments, SentenceRanges &&sourceRanges,
-                 std::promise<Response> responsePromise)
+                 std::vector<Ptr<Vocab const>> &vocabs, AnnotatedText &&source,
+                 Segments &&segments, std::promise<Response> responsePromise)
    : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs),
      source_(std::move(source)), segments_(std::move(segments)),
-      sourceRanges_(std::move(sourceRanges)),
      response_(std::move(responsePromise)) {

  counter_ = segments_.size();
@ -48,8 +46,7 @@ void Request::processHistory(size_t index, Ptr<History> history) {
 void Request::completeRequest() {
  // Request no longer needs to hold the content, can transfer it to
  // Response.
-  Response response(std::move(source_), std::move(sourceRanges_),
-                    std::move(histories_), *vocabs_);
+  Response response(std::move(source_), std::move(histories_), *vocabs_);
  response_.set_value(std::move(response));
 }

--- a/src/translator/request.h
+++ b/src/translator/request.h
@ -1,9 +1,9 @@
 //
 // Defines:
 //
-// Request: holds the input blob of a text, Segments (vector<Words>) which are
+// Request: holds the input text of a text, Segments (vector<Words>) which are
 // to go to the batching mechanism and alignments between the processed
-// segments and the input blob (sourceTokenRanges). In addition, Request takes
+// segments and the input text (sourceTokenRanges). In addition, Request takes
 // care of the barrier which fires when all the Segments in a request are done
 // translating by the workers (BatchTranslator).
 // TODO(jerinphilip):  Extend Request with notions of Priority (sequence,
@ -36,9 +36,8 @@ namespace bergamot {
 class Request {
 public:
  Request(size_t Id, size_t lineNumberBegin,
-          std::vector<Ptr<Vocab const>> &vocabs_, std::string &&source,
-          Segments &&segments, SentenceRanges &&sourceTokenRanges,
-          std::promise<Response> responsePromise);
+          std::vector<Ptr<Vocab const>> &vocabs_, AnnotatedText &&source,
+          Segments &&segments, std::promise<Response> responsePromise);

  // Obtain the count of tokens in the segment correponding to index. Used to
  // insert sentence from multiple requests into the corresponding size bucket.
@ -77,9 +76,8 @@ private:
  // string_views of the text corresponding to these words, pointing to
  // sequences in source_. histories_ is a buffer which eventually stores the
  // translations of each segment in the corresponding index.
-  std::string source_;
+  AnnotatedText source_;
  Segments segments_;
-  SentenceRanges sourceRanges_;
  std::vector<Ptr<History>> histories_;

  // Members above are moved into newly constructed Response on completion
--- a/src/translator/response.cpp
+++ b/src/translator/response.cpp
@ -1,97 +1,105 @@
 #include "response.h"
-#include "sentence_ranges.h"
 #include "common/logging.h"
 #include "data/alignment.h"
+#include "sentence_ranges.h"

 #include <utility>

 namespace marian {
 namespace bergamot {

-Response::Response(std::string &&source, SentenceRanges &&sourceRanges,
-                   Histories &&histories, std::vector<Ptr<Vocab const>> &vocabs)
-    : source_(std::move(source)), sourceRanges_(std::move(sourceRanges)),
-      histories_(std::move(histories)), vocabs_(&vocabs) {}
-
-void Response::move(std::string &source, std::string &translation,
-                    SentenceMappings &sentenceMappings) {
-
-  // Construct required stuff first.
-  constructTranslation();
-  constructSentenceMappings(sentenceMappings);
-
-  // Move content out.
-  source = std::move(source_);
-  translation = std::move(translation_);
-
-  // The above assignment expects source, target be moved.
-  // which makes the following invalid, hence required to be cleared.
-  sourceRanges_.clear();
-  targetRanges_.clear();
-  histories_.clear();
-}
-
-void Response::constructTranslation() {
-  if (translationConstructed_) {
-    return;
-  }
-
+Response::Response(AnnotatedText &&source, Histories &&histories,
+                   std::vector<Ptr<Vocab const>> &vocabs)
+    : source(std::move(source)) {
  // Reserving length at least as much as source_ seems like a reasonable thing
  // to do to avoid reallocations.
-  translation_.reserve(source_.size());
+  target.text.reserve(source.text.size());

  // In a first step, the decoded units (individual senteneces) are compiled
  // into a huge string. This is done by computing indices first and appending
  // to the string as each sentences are decoded.
  std::vector<std::pair<size_t, size_t>> translationRanges;
+  std::vector<size_t> sentenceBegins;

  size_t offset{0};
  bool first{true};

-  for (auto &history : histories_) {
+  for (auto &history : histories) {
    // TODO(jerin): Change hardcode of nBest = 1
    NBestList onebest = history->nBest(1);

    Result result = onebest[0]; // Expecting only one result;
    Words words = std::get<0>(result);
-    auto targetVocab = vocabs_->back();
-    std::string decoded = targetVocab->decode(words);
+    auto targetVocab = vocabs.back();
+
+    std::string decoded;
+    std::vector<string_view> targetMappings;
+    targetVocab->decodeWithByteRanges(words, decoded, targetMappings);
+
    if (first) {
      first = false;
    } else {
-      translation_ += " ";
+      target.text += " ";
      ++offset;
    }

-    translation_ += decoded;
-    translationRanges.emplace_back(offset, decoded.size());
+    sentenceBegins.push_back(translationRanges.size());
+    target.text += decoded;
+    auto decodedStringBeginMarker = targetMappings.front().begin();
+    for (auto &sview : targetMappings) {
+      size_t startIdx = offset + sview.begin() - decodedStringBeginMarker;
+      translationRanges.emplace_back(startIdx, startIdx + sview.size());
+    }
+
    offset += decoded.size();
+
+    // Alignments
+    // TODO(jerinphilip): The following double conversion might not be
+    // necessary. Hard alignment can directly be exported, but this would mean
+    // WASM bindings for a structure deep within marian source.
+    auto hyp = std::get<1>(result);
+    auto softAlignment = hyp->tracebackAlignment();
+    auto hardAlignment = data::ConvertSoftAlignToHardAlign(
+        softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a
+                                            // configurable parameter.
+
+    Alignment unified_alignment;
+    for (auto &p : hardAlignment) {
+      unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob});
+    }
+
+    alignments.push_back(std::move(unified_alignment));
+
+    // Quality scores: Sequence level is obtained as normalized path scores.
+    // Word level using hypothesis traceback. These are most-likely logprobs.
+    auto normalizedPathScore = std::get<2>(result);
+    auto wordQualities = hyp->tracebackWordScores();
+    wordQualities.pop_back();
+    qualityScores.push_back((Quality){normalizedPathScore, wordQualities});
  }

-  // Once the entire string is constructed, there are no further possibility of
-  // reallocation in the string's storage, the indices are converted into
-  // string_views.
+  // Once we have the indices in translation (which might be resized a few
+  // times) ready, we can prepare and store the string_view as annotations
+  // instead. This is accomplished by iterating over available sentences using
+  // sentenceBegin and using addSentence(...) API from Annotation.

-  for (auto &range : translationRanges) {
-    // TODO(@jerinphilip):  Currently considers target tokens as whole text.
-    // Needs to be further enhanced in marian-dev to extract alignments.
+  for (size_t i = 1; i <= sentenceBegins.size(); i++) {
    std::vector<string_view> targetMappings;
+    size_t begin = sentenceBegins[i - 1];
+    size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size()
+                                                   : sentenceBegins[i];

-    const char *begin = &translation_[range.first];
-    targetMappings.emplace_back(begin, range.second);
-    targetRanges_.addSentence(targetMappings);
-  }
+    for (size_t idx = begin; idx < safe_end; idx++) {
+      auto &p = translationRanges[idx];
+      size_t begin_idx = p.first;
+      size_t end_idx = p.second;

-  translationConstructed_ = true;
-}
+      const char *data = &target.text[begin_idx];
+      size_t size = end_idx - begin_idx;
+      targetMappings.emplace_back(data, size);
+    }

-void Response::constructSentenceMappings(
-    Response::SentenceMappings &sentenceMappings) {
-
-  for (size_t i = 0; i < sourceRanges_.numSentences(); i++) {
-    string_view src = sourceRanges_.sentence(i);
-    string_view tgt = targetRanges_.sentence(i);
-    sentenceMappings.emplace_back(src, tgt);
+    target.addSentence(targetMappings);
  }
 }
 } // namespace bergamot
--- a/src/translator/response.h
+++ b/src/translator/response.h
@ -1,9 +1,10 @@
 #ifndef SRC_BERGAMOT_RESPONSE_H_
 #define SRC_BERGAMOT_RESPONSE_H_

-#include "sentence_ranges.h"
+#include "data/alignment.h"
 #include "data/types.h"
 #include "definitions.h"
+#include "sentence_ranges.h"
 #include "translator/beam_search.h"

 #include <cassert>
@ -12,86 +13,79 @@

 namespace marian {
 namespace bergamot {
+
+/// Alignment is stored as a sparse matrix, this pretty much aligns with marian
+/// internals but is brought here to maintain translator
+/// agnosticism/independence.
+struct Point {
+  size_t src; ///< Index pointing to source ByteRange
+  size_t tgt; ///< Index pointing to target ByteRange
+  float prob; ///< Score between [0, 1] on indicating degree of alignment.
+};
+
+/// Alignment is a sparse matrix, where Points represent entries with values.
+typedef std::vector<Point> Alignment;
+
+/// -loglikelhoods of the sequence components as proxy to quality.
+struct Quality {
+  /// Certainty/uncertainty score for sequence.
+  float sequence;
+  /// Certainty/uncertainty for each word in the sequence.
+  std::vector<float> word;
+};
+
+/// Response holds AnnotatedText(s) of source-text and translated text,
+/// alignment information between source and target sub-words and sentences.
+///
+/// AnnotatedText provides an API to access markings of (sub)-word and
+/// sentences boundaries, which are required to interpret Quality and
+/// Alignment (s) at the moment.
 class Response {
-  // Response is a marian internal class (not a bergamot-translator class)
-  // holding source blob of text, vector of TokenRanges corresponding to each
-  // sentence in the source text blob and histories obtained from translating
-  // these sentences.
-  //
-  // This class provides an API at a higher level in comparison to History to
-  // access translations and additionally use string_view manipulations to
-  // recover structure in translation from source-text's structure known through
-  // reference string and string_view. As many of these computations are not
-  // required until invoked, they are computed as required and stored in data
-  // members where it makes sense to do so (translation,translationTokenRanges).
-  //
-  // Examples of such use-cases are:
-  //    translation()
-  //    translationInSourceStructure() TODO(@jerinphilip)
-  //    alignment(idx) TODO(@jerinphilip)
-  //    sentenceMappings (for bergamot-translator)

 public:
-  Response(std::string &&source, SentenceRanges &&sourceRanges,
-           Histories &&histories,
-           // Required for constructing translation and TokenRanges within
-           // translation lazily.
+  ///
+  Response(AnnotatedText &&source, Histories &&histories,
           std::vector<Ptr<Vocab const>> &vocabs);

+  /// \cond HIDDEN_PUBLIC
  // Move constructor.
  Response(Response &&other)
-      : source_(std::move(other.source_)),
-        translation_(std::move(other.translation_)),
-        sourceRanges_(std::move(other.sourceRanges_)),
-        targetRanges_(std::move(other.targetRanges_)),
-        histories_(std::move(other.histories_)),
-        vocabs_(std::move(other.vocabs_)){};
+      : source(std::move(other.source)), target(std::move(other.target)),
+        alignments(std::move(other.alignments)),
+        qualityScores(std::move(other.qualityScores)){};
+
+  // The following copy bans are not stricitly required anymore since Annotation
+  // is composed of the ByteRange primitive (which was previously string_view
+  // and required to be bound to string), but makes movement efficient by
+  // banning these letting compiler complain about copies.

-  // Prevents CopyConstruction and CopyAssignment. sourceRanges_ is constituted
-  // by string_view and copying invalidates the data member.
  Response(const Response &) = delete;
  Response &operator=(const Response &) = delete;

-  typedef std::vector<std::pair<const string_view, const string_view>>
-      SentenceMappings;
+  /// \endcond

-  // Moves source sentence into source, translated text into translation.
-  // Pairs of string_views to corresponding sentences in
-  // source and translation are loaded into sentenceMappings. These string_views
-  // reference the new source and translation.
-  //
-  // Calling move() invalidates the Response object as ownership is transferred.
-  // Exists for moving strc
-  void move(std::string &source, std::string &translation,
-            SentenceMappings &sentenceMappings);
+  /// Number of sentences translated. The processing of a text of into sentences
+  /// are handled internally, and this information can be used to iterate
+  /// through meaningful units of translation for which alignment and quality
+  /// information are available.
+  const size_t size() const { return source.numSentences(); }

-  const Histories &histories() const { return histories_; }
-  const std::string &source() const { return source_; }
-  const std::string &translation() {
-    constructTranslation();
-    return translation_;
-  }
+  /// source text and annotations of (sub-)words and sentences.
+  AnnotatedText source;

-  // A convenience function provided to return translated text placed within
-  // source's structure. This is useful when the source text is a multi-line
-  // paragraph or string_views extracted from structured text like HTML and it's
-  // desirable to place the individual sentences in the locations of the source
-  // sentences.
-  // const std::string translationInSourceStructure();
-  // const PendingAlignmentType alignment(size_t idx);
+  /// translated text and annotations of (sub-)words and sentences.
+  AnnotatedText target;

-private:
-  void constructTranslation();
-  void constructSentenceMappings(SentenceMappings &);
+  /// -logprob of each word and negative log likelihood of sequence (sentence)
+  /// normalized by length, for each sentence processed by the translator.
+  /// Indices correspond to ranges accessible through respective Annotation on
+  /// source or target.
+  std::vector<Quality> qualityScores;

-  std::string source_;
-  SentenceRanges sourceRanges_;
-  Histories histories_;
-
-  std::vector<Ptr<Vocab const>> *vocabs_;
-  bool translationConstructed_{false};
-  std::string translation_;
-  SentenceRanges targetRanges_;
+  /// Alignments between source and target. Each Alignment is a
+  /// sparse matrix representation with indices corresponding
+  /// to (sub-)words accessible through Annotation.
+  std::vector<Alignment> alignments;
 };
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/sentence_ranges.cpp
+++ b/src/translator/sentence_ranges.cpp
@ -5,40 +5,83 @@
 namespace marian {
 namespace bergamot {

-void SentenceRanges::addSentence(std::vector<string_view> &wordRanges) {
-  addSentence(std::begin(wordRanges), std::end(wordRanges));
-}
-
-void SentenceRanges::addSentence(WordIterator begin, WordIterator end) {
+void Annotation::addSentence(std::vector<ByteRange> &sentence) {
+  flatByteRanges_.insert(std::end(flatByteRanges_), std::begin(sentence),
+                         std::end(sentence));
  size_t size = flatByteRanges_.size();
-  flatByteRanges_.insert(std::end(flatByteRanges_), begin, end);
-  sentenceBeginIds_.push_back(size);
+  sentenceEndIds_.push_back(size);
 }

-string_view SentenceRanges::sentence(size_t index) const {
-  size_t bos_id;
-  string_view eos, bos;
+size_t Annotation::numWords(size_t sentenceIdx) const {
+  size_t bosId, eosId;
+  bosId = sentenceEndIds_[sentenceIdx]; // Half interval, so;
+  eosId = sentenceEndIds_[sentenceIdx + 1];
+  // Difference between eosId and bosId is the number of words.
+  return eosId - bosId;
+}

-  bos_id = sentenceBeginIds_[index];
-  bos = flatByteRanges_[bos_id];
+ByteRange Annotation::sentence(size_t sentenceIdx) const {
+  size_t bosId, eosId;
+  bosId = sentenceEndIds_[sentenceIdx]; // Half interval, so;
+  eosId = sentenceEndIds_[sentenceIdx + 1];
+  ByteRange sentenceByteRange;

-  if (index + 1 == numSentences()) {
-    eos = flatByteRanges_.back();
+  if (bosId == eosId) {
+    // We have an empty sentence. However, we want to be able to point where in
+    // target this happened through the ranges. We are looking for the end of
+    // the flatByteRange and non-empty sentence before this happened and
+    // construct empty string-view equivalent ByteRange.
+    ByteRange eos = flatByteRanges_[eosId - 1];
+    sentenceByteRange = (ByteRange){eos.end, eos.end};
  } else {
-    assert(index < numSentences());
-    size_t eos_id = sentenceBeginIds_[index + 1];
-    --eos_id;
-    eos = flatByteRanges_[eos_id];
+    ByteRange bos = flatByteRanges_[bosId];
+    ByteRange eos = flatByteRanges_[eosId - 1];
+    sentenceByteRange = (ByteRange){bos.begin, eos.end};
  }
-
-  return sentenceBetween(bos, eos);
+  return sentenceByteRange;
 }

-string_view SentenceRanges::sentenceBetween(string_view firstWord,
-                                            string_view lastWord) const {
+ByteRange Annotation::word(size_t sentenceIdx, size_t wordIdx) const {
+  size_t bosOffset = sentenceEndIds_[sentenceIdx];
+  return flatByteRanges_[bosOffset + wordIdx];
+}

-  const char *data = firstWord.data();
-  size_t size = lastWord.data() + lastWord.size() - firstWord.data();
+string_view AnnotatedText::word(size_t sentenceIdx, size_t wordIdx) const {
+  auto terminals = annotation.word(sentenceIdx, wordIdx);
+  return string_view(&text[terminals.begin], terminals.size());
+}
+
+string_view AnnotatedText::sentence(size_t sentenceIdx) const {
+  auto sentenceAsByteRange = annotation.sentence(sentenceIdx);
+  return asStringView(sentenceAsByteRange);
+}
+
+void AnnotatedText::addSentence(std::vector<string_view> &wordRanges) {
+  addSentence(std::begin(wordRanges), std::end(wordRanges));
+};
+
+void AnnotatedText::addSentence(std::vector<string_view>::iterator begin,
+                                std::vector<string_view>::iterator end) {
+  std::vector<ByteRange> sentence;
+  for (auto p = begin; p != end; p++) {
+    size_t begin_offset = p->data() - &text[0];
+    sentence.push_back((ByteRange){begin_offset, begin_offset + p->size()});
+  }
+  annotation.addSentence(sentence);
+};
+
+ByteRange AnnotatedText::wordAsByteRange(size_t sentenceIdx,
+                                         size_t wordIdx) const {
+  return annotation.word(sentenceIdx, wordIdx);
+}
+
+ByteRange AnnotatedText::sentenceAsByteRange(size_t sentenceIdx) const {
+  return annotation.sentence(sentenceIdx);
+}
+
+string_view AnnotatedText::asStringView(const ByteRange &byteRange) const {
+  const char *data = &text[byteRange.begin];
+  size_t size = byteRange.size();
  return string_view(data, size);
 }

--- a/src/translator/sentence_ranges.h
+++ b/src/translator/sentence_ranges.h
@ -3,50 +3,165 @@

 #include "data/types.h"
 #include <cassert>
+#include <utility>
 #include <vector>

 namespace marian {
 namespace bergamot {

-class SentenceRanges {
-  // SentenceRanges stores string_views into a source text, with additional
-  // annotations to mark sentence boundaries.
-  //
-  // Given the availability annotations, this container provides capabilty to
-  // add sentences, and access individual sentences.
+/// ByteRange stores indices for half-interval [begin, end) in a string. Can be
+/// used to represent a sentence, word.
+struct ByteRange {
+  size_t begin;
+  size_t end;
+  const size_t size() const { return end - begin; }
+};
+
+/// An Annotation is a collection of ByteRanges used to denote ancillary
+/// information of sentences and words on a text of string. Annotation is meant
+/// for consumption on platforms where `string_view` creates problems (eg:
+/// exports through WASM) conveniently rebasing them as required into
+/// ByteRanges. See AnnotatedText for cases where this is a non-issue.
+///
+/// **Usage**
+///
+/// To ensure rebasing is consistent during creation and updation, use
+/// `Annotation` best through `AnnotatedText`, which also holds the reference
+/// string and can work with `string_views`.
+///
+/// If used separately, it is on the user to ensure the reference string
+/// is the same as what the Annotation refers to. For best results, an instance
+/// is expected to be read only in this mode of operation.
+///
+/// **Idea**
+///
+/// Annotation is intended to be the same structure conceptually as below,
+/// except the `std::vector<std::vector<ByteRange>>` hammered into a flat
+/// structure to avoid multiple reallocs keeping efficiency in mind. This is
+/// achieved by having markers of where sentence ends in the flat container
+/// storing word ByteRanges.
+///
+/// ```cpp
+/// typedef ByteRange Word;
+/// // std::vector<ByteRange>, a single sentence
+/// typedef std::vector<Word> Sentence;
+/// std::vector<std::vector<ByteRange> // multiple sentences
+/// typedef std::vector<Sentence> Annotation;
+///
+/// Annotation example;
+/// ```
+/// This structure exists to provide a consistent API to access the nested
+/// sentences of varying lengths, which occur in source-text processed into
+/// multiple sentences, and target-text translated from source as multiple
+/// sentences, both composed of (sub)-words, providing a List[List] like access
+/// while storing it in a compact and efficient manner.
+class Annotation {
 public:
-  typedef std::vector<string_view>::iterator WordIterator;
-
-  void addSentence(std::vector<string_view> &wordRanges);
-  void addSentence(WordIterator begin, WordIterator end);
-
-  void clear() {
-    flatByteRanges_.clear();
-    sentenceBeginIds_.clear();
+  /// Annotation is constructed empty. See `addSentence()` to populate it with
+  /// annotations.
+  Annotation() {
+    // The -1-th sentence ends at 0.
+    sentenceEndIds_.push_back(0);
  }

-  size_t numSentences() const { return sentenceBeginIds_.size(); }
+  /// Returns the number of sentences annotated in a text.
+  size_t numSentences() const { return sentenceEndIds_.size() - 1; }

-  // Returns a string_view into the ith sentence.
-  string_view sentence(size_t index) const;
+  /// Returns number of words in the sentence identified by `sentenceIdx`.
+  size_t numWords(size_t sentenceIdx) const;
+
+  /// Adds a sentences from `vector<ByteRange>` representation, internally doing
+  /// extra book-keeping for the sentence terminal markings. Sentences are
+  /// expected to be added in order as they occur in text.
+  void addSentence(std::vector<ByteRange> &sentence);
+
+  /// Returns a ByteRange representing `wordIdx` in sentence indexed by
+  /// `sentenceIdx`. `wordIdx` follows 0-based indexing, and should be less than
+  /// `.numWords()` for `sentenceIdx` for defined behaviour.
+  ByteRange word(size_t sentenceIdx, size_t wordIdx) const;
+
+  /// Returns a ByteRange representing sentence corresponding to `sentenceIdx`.
+  /// `sentenceIdx` follows 0-based indexing, and behaviour is defined only when
+  /// less than `.numSentences()`.
+  ByteRange sentence(size_t sentenceIdx) const;

 private:
-  // A flat storage for string_views. Can be words or sentences.
-  std::vector<string_view> flatByteRanges_;
+  /// A flat storage for ByteRanges. Composed of word ByteRanges, extra
+  /// information in sentenceEndIds_ to denote sentence boundary markers as
+  /// indices.
+  std::vector<ByteRange> flatByteRanges_;

-  // The container grows dynamically with addSentence. size_t marking index is
-  // used to ensure the sentence boundaries stay same while underlying storage
-  // might be changed during reallocation.
-  std::vector<size_t> sentenceBeginIds_;
+  /// Stores indices onto flatByteRanges_ of where sentences end (not inclusive,
+  /// aligned with C++ half interval notions). There is a 0 marker to simplify
+  /// sources, indicating where the -1-th sentence ends.
+  std::vector<size_t> sentenceEndIds_;
+};

-  // Utility function to extract the string starting at firstWord and ending at
-  // lastWord as a single string-view.
-  string_view sentenceBetween(string_view firstWord,
-                              string_view lastWord) const;
+/// AnnotatedText is effectively std::string text + Annotation, providing the
+/// following additional desiderata.
+///
+/// 1. Access to processed string_views for convenience rather than ByteRanges
+/// (which only provides index information).
+///
+/// 2. Transparently convert string_views into ByteRanges for the Annotation
+/// referring to the text bound by this structure.
+///
+/// 3. Bind the text and annotations together, to move around as a meaningful
+/// unit.
+
+struct AnnotatedText {
+public:
+  std::string text;      ///< Blob of string elements in annotation refers to.
+  Annotation annotation; ///< sentence and (sub-) word annotations.
+
+  /// Construct an empty AnnotatedText. This is useful when the target string or
+  /// ByteRanges are not known yet, but the public members can be used to
+  /// populate it. One use-case, when translated-text is created decoding from
+  /// histories and the ByteRanges only known after the string has been
+  /// constructed.
+  AnnotatedText() {}
+
+  /// Construct moving in a string (for efficiency purposes, copying string
+  /// constructor is disallowed).
+  AnnotatedText(std::string &&text) : text(std::move(text)){};
+
+  AnnotatedText(AnnotatedText &&annotatedBlob)
+      : text(std::move(annotatedBlob.text)),
+        annotation(std::move(annotatedBlob.annotation)) {}
+
+  /// Returns the number of sentences in the annotation structure.
+  const size_t numSentences() const { return annotation.numSentences(); }
+
+  /// Returns number of words in the sentece identified by sentenceIdx.
+  const size_t numWords(size_t sentenceIdx) const {
+    return annotation.numWords(sentenceIdx);
+  }
+
+  /// Adds a sentence, used to load from SentencePiece annotations conveniently.
+  void addSentence(std::vector<string_view> &wordRanges);
+
+  /// Adds a sentence between two iterators, often useful while constructing
+  /// from parts of a container.
+  void addSentence(std::vector<string_view>::iterator begin,
+                   std::vector<string_view>::iterator end);
+
+  /// Returns a string_view representing wordIdx in sentenceIdx
+  string_view word(size_t sentenceIdx, size_t wordIdx) const;
+
+  /// Returns a string_view representing sentence corresponding to sentenceIdx.
+  string_view sentence(size_t sentenceIdx) const;
+
+  /// Returns a ByteRange representing wordIdx in sentenceIdx
+  ByteRange wordAsByteRange(size_t sentenceIdx, size_t wordIdx) const;
+
+  /// Returns a ByteRange representing sentence corresponding to sentenceIdx.
+  ByteRange sentenceAsByteRange(size_t sentenceIdx) const;
+
+private:
+  string_view asStringView(const ByteRange &byteRange) const;
 };

 } // namespace bergamot
-
 } // namespace marian

 #endif //  BERGAMOT_SENTENCE_RANGES_H_
--- a/src/translator/service.cpp
+++ b/src/translator/service.cpp
@ -5,25 +5,78 @@
 #include <string>
 #include <utility>

+inline std::vector<marian::Ptr<const marian::Vocab>>
+loadVocabularies(marian::Ptr<marian::Options> options) {
+  // @TODO: parallelize vocab loading for faster startup
+  auto vfiles = options->get<std::vector<std::string>>("vocabs");
+  // with the current setup, we need at least two vocabs: src and trg
+  ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
+  std::vector<marian::Ptr<marian::Vocab const>> vocabs(vfiles.size());
+  std::unordered_map<std::string, marian::Ptr<marian::Vocab>> vmap;
+  for (size_t i = 0; i < vocabs.size(); ++i) {
+    auto m =
+        vmap.emplace(std::make_pair(vfiles[i], marian::Ptr<marian::Vocab>()));
+    if (m.second) { // new: load the vocab
+      m.first->second = marian::New<marian::Vocab>(options, i);
+      m.first->second->load(vfiles[i]);
+    }
+    vocabs[i] = m.first->second;
+  }
+  return vocabs;
+}
+
 namespace marian {
 namespace bergamot {

-Service::Service(Ptr<Options> options)
-    : ServiceBase(options), numWorkers_(options->get<int>("cpu-threads")),
-      pcqueue_(numWorkers_) {
-  if (numWorkers_ == 0) {
-    ABORT("Fatal: Attempt to create multithreaded instance with --cpu-threads "
-          "0. ");
-  }
+Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory)
+    : requestId_(0), vocabs_(std::move(loadVocabularies(options))),
+      text_processor_(vocabs_, options), batcher_(options),
+      numWorkers_(options->get<int>("cpu-threads")),
+      modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))
+#ifndef WASM_COMPATIBLE_SOURCE
+      // 0 elements in PCQueue is illegal and can lead to failures. Adding a
+      // guard to have at least one entry allocated. In the single-threaded
+      // case, while initialized pcqueue_ remains unused.
+      ,
+      pcqueue_(std::max<size_t>(1, numWorkers_))
+#endif
+{

-  translators_.reserve(numWorkers_);
+  if (numWorkers_ == 0) {
+    build_translators(options, /*numTranslators=*/1);
+    initialize_blocking_translator();
+  } else {
+    build_translators(options, numWorkers_);
+    initialize_async_translators();
+  }
+}
+
+void Service::build_translators(Ptr<Options> options, size_t numTranslators) {
+  translators_.reserve(numTranslators);
+  for (size_t cpuId = 0; cpuId < numTranslators; cpuId++) {
+    marian::DeviceId deviceId(cpuId, DeviceType::cpu);
+    translators_.emplace_back(deviceId, vocabs_, options, &modelMemory_, &shortlistMemory_);
+  }
+}
+
+void Service::initialize_blocking_translator() {
+  translators_.back().initialize();
+}
+
+void Service::blocking_translate() {
+  Batch batch;
+  while (batcher_ >> batch) {
+    auto &translator = translators_.back();
+    translator.translate(batch);
+  }
+}
+
+#ifndef WASM_COMPATIBLE_SOURCE
+void Service::initialize_async_translators() {
  workers_.reserve(numWorkers_);

  for (size_t cpuId = 0; cpuId < numWorkers_; cpuId++) {
-    marian::DeviceId deviceId(cpuId, DeviceType::cpu);
-    translators_.emplace_back(deviceId, vocabs_, options);
-    auto &translator = translators_.back();
-
+    auto &translator = translators_[cpuId];
    workers_.emplace_back([&translator, this] {
      translator.initialize();

@ -42,29 +95,58 @@ Service::Service(Ptr<Options> options)
  }
 }

-void Service::enqueue() {
+void Service::async_translate() {
  Batch batch;
  while (batcher_ >> batch) {
    pcqueue_.ProduceSwap(batch);
  }
 }
+#else  // WASM_COMPATIBLE_SOURCE
+void Service::initialize_async_translators() {
+  ABORT("Cannot run in async mode without multithreading.");
+}
+
+void Service::async_translate() {
+  ABORT("Cannot run in async mode without multithreading.");
+}
+#endif // WASM_COMPATIBLE_SOURCE
+
+std::future<Response> Service::translate(std::string &&input) {
+  Segments segments;
+  AnnotatedText source(std::move(input));
+  text_processor_.process(source, segments);
+
+  std::promise<Response> responsePromise;
+  auto future = responsePromise.get_future();
+
+  Ptr<Request> request = New<Request>(
+      requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(source),
+      std::move(segments), std::move(responsePromise));
+
+  batcher_.addWholeRequest(request);
+  if (numWorkers_ == 0) {
+    blocking_translate();
+  } else {
+    async_translate();
+  }
+  return future;
+}
+
+Service::~Service() {
+#ifndef WASM_COMPATIBLE_SOURCE
+  for (size_t workerId = 0; workerId < numWorkers_; workerId++) {

-void Service::stop() {
-  for (auto &worker : workers_) {
    Batch poison = Batch::poison();
    pcqueue_.ProduceSwap(poison);
  }

-  for (auto &worker : workers_) {
-    if (worker.joinable()) {
-      worker.join();
+  for (size_t workerId = 0; workerId < numWorkers_; workerId++) {
+    if (workers_[workerId].joinable()) {
+      workers_[workerId].join();
    }
  }
-
-  workers_.clear();
+#endif
 }

-Service::~Service() { stop(); }
-
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/service.h
+++ b/src/translator/service.h
@ -4,10 +4,13 @@
 #include "batch_translator.h"
 #include "batcher.h"
 #include "data/types.h"
-#include "pcqueue.h"
 #include "response.h"
-#include "service_base.h"
 #include "text_processor.h"
+#include "translator/parser.h"
+
+#ifndef WASM_COMPATIBLE_SOURCE
+#include "pcqueue.h"
+#endif

 #include <queue>
 #include <vector>
@ -15,39 +18,104 @@
 namespace marian {
 namespace bergamot {

-class Service : public ServiceBase {
-
-  // Service exposes methods to translate an incoming blob of text to the
-  // Consumer of bergamot API.
-  //
-  // An example use of this API looks as follows:
-  //
-  //  options = ...;
-  //  service = Service(options);
-  //  std::string input_blob = "Hello World";
-  //  std::future<Response>
-  //      response = service.translate(std::move(input_blob));
-  //  response.wait();
-  //  Response result = response.get();
+/// Service exposes methods to translate an incoming blob of text to the
+/// Consumer of bergamot API.
+///
+/// An example use of this API looks as follows:
+///
+///  options = ...;
+///  service = Service(options);
+///  std::string input_text = "Hello World";
+///  std::future<Response>
+///      response = service.translate(std::move(input_text));
+///  response.wait();
+///  Response result = response.get();
+///
+/// Optionally Service can be initialized by also passing model_memory for
+/// purposes of efficiency (which defaults to nullpointer and then reads from
+/// file supplied through config).
+class Service {

 public:
-  explicit Service(Ptr<Options> options);
-  // Implements enqueue and top through blocking methods.
-  void stop() override;
+  /// @param options Marian options object
+  /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
+  /// of a model.bin. Optional, defaults to nullptr when not used
+  /// @param shortlistMemory byte array of shortlist (aligned to 64)
+  explicit Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory);
+
+  explicit Service(Ptr<Options> options) : Service(options, AlignedMemory(), AlignedMemory()){}
+
+  /// Construct Service from a string configuration.
+  /// @param [in] config string parsable as YAML expected to adhere with marian
+  /// config
+  /// @param [in] model_memory byte array (aligned to 256!!!) that contains the
+  /// bytes of a model.bin. Optional.
+  /// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
+  explicit Service(const std::string &config,
+                   AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory())
+      : Service(parseOptions(config), std::move(modelMemory), std::move(shortlistMemory)) {}
+
+  /// Explicit destructor to clean up after any threads initialized in
+  /// asynchronous operation mode.
  ~Service();

+  /// To stay efficient and to refer to the string for alignments, expects
+  /// ownership be moved through std::move(..)
+  ///
+  ///  @param [in] rvalue reference of string to be translated.
+  std::future<Response> translate(std::string &&input);
+
 private:
-  void enqueue() override;
+  /// Build numTranslators number of translators with options from options
+  void build_translators(Ptr<Options> options, size_t numTranslators);
+  /// Initializes a blocking translator without using std::thread
+  void initialize_blocking_translator();
+  /// Translates through direct interaction between batcher_ and translators_
+  void blocking_translate();

-  // In addition to the common members (text_processor, requestId, vocabs_,
-  // batcher) extends with a producer-consumer queue, vector of translator
-  // instances owned by service each listening to the pcqueue in separate
-  // threads.
+  /// Launches multiple workers of translators using std::thread
+  /// Reduces to ABORT if called when not compiled WITH_PTHREAD
+  void initialize_async_translators();
+  /// Async translate produces to a producer-consumer queue as batches are
+  /// generated by Batcher. In another thread, the translators consume from
+  /// producer-consumer queue.
+  /// Reduces to ABORT if called when not compiled WITH_PTHREAD
+  void async_translate();

-  size_t numWorkers_;      // ORDER DEPENDENCY
-  PCQueue<Batch> pcqueue_; // ORDER DEPENDENCY
+  /// Number of workers to launch.
+  size_t numWorkers_;              // ORDER DEPENDENCY (pcqueue_)
+  /// Model memory to load model passed as bytes.
+  AlignedMemory modelMemory_;      // ORDER DEPENDENCY (translators_)
+  /// Shortlist memory passed as bytes.
+  AlignedMemory shortlistMemory_;  // ORDER DEPENDENCY (translators_)
+
+  /// Holds instances of batch translators, just one in case
+  /// of single-threaded application, numWorkers_ in case of multithreaded
+  /// setting.
+  std::vector<BatchTranslator> translators_;  // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)
+
+  /// Stores requestId of active request. Used to establish
+  /// ordering among requests and logging/book-keeping.
+
+  size_t requestId_;
+
+  /// Store vocabs representing source and target.
+  std::vector<Ptr<Vocab const>> vocabs_; // ORDER DEPENDENCY (text_processor_)
+
+  /// TextProcesser takes a blob of text and converts into format consumable by
+  /// the batch-translator and annotates sentences and words.
+  TextProcessor text_processor_; // ORDER DEPENDENCY (vocabs_)
+
+  /// Batcher handles generation of batches from a request, subject to
+  /// packing-efficiency and priority optimization heuristics.
+  Batcher batcher_;
+
+  // The following constructs are available providing full capabilities on a non
+  // WASM platform, where one does not have to hide threads.
+#ifndef WASM_COMPATIBLE_SOURCE
+  PCQueue<Batch> pcqueue_; // ORDER DEPENDENCY (numWorkers_)
  std::vector<std::thread> workers_;
-  std::vector<BatchTranslator> translators_;
+#endif // WASM_COMPATIBLE_SOURCE
 };

 } // namespace bergamot
--- a/src/translator/service_base.cpp
+++ b/src/translator/service_base.cpp
@ -1,42 +0,0 @@
-#include "service_base.h"
-
-namespace marian {
-namespace bergamot {
-
-ServiceBase::ServiceBase(Ptr<Options> options)
-    : requestId_(0), vocabs_(std::move(loadVocabularies(options))),
-      text_processor_(vocabs_, options), batcher_(options) {}
-
-std::future<Response> ServiceBase::translate(std::string &&input) {
-  Segments segments;
-  SentenceRanges sourceRanges;
-  text_processor_.process(input, segments, sourceRanges);
-
-  std::promise<Response> responsePromise;
-  auto future = responsePromise.get_future();
-
-  Ptr<Request> request = New<Request>(
-      requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(input),
-      std::move(segments), std::move(sourceRanges), std::move(responsePromise));
-
-  batcher_.addWholeRequest(request);
-  enqueue();
-  return future;
-}
-
-NonThreadedService::NonThreadedService(Ptr<Options> options)
-    : ServiceBase(options),
-      translator_(DeviceId(0, DeviceType::cpu), vocabs_, options) {
-  translator_.initialize();
-}
-
-void NonThreadedService::enqueue() {
-  // Queue single-threaded
-  Batch batch;
-  while (batcher_ >> batch) {
-    translator_.translate(batch);
-  }
-}
-
-} // namespace bergamot
-} // namespace marian
--- a/src/translator/service_base.h
+++ b/src/translator/service_base.h
@ -1,80 +0,0 @@
-#ifndef SRC_BERGAMOT_SERVICE_BASE_H_
-#define SRC_BERGAMOT_SERVICE_BASE_H_
-#include "batch_translator.h"
-#include "batcher.h"
-#include "data/types.h"
-#include "response.h"
-#include "text_processor.h"
-
-#include <queue>
-#include <vector>
-
-namespace marian {
-namespace bergamot {
-// This file describes the base class ServiceBase, and a non-threaded subclass
-// implementing translation functionality called NonThreadedService.
-
-class ServiceBase {
-public:
-  explicit ServiceBase(Ptr<Options> options);
-
-  // Transfers ownership of input string to Service, returns a future containing
-  // an object which provides access to translations, other features like
-  // sentencemappings and (tentatively) alignments.
-  std::future<Response> translate(std::string &&input);
-
-  // Convenience accessor methods to extract these vocabulary outside service.
-  // e.g: For use in decoding histories for marian-decoder replacement.
-  Ptr<Vocab const> sourceVocab() const { return vocabs_.front(); }
-  Ptr<Vocab const> targetVocab() const { return vocabs_.back(); }
-
-  // Wraps up any thread related destruction code.
-  virtual void stop() = 0;
-
-protected:
-  // Enqueue queues a request for translation, this can be synchronous, blocking
-  // or asynchronous and queued in the background.
-  virtual void enqueue() = 0;
-
-  size_t requestId_;
-  std::vector<Ptr<Vocab const>> vocabs_; // ORDER DEPENDENCY
-  TextProcessor text_processor_;         // ORDER DEPENDENCY
-  Batcher batcher_;
-};
-
-class NonThreadedService : public ServiceBase {
-public:
-  explicit NonThreadedService(Ptr<Options> options);
-  void stop() override{};
-
-private:
-  // NonThreaded service overrides unimplemented functions in base-class using
-  // blocking mechanisms.
-  void enqueue() override;
-  // There's a single translator, launched as part of the main process.
-  BatchTranslator translator_;
-};
-
-// Used across Services
-inline std::vector<Ptr<const Vocab>> loadVocabularies(Ptr<Options> options) {
-  // @TODO: parallelize vocab loading for faster startup
-  auto vfiles = options->get<std::vector<std::string>>("vocabs");
-  // with the current setup, we need at least two vocabs: src and trg
-  ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
-  std::vector<Ptr<Vocab const>> vocabs(vfiles.size());
-  std::unordered_map<std::string, Ptr<Vocab>> vmap;
-  for (size_t i = 0; i < vocabs.size(); ++i) {
-    auto m = vmap.emplace(std::make_pair(vfiles[i], Ptr<Vocab>()));
-    if (m.second) { // new: load the vocab
-      m.first->second = New<Vocab>(options, i);
-      m.first->second->load(vfiles[i]);
-    }
-    vocabs[i] = m.first->second;
-  }
-  return vocabs;
-}
-
-} // namespace bergamot
-} // namespace marian
-
-#endif // SRC_BERGAMOT_SERVICE_BASE_H_
--- a/src/translator/text_processor.cpp
+++ b/src/translator/text_processor.cpp
@ -25,9 +25,9 @@ TextProcessor::TextProcessor(std::vector<Ptr<Vocab const>> &vocabs,
  ABORT_IF(max_length_break_ < 0, "max-length-break cannot be < 0");
 }

-void TextProcessor::process(const string_view &query, Segments &segments,
-                            SentenceRanges &sourceRanges) {
+void TextProcessor::process(AnnotatedText &source, Segments &segments) {

+  string_view query = string_view(source.text);
  auto sentenceStream = sentence_splitter_.createSentenceStream(query);
  std::string_view sentenceStringPiece;

@ -42,14 +42,14 @@ void TextProcessor::process(const string_view &query, Segments &segments,
    // after normalization. 0 prevents any empty entries from being added.
    if (segment.size() > 0) {
      // Truncate segment into max_input_size segments.
-      truncate(segment, wordRanges, segments, sourceRanges);
+      truncate(segment, wordRanges, segments, source);
    }
  }
 }

 void TextProcessor::truncate(Segment &segment,
                             std::vector<string_view> &wordRanges,
-                             Segments &segments, SentenceRanges &sourceRanges) {
+                             Segments &segments, AnnotatedText &source) {
  for (size_t offset = 0; offset < segment.size();
       offset += max_length_break_) {
    auto start = segment.begin() + offset;
@ -61,7 +61,7 @@ void TextProcessor::truncate(Segment &segment,
    segments.back().push_back(sourceEosId());

    auto astart = wordRanges.begin() + offset;
-    sourceRanges.addSentence(astart, astart + diff);
+    source.addSentence(astart, astart + diff);
  }
 }

--- a/src/translator/text_processor.h
+++ b/src/translator/text_processor.h
@ -23,8 +23,7 @@ class TextProcessor {
 public:
  explicit TextProcessor(std::vector<Ptr<Vocab const>> &vocabs, Ptr<Options>);

-  void process(const string_view &query, Segments &segments,
-               SentenceRanges &sourceRanges);
+  void process(AnnotatedText &source, Segments &segments);

 private:
  // Tokenizes an input string, returns Words corresponding. Loads the
@ -34,7 +33,7 @@ private:

  // Truncate sentence into max_input_size segments.
  void truncate(Segment &sentence, std::vector<string_view> &tokenRanges,
-                Segments &segments, SentenceRanges &sourceRanges);
+                Segments &segments, AnnotatedText &source);

  // shorthand, used only in truncate()
  const Word sourceEosId() const { return vocabs_->front()->getEosId(); }
--- a/wasm/patch-artifacts-enable-wormhole.sh
+++ b/wasm/patch-artifacts-enable-wormhole.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+
+echo "Patching wasm artifacts to enable wormhole via APIs that compile and instantiate wasm module"
+sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
+sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
+sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
+echo "Done"
--- a/wasm/test_page/start_server.sh
+++ b/wasm/test_page/start_server.sh
@ -4,13 +4,6 @@ cp ../../build-wasm/wasm/bergamot-translator-worker.data .
 cp ../../build-wasm/wasm/bergamot-translator-worker.js .
 cp ../../build-wasm/wasm/bergamot-translator-worker.wasm .
 cp ../../build-wasm/wasm/bergamot-translator-worker.worker.js .
-echo "Done----"
-
-echo "Start: Enabling wormhole via APIs that compile and instantiate wasm module-------"
-sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' bergamot-translator-worker.js
-sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' bergamot-translator-worker.js
-sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' bergamot-translator-worker.js
-echo "Done: Enabling wormhole via APIs that compile and instantiate wasm module--------"

 npm install
 echo "Start httpserver"