Merge remote-tracking branch 'upstream/main' into upstream-sync

2024-10-26 05:43:59 +03:00 · 2021-04-14 14:35:47 +02:00 · 2021-04-14 14:35:47 +02:00 · 1574a4586c
commit 1574a4586c
parent f5dffeb5ca e4b58357db
62 changed files with 4302 additions and 755 deletions
--- a/.github/workflows/macos-custom-marian-native.yml
+++ b/.github/workflows/macos-custom-marian-native.yml
@ -1,32 +0,0 @@
 name: MacOS Native (Custom)
 on:
  push:
    branches: [ main, ci-sandbox ]
  pull_request:
    branches: [ main, ci-sandbox ]
 jobs:
  build-macos:
    name: Native (With Custom Marian)
    runs-on: macos-10.15
    steps:
      - name: Checkout
        uses: actions/checkout@v2
        with:
          submodules: recursive
      - name: Configure CMake
        run: |
          mkdir -p build-native
          cd build-native
          cmake ..
      - name: Compile
        working-directory: build-native
        run: make -j2
      - name: Print versions
        working-directory: build-native
        run: ./app/bergamot-translator-app --version
--- a/.github/workflows/native-custom_marian-mac.yml
+++ b/.github/workflows/native-custom_marian-mac.yml
@ -0,0 +1,33 @@
 name: Native (Custom Marian) MacOS
 on:
  push:
    branches: [ main, ci-sandbox ]
  pull_request:
    branches: [ main, ci-sandbox ]
 jobs:
  build-macos:
    name: MacOS
    runs-on: macos-10.15
    steps:
    - name: Checkout
      uses: actions/checkout@v2
      with:
        submodules: recursive
    - name: Configure CMake
      run: |
        mkdir -p build
        cd build
        cmake ..
    - name: Compile
      working-directory: build
      run: make -j2
    - name: Print versions
      working-directory: build
      run: |
        ./app/bergamot-translator-app --version
--- a/.github/workflows/native-custom_marian-ubuntu.yml
+++ b/.github/workflows/native-custom_marian-ubuntu.yml
@ -0,0 +1,33 @@
 name: Native (Custom Marian) Ubuntu
 on:
  push:
    branches: [ main, ci-sandbox ]
  pull_request:
    branches: [ main, ci-sandbox ]
 jobs:
  build-macos:
    name: Ubuntu
    runs-on: ubuntu-latest
    steps:
    - name: Checkout
      uses: actions/checkout@v2
      with:
        submodules: recursive
    - name: Configure CMake
      run: |
        mkdir -p build
        cd build
        cmake ..
    - name: Compile
      working-directory: build
      run: make -j2
    - name: Print versions
      working-directory: build
      run: |
        ./app/bergamot-translator-app --version
--- a/.github/workflows/native-full_marian-mac.yml
+++ b/.github/workflows/native-full_marian-mac.yml
@ -1,4 +1,4 @@
-name: MacOS
+name: Native (Full Marian) MacOS
 on:
  push:
@ -39,16 +39,15 @@ jobs:
          -DUSE_FBGEMM=on \
          -DUSE_SENTENCEPIECE=on \
          -DUSE_STATIC_LIBS=off \
-          -DUSE_WASM_COMPATIBLE_SOURCES=off
+          -DUSE_WASM_COMPATIBLE_SOURCE=off
    - name: Compile
      working-directory: build
      run: make -j2
-    # Removing unit-tests, taken care of in browsermt/marian-dev
+    - name: Run unit tests
-    # - name: Run unit tests
+      working-directory: build
-    # - working-directory: build
+      run: make test
    # - run: make test
    - name: Print versions
      working-directory: build
--- a/.github/workflows/native-full_marian-ubuntu.yml
+++ b/.github/workflows/native-full_marian-ubuntu.yml
@ -1,10 +1,10 @@
-name: Ubuntu
+name: Native (Full Marian) Ubuntu
 on:
  push:
-    branches: [ main, ci-sandbox ]
+    branches: [ main, ci-test ]
  pull_request:
-    branches: [ main, ci-sandbox ]
+    branches: [ main, ci-test ]
 jobs:
  build-ubuntu:
@ -15,7 +15,7 @@ jobs:
          - name: "Ubuntu CPU-only"
            os: ubuntu-latest
            cuda: ""
-            gcc: 7
+            gcc: 8
            cpu: true
            gpu: false
          # GPU Builds are commented out, for bergamot-translator CI runs.
@ -62,7 +62,7 @@ jobs:
    # No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because
    # it is installed together with libprotobuf-dev
    - name: Install dependencies
-      run: sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-all-dev
+      run: sudo apt-get update && sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-all-dev g++-8
    # https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
    - name: Install MKL
@ -97,19 +97,17 @@ jobs:
          -DUSE_FBGEMM=${{ matrix.cpu }} \
          -DUSE_SENTENCEPIECE=on \
          -DUSE_STATIC_LIBS=on \
-          -DUSE_WASM_COMPATIBLE_SOURCES=off
+          -DUSE_WASM_COMPATIBLE_SOURCE=off
    - name: Compile
      working-directory: build
      run: make -j2
-    # Removing unit-tests, taken care of in browsermt/marian-dev
+    - name: Run unit tests
-    # TODO: add a flag to CMake to compile unit tests only on CPU
+      working-directory: build
-    # - name: Run unit tests
+      run: make test
-    #   working-directory: build
+      # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds
-    #   run: make test
+      if: matrix.gpu == false
    #   # GitHub-hosted VMs do not have GPUs, so can not be run in CUDA builds
    #   if: matrix.gpu == false
    - name: Print versions
      working-directory: build
--- a/.github/workflows/macos-custom-marian-wasm.yml
+++ b/.github/workflows/macos-custom-marian-wasm.yml
@ -1,4 +1,4 @@
-name: MacOS WASM (Custom)
+name: WASM (Custom Marian) MacOS
 on:
  push:
@ -8,7 +8,7 @@ on:
 jobs:
  build-wasm:
-    name: WASM (With Custom Marian)
+    name: WASM (Custom Marian) MacOS
    runs-on: macos-10.15
    steps:
@ -35,10 +35,7 @@ jobs:
      - name: Instantiate simd wormhole
        working-directory: build-wasm
-        run: |
+        run: bash ../wasm/patch-artifacts-enable-wormhole.sh
          sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
          sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
          sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
      - name: Check artifacts
        working-directory: build-wasm
--- a/.github/workflows/wasm-custom_marian-ubuntu.yml
+++ b/.github/workflows/wasm-custom_marian-ubuntu.yml
@ -0,0 +1,51 @@
 name: WASM (Custom Marian) Ubuntu
 on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
 jobs:
  build-wasm:
    name: WASM (Custom Marian) Ubuntu
    runs-on: ubuntu-latest
    steps:
      - name: Setup Emscripten toolchain
        uses: mymindstorm/setup-emsdk@v8
      - name: Verify Emscripten setup
        run: emcc -v
      - name: Checkout
        uses: actions/checkout@v2
        with:
          submodules: recursive
      - name: Configure builds
        run: |
          mkdir -p build-wasm
          cd build-wasm
          emcmake cmake -DCOMPILE_WASM=on ..
      - name: Compile
        working-directory: build-wasm
        run: emmake make -j2
      - name: Instantiate simd wormhole
        working-directory: build-wasm
        run: bash ../wasm/patch-artifacts-enable-wormhole.sh
      - name: Check artifacts
        working-directory: build-wasm
        run: |
          export WASM_ARTIFACTS_DIR=wasm
          ls -all ${WASM_ARTIFACTS_DIR}
          if ls ${WASM_ARTIFACTS_DIR}/*.wasm &>/dev/null && ls ${WASM_ARTIFACTS_DIR}/*.js &>/dev/null
          then
            echo "Artifacts Successfully Generated"
          else
            echo "Failure: Artifacts Not Present"
            exit 1
          fi
--- a/3rd_party/marian-dev
+++ b/3rd_party/marian-dev
@ -1 +1 @@
-Subproject commit 8ddb73fad1001ae4c1697d2514ac1e5bd43e2ed3
+Subproject commit 0f0bcf99626c660227bb68b76267a8d2451e7172
--- a/1
+++ b/1
@ -0,0 +1 @@
 v0.0.0
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -13,29 +13,21 @@ include(CMakeDependentOption)
 # Project specific cmake options
 option(COMPILE_WASM "Compile for WASM" OFF)
-option(USE_WASM_COMPATIBLE_SOURCES "Use wasm compatible sources" ON)
+option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" ON)
 option(COMPILE_TESTS "Compile bergamot-tests" OFF)
 SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be packaged (pre-loaded) in wasm builds")
-# Set marian (3rd party submodule) cmake options to compile for this project
+# Set 3rd party submodule specific cmake options for this project
 SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version")
 SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece")
 SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs")
-if (USE_WASM_COMPATIBLE_SOURCES)
+if (USE_WASM_COMPATIBLE_SOURCE)
  # If using wasm compatible marian then set following flags
  SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.")
  SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
-  SET(COMPILE_DECODER_ONLY ON CACHE BOOL "Compile marian-decoder only")
+  # # Setting the ssplit-cpp submodule specific cmake options for wasm
-  SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support")
+  SET(USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
  SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds")
  SET(COMPILE_WITHOUT_EXCEPTIONS ON CACHE BOOL "Compile without exceptions")
  if(COMPILE_WASM)
    # Set WORMHOLE to ON for marian whenever compiling for wasm platform
    SET(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160")
 endif()
 endif()
 # Set ssplit (3rd party submodule) cmake options to compile for this project
 CMAKE_DEPENDENT_OPTION(USE_INTERNAL_PCRE2 "Use internal PCRE2 instead of system PCRE2" ON
                       "USE_WASM_COMPATIBLE_SOURCES" OFF)
 # Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html
 # Ensures the submodules are set correctly during a build.
@ -60,11 +52,17 @@ if(NOT COMPILE_WASM)
 endif()
 if(COMPILE_WASM)
  set(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160")
  list(APPEND WASM_COMPILE_FLAGS -pthread -O3 -g2 -fPIC -mssse3 -msimd128)
  list(APPEND WASM_COMPILE_FLAGS "SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=0" "SHELL:-s DISABLE_EXCEPTION_CATCHING=1" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1")
  list(APPEND WASM_COMPILE_FLAGS -Wno-error=pthreads-mem-growth)
 endif(COMPILE_WASM)
 # Needs to be enabled before including the folder containing tests (src/tests)
 if(COMPILE_TESTS)
    enable_testing()
 endif(COMPILE_TESTS)
 add_subdirectory(3rd_party)
 add_subdirectory(src)
@ -73,3 +71,4 @@ if(COMPILE_WASM)
 else()
  add_subdirectory(app)
 endif(COMPILE_WASM)
--- a/Doxyfile.in
+++ b/Doxyfile.in
--- a/README.md
+++ b/README.md
@ -77,17 +77,16 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.
            emmake make -j
            ```
        The wasm artifacts (.js and .wasm files) will be available in `wasm` folder of build directory ("build-wasm" in this case).
    3. Enable SIMD Wormhole via Wasm instantiation API in generated artifacts
        ```bash
        bash ../wasm/patch-artifacts-enable-wormhole.sh
        ```
        sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
        sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
        sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
        ```
    The artefacts (.js and .wasm files) will be available in `wasm` folder of build directory ("build-wasm" in this case).
 #### Recompiling
-As long as you don't update any submodule, just follow steps in `4.ii` to recompile.\
+As long as you don't update any submodule, just follow steps in `4.ii` and `4.iii` to recompile.\
-If you update a submodule, execute following command before executing steps in `4.ii` to recompile.
+If you update a submodule, execute following command before executing steps in `4.ii` and `4.iii` to recompile.
 ```bash
 git submodule update --init --recursive
 ```
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@ -1,10 +1,16 @@
-add_executable(bergamot-translator-app main.cpp)
+add_executable(bergamot-translator-app bergamot-translator-app.cpp)
 target_link_libraries(bergamot-translator-app PRIVATE bergamot-translator)
-if (NOT USE_WASM_COMPATIBLE_SOURCES)
+add_executable(bergamot-translator-app-bytearray bergamot-translator-app-bytearray.cpp)
-    add_executable(service-cli main-mts.cpp)
+target_link_libraries(bergamot-translator-app-bytearray PRIVATE bergamot-translator)
 if (NOT USE_WASM_COMPATIBLE_SOURCE)
    add_executable(service-cli service-cli.cpp)
    target_link_libraries(service-cli PRIVATE bergamot-translator)
    add_executable(service-cli-bytearray service-cli-bytearray.cpp)
    target_link_libraries(service-cli-bytearray PRIVATE bergamot-translator)
    add_executable(marian-decoder-new marian-decoder-new.cpp)
    target_link_libraries(marian-decoder-new PRIVATE bergamot-translator)
 endif()
--- a/app/bergamot-translator-app-bytearray.cpp
+++ b/app/bergamot-translator-app-bytearray.cpp
@ -0,0 +1,42 @@
 /*
 * main.cpp
 *
 * An example application to demonstrate the use of Bergamot translator.
 *
 */
 #include <iostream>
 #include "TranslationModel.h"
 #include "translator/parser.h"
 #include "translator/byte_array_util.h"
 int main(int argc, char **argv) {
  // Create a configParser and load command line parameters into a YAML config
  // string.
  auto configParser = marian::bergamot::createConfigParser();
  auto options = configParser.parseOptions(argc, argv, true);
  std::string config = options->asYamlString();
  // Route the config string to construct marian model through TranslationModel
  TranslationModel model(config, marian::bergamot::getModelMemoryFromConfig(options));
  TranslationRequest translationRequest;
  std::vector<std::string> texts;
  for (std::string line; std::getline(std::cin, line);) {
        texts.emplace_back(line);
  }
  auto results = model.translate(std::move(texts), translationRequest);
  // Resolve the future and get the actual result
  //std::vector<TranslationResult> results = futureResults.get();
  for (auto &result : results) {
    std::cout << result.getTranslatedText() << std::endl;
  }
  return 0;
 }
--- a/app/bergamot-translator-app.cpp
+++ b/app/bergamot-translator-app.cpp
@ -0,0 +1,43 @@
 /*
 * main.cpp
 *
 * An application which accepts line separated texts in stdin and returns translated ones in stdout.
 * It is convenient for batch processing and can be used with tools like SacreBLEU.
 *
 */
 #include <iostream>
 #include <string>
 #include "TranslationModel.h"
 #include "translator/parser.h"
 int main(int argc, char **argv) {
  // Create a configParser and load command line parameters into a YAML config
  // string.
  auto configParser = marian::bergamot::createConfigParser();
  auto options = configParser.parseOptions(argc, argv, true);
  std::string config = options->asYamlString();
  // Route the config string to construct marian model through TranslationModel
  TranslationModel model(config);
  TranslationRequest translationRequest;
  std::vector<std::string> texts;
  for (std::string line; std::getline(std::cin, line);) {
        texts.emplace_back(line);
  }
  auto results = model.translate(std::move(texts), translationRequest);
  // Resolve the future and get the actual result
  //std::vector<TranslationResult> results = futureResults.get();
  for (auto &result : results) {
    std::cout << result.getTranslatedText() << std::endl;
  }
  return 0;
 }
--- a/app/main-mts.cpp
+++ b/app/main-mts.cpp
@ -1,33 +0,0 @@
 #include <cstdlib>
 #include <future>
 #include <iostream>
 #include <sstream>
 #include "common/definitions.h"
 #include "common/utils.h"
 #include "marian.h"
 #include "translator/parser.h"
 #include "translator/response.h"
 #include "translator/service.h"
 int main(int argc, char *argv[]) {
  auto cp = marian::bergamot::createConfigParser();
  auto options = cp.parseOptions(argc, argv, true);
  marian::bergamot::Service service(options);
  // Read a large input text blob from stdin
  std::ostringstream std_input;
  std_input << std::cin.rdbuf();
  std::string input = std_input.str();
  using marian::bergamot::Response;
  // Wait on future until Response is complete
  std::future<Response> responseFuture = service.translate(std::move(input));
  responseFuture.wait();
  Response response = responseFuture.get();
  std::cout << response.translation() << std::endl;
  // Stop Service.
  service.stop();
  return 0;
 }
--- a/app/main.cpp
+++ b/app/main.cpp
@ -1,67 +0,0 @@
 /*
 * main.cpp
 *
 * An example application to demonstrate the use of Bergamot translator.
 *
 */
 #include <iostream>
 #include "AbstractTranslationModel.h"
 #include "TranslationRequest.h"
 #include "TranslationResult.h"
 #include "translator/parser.h"
 int main(int argc, char **argv) {
  // Create a configParser and load command line parameters into a YAML config
  // string.
  auto configParser = marian::bergamot::createConfigParser();
  auto options = configParser.parseOptions(argc, argv, true);
  std::string config = options->asYamlString();
  // Route the config string to construct marian model through
  // AbstractTranslationModel
  std::shared_ptr<AbstractTranslationModel> model =
      AbstractTranslationModel::createInstance(config);
  TranslationRequest translationRequest;
  std::vector<std::string> texts;
  texts.emplace_back(
      "The Bergamot project will add and improve client-side machine "
      "translation in a web browser.  Unlike current cloud-based "
      "options, running directly on users’ machines empowers citizens to "
      "preserve their privacy and increases the uptake of language "
      "technologies in Europe in various sectors that require "
      "confidentiality.");
  texts.emplace_back(
      "Free software integrated with an open-source web "
      "browser, such as Mozilla Firefox, will enable bottom-up adoption "
      "by non-experts, resulting in cost savings for private and public "
      "sector users who would otherwise procure translation or operate "
      "monolingually.  Bergamot is a consortium coordinated by the "
      "University of Edinburgh with partners Charles University in "
      "Prague, the University of Sheffield, University of Tartu, and "
      "Mozilla.");
  auto results = model->translate(std::move(texts), translationRequest);
  // Resolve the future and get the actual result
  //std::vector<TranslationResult> results = futureResults.get();
  for (auto &result : results) {
    std::cout << "[original]: " << result.getOriginalText() << std::endl;
    std::cout << "[translated]: " << result.getTranslatedText() << std::endl;
    auto mappings = result.getSentenceMappings();
    for (auto &p : mappings) {
      std::string_view src = p.first;
      std::string_view tgt = p.second;
      std::cout << " [src Sentence]: " << src << std::endl;
      std::cout << " [tgt Sentence]: " << tgt << std::endl;
    }
    std::cout << std::endl;
  }
  return 0;
 }
--- a/app/marian-decoder-new.cpp
+++ b/app/marian-decoder-new.cpp
@ -14,25 +14,11 @@
 #include "translator/response.h"
 #include "translator/service.h"
-void marian_decoder_minimal(const marian::Histories &histories,
+void marian_decoder_minimal(const marian::bergamot::Response &response,
                            marian::Ptr<marian::Vocab const> targetVocab,
                            marian::Ptr<marian::Options> options) {
-
+  // We are no longer marian-decoder compatible. Server ideas are on hold.
-  bool doNbest = options->get<bool>("n-best");
+  for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
-  auto collector =
+    std::cout << response.target.sentence(sentenceIdx) << "\n";
      marian::New<marian::OutputCollector>(options->get<std::string>("output"));
  // There is a dependency of vocabs here.
  auto printer = marian::New<marian::OutputPrinter>(options, targetVocab);
  if (options->get<bool>("quiet-translation"))
    collector->setPrintingStrategy(marian::New<marian::QuietPrinting>());
  for (auto &history : histories) {
    std::stringstream best1;
    std::stringstream bestn;
    printer->print(history, best1, bestn);
    collector->Write((long)history->getLineNum(), best1.str(), bestn.str(),
                     doNbest);
  }
 }
@ -53,9 +39,8 @@ int main(int argc, char *argv[]) {
  responseFuture.wait();
  const Response &response = responseFuture.get();
-  marian_decoder_minimal(response.histories(), service.targetVocab(), options);
+  marian_decoder_minimal(response, options);
  LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed());
  service.stop();
  return 0;
 }
--- a/app/service-cli-bytearray.cpp
+++ b/app/service-cli-bytearray.cpp
@ -0,0 +1,86 @@
 #include <cstdlib>
 #include <future>
 #include <iostream>
 #include <sstream>
 #include "common/definitions.h"
 #include "common/utils.h"
 #include "marian.h"
 #include "translator/parser.h"
 #include "translator/response.h"
 #include "translator/service.h"
 #include "translator/byte_array_util.h"
 int main(int argc, char *argv[]) {
  auto cp = marian::bergamot::createConfigParser();
  auto options = cp.parseOptions(argc, argv, true);
  // Prepare memories for model and shortlist
  marian::bergamot::AlignedMemory modelBytes = marian::bergamot::getModelMemoryFromConfig(options);
  marian::bergamot::AlignedMemory shortlistBytes = marian::bergamot::getShortlistMemoryFromConfig(options);
  marian::bergamot::Service service(options, std::move(modelBytes), std::move(shortlistBytes));
  // Read a large input text blob from stdin
  std::ostringstream std_input;
  std_input << std::cin.rdbuf();
  std::string input = std_input.str();
  using marian::bergamot::Response;
  // Wait on future until Response is complete
  std::future<Response> responseFuture = service.translate(std::move(input));
  responseFuture.wait();
  Response response = responseFuture.get();
  std::cout << "[original]: " << response.source.text << '\n';
  std::cout << "[translated]: " << response.target.text << '\n';
  for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
    std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx)
              << '\n';
    std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx)
              << '\n';
    std::cout << "Alignments" << '\n';
    typedef std::pair<size_t, float> Point;
    // Initialize a point vector.
    std::vector<std::vector<Point>> aggregate(
        response.source.numWords(sentenceIdx));
    // Handle alignments
    auto &alignments = response.alignments[sentenceIdx];
    for (auto &p : alignments) {
      aggregate[p.src].emplace_back(p.tgt, p.prob);
    }
    for (size_t src = 0; src < aggregate.size(); src++) {
      std::cout << response.source.word(sentenceIdx, src) << ": ";
      for (auto &p : aggregate[src]) {
        std::cout << response.target.word(sentenceIdx, p.first) << "("
                  << p.second << ") ";
      }
      std::cout << '\n';
    }
    // Handle quality.
    auto &quality = response.qualityScores[sentenceIdx];
    std::cout << "Quality: whole(" << quality.sequence
              << "), tokens below:" << '\n';
    size_t wordIdx = 0;
    bool first = true;
    for (auto &p : quality.word) {
      if (first) {
        first = false;
      } else {
        std::cout << " ";
      }
      std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p
                << ")";
      wordIdx++;
    }
    std::cout << '\n';
  }
  std::cout << "--------------------------\n";
  std::cout << '\n';
  return 0;
 }
--- a/app/service-cli.cpp
+++ b/app/service-cli.cpp
@ -0,0 +1,80 @@
 #include <cstdlib>
 #include <future>
 #include <iostream>
 #include <sstream>
 #include "common/definitions.h"
 #include "common/utils.h"
 #include "marian.h"
 #include "translator/parser.h"
 #include "translator/response.h"
 #include "translator/service.h"
 int main(int argc, char *argv[]) {
  auto cp = marian::bergamot::createConfigParser();
  auto options = cp.parseOptions(argc, argv, true);
  marian::bergamot::Service service(options);
  // Read a large input text blob from stdin
  std::ostringstream std_input;
  std_input << std::cin.rdbuf();
  std::string input = std_input.str();
  using marian::bergamot::Response;
  // Wait on future until Response is complete
  std::future<Response> responseFuture = service.translate(std::move(input));
  responseFuture.wait();
  Response response = responseFuture.get();
  std::cout << "[original]: " << response.source.text << '\n';
  std::cout << "[translated]: " << response.target.text << '\n';
  for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
    std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx)
              << '\n';
    std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx)
              << '\n';
    std::cout << "Alignments" << '\n';
    typedef std::pair<size_t, float> Point;
    // Initialize a point vector.
    std::vector<std::vector<Point>> aggregate(
        response.source.numWords(sentenceIdx));
    // Handle alignments
    auto &alignments = response.alignments[sentenceIdx];
    for (auto &p : alignments) {
      aggregate[p.src].emplace_back(p.tgt, p.prob);
    }
    for (size_t src = 0; src < aggregate.size(); src++) {
      std::cout << response.source.word(sentenceIdx, src) << ": ";
      for (auto &p : aggregate[src]) {
        std::cout << response.target.word(sentenceIdx, p.first) << "("
                  << p.second << ") ";
      }
      std::cout << '\n';
    }
    // Handle quality.
    auto &quality = response.qualityScores[sentenceIdx];
    std::cout << "Quality: whole(" << quality.sequence
              << "), tokens below:" << '\n';
    size_t wordIdx = 0;
    bool first = true;
    for (auto &p : quality.word) {
      if (first) {
        first = false;
      } else {
        std::cout << " ";
      }
      std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p
                << ")";
      wordIdx++;
    }
    std::cout << '\n';
  }
  std::cout << "--------------------------\n";
  std::cout << '\n';
  return 0;
 }
--- a/doc/.gitignore
+++ b/doc/.gitignore
@ -0,0 +1,4 @@
 api
 build
 doxygen
 venv
--- a/doc/README.md
+++ b/doc/README.md
@ -0,0 +1,51 @@
 # Marian NMT code documentation and library API
 This directory contains code documentation and library API for developers of Marian NMT.
 The documentation is generated using
 [Sphinx](https://www.sphinx-doc.org/en/master/usage/quickstart.html) +
 [Breathe](https://breathe.readthedocs.io/en/latest/directives.html) +
 [Doxygen](http://www.doxygen.nl/manual/docblocks.html) +
 [Exhale](https://exhale.readthedocs.io/en/latest/usage.html).
 The documentation source code is written in `.rst` or `.md` files with special directives that allow
 to reference to C++ source code and documentation. The source documents are then build into static
 HTML pages.
 ## Installation
 On Ubuntu 20.04, install the following packages:
    sudo apt-get install python3 python3-pip python3-setuptools doxygen
 Then set up a Python environment and install modules:
    pip3 install virtualenv
    virtualenv venv -p python3
    source venv/bin/activate
    pip install -r requirements.txt
 Documentation building should also work on Windows, but it has not been tested.
 ## Generation
 The documentation can be generated by running:
    make html
 The website will be generated into `build/html` and accessible by opening _index.html_ in your
 browser.
 Directories:
 - `build` - automatically output directory for HTML documentation
 - `doxygen` - automatically generated Doxygen XML files
 - `api` - automatic library API generated with Exhale
 - `.rst` and `.md` files in this directory and its subdirectories are documentation source files
 - `_static` - custom CSS and JavaScript files
 ## Writing documentation
 To be documented...
--- a/doc/_static/css/custom.css
+++ b/doc/_static/css/custom.css
@ -0,0 +1,4 @@
 .wy-body-for-nav > .wy-grid-for-nav > .wy-nav-side {
    border-bottom: 5px solid #28bbee;
    /*background-color: #494d55;*/
 }
--- a/doc/conf.py
+++ b/doc/conf.py
@ -0,0 +1,120 @@
 # Configuration file for the Sphinx documentation builder.
 #
 # This file only contains a selection of the most common options. For a full
 # list see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 # -- Path setup --------------------------------------------------------------
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 import os
 import datetime
 import sys
 sys.path.insert(0, os.path.abspath('.'))
 # -- Project information -----------------------------------------------------
 project = 'Bergamot Translator'
 copyright = '2021, Bergamot Translator Team'
 author = 'Bergamot Translator Team'
 # The full version, including alpha/beta/rc tags
 # TODO: add GitHub commit hash to the version
 version_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'BERGAMOT_VERSION')
 with open(os.path.abspath(version_file)) as f:
    version = f.read().strip()
 release = version + ' ' + str(datetime.date.today())
 # -- General configuration ---------------------------------------------------
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
    'sphinx.ext.imgmath',
    'sphinx.ext.todo',
    'breathe',
    'exhale',
    'recommonmark',
 ]
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = [
    'build',
    'doxygen',
    'venv',
    'README.md',
 ]
 # -- Options for HTML output -------------------------------------------------
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
 html_theme = 'sphinx_rtd_theme'
 htmlhelp_basename = 'bergamot-translator'
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 html_css_files = ['css/custom.css']
 # The base URL which points to the root of the HTML documentation
 html_baseurl = 'http://jerinphilip.github.io/bergamot-translator'
 # -- Extension configuration -------------------------------------------------
 breathe_projects = { 'bergamot-translator': './doxygen/xml' }
 breathe_default_project = 'bergamot-translator'
 doxygen_config = """
 INPUT                = ../src
 EXCLUDE             += ../3rd_party
 EXCLUDE             += ../src/tests
 EXCLUDE_PATTERNS     = *.md *.txt
 FILE_PATTERNS       += *.cu
 EXTENSION_MAPPING   += cu=C++ inc=C++
 ENABLE_PREPROCESSING = YES
 JAVADOC_AUTOBRIEF    = YES
 WARN_IF_UNDOCUMENTED = NO
 """
 exhale_args = {
    'containmentFolder'     : './api',
    'rootFileName'          : 'library_index.rst',
    'rootFileTitle'         : 'Library API',
    'doxygenStripFromPath'  : '..',
    'createTreeView'        : True,
    'exhaleExecutesDoxygen' : True,
    'exhaleDoxygenStdin'    : doxygen_config.strip(),
 }
 primary_domain = 'cpp'
 highlight_language = 'cpp'
 # A trick to include markdown files from outside the source directory using
 # 'mdinclude'. Warning: all other markdown files not included via 'mdinclude'
 # will be rendered using recommonmark as recommended by Sphinx
 from m2r import MdInclude
 def setup(app):
    # from m2r to make `mdinclude` work
    app.add_config_value('no_underscore_emphasis', False, 'env')
    app.add_config_value('m2r_parse_relative_links', False, 'env')
    app.add_config_value('m2r_anonymous_references', False, 'env')
    app.add_config_value('m2r_disable_inline_math', False, 'env')
    app.add_directive('mdinclude', MdInclude)
--- a/doc/index.rst
+++ b/doc/index.rst
@ -0,0 +1,38 @@
 Welcome to Bergamot Translator's documentation!
 ===============================================
 |buildcpu| |tests| |release| |license|
 Bergamot translator provides a unified API for (Marian NMT framework based)
 neural machine translation functionality in accordance with the Bergamot
 project that focuses on improving client-side machine translation in a web
 browser.
 This is developer documentation. 
 .. toctree::
   :maxdepth: 2
   :caption: Contents:
   marian-integration
   api/library_index
 Indices and tables
 ------------------
 * :ref:`genindex`
 .. |buildcpu| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/browsermt/job/bergamot-translator.svg?label=CPU%20Build
   :target: http://vali.inf.ed.ac.uk/jenkins/job/bergamot-translator
   :alt: CPU build status
 .. |tests| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/bergamot-translator-regression-tests.svg?label=Tests
   :target: http://vali.inf.ed.ac.uk/jenkins/job/bergamot-translator-regression-tests/
   :alt: Tests status
 .. |license| image:: https://img.shields.io/badge/License-MPL%202.0-brightgreen.svg
   :target: https://opensource.org/licenses/MPL-2.0
   :alt: License: MPL
--- a/doc/make.bat
+++ b/doc/make.bat
@ -0,0 +1,35 @@
@ECHO OFF
 pushd %~dp0
 REM Command file for Sphinx documentation
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
 )
 set SOURCEDIR=source
 set BUILDDIR=build
 if "%1" == "" goto help
 %SPHINXBUILD% >NUL 2>NUL
 if errorlevel 9009 (
 	echo.
 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 	echo.installed, then set the SPHINXBUILD environment variable to point
 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 	echo.may add the Sphinx directory to PATH.
 	echo.
 	echo.If you don't have Sphinx installed, grab it from
 	echo.http://sphinx-doc.org/
 	exit /b 1
 )
 %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 goto end
 :help
 %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 :end
 popd
--- a/doc/marian-integration.md
+++ b/doc/marian-integration.md
@ -1,4 +1,4 @@
-# Marian Integration
+# Building marian code for bergamot
 This document summarizes the minimal build instructions develop for the
 marian-code powering bergamot-translator.
@ -10,7 +10,7 @@ $ git clone https://github.com/browsermt/bergamot-translator
 $ cd bergamot-translator
 $ mkdir build
 $ cd build
-$ cmake .. -DUSE_WASM_COMPATIBLE_SOURCES=off -DCMAKE_BUILD_TYPE=Release
+$ cmake .. -DUSE_WASM_COMPATIBLE_SOURCE=off -DCMAKE_BUILD_TYPE=Release
 $ make -j
 ```
--- a/doc/references.bib
+++ b/doc/references.bib
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@ -0,0 +1,6 @@
 sphinx==2.4.4
 breathe==4.13.0
 exhale
 sphinx_rtd_theme
 recommonmark
 m2r
--- a/src/AbstractTranslationModel.h
+++ b/src/AbstractTranslationModel.h
@ -1,68 +0,0 @@
 /*
 * AbstractTranslationModel.h
 *
 * An interface for a translation model for translating a plain (without any
 * markups and emojis) UTF-8 encoded text. The model supports translation from 1
 * source language to 1 target language. There can be different implementations
 * of this interface.
 */
 #ifndef SRC_TRANSLATOR_ABSTRACTTRANSLATIONMODEL_H_
 #define SRC_TRANSLATOR_ABSTRACTTRANSLATIONMODEL_H_
 #include <future>
 #include <memory>
 #include <string>
 #include <vector>
 #include "TranslationRequest.h"
 #include "TranslationResult.h"
 /* An interface for a translation model for translating a plain (without any
 * markups and emojis) UTF-8 encoded text. The model supports translation from 1
 * source language to 1 target language.
 */
 class AbstractTranslationModel {
 public:
  /* A Factory method to create and return an instance of an implementation of
   * AbstractTranslationModel. The instance is created using translation model
   * configuration provided as yaml-formatted string.
   */
  static std::shared_ptr<AbstractTranslationModel>
  createInstance(const std::string &config);
  AbstractTranslationModel() = default;
  virtual ~AbstractTranslationModel() = default;
  /* This method performs translation on a list of (UTF-8 encoded) texts and
   * returns a list of results in the same order. Each text entry can either be
   * a word, a phrase, a sentence or a list of sentences and should contain
   * plain text (without any markups or emojis). Additional information related
   * to the translated text can be requested via TranslationRequest which is
   * applied equally to each text entry.
   *
   * The translated text corresponding to each text entry and the additional
   * information (as specified in the TranslationRequest) is encapsulated and
   * returned in TranslationResult.
   *
   * The API splits each text entry into sentences internally, which are then
   * translated independent of each other. The translated sentences are then
   * joined together and returned in TranslationResult. Please refer to the
   * TranslationRequest class to find out what additional information can be
   * requested. The alignment information can only be requested if the model
   * supports it (check isAlignmentSupported() API).
   *
   * The texts argument will become empty after the execution of this API (each
   * entry of texts list will be moved to its corresponding TranslationResult
   * object).
   */
  virtual std::vector<TranslationResult>
  translate(std::vector<std::string> &&texts, TranslationRequest request) = 0;
  /* Check if the model can provide alignment information b/w original and
   * translated text. */
  virtual bool isAlignmentSupported() const = 0;
 };
 #endif /* SRC_TRANSLATOR_ABSTRACTTRANSLATIONMODEL_H_ */
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -1 +1,7 @@
 add_subdirectory(translator)
 if(COMPILE_TESTS)
  # Catch currently comes from marian sources.
  add_subdirectory(tests)
 endif(COMPILE_TESTS)
--- a/src/translator/TranslationModel.h
+++ b/src/translator/TranslationModel.h
@ -1,7 +1,7 @@
 /*
 * TranslationModel.h
 *
- *  A implementation of AbstractTranslationModel interface.
+ * Main interface for translation API.
 */
 #ifndef SRC_TRANSLATOR_TRANSLATIONMODEL_H_
@ -15,19 +15,28 @@
 #include "3rd_party/marian-dev/src/common/options.h"
 // All local project includes
-#include "AbstractTranslationModel.h"
+#include "TranslationRequest.h"
-#include "translator/service_base.h"
+#include "TranslationResult.h"
 #include "translator/definitions.h"
 #include "translator/service.h"
 /* A Translation model that translates a plain (without any markups and emojis)
 * UTF-8 encoded text. This implementation supports translation from 1 source
 * language to 1 target language.
 */
-class TranslationModel : public AbstractTranslationModel {
+class TranslationModel {
 public:
  /* Construct the model using the model configuration options as yaml-formatted
   * string
   */
-  TranslationModel(const std::string &config);
+  /**
   * @param config Marian yml config file in the form of a string
   * @param model_memory optional byte array (aligned to 64!!!) that contains
   * the bytes of a model.bin.
   */
  TranslationModel(const std::string &config,
                   marian::bergamot::AlignedMemory modelMemory = marian::bergamot::AlignedMemory(),
                   marian::bergamot::AlignedMemory shortlistMemory = marian::bergamot::AlignedMemory());
  ~TranslationModel();
@ -56,16 +65,16 @@ public:
   * object).
   */
  std::vector<TranslationResult> translate(std::vector<std::string> &&texts,
-                                           TranslationRequest request) override;
+                                           TranslationRequest request);
  /* Check if the model can provide alignment information b/w original and
   * translated text. */
-  bool isAlignmentSupported() const override;
+  bool isAlignmentSupported() const;
 private:
  // Model configuration options
  std::shared_ptr<marian::Options> configOptions_; // ORDER DEPENDECNY
-  marian::bergamot::NonThreadedService service_;   // ORDER DEPENDENCY
+  marian::bergamot::Service service_;              // ORDER DEPENDENCY
 };
 #endif /* SRC_TRANSLATOR_TRANSLATIONMODEL_H_ */
--- a/src/TranslationRequest.h
+++ b/src/TranslationRequest.h
@ -2,7 +2,7 @@
 * TranslationRequest.h
 *
 *  This file defines the translation request class to be used in
- * AbstractTranslationModel::translate() API.
+ *  TranslationModel::translate() API.
 */
 #ifndef SRC_TRANSLATOR_TRANSLATIONREQUEST_H_
--- a/src/TranslationResult.h
+++ b/src/TranslationResult.h
@ -1,7 +1,7 @@
 /*
 * TranslationResult.h
 *
- * The class that represents the result of AbstractTranslationModel::translate()
+ * The class that represents the result of TranslationModel::translate()
 * API for each of its text entry and TranslationRequest.
 */
@ -13,7 +13,7 @@
 #include "QualityScore.h"
-/* This class represents the result of AbstractTranslationModel::translate() API
+/* This class represents the result of TranslationModel::translate() API
 * for each of its text entry and TranslationRequest.
 */
 class TranslationResult {
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@ -0,0 +1,22 @@
 # Unit tests
 set(UNIT_TESTS
    annotation_tests
 )
 foreach(test ${UNIT_TESTS})
  add_executable("run_${test}" run_tests.cpp "${test}.cpp")
  target_include_directories("run_${test}" PRIVATE ${CATCH_INCLUDE_DIR} "${CMAKE_SOURCE_DIR}/src")
  if(CUDA_FOUND)
    target_link_libraries("run_${test}" ${EXT_LIBS} marian ${EXT_LIBS} marian_cuda ${EXT_LIBS} Catch bergamot-translator)
  else(CUDA_FOUND)
    target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch bergamot-translator)
  endif(CUDA_FOUND)
  if(msvc)
    # disable c4305: truncation from 'double' to '_ty'
    target_compile_options("run_${test}" public /wd4305)
  endif(msvc)
  add_test(NAME ${test} COMMAND "run_${test}")
 endforeach(test)
--- a/src/tests/annotation_tests.cpp
+++ b/src/tests/annotation_tests.cpp
@ -0,0 +1,220 @@
 #include "catch.hpp"
 #include "translator/sentence_ranges.h"
 #include <random>
 #include <vector>
 using namespace marian::bergamot;
 TEST_CASE("Test Annotation API with random sentences") {
  /// Objective here is to test insertion for sentences, and that whatever comes
  /// out adheres to the way it was inserted. Towards this, we keep externally
  /// which sentence went in where and try to use accessor methods on
  /// AnnotatedText to check if what we have as ground-truth by construction is
  /// consistent with what is returned.
  size_t sentences = 500;
  size_t maxWords = 40;
  // Set in case needed to see output. The output is in lines of #sentences +
  // header, which can be split and compared for easy understanding. The ideal
  // way to inspect what is going wrong is to redirect output and use to split
  // the different stages by sentences + 1 lines and check the diff.
  bool debug{false};
  std::mt19937 randomIntGen_;
  randomIntGen_.seed(42);
  AnnotatedText testAnnotation; // This the container we add through API and
                                // check if the access is correct.
  // External book-keeping so we have ground truths. Each element represents a
  // sentence.
  // word byte ranges - for testAnnotation.word(sId, wId)
  std::vector<std::vector<ByteRange>> groundTruthWords;
  // sentence byte ranges - for testAnnotation.sentence(sId, wId)
  std::vector<ByteRange> groundTruthSentences;
  // Prepare the text and construct ByteRanges as intended for sentences and
  // words. The ByteRanges we construct here are expected to be the
  // ground-truths for words and sentences. The string being constructed is like
  // as follows:
  //
  //     0-0 0-1 0-2 0-3
  //     1-0 1-1 1-2 1-3 1-4
  //     2-0 2-1
  //
  //     4-0 4-1 4-2 4-3
  //
  // Words are separated by space units.
  //
  // Below, we accumulate the text with intended structure as above, and
  // ground-truth tables populated to be aware of the ByteRanges where they are
  // meant to be.
  if (debug) {
    std::cout << "Preparing text and ground truth-tables" << std::endl;
  }
  for (size_t idx = 0; idx < sentences; idx++) {
    if (idx != 0)
      testAnnotation.text += "\n";
    // Words can be zero, we need to support empty word sentences as well.
    size_t numWords = randomIntGen_() % maxWords;
    std::vector<ByteRange> wordByteRanges;
    wordByteRanges.reserve(numWords);
    // For empty sentence, we expect it to be empty and marked in position where
    // the existing string is if needed to be pointed out.
    size_t before = testAnnotation.text.size() - 1;
    size_t sentenceBegin{before}, sentenceEnd{before};
    for (size_t idw = 0; idw < numWords; idw++) {
      if (idw != 0) {
        testAnnotation.text += " ";
        if (debug) {
          std::cout << " ";
        }
      }
      // Get new beginning, accounting for space above.
      before = testAnnotation.text.size();
      // Add the word
      std::string word = std::to_string(idx) + "-" + std::to_string(idw);
      testAnnotation.text += word;
      // Do math, before, before + new-word's size.
      wordByteRanges.push_back((ByteRange){before, before + word.size()});
      if (debug) {
        std::cout << word;
      }
      if (idw == 0) {
        sentenceBegin = before;
      }
      if (idw == numWords - 1) {
        sentenceEnd = before + word.size();
      }
    }
    if (debug) {
      std::cout << std::endl;
    }
    groundTruthWords.push_back(wordByteRanges);
    groundTruthSentences.push_back((ByteRange){sentenceBegin, sentenceEnd});
  }
  // We prepare string_views now with the known ByteRanges and use the
  // string_view based AnnotatedText.addSentence(...) API to add sentences to
  // transparently convert from string_views to ByteRanges, rebasing/working out
  // the math underneath.
  if (debug) {
    std::cout << "Inserting words onto container and save ground-truth-table:"
              << std::endl;
  }
  std::vector<std::vector<marian::string_view>> wordStringViews;
  for (auto &sentence : groundTruthWords) {
    std::vector<marian::string_view> wordByteRanges;
    bool first{true};
    for (auto &word : sentence) {
      marian::string_view wordView(&testAnnotation.text[word.begin],
                                   word.size());
      wordByteRanges.push_back(wordView);
      if (debug) {
        if (first) {
          first = false;
        } else {
          std::cout << " ";
        }
        std::cout << std::string(wordView);
      }
    }
    testAnnotation.addSentence(wordByteRanges);
    wordStringViews.push_back(wordByteRanges);
    if (debug) {
      std::cout << std::endl;
    }
  }
  if (debug) {
    std::cout
        << "Inserting sentences onto container and save ground-truth-table"
        << std::endl;
  }
  std::vector<marian::string_view> sentenceStringViews;
  for (auto &sentenceByteRange : groundTruthSentences) {
    char *data = &(testAnnotation.text[sentenceByteRange.begin]);
    marian::string_view sentenceView(data, sentenceByteRange.size());
    sentenceStringViews.push_back(sentenceView);
    if (debug) {
      std::cout << sentenceView << std::endl;
    }
  }
  // Access from the sentence(sentenceIdx) API and confirm that the ground truth
  // we expect is same as what comes out of the container.
  if (debug) {
    std::cout << "From container: Sentences" << std::endl;
  }
  for (int idx = 0; idx < groundTruthSentences.size(); idx++) {
    ByteRange expected = groundTruthSentences[idx];
    ByteRange obtained = testAnnotation.sentenceAsByteRange(idx);
    if (debug) {
      std::cout << std::string(testAnnotation.sentence(idx)) << std::endl;
    }
    CHECK(expected.begin == obtained.begin);
    CHECK(expected.end == obtained.end);
    std::string expected_string = std::string(sentenceStringViews[idx]);
    std::string obtained_string = std::string(testAnnotation.sentence(idx));
    CHECK(expected_string == obtained_string);
  }
  /// Access the word(sentenceIdx, wordIdx) API and confirm what we hold as
  /// expected words are the same as those obtained from the container.
  if (debug) {
    std::cout << "From container: Words" << std::endl;
  }
  CHECK(groundTruthWords.size() == testAnnotation.numSentences());
  for (int idx = 0; idx < groundTruthWords.size(); idx++) {
    CHECK(groundTruthWords[idx].size() == testAnnotation.numWords(idx));
  }
  for (int idx = 0; idx < groundTruthWords.size(); idx++) {
    for (int idw = 0; idw < groundTruthWords[idx].size(); idw++) {
      ByteRange expected = groundTruthWords[idx][idw];
      ByteRange obtained = testAnnotation.wordAsByteRange(idx, idw);
      if (debug) {
        std::cout << std::string(testAnnotation.word(idx, idw)) << " ";
      }
      CHECK(expected.begin == obtained.begin);
      CHECK(expected.end == obtained.end);
      std::string expected_string = std::string(wordStringViews[idx][idw]);
      std::string obtained_string = std::string(testAnnotation.word(idx, idw));
      CHECK(expected_string == obtained_string);
    }
    if (debug) {
      std::cout << std::endl;
    }
  }
  // Try inserting an empty Sentence. This is ensuring we check for empty
  // Sentence if the random test above does not cover it for some reason.
  int emptySentenceIdx = sentences;
  std::vector<marian::string_view> emptySentence;
  testAnnotation.addSentence(emptySentence);
  // There are no words.
  CHECK(testAnnotation.numWords(emptySentenceIdx) == 0);
  // Empty sentence expected at output.
  std::string expectedEmptyString = "";
  marian::string_view emptyView = testAnnotation.sentence(emptySentenceIdx);
  std::string obtainedString = std::string(emptyView.data(), emptyView.size());
  CHECK(expectedEmptyString == obtainedString);
 }
--- a/src/tests/run_tests.cpp
+++ b/src/tests/run_tests.cpp
@ -0,0 +1,2 @@
 #define CATCH_CONFIG_MAIN
 #include "catch.hpp"
--- a/src/translator/AbstractTranslationModel.cpp
+++ b/src/translator/AbstractTranslationModel.cpp
@ -1,14 +0,0 @@
 /*
 * AbstractTranslationModel.cpp
 *
 */
 #include <memory>
 // All local includes
 #include "AbstractTranslationModel.h"
 #include "TranslationModel.h"
 std::shared_ptr<AbstractTranslationModel>
 AbstractTranslationModel::createInstance(const std::string &config) {
  return std::make_shared<TranslationModel>(config);
 }
--- a/src/translator/CMakeLists.txt
+++ b/src/translator/CMakeLists.txt
@ -1,32 +1,26 @@
 if (NOT USE_WASM_COMPATIBLE_SOURCES)
    set(MULTITHREADED_SERVICE_SOURCE "service.cpp")
 endif()
 add_library(bergamot-translator STATIC
    AbstractTranslationModel.cpp
    TranslationModel.cpp
-
+    byte_array_util.cpp
    # Following files added from browsermt/mts@nuke
    text_processor.cpp
    sentence_splitter.cpp
    batch_translator.cpp 
    multifactor_priority.cpp 
    request.cpp 
    service_base.cpp
    ${MULTITHREADED_SERVICE_SOURCE}
    batcher.cpp
    response.cpp
    batch.cpp
    sentence_ranges.cpp
    service.cpp
 )
-if (COMPILE_DECODER_ONLY)
+if (USE_WASM_COMPATIBLE_SOURCE)
-  # A dirty hack because of marian's bad cmake practices
+  # Using wasm compatible sources should include this compile definition;
-  target_compile_definitions(bergamot-translator PUBLIC DECODER_ONLY)
+  # Has to be done here because we are including marian headers + some sources
  # in local repository use these definitions
  target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM_COMPATIBLE_SOURCE)
 endif()
 if(COMPILE_WASM)
-  # A dirty hack because of marian's bad cmake practices
+  target_compile_definitions(bergamot-translator PUBLIC WASM)
  target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM)
  # Enable code that is required for generating JS bindings
  target_compile_definitions(bergamot-translator PRIVATE WASM_BINDINGS)
  target_compile_options(bergamot-translator PRIVATE ${WASM_COMPILE_FLAGS})
@ -35,7 +29,5 @@ endif(COMPILE_WASM)
 target_link_libraries(bergamot-translator marian ssplit)
 target_include_directories(bergamot-translator
-    PRIVATE ${CMAKE_SOURCE_DIR}
+    PUBLIC ${CMAKE_SOURCE_DIR}
    PUBLIC ${CMAKE_SOURCE_DIR}/src)
--- a/src/translator/TranslationModel.cpp
+++ b/src/translator/TranslationModel.cpp
@ -6,53 +6,15 @@
 #include <future>
 #include <vector>
 // All 3rd party includes
 #include "3rd_party/marian-dev/src/3rd_party/yaml-cpp/yaml.h"
 #include "3rd_party/marian-dev/src/common/config_parser.h"
 #include "common/config_validator.h"
 #include "common/options.h"
 // All local project includes
 #include "TranslationModel.h"
 #include "translator/parser.h"
-#include "translator/service_base.h"
+#include "translator/service.h"
-std::shared_ptr<marian::Options> parseOptions(const std::string &config) {
+TranslationModel::TranslationModel(const std::string &config,
-  marian::Options options;
+                                   marian::bergamot::AlignedMemory model_memory,
-
+                                   marian::bergamot::AlignedMemory lexical_memory)
-  // @TODO(jerinphilip) There's something off here, @XapaJIaMnu suggests
+    : service_(config, std::move(model_memory), std::move(lexical_memory)) {}
  // that should not be using the defaultConfig. This function only has access
  // to std::string config and needs to be able to construct Options from the
  // same.
  // Absent the following code-segment, there is a parsing exception thrown on
  // rebuilding YAML.
  //
  // Error: Unhandled exception of type 'N4YAML11InvalidNodeE': invalid node;
  // this may result from using a map iterator as a sequence iterator, or
  // vice-versa
  //
  // Error: Aborted from void unhandledException() in
  // 3rd_party/marian-dev/src/common/logging.cpp:113
  marian::ConfigParser configParser = marian::bergamot::createConfigParser();
  const YAML::Node &defaultConfig = configParser.getConfig();
  options.merge(defaultConfig);
  // Parse configs onto defaultConfig.
  options.parse(config);
  YAML::Node configCopy = options.cloneToYamlNode();
  marian::ConfigValidator validator(configCopy);
  validator.validateOptions(marian::cli::mode::translation);
  return std::make_shared<marian::Options>(options);
 }
 TranslationModel::TranslationModel(const std::string &config)
    : configOptions_(std::move(parseOptions(config))),
      AbstractTranslationModel(), service_(configOptions_) {}
 TranslationModel::~TranslationModel() {}
@ -71,23 +33,18 @@ TranslationModel::translate(std::vector<std::string> &&texts,
    intermediate.wait();
    auto marianResponse(std::move(intermediate.get()));
    // This mess because marian::string_view != std::string_view
    std::string source, translation;
    marian::bergamot::Response::SentenceMappings mSentenceMappings;
    marianResponse.move(source, translation, mSentenceMappings);
    // Convert to UnifiedAPI::TranslationResult
    TranslationResult::SentenceMappings sentenceMappings;
-    for (auto &p : mSentenceMappings) {
+    for (size_t idx = 0; idx < marianResponse.size(); idx++) {
-      std::string_view src(p.first.data(), p.first.size()),
+      marian::string_view src = marianResponse.source.sentence(idx);
-          tgt(p.second.data(), p.second.size());
+      marian::string_view tgt = marianResponse.target.sentence(idx);
-      sentenceMappings.emplace_back(src, tgt);
+      sentenceMappings.emplace_back(std::string_view(src.data(), src.size()),
                                    std::string_view(tgt.data(), tgt.size()));
    }
    // In place construction.
    translationResults.emplace_back(
-        std::move(source),          // &&marianResponse.source_
+        std::move(marianResponse.source.text), // &&marianResponse.source_
-        std::move(translation),     // &&marianResponse.translation_
+        std::move(marianResponse.target.text), // &&marianResponse.translation_
        std::move(sentenceMappings)            // &&sentenceMappings
    );
  }
--- a/src/translator/aligned.h
+++ b/src/translator/aligned.h
@ -0,0 +1,71 @@
 #pragma once
 #include <cstdlib>
 #include <new>
 #ifdef _MSC_VER
 #include <malloc.h>
 #endif
 // Aligned simple vector.
 namespace marian {
 namespace bergamot {
 template <class T> class AlignedVector {
 public:
  AlignedVector() : mem_(nullptr), size_(0) {}
  explicit AlignedVector(std::size_t size, std::size_t alignment = 64 /* CPU cares about this */)
          : size_(size) {
 #ifdef _MSC_VER
    mem_ = static_cast<T*>(_aligned_malloc(size * sizeof(T), alignment));
      if (!mem_) throw std::bad_alloc();
 #else
    if (posix_memalign(reinterpret_cast<void **>(&mem_), alignment, size * sizeof(T))) {
      throw std::bad_alloc();
    }
 #endif
  }
  AlignedVector(AlignedVector &&from) : mem_(from.mem_), size_(from.size_) {
    from.mem_ = nullptr;
    from.size_ = 0;
  }
  AlignedVector &operator=(AlignedVector &&from) {
    mem_ = from.mem_;
    size_ = from.size_;
    from.mem_ = nullptr;
    from.size_ = 0;
    return *this;
  }
  AlignedVector(const AlignedVector&) = delete;
  AlignedVector& operator=(const AlignedVector&) = delete;
  ~AlignedVector() {
 #ifdef _MSC_VER
    _aligned_free(mem_);
 #else
    std::free(mem_);
 #endif
  }
  std::size_t size() const { return size_; }
  T &operator[](std::size_t offset) { return mem_[offset]; }
  const T &operator[](std::size_t offset) const { return mem_[offset]; }
  T *begin() { return mem_; }
  const T *begin() const { return mem_; }
  T *end() { return mem_ + size_; }
  const T *end() const { return mem_ + size_; }
  template <typename ReturnType>
  ReturnType *as() { return reinterpret_cast<ReturnType*>(mem_); }
 private:
  T *mem_;
  std::size_t size_;
 };
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/batch_translator.cpp
+++ b/src/translator/batch_translator.cpp
@ -10,26 +10,46 @@ namespace bergamot {
 BatchTranslator::BatchTranslator(DeviceId const device,
                                 std::vector<Ptr<Vocab const>> &vocabs,
-                                 Ptr<Options> options)
+                                 Ptr<Options> options,
-    : device_(device), options_(options), vocabs_(&vocabs) {}
+                                 const AlignedMemory* modelMemory,
                                 const AlignedMemory* shortlistMemory)
    : device_(device), options_(options), vocabs_(&vocabs),
    modelMemory_(modelMemory), shortlistMemory_(shortlistMemory) {}
 void BatchTranslator::initialize() {
  // Initializes the graph.
  if (options_->hasAndNotEmpty("shortlist")) {
    int srcIdx = 0, trgIdx = 1;
    bool shared_vcb = vocabs_->front() == vocabs_->back();
-    slgen_ = New<data::LexicalShortlistGenerator>(options_, vocabs_->front(),
+    if (shortlistMemory_->size() > 0 && shortlistMemory_->begin() != nullptr) {
      bool check = options_->get<bool>("check-bytearray",true);
      slgen_ = New<data::BinaryShortlistGenerator>(shortlistMemory_->begin(), shortlistMemory_->size(),
                                                     vocabs_->front(), vocabs_->back(),
                                                     srcIdx, trgIdx, shared_vcb, check);
    }
    else {
      // Changed to BinaryShortlistGenerator to enable loading binary shortlist file
      // This class also supports text shortlist file
      slgen_ = New<data::BinaryShortlistGenerator>(options_, vocabs_->front(),
                                                    vocabs_->back(), srcIdx,
                                                    trgIdx, shared_vcb);
    }
  }
-  graph_ = New<ExpressionGraph>(true); // always optimize
+  graph_ = New<ExpressionGraph>(true); // set the graph to be inference only
  auto prec = options_->get<std::vector<std::string>>("precision", {"float32"});
  graph_->setDefaultElementType(typeFromString(prec[0]));
  graph_->setDevice(device_);
  graph_->getBackend()->configureDevice(options_);
  graph_->reserveWorkspaceMB(options_->get<size_t>("workspace"));
  if (modelMemory_->size() > 0 && modelMemory_->begin() != nullptr) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file
    ABORT_IF((uintptr_t)modelMemory_->begin() % 256 != 0,
             "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
    const std::vector<const void *> container = {modelMemory_->begin()}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding.
    scorers_ = createScorers(options_, container);
  } else {
    scorers_ = createScorers(options_);
  }
  for (auto scorer : scorers_) {
    scorer->init(graph_);
    if (slgen_) {
--- a/src/translator/batch_translator.h
+++ b/src/translator/batch_translator.h
@ -12,7 +12,7 @@
 #include "translator/history.h"
 #include "translator/scorers.h"
-#ifdef WITH_PTHREADS
+#ifndef WASM_COMPATIBLE_SOURCE
 #include "pcqueue.h"
 #endif
@ -26,8 +26,16 @@ class BatchTranslator {
  // shut down in Service which calls join() on the threads.
 public:
-  BatchTranslator(DeviceId const device, std::vector<Ptr<Vocab const>> &vocabs,
+  /**
-                  Ptr<Options> options);
+   * Initialise the marian translator.
   * @param device DeviceId that performs translation. Could be CPU or GPU
   * @param vocabs Vector that contains ptrs to two vocabs
   * @param options Marian options object
   * @param modelMemory byte array (aligned to 256!!!) that contains the bytes of a model.bin. Provide a nullptr if not used.
   * @param shortlistMemory byte array of shortlist (aligned to 64)
   */
  explicit BatchTranslator(DeviceId const device, std::vector<Ptr<Vocab const>> &vocabs,
                  Ptr<Options> options, const AlignedMemory* modelMemory, const AlignedMemory* shortlistMemory);
  // convenience function for logging. TODO(jerin)
  std::string _identifier() { return "worker" + std::to_string(device_.no); }
@ -41,6 +49,8 @@ private:
  Ptr<ExpressionGraph> graph_;
  std::vector<Ptr<Scorer>> scorers_;
  Ptr<data::ShortlistGenerator const> slgen_;
  const AlignedMemory* modelMemory_{nullptr};
  const AlignedMemory* shortlistMemory_{nullptr};
 };
 } // namespace bergamot
--- a/src/translator/batcher.h
+++ b/src/translator/batcher.h
@ -7,7 +7,7 @@
 #include "definitions.h"
 #include "request.h"
-#ifdef WITH_PTHREADS
+#ifndef WASM_COMPATIBLE_SOURCE
 #include "pcqueue.h"
 #endif
--- a/src/translator/byte_array_util.cpp
+++ b/src/translator/byte_array_util.cpp
@ -0,0 +1,108 @@
 #include "byte_array_util.h"
 #include <stdlib.h>
 #include <iostream>
 namespace marian {
 namespace bergamot {
 namespace {
 // This is a basic validator that checks if the file has not been truncated
 // it basically loads up the header and checks
 // This struct and the getter are copied from the marian source, because it's located
 // inside src/common/binary.cpp:15 and we can't include it.
 struct Header {
  uint64_t nameLength;
  uint64_t type;
  uint64_t shapeLength;
  uint64_t dataLength;
 };
 // cast current void pointer to T pointer and move forward by num elements
 template <typename T>
 const T* get(const void*& current, uint64_t num = 1) {
  const T* ptr = (const T*)current;
  current = (const T*)current + num;
  return ptr;
 }
 bool validateBinaryModel(AlignedMemory& model, uint64_t fileSize) {
  const void * current = &model[0];
  uint64_t memoryNeeded = sizeof(uint64_t)*2; // We keep track of how much memory we would need if we have a complete file
  uint64_t numHeaders;
  if (fileSize >= memoryNeeded) { // We have enough filesize to fetch the headers.
    uint64_t binaryFileVersion = *get<uint64_t>(current);
    numHeaders = *get<uint64_t>(current); // number of item headers that follow
  } else {
    return false;
  }
  memoryNeeded += numHeaders*sizeof(Header);
  const Header* headers;
  if (fileSize >= memoryNeeded) {
    headers = get<Header>(current, numHeaders); // read that many headers
  } else {
    return false;
  }
  // Calculate how many bytes we are going to for reading just the names and the shape
  for (uint64_t i = 0; i < numHeaders; i++) {
    memoryNeeded += headers[i].nameLength + headers[i].shapeLength*sizeof(int);
    // Advance the pointers.
    get<char>(current, headers[i].nameLength);
    get<int>(current, headers[i].shapeLength);
  }
  // Before we start reading the data, there is a small padding to ensure alignment
  // Read that in, before calculating the actual tensor memory requirements.
  uint64_t aligned_offset;
  if (fileSize >= memoryNeeded) {
    aligned_offset = *get<uint64_t>(current); // Offset to align memory to 256 size
    memoryNeeded += aligned_offset + sizeof(uint64_t);
  } else {
    return false;
  }
  // Finally the tensor size:
  for (uint64_t i = 0; i < numHeaders; i++) {
    memoryNeeded += headers[i].dataLength;
  }
  // If this final check passes, the file is at least big enough to contain the model
  if (fileSize >= memoryNeeded) {
    return true;
  } else {
    return false;
  }
 }
 } // Anonymous namespace
 AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){
  uint64_t fileSize = filesystem::fileSize(path);
  io::InputFileStream in(path);
  ABORT_IF(in.bad(), "Failed opening file stream: {}", path);
  AlignedMemory alignedMemory(fileSize, alignment);
  in.read(reinterpret_cast<char *>(alignedMemory.begin()), fileSize);
  ABORT_IF(alignedMemory.size() != fileSize, "Error reading file {}", path);
  return alignedMemory;
 }
 AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options){
    auto models = options->get<std::vector<std::string>>("models");
    ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
    marian::filesystem::Path modelPath(models[0]);
    ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin");
    AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
    ABORT_IF(!validateBinaryModel(alignedMemory, alignedMemory.size()), "The binary file is invalid. Incomplete or corrupted download?");
    return alignedMemory;
 }
 AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options){
  auto shortlist = options->get<std::vector<std::string>>("shortlist");
  ABORT_IF(shortlist.empty(), "No path to shortlist file is given.");
  return loadFileToMemory(shortlist[0], 64);
 }
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/byte_array_util.h
+++ b/src/translator/byte_array_util.h
@ -0,0 +1,12 @@
 #include "marian.h"
 #include "definitions.h"
 namespace marian {
 namespace bergamot {
 AlignedMemory loadFileToMemory(const std::string& path, size_t alignment);
 AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
 AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options);
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/definitions.h
+++ b/src/translator/definitions.h
@ -3,6 +3,7 @@
 #include "data/types.h"
 #include "data/vocab_base.h"
 #include "aligned.h"
 #include <vector>
 namespace marian {
@ -21,6 +22,9 @@ template <class T, typename... Args> UPtr<T> UNew(Args &&... args) {
 template <class T> UPtr<T> UNew(UPtr<T> p) { return UPtr<T>(p); }
 /// Shortcut to AlignedVector<const void*> for byte arrays
 typedef AlignedVector<const void*> AlignedMemory;
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/parser.h
+++ b/src/translator/parser.h
@ -1,6 +1,10 @@
 #ifndef SRC_BERGAMOT_PARSER_H
 #define SRC_BERGAMOT_PARSER_H
 #include "3rd_party/yaml-cpp/yaml.h"
 #include "common/config_parser.h"
 #include "common/config_validator.h"
 #include "common/options.h"
 #include "marian.h"
 namespace marian {
@ -19,9 +23,47 @@ inline marian::ConfigParser createConfigParser() {
      "--max-length-break", "Bergamot Options",
      "Maximum input tokens to be processed in a single sentence.", 128);
  cp.addOption<bool>(
      "--check-bytearray", "Bergamot Options",
      "Flag holds whether to check the content of the bytearray (true by default)", true);
    return cp;
 }
 inline std::shared_ptr<marian::Options>
 parseOptions(const std::string &config) {
  marian::Options options;
  // @TODO(jerinphilip) There's something off here, @XapaJIaMnu suggests
  // that should not be using the defaultConfig. This function only has access
  // to std::string config and needs to be able to construct Options from the
  // same.
  // Absent the following code-segment, there is a parsing exception thrown on
  // rebuilding YAML.
  //
  // Error: Unhandled exception of type 'N4YAML11InvalidNodeE': invalid node;
  // this may result from using a map iterator as a sequence iterator, or
  // vice-versa
  //
  // Error: Aborted from void unhandledException() in
  // 3rd_party/marian-dev/src/common/logging.cpp:113
  marian::ConfigParser configParser = createConfigParser();
  const YAML::Node &defaultConfig = configParser.getConfig();
  options.merge(defaultConfig);
  // Parse configs onto defaultConfig.
  options.parse(config);
  YAML::Node configCopy = options.cloneToYamlNode();
  marian::ConfigValidator validator(configCopy);
  validator.validateOptions(marian::cli::mode::translation);
  return std::make_shared<marian::Options>(options);
 }
 } //  namespace bergamot
 } //  namespace marian
--- a/src/translator/request.cpp
+++ b/src/translator/request.cpp
@ -12,12 +12,10 @@ namespace bergamot {
 // -----------------------------------------------------------------
 Request::Request(size_t Id, size_t lineNumberBegin,
-                 std::vector<Ptr<Vocab const>> &vocabs, std::string &&source,
+                 std::vector<Ptr<Vocab const>> &vocabs, AnnotatedText &&source,
-                 Segments &&segments, SentenceRanges &&sourceRanges,
+                 Segments &&segments, std::promise<Response> responsePromise)
                 std::promise<Response> responsePromise)
    : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs),
      source_(std::move(source)), segments_(std::move(segments)),
      sourceRanges_(std::move(sourceRanges)),
      response_(std::move(responsePromise)) {
  counter_ = segments_.size();
@ -48,8 +46,7 @@ void Request::processHistory(size_t index, Ptr<History> history) {
 void Request::completeRequest() {
  // Request no longer needs to hold the content, can transfer it to
  // Response.
-  Response response(std::move(source_), std::move(sourceRanges_),
+  Response response(std::move(source_), std::move(histories_), *vocabs_);
                    std::move(histories_), *vocabs_);
  response_.set_value(std::move(response));
 }
--- a/src/translator/request.h
+++ b/src/translator/request.h
@ -1,9 +1,9 @@
 //
 // Defines:
 //
-// Request: holds the input blob of a text, Segments (vector<Words>) which are
+// Request: holds the input text of a text, Segments (vector<Words>) which are
 // to go to the batching mechanism and alignments between the processed
-// segments and the input blob (sourceTokenRanges). In addition, Request takes
+// segments and the input text (sourceTokenRanges). In addition, Request takes
 // care of the barrier which fires when all the Segments in a request are done
 // translating by the workers (BatchTranslator).
 // TODO(jerinphilip):  Extend Request with notions of Priority (sequence,
@ -36,9 +36,8 @@ namespace bergamot {
 class Request {
 public:
  Request(size_t Id, size_t lineNumberBegin,
-          std::vector<Ptr<Vocab const>> &vocabs_, std::string &&source,
+          std::vector<Ptr<Vocab const>> &vocabs_, AnnotatedText &&source,
-          Segments &&segments, SentenceRanges &&sourceTokenRanges,
+          Segments &&segments, std::promise<Response> responsePromise);
          std::promise<Response> responsePromise);
  // Obtain the count of tokens in the segment correponding to index. Used to
  // insert sentence from multiple requests into the corresponding size bucket.
@ -77,9 +76,8 @@ private:
  // string_views of the text corresponding to these words, pointing to
  // sequences in source_. histories_ is a buffer which eventually stores the
  // translations of each segment in the corresponding index.
-  std::string source_;
+  AnnotatedText source_;
  Segments segments_;
  SentenceRanges sourceRanges_;
  std::vector<Ptr<History>> histories_;
  // Members above are moved into newly constructed Response on completion
--- a/src/translator/response.cpp
+++ b/src/translator/response.cpp
@ -1,97 +1,105 @@
 #include "response.h"
 #include "sentence_ranges.h"
 #include "common/logging.h"
 #include "data/alignment.h"
 #include "sentence_ranges.h"
 #include <utility>
 namespace marian {
 namespace bergamot {
-Response::Response(std::string &&source, SentenceRanges &&sourceRanges,
+Response::Response(AnnotatedText &&source, Histories &&histories,
-                   Histories &&histories, std::vector<Ptr<Vocab const>> &vocabs)
+                   std::vector<Ptr<Vocab const>> &vocabs)
-    : source_(std::move(source)), sourceRanges_(std::move(sourceRanges)),
+    : source(std::move(source)) {
      histories_(std::move(histories)), vocabs_(&vocabs) {}
 void Response::move(std::string &source, std::string &translation,
                    SentenceMappings &sentenceMappings) {
  // Construct required stuff first.
  constructTranslation();
  constructSentenceMappings(sentenceMappings);
  // Move content out.
  source = std::move(source_);
  translation = std::move(translation_);
  // The above assignment expects source, target be moved.
  // which makes the following invalid, hence required to be cleared.
  sourceRanges_.clear();
  targetRanges_.clear();
  histories_.clear();
 }
 void Response::constructTranslation() {
  if (translationConstructed_) {
    return;
  }
  // Reserving length at least as much as source_ seems like a reasonable thing
  // to do to avoid reallocations.
-  translation_.reserve(source_.size());
+  target.text.reserve(source.text.size());
  // In a first step, the decoded units (individual senteneces) are compiled
  // into a huge string. This is done by computing indices first and appending
  // to the string as each sentences are decoded.
  std::vector<std::pair<size_t, size_t>> translationRanges;
  std::vector<size_t> sentenceBegins;
  size_t offset{0};
  bool first{true};
-  for (auto &history : histories_) {
+  for (auto &history : histories) {
    // TODO(jerin): Change hardcode of nBest = 1
    NBestList onebest = history->nBest(1);
    Result result = onebest[0]; // Expecting only one result;
    Words words = std::get<0>(result);
-    auto targetVocab = vocabs_->back();
+    auto targetVocab = vocabs.back();
-    std::string decoded = targetVocab->decode(words);
+
    std::string decoded;
    std::vector<string_view> targetMappings;
    targetVocab->decodeWithByteRanges(words, decoded, targetMappings);
    if (first) {
      first = false;
    } else {
-      translation_ += " ";
+      target.text += " ";
      ++offset;
    }
-    translation_ += decoded;
+    sentenceBegins.push_back(translationRanges.size());
-    translationRanges.emplace_back(offset, decoded.size());
+    target.text += decoded;
    auto decodedStringBeginMarker = targetMappings.front().begin();
    for (auto &sview : targetMappings) {
      size_t startIdx = offset + sview.begin() - decodedStringBeginMarker;
      translationRanges.emplace_back(startIdx, startIdx + sview.size());
    }
    offset += decoded.size();
    // Alignments
    // TODO(jerinphilip): The following double conversion might not be
    // necessary. Hard alignment can directly be exported, but this would mean
    // WASM bindings for a structure deep within marian source.
    auto hyp = std::get<1>(result);
    auto softAlignment = hyp->tracebackAlignment();
    auto hardAlignment = data::ConvertSoftAlignToHardAlign(
        softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a
                                            // configurable parameter.
    Alignment unified_alignment;
    for (auto &p : hardAlignment) {
      unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob});
    }
-  // Once the entire string is constructed, there are no further possibility of
+    alignments.push_back(std::move(unified_alignment));
  // reallocation in the string's storage, the indices are converted into
  // string_views.
-  for (auto &range : translationRanges) {
+    // Quality scores: Sequence level is obtained as normalized path scores.
-    // TODO(@jerinphilip):  Currently considers target tokens as whole text.
+    // Word level using hypothesis traceback. These are most-likely logprobs.
-    // Needs to be further enhanced in marian-dev to extract alignments.
+    auto normalizedPathScore = std::get<2>(result);
    auto wordQualities = hyp->tracebackWordScores();
    wordQualities.pop_back();
    qualityScores.push_back((Quality){normalizedPathScore, wordQualities});
  }
  // Once we have the indices in translation (which might be resized a few
  // times) ready, we can prepare and store the string_view as annotations
  // instead. This is accomplished by iterating over available sentences using
  // sentenceBegin and using addSentence(...) API from Annotation.
  for (size_t i = 1; i <= sentenceBegins.size(); i++) {
    std::vector<string_view> targetMappings;
    size_t begin = sentenceBegins[i - 1];
    size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size()
                                                   : sentenceBegins[i];
-    const char *begin = &translation_[range.first];
+    for (size_t idx = begin; idx < safe_end; idx++) {
-    targetMappings.emplace_back(begin, range.second);
+      auto &p = translationRanges[idx];
-    targetRanges_.addSentence(targetMappings);
+      size_t begin_idx = p.first;
      size_t end_idx = p.second;
      const char *data = &target.text[begin_idx];
      size_t size = end_idx - begin_idx;
      targetMappings.emplace_back(data, size);
    }
-  translationConstructed_ = true;
+    target.addSentence(targetMappings);
 }
 void Response::constructSentenceMappings(
    Response::SentenceMappings &sentenceMappings) {
  for (size_t i = 0; i < sourceRanges_.numSentences(); i++) {
    string_view src = sourceRanges_.sentence(i);
    string_view tgt = targetRanges_.sentence(i);
    sentenceMappings.emplace_back(src, tgt);
  }
 }
 } // namespace bergamot
--- a/src/translator/response.h
+++ b/src/translator/response.h
@ -1,9 +1,10 @@
 #ifndef SRC_BERGAMOT_RESPONSE_H_
 #define SRC_BERGAMOT_RESPONSE_H_
-#include "sentence_ranges.h"
+#include "data/alignment.h"
 #include "data/types.h"
 #include "definitions.h"
 #include "sentence_ranges.h"
 #include "translator/beam_search.h"
 #include <cassert>
@ -12,86 +13,79 @@
 namespace marian {
 namespace bergamot {
 /// Alignment is stored as a sparse matrix, this pretty much aligns with marian
 /// internals but is brought here to maintain translator
 /// agnosticism/independence.
 struct Point {
  size_t src; ///< Index pointing to source ByteRange
  size_t tgt; ///< Index pointing to target ByteRange
  float prob; ///< Score between [0, 1] on indicating degree of alignment.
 };
 /// Alignment is a sparse matrix, where Points represent entries with values.
 typedef std::vector<Point> Alignment;
 /// -loglikelhoods of the sequence components as proxy to quality.
 struct Quality {
  /// Certainty/uncertainty score for sequence.
  float sequence;
  /// Certainty/uncertainty for each word in the sequence.
  std::vector<float> word;
 };
 /// Response holds AnnotatedText(s) of source-text and translated text,
 /// alignment information between source and target sub-words and sentences.
 ///
 /// AnnotatedText provides an API to access markings of (sub)-word and
 /// sentences boundaries, which are required to interpret Quality and
 /// Alignment (s) at the moment.
 class Response {
  // Response is a marian internal class (not a bergamot-translator class)
  // holding source blob of text, vector of TokenRanges corresponding to each
  // sentence in the source text blob and histories obtained from translating
  // these sentences.
  //
  // This class provides an API at a higher level in comparison to History to
  // access translations and additionally use string_view manipulations to
  // recover structure in translation from source-text's structure known through
  // reference string and string_view. As many of these computations are not
  // required until invoked, they are computed as required and stored in data
  // members where it makes sense to do so (translation,translationTokenRanges).
  //
  // Examples of such use-cases are:
  //    translation()
  //    translationInSourceStructure() TODO(@jerinphilip)
  //    alignment(idx) TODO(@jerinphilip)
  //    sentenceMappings (for bergamot-translator)
 public:
-  Response(std::string &&source, SentenceRanges &&sourceRanges,
+  ///
-           Histories &&histories,
+  Response(AnnotatedText &&source, Histories &&histories,
           // Required for constructing translation and TokenRanges within
           // translation lazily.
           std::vector<Ptr<Vocab const>> &vocabs);
  /// \cond HIDDEN_PUBLIC
  // Move constructor.
  Response(Response &&other)
-      : source_(std::move(other.source_)),
+      : source(std::move(other.source)), target(std::move(other.target)),
-        translation_(std::move(other.translation_)),
+        alignments(std::move(other.alignments)),
-        sourceRanges_(std::move(other.sourceRanges_)),
+        qualityScores(std::move(other.qualityScores)){};
-        targetRanges_(std::move(other.targetRanges_)),
+
-        histories_(std::move(other.histories_)),
+  // The following copy bans are not stricitly required anymore since Annotation
-        vocabs_(std::move(other.vocabs_)){};
+  // is composed of the ByteRange primitive (which was previously string_view
  // and required to be bound to string), but makes movement efficient by
  // banning these letting compiler complain about copies.
  // Prevents CopyConstruction and CopyAssignment. sourceRanges_ is constituted
  // by string_view and copying invalidates the data member.
  Response(const Response &) = delete;
  Response &operator=(const Response &) = delete;
-  typedef std::vector<std::pair<const string_view, const string_view>>
+  /// \endcond
      SentenceMappings;
-  // Moves source sentence into source, translated text into translation.
+  /// Number of sentences translated. The processing of a text of into sentences
-  // Pairs of string_views to corresponding sentences in
+  /// are handled internally, and this information can be used to iterate
-  // source and translation are loaded into sentenceMappings. These string_views
+  /// through meaningful units of translation for which alignment and quality
-  // reference the new source and translation.
+  /// information are available.
-  //
+  const size_t size() const { return source.numSentences(); }
  // Calling move() invalidates the Response object as ownership is transferred.
  // Exists for moving strc
  void move(std::string &source, std::string &translation,
            SentenceMappings &sentenceMappings);
-  const Histories &histories() const { return histories_; }
+  /// source text and annotations of (sub-)words and sentences.
-  const std::string &source() const { return source_; }
+  AnnotatedText source;
  const std::string &translation() {
    constructTranslation();
    return translation_;
  }
-  // A convenience function provided to return translated text placed within
+  /// translated text and annotations of (sub-)words and sentences.
-  // source's structure. This is useful when the source text is a multi-line
+  AnnotatedText target;
  // paragraph or string_views extracted from structured text like HTML and it's
  // desirable to place the individual sentences in the locations of the source
  // sentences.
  // const std::string translationInSourceStructure();
  // const PendingAlignmentType alignment(size_t idx);
-private:
+  /// -logprob of each word and negative log likelihood of sequence (sentence)
-  void constructTranslation();
+  /// normalized by length, for each sentence processed by the translator.
-  void constructSentenceMappings(SentenceMappings &);
+  /// Indices correspond to ranges accessible through respective Annotation on
  /// source or target.
  std::vector<Quality> qualityScores;
-  std::string source_;
+  /// Alignments between source and target. Each Alignment is a
-  SentenceRanges sourceRanges_;
+  /// sparse matrix representation with indices corresponding
-  Histories histories_;
+  /// to (sub-)words accessible through Annotation.
-
+  std::vector<Alignment> alignments;
  std::vector<Ptr<Vocab const>> *vocabs_;
  bool translationConstructed_{false};
  std::string translation_;
  SentenceRanges targetRanges_;
 };
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/sentence_ranges.cpp
+++ b/src/translator/sentence_ranges.cpp
@ -5,40 +5,83 @@
 namespace marian {
 namespace bergamot {
-void SentenceRanges::addSentence(std::vector<string_view> &wordRanges) {
+void Annotation::addSentence(std::vector<ByteRange> &sentence) {
-  addSentence(std::begin(wordRanges), std::end(wordRanges));
+  flatByteRanges_.insert(std::end(flatByteRanges_), std::begin(sentence),
-}
+                         std::end(sentence));
 void SentenceRanges::addSentence(WordIterator begin, WordIterator end) {
  size_t size = flatByteRanges_.size();
-  flatByteRanges_.insert(std::end(flatByteRanges_), begin, end);
+  sentenceEndIds_.push_back(size);
  sentenceBeginIds_.push_back(size);
 }
-string_view SentenceRanges::sentence(size_t index) const {
+size_t Annotation::numWords(size_t sentenceIdx) const {
-  size_t bos_id;
+  size_t bosId, eosId;
-  string_view eos, bos;
+  bosId = sentenceEndIds_[sentenceIdx]; // Half interval, so;
  eosId = sentenceEndIds_[sentenceIdx + 1];
  // Difference between eosId and bosId is the number of words.
  return eosId - bosId;
 }
-  bos_id = sentenceBeginIds_[index];
+ByteRange Annotation::sentence(size_t sentenceIdx) const {
-  bos = flatByteRanges_[bos_id];
+  size_t bosId, eosId;
  bosId = sentenceEndIds_[sentenceIdx]; // Half interval, so;
  eosId = sentenceEndIds_[sentenceIdx + 1];
  ByteRange sentenceByteRange;
-  if (index + 1 == numSentences()) {
+  if (bosId == eosId) {
-    eos = flatByteRanges_.back();
+    // We have an empty sentence. However, we want to be able to point where in
    // target this happened through the ranges. We are looking for the end of
    // the flatByteRange and non-empty sentence before this happened and
    // construct empty string-view equivalent ByteRange.
    ByteRange eos = flatByteRanges_[eosId - 1];
    sentenceByteRange = (ByteRange){eos.end, eos.end};
  } else {
-    assert(index < numSentences());
+    ByteRange bos = flatByteRanges_[bosId];
-    size_t eos_id = sentenceBeginIds_[index + 1];
+    ByteRange eos = flatByteRanges_[eosId - 1];
-    --eos_id;
+    sentenceByteRange = (ByteRange){bos.begin, eos.end};
-    eos = flatByteRanges_[eos_id];
+  }
  return sentenceByteRange;
 }
-  return sentenceBetween(bos, eos);
+ByteRange Annotation::word(size_t sentenceIdx, size_t wordIdx) const {
  size_t bosOffset = sentenceEndIds_[sentenceIdx];
  return flatByteRanges_[bosOffset + wordIdx];
 }
-string_view SentenceRanges::sentenceBetween(string_view firstWord,
+string_view AnnotatedText::word(size_t sentenceIdx, size_t wordIdx) const {
-                                            string_view lastWord) const {
+  auto terminals = annotation.word(sentenceIdx, wordIdx);
  return string_view(&text[terminals.begin], terminals.size());
 }
-  const char *data = firstWord.data();
+string_view AnnotatedText::sentence(size_t sentenceIdx) const {
-  size_t size = lastWord.data() + lastWord.size() - firstWord.data();
+  auto sentenceAsByteRange = annotation.sentence(sentenceIdx);
  return asStringView(sentenceAsByteRange);
 }
 void AnnotatedText::addSentence(std::vector<string_view> &wordRanges) {
  addSentence(std::begin(wordRanges), std::end(wordRanges));
 };
 void AnnotatedText::addSentence(std::vector<string_view>::iterator begin,
                                std::vector<string_view>::iterator end) {
  std::vector<ByteRange> sentence;
  for (auto p = begin; p != end; p++) {
    size_t begin_offset = p->data() - &text[0];
    sentence.push_back((ByteRange){begin_offset, begin_offset + p->size()});
  }
  annotation.addSentence(sentence);
 };
 ByteRange AnnotatedText::wordAsByteRange(size_t sentenceIdx,
                                         size_t wordIdx) const {
  return annotation.word(sentenceIdx, wordIdx);
 }
 ByteRange AnnotatedText::sentenceAsByteRange(size_t sentenceIdx) const {
  return annotation.sentence(sentenceIdx);
 }
 string_view AnnotatedText::asStringView(const ByteRange &byteRange) const {
  const char *data = &text[byteRange.begin];
  size_t size = byteRange.size();
  return string_view(data, size);
 }
--- a/src/translator/sentence_ranges.h
+++ b/src/translator/sentence_ranges.h
@ -3,50 +3,165 @@
 #include "data/types.h"
 #include <cassert>
 #include <utility>
 #include <vector>
 namespace marian {
 namespace bergamot {
-class SentenceRanges {
+/// ByteRange stores indices for half-interval [begin, end) in a string. Can be
-  // SentenceRanges stores string_views into a source text, with additional
+/// used to represent a sentence, word.
-  // annotations to mark sentence boundaries.
+struct ByteRange {
-  //
+  size_t begin;
-  // Given the availability annotations, this container provides capabilty to
+  size_t end;
-  // add sentences, and access individual sentences.
+  const size_t size() const { return end - begin; }
 };
 /// An Annotation is a collection of ByteRanges used to denote ancillary
 /// information of sentences and words on a text of string. Annotation is meant
 /// for consumption on platforms where `string_view` creates problems (eg:
 /// exports through WASM) conveniently rebasing them as required into
 /// ByteRanges. See AnnotatedText for cases where this is a non-issue.
 ///
 /// **Usage**
 ///
 /// To ensure rebasing is consistent during creation and updation, use
 /// `Annotation` best through `AnnotatedText`, which also holds the reference
 /// string and can work with `string_views`.
 ///
 /// If used separately, it is on the user to ensure the reference string
 /// is the same as what the Annotation refers to. For best results, an instance
 /// is expected to be read only in this mode of operation.
 ///
 /// **Idea**
 ///
 /// Annotation is intended to be the same structure conceptually as below,
 /// except the `std::vector<std::vector<ByteRange>>` hammered into a flat
 /// structure to avoid multiple reallocs keeping efficiency in mind. This is
 /// achieved by having markers of where sentence ends in the flat container
 /// storing word ByteRanges.
 ///
 /// ```cpp
 /// typedef ByteRange Word;
 /// // std::vector<ByteRange>, a single sentence
 /// typedef std::vector<Word> Sentence;
 /// std::vector<std::vector<ByteRange> // multiple sentences
 /// typedef std::vector<Sentence> Annotation;
 ///
 /// Annotation example;
 /// ```
 /// This structure exists to provide a consistent API to access the nested
 /// sentences of varying lengths, which occur in source-text processed into
 /// multiple sentences, and target-text translated from source as multiple
 /// sentences, both composed of (sub)-words, providing a List[List] like access
 /// while storing it in a compact and efficient manner.
 class Annotation {
 public:
-  typedef std::vector<string_view>::iterator WordIterator;
+  /// Annotation is constructed empty. See `addSentence()` to populate it with
-
+  /// annotations.
-  void addSentence(std::vector<string_view> &wordRanges);
+  Annotation() {
-  void addSentence(WordIterator begin, WordIterator end);
+    // The -1-th sentence ends at 0.
-
+    sentenceEndIds_.push_back(0);
  void clear() {
    flatByteRanges_.clear();
    sentenceBeginIds_.clear();
  }
-  size_t numSentences() const { return sentenceBeginIds_.size(); }
+  /// Returns the number of sentences annotated in a text.
  size_t numSentences() const { return sentenceEndIds_.size() - 1; }
-  // Returns a string_view into the ith sentence.
+  /// Returns number of words in the sentence identified by `sentenceIdx`.
-  string_view sentence(size_t index) const;
+  size_t numWords(size_t sentenceIdx) const;
  /// Adds a sentences from `vector<ByteRange>` representation, internally doing
  /// extra book-keeping for the sentence terminal markings. Sentences are
  /// expected to be added in order as they occur in text.
  void addSentence(std::vector<ByteRange> &sentence);
  /// Returns a ByteRange representing `wordIdx` in sentence indexed by
  /// `sentenceIdx`. `wordIdx` follows 0-based indexing, and should be less than
  /// `.numWords()` for `sentenceIdx` for defined behaviour.
  ByteRange word(size_t sentenceIdx, size_t wordIdx) const;
  /// Returns a ByteRange representing sentence corresponding to `sentenceIdx`.
  /// `sentenceIdx` follows 0-based indexing, and behaviour is defined only when
  /// less than `.numSentences()`.
  ByteRange sentence(size_t sentenceIdx) const;
 private:
-  // A flat storage for string_views. Can be words or sentences.
+  /// A flat storage for ByteRanges. Composed of word ByteRanges, extra
-  std::vector<string_view> flatByteRanges_;
+  /// information in sentenceEndIds_ to denote sentence boundary markers as
  /// indices.
  std::vector<ByteRange> flatByteRanges_;
-  // The container grows dynamically with addSentence. size_t marking index is
+  /// Stores indices onto flatByteRanges_ of where sentences end (not inclusive,
-  // used to ensure the sentence boundaries stay same while underlying storage
+  /// aligned with C++ half interval notions). There is a 0 marker to simplify
-  // might be changed during reallocation.
+  /// sources, indicating where the -1-th sentence ends.
-  std::vector<size_t> sentenceBeginIds_;
+  std::vector<size_t> sentenceEndIds_;
 };
-  // Utility function to extract the string starting at firstWord and ending at
+/// AnnotatedText is effectively std::string text + Annotation, providing the
-  // lastWord as a single string-view.
+/// following additional desiderata.
-  string_view sentenceBetween(string_view firstWord,
+///
-                              string_view lastWord) const;
+/// 1. Access to processed string_views for convenience rather than ByteRanges
 /// (which only provides index information).
 ///
 /// 2. Transparently convert string_views into ByteRanges for the Annotation
 /// referring to the text bound by this structure.
 ///
 /// 3. Bind the text and annotations together, to move around as a meaningful
 /// unit.
 struct AnnotatedText {
 public:
  std::string text;      ///< Blob of string elements in annotation refers to.
  Annotation annotation; ///< sentence and (sub-) word annotations.
  /// Construct an empty AnnotatedText. This is useful when the target string or
  /// ByteRanges are not known yet, but the public members can be used to
  /// populate it. One use-case, when translated-text is created decoding from
  /// histories and the ByteRanges only known after the string has been
  /// constructed.
  AnnotatedText() {}
  /// Construct moving in a string (for efficiency purposes, copying string
  /// constructor is disallowed).
  AnnotatedText(std::string &&text) : text(std::move(text)){};
  AnnotatedText(AnnotatedText &&annotatedBlob)
      : text(std::move(annotatedBlob.text)),
        annotation(std::move(annotatedBlob.annotation)) {}
  /// Returns the number of sentences in the annotation structure.
  const size_t numSentences() const { return annotation.numSentences(); }
  /// Returns number of words in the sentece identified by sentenceIdx.
  const size_t numWords(size_t sentenceIdx) const {
    return annotation.numWords(sentenceIdx);
  }
  /// Adds a sentence, used to load from SentencePiece annotations conveniently.
  void addSentence(std::vector<string_view> &wordRanges);
  /// Adds a sentence between two iterators, often useful while constructing
  /// from parts of a container.
  void addSentence(std::vector<string_view>::iterator begin,
                   std::vector<string_view>::iterator end);
  /// Returns a string_view representing wordIdx in sentenceIdx
  string_view word(size_t sentenceIdx, size_t wordIdx) const;
  /// Returns a string_view representing sentence corresponding to sentenceIdx.
  string_view sentence(size_t sentenceIdx) const;
  /// Returns a ByteRange representing wordIdx in sentenceIdx
  ByteRange wordAsByteRange(size_t sentenceIdx, size_t wordIdx) const;
  /// Returns a ByteRange representing sentence corresponding to sentenceIdx.
  ByteRange sentenceAsByteRange(size_t sentenceIdx) const;
 private:
  string_view asStringView(const ByteRange &byteRange) const;
 };
 } // namespace bergamot
 } // namespace marian
 #endif //  BERGAMOT_SENTENCE_RANGES_H_
--- a/src/translator/service.cpp
+++ b/src/translator/service.cpp
@ -5,25 +5,78 @@
 #include <string>
 #include <utility>
 inline std::vector<marian::Ptr<const marian::Vocab>>
 loadVocabularies(marian::Ptr<marian::Options> options) {
  // @TODO: parallelize vocab loading for faster startup
  auto vfiles = options->get<std::vector<std::string>>("vocabs");
  // with the current setup, we need at least two vocabs: src and trg
  ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
  std::vector<marian::Ptr<marian::Vocab const>> vocabs(vfiles.size());
  std::unordered_map<std::string, marian::Ptr<marian::Vocab>> vmap;
  for (size_t i = 0; i < vocabs.size(); ++i) {
    auto m =
        vmap.emplace(std::make_pair(vfiles[i], marian::Ptr<marian::Vocab>()));
    if (m.second) { // new: load the vocab
      m.first->second = marian::New<marian::Vocab>(options, i);
      m.first->second->load(vfiles[i]);
    }
    vocabs[i] = m.first->second;
  }
  return vocabs;
 }
 namespace marian {
 namespace bergamot {
-Service::Service(Ptr<Options> options)
+Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory)
-    : ServiceBase(options), numWorkers_(options->get<int>("cpu-threads")),
+    : requestId_(0), vocabs_(std::move(loadVocabularies(options))),
-      pcqueue_(numWorkers_) {
+      text_processor_(vocabs_, options), batcher_(options),
      numWorkers_(options->get<int>("cpu-threads")),
      modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))
 #ifndef WASM_COMPATIBLE_SOURCE
      // 0 elements in PCQueue is illegal and can lead to failures. Adding a
      // guard to have at least one entry allocated. In the single-threaded
      // case, while initialized pcqueue_ remains unused.
      ,
      pcqueue_(std::max<size_t>(1, numWorkers_))
 #endif
 {
  if (numWorkers_ == 0) {
-    ABORT("Fatal: Attempt to create multithreaded instance with --cpu-threads "
+    build_translators(options, /*numTranslators=*/1);
-          "0. ");
+    initialize_blocking_translator();
  } else {
    build_translators(options, numWorkers_);
    initialize_async_translators();
  }
 }
-  translators_.reserve(numWorkers_);
+void Service::build_translators(Ptr<Options> options, size_t numTranslators) {
  translators_.reserve(numTranslators);
  for (size_t cpuId = 0; cpuId < numTranslators; cpuId++) {
    marian::DeviceId deviceId(cpuId, DeviceType::cpu);
    translators_.emplace_back(deviceId, vocabs_, options, &modelMemory_, &shortlistMemory_);
  }
 }
 void Service::initialize_blocking_translator() {
  translators_.back().initialize();
 }
 void Service::blocking_translate() {
  Batch batch;
  while (batcher_ >> batch) {
    auto &translator = translators_.back();
    translator.translate(batch);
  }
 }
 #ifndef WASM_COMPATIBLE_SOURCE
 void Service::initialize_async_translators() {
  workers_.reserve(numWorkers_);
  for (size_t cpuId = 0; cpuId < numWorkers_; cpuId++) {
-    marian::DeviceId deviceId(cpuId, DeviceType::cpu);
+    auto &translator = translators_[cpuId];
    translators_.emplace_back(deviceId, vocabs_, options);
    auto &translator = translators_.back();
    workers_.emplace_back([&translator, this] {
      translator.initialize();
@ -42,29 +95,58 @@ Service::Service(Ptr<Options> options)
  }
 }
-void Service::enqueue() {
+void Service::async_translate() {
  Batch batch;
  while (batcher_ >> batch) {
    pcqueue_.ProduceSwap(batch);
  }
 }
 #else  // WASM_COMPATIBLE_SOURCE
 void Service::initialize_async_translators() {
  ABORT("Cannot run in async mode without multithreading.");
 }
 void Service::async_translate() {
  ABORT("Cannot run in async mode without multithreading.");
 }
 #endif // WASM_COMPATIBLE_SOURCE
 std::future<Response> Service::translate(std::string &&input) {
  Segments segments;
  AnnotatedText source(std::move(input));
  text_processor_.process(source, segments);
  std::promise<Response> responsePromise;
  auto future = responsePromise.get_future();
  Ptr<Request> request = New<Request>(
      requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(source),
      std::move(segments), std::move(responsePromise));
  batcher_.addWholeRequest(request);
  if (numWorkers_ == 0) {
    blocking_translate();
  } else {
    async_translate();
  }
  return future;
 }
 Service::~Service() {
 #ifndef WASM_COMPATIBLE_SOURCE
  for (size_t workerId = 0; workerId < numWorkers_; workerId++) {
 void Service::stop() {
  for (auto &worker : workers_) {
    Batch poison = Batch::poison();
    pcqueue_.ProduceSwap(poison);
  }
-  for (auto &worker : workers_) {
+  for (size_t workerId = 0; workerId < numWorkers_; workerId++) {
-    if (worker.joinable()) {
+    if (workers_[workerId].joinable()) {
-      worker.join();
+      workers_[workerId].join();
    }
  }
-
+#endif
  workers_.clear();
 }
 Service::~Service() { stop(); }
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/service.h
+++ b/src/translator/service.h
@ -4,10 +4,13 @@
 #include "batch_translator.h"
 #include "batcher.h"
 #include "data/types.h"
 #include "pcqueue.h"
 #include "response.h"
 #include "service_base.h"
 #include "text_processor.h"
 #include "translator/parser.h"
 #ifndef WASM_COMPATIBLE_SOURCE
 #include "pcqueue.h"
 #endif
 #include <queue>
 #include <vector>
@ -15,39 +18,104 @@
 namespace marian {
 namespace bergamot {
-class Service : public ServiceBase {
+/// Service exposes methods to translate an incoming blob of text to the
-
+/// Consumer of bergamot API.
-  // Service exposes methods to translate an incoming blob of text to the
+///
-  // Consumer of bergamot API.
+/// An example use of this API looks as follows:
-  //
+///
-  // An example use of this API looks as follows:
+///  options = ...;
-  //
+///  service = Service(options);
-  //  options = ...;
+///  std::string input_text = "Hello World";
-  //  service = Service(options);
+///  std::future<Response>
-  //  std::string input_blob = "Hello World";
+///      response = service.translate(std::move(input_text));
-  //  std::future<Response>
+///  response.wait();
-  //      response = service.translate(std::move(input_blob));
+///  Response result = response.get();
-  //  response.wait();
+///
-  //  Response result = response.get();
+/// Optionally Service can be initialized by also passing model_memory for
 /// purposes of efficiency (which defaults to nullpointer and then reads from
 /// file supplied through config).
 class Service {
 public:
-  explicit Service(Ptr<Options> options);
+  /// @param options Marian options object
-  // Implements enqueue and top through blocking methods.
+  /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
-  void stop() override;
+  /// of a model.bin. Optional, defaults to nullptr when not used
  /// @param shortlistMemory byte array of shortlist (aligned to 64)
  explicit Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory);
  explicit Service(Ptr<Options> options) : Service(options, AlignedMemory(), AlignedMemory()){}
  /// Construct Service from a string configuration.
  /// @param [in] config string parsable as YAML expected to adhere with marian
  /// config
  /// @param [in] model_memory byte array (aligned to 256!!!) that contains the
  /// bytes of a model.bin. Optional.
  /// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
  explicit Service(const std::string &config,
                   AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory())
      : Service(parseOptions(config), std::move(modelMemory), std::move(shortlistMemory)) {}
  /// Explicit destructor to clean up after any threads initialized in
  /// asynchronous operation mode.
  ~Service();
  /// To stay efficient and to refer to the string for alignments, expects
  /// ownership be moved through std::move(..)
  ///
  ///  @param [in] rvalue reference of string to be translated.
  std::future<Response> translate(std::string &&input);
 private:
-  void enqueue() override;
+  /// Build numTranslators number of translators with options from options
  void build_translators(Ptr<Options> options, size_t numTranslators);
  /// Initializes a blocking translator without using std::thread
  void initialize_blocking_translator();
  /// Translates through direct interaction between batcher_ and translators_
  void blocking_translate();
-  // In addition to the common members (text_processor, requestId, vocabs_,
+  /// Launches multiple workers of translators using std::thread
-  // batcher) extends with a producer-consumer queue, vector of translator
+  /// Reduces to ABORT if called when not compiled WITH_PTHREAD
-  // instances owned by service each listening to the pcqueue in separate
+  void initialize_async_translators();
-  // threads.
+  /// Async translate produces to a producer-consumer queue as batches are
  /// generated by Batcher. In another thread, the translators consume from
  /// producer-consumer queue.
  /// Reduces to ABORT if called when not compiled WITH_PTHREAD
  void async_translate();
-  size_t numWorkers_;      // ORDER DEPENDENCY
+  /// Number of workers to launch.
-  PCQueue<Batch> pcqueue_; // ORDER DEPENDENCY
+  size_t numWorkers_;              // ORDER DEPENDENCY (pcqueue_)
  /// Model memory to load model passed as bytes.
  AlignedMemory modelMemory_;      // ORDER DEPENDENCY (translators_)
  /// Shortlist memory passed as bytes.
  AlignedMemory shortlistMemory_;  // ORDER DEPENDENCY (translators_)
  /// Holds instances of batch translators, just one in case
  /// of single-threaded application, numWorkers_ in case of multithreaded
  /// setting.
  std::vector<BatchTranslator> translators_;  // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)
  /// Stores requestId of active request. Used to establish
  /// ordering among requests and logging/book-keeping.
  size_t requestId_;
  /// Store vocabs representing source and target.
  std::vector<Ptr<Vocab const>> vocabs_; // ORDER DEPENDENCY (text_processor_)
  /// TextProcesser takes a blob of text and converts into format consumable by
  /// the batch-translator and annotates sentences and words.
  TextProcessor text_processor_; // ORDER DEPENDENCY (vocabs_)
  /// Batcher handles generation of batches from a request, subject to
  /// packing-efficiency and priority optimization heuristics.
  Batcher batcher_;
  // The following constructs are available providing full capabilities on a non
  // WASM platform, where one does not have to hide threads.
 #ifndef WASM_COMPATIBLE_SOURCE
  PCQueue<Batch> pcqueue_; // ORDER DEPENDENCY (numWorkers_)
  std::vector<std::thread> workers_;
-  std::vector<BatchTranslator> translators_;
+#endif // WASM_COMPATIBLE_SOURCE
 };
 } // namespace bergamot
--- a/src/translator/service_base.cpp
+++ b/src/translator/service_base.cpp
@ -1,42 +0,0 @@
 #include "service_base.h"
 namespace marian {
 namespace bergamot {
 ServiceBase::ServiceBase(Ptr<Options> options)
    : requestId_(0), vocabs_(std::move(loadVocabularies(options))),
      text_processor_(vocabs_, options), batcher_(options) {}
 std::future<Response> ServiceBase::translate(std::string &&input) {
  Segments segments;
  SentenceRanges sourceRanges;
  text_processor_.process(input, segments, sourceRanges);
  std::promise<Response> responsePromise;
  auto future = responsePromise.get_future();
  Ptr<Request> request = New<Request>(
      requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(input),
      std::move(segments), std::move(sourceRanges), std::move(responsePromise));
  batcher_.addWholeRequest(request);
  enqueue();
  return future;
 }
 NonThreadedService::NonThreadedService(Ptr<Options> options)
    : ServiceBase(options),
      translator_(DeviceId(0, DeviceType::cpu), vocabs_, options) {
  translator_.initialize();
 }
 void NonThreadedService::enqueue() {
  // Queue single-threaded
  Batch batch;
  while (batcher_ >> batch) {
    translator_.translate(batch);
  }
 }
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/service_base.h
+++ b/src/translator/service_base.h
@ -1,80 +0,0 @@
 #ifndef SRC_BERGAMOT_SERVICE_BASE_H_
 #define SRC_BERGAMOT_SERVICE_BASE_H_
 #include "batch_translator.h"
 #include "batcher.h"
 #include "data/types.h"
 #include "response.h"
 #include "text_processor.h"
 #include <queue>
 #include <vector>
 namespace marian {
 namespace bergamot {
 // This file describes the base class ServiceBase, and a non-threaded subclass
 // implementing translation functionality called NonThreadedService.
 class ServiceBase {
 public:
  explicit ServiceBase(Ptr<Options> options);
  // Transfers ownership of input string to Service, returns a future containing
  // an object which provides access to translations, other features like
  // sentencemappings and (tentatively) alignments.
  std::future<Response> translate(std::string &&input);
  // Convenience accessor methods to extract these vocabulary outside service.
  // e.g: For use in decoding histories for marian-decoder replacement.
  Ptr<Vocab const> sourceVocab() const { return vocabs_.front(); }
  Ptr<Vocab const> targetVocab() const { return vocabs_.back(); }
  // Wraps up any thread related destruction code.
  virtual void stop() = 0;
 protected:
  // Enqueue queues a request for translation, this can be synchronous, blocking
  // or asynchronous and queued in the background.
  virtual void enqueue() = 0;
  size_t requestId_;
  std::vector<Ptr<Vocab const>> vocabs_; // ORDER DEPENDENCY
  TextProcessor text_processor_;         // ORDER DEPENDENCY
  Batcher batcher_;
 };
 class NonThreadedService : public ServiceBase {
 public:
  explicit NonThreadedService(Ptr<Options> options);
  void stop() override{};
 private:
  // NonThreaded service overrides unimplemented functions in base-class using
  // blocking mechanisms.
  void enqueue() override;
  // There's a single translator, launched as part of the main process.
  BatchTranslator translator_;
 };
 // Used across Services
 inline std::vector<Ptr<const Vocab>> loadVocabularies(Ptr<Options> options) {
  // @TODO: parallelize vocab loading for faster startup
  auto vfiles = options->get<std::vector<std::string>>("vocabs");
  // with the current setup, we need at least two vocabs: src and trg
  ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
  std::vector<Ptr<Vocab const>> vocabs(vfiles.size());
  std::unordered_map<std::string, Ptr<Vocab>> vmap;
  for (size_t i = 0; i < vocabs.size(); ++i) {
    auto m = vmap.emplace(std::make_pair(vfiles[i], Ptr<Vocab>()));
    if (m.second) { // new: load the vocab
      m.first->second = New<Vocab>(options, i);
      m.first->second->load(vfiles[i]);
    }
    vocabs[i] = m.first->second;
  }
  return vocabs;
 }
 } // namespace bergamot
 } // namespace marian
 #endif // SRC_BERGAMOT_SERVICE_BASE_H_
--- a/src/translator/text_processor.cpp
+++ b/src/translator/text_processor.cpp
@ -25,9 +25,9 @@ TextProcessor::TextProcessor(std::vector<Ptr<Vocab const>> &vocabs,
  ABORT_IF(max_length_break_ < 0, "max-length-break cannot be < 0");
 }
-void TextProcessor::process(const string_view &query, Segments &segments,
+void TextProcessor::process(AnnotatedText &source, Segments &segments) {
                            SentenceRanges &sourceRanges) {
  string_view query = string_view(source.text);
  auto sentenceStream = sentence_splitter_.createSentenceStream(query);
  std::string_view sentenceStringPiece;
@ -42,14 +42,14 @@ void TextProcessor::process(const string_view &query, Segments &segments,
    // after normalization. 0 prevents any empty entries from being added.
    if (segment.size() > 0) {
      // Truncate segment into max_input_size segments.
-      truncate(segment, wordRanges, segments, sourceRanges);
+      truncate(segment, wordRanges, segments, source);
    }
  }
 }
 void TextProcessor::truncate(Segment &segment,
                             std::vector<string_view> &wordRanges,
-                             Segments &segments, SentenceRanges &sourceRanges) {
+                             Segments &segments, AnnotatedText &source) {
  for (size_t offset = 0; offset < segment.size();
       offset += max_length_break_) {
    auto start = segment.begin() + offset;
@ -61,7 +61,7 @@ void TextProcessor::truncate(Segment &segment,
    segments.back().push_back(sourceEosId());
    auto astart = wordRanges.begin() + offset;
-    sourceRanges.addSentence(astart, astart + diff);
+    source.addSentence(astart, astart + diff);
  }
 }
--- a/src/translator/text_processor.h
+++ b/src/translator/text_processor.h
@ -23,8 +23,7 @@ class TextProcessor {
 public:
  explicit TextProcessor(std::vector<Ptr<Vocab const>> &vocabs, Ptr<Options>);
-  void process(const string_view &query, Segments &segments,
+  void process(AnnotatedText &source, Segments &segments);
               SentenceRanges &sourceRanges);
 private:
  // Tokenizes an input string, returns Words corresponding. Loads the
@ -34,7 +33,7 @@ private:
  // Truncate sentence into max_input_size segments.
  void truncate(Segment &sentence, std::vector<string_view> &tokenRanges,
-                Segments &segments, SentenceRanges &sourceRanges);
+                Segments &segments, AnnotatedText &source);
  // shorthand, used only in truncate()
  const Word sourceEosId() const { return vocabs_->front()->getEosId(); }
--- a/wasm/patch-artifacts-enable-wormhole.sh
+++ b/wasm/patch-artifacts-enable-wormhole.sh
@ -0,0 +1,7 @@
 #!/bin/bash
 echo "Patching wasm artifacts to enable wormhole via APIs that compile and instantiate wasm module"
 sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
 sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
 sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' wasm/bergamot-translator-worker.js
 echo "Done"
--- a/wasm/test_page/start_server.sh
+++ b/wasm/test_page/start_server.sh
@ -4,13 +4,6 @@ cp ../../build-wasm/wasm/bergamot-translator-worker.data .
 cp ../../build-wasm/wasm/bergamot-translator-worker.js .
 cp ../../build-wasm/wasm/bergamot-translator-worker.wasm .
 cp ../../build-wasm/wasm/bergamot-translator-worker.worker.js .
 echo "Done----"
 echo "Start: Enabling wormhole via APIs that compile and instantiate wasm module-------"
 sed -i.bak 's/var result = WebAssembly.instantiateStreaming(response, info);/var result = WebAssembly.instantiateStreaming(response, info, {simdWormhole:true});/g' bergamot-translator-worker.js
 sed -i.bak 's/return WebAssembly.instantiate(binary, info);/return WebAssembly.instantiate(binary, info, {simdWormhole:true});/g' bergamot-translator-worker.js
 sed -i.bak 's/var module = new WebAssembly.Module(bytes);/var module = new WebAssembly.Module(bytes, {simdWormhole:true});/g' bergamot-translator-worker.js
 echo "Done: Enabling wormhole via APIs that compile and instantiate wasm module--------"
 npm install
 echo "Start httpserver"
		`@ -1 +1 @@`
			`Subproject commit 8ddb73fad1001ae4c1697d2514ac1e5bd43e2ed3`				`Subproject commit 0f0bcf99626c660227bb68b76267a8d2451e7172`
		`@ -0,0 +1,2 @@`
							`#define CATCH_CONFIG_MAIN`
							`#include "catch.hpp"`