Merge remote-tracking branch 'upstream/main' into main

2024-10-10 12:07:39 +03:00 · 2021-05-12 08:53:25 +02:00 · 2021-05-12 08:53:25 +02:00 · 451ab047ff
commit 451ab047ff
parent 743ebcd3bc d7cb859ab7
29 changed files with 252 additions and 242 deletions
--- a/.github/workflows/native-mac.yml
+++ b/.github/workflows/native-mac.yml
@ -14,7 +14,7 @@ jobs:
        include:
          - name: "full-marian"
            os: macos-10.15
-            test_tags: ""
+            test_tags: "'#mac'"
            cmake: 
              CMAKE_BUILD_TYPE: "Release"
              COMPILE_TESTS: "ON"
@ -23,6 +23,7 @@ jobs:
              USE_STATIC_LIBS: "OFF"
              COMPILE_SERVER: "OFF"
              COMPILE_EXAMPLES: "OFF"
+              USE_APPLE_ACCELERATE: "OFF"

          - name: "minimal-marian"
            os: macos-10.15
@ -37,6 +38,8 @@ jobs:
              USE_STATIC_LIBS: "ON" 
              COMPILE_SERVER: "OFF"
              COMPILE_EXAMPLES: "OFF"
+              USE_APPLE_ACCELERATE: "OFF"
+
        
    name: ${{ matrix.name }}
    runs-on: ${{ matrix.os }}
@ -66,13 +69,14 @@ jobs:
      run: |
        mkdir -p build
        cd build
-        cmake .. \
+        cmake -L .. \
          -DCMAKE_BUILD_TYPE=${{ matrix.cmake.CMAKE_BUILD_TYPE }}\
          -DCOMPILE_TESTS=${{ matrix.cmake.COMPILE_TESTS }}\
          -DCOMPILE_EXAMPLES=${{ matrix.cmake.COMPILE_EXAMPLES }} \
          -DCOMPILE_SERVER=${{ matrix.cmake.COMPILE_SERVER }} \
          -DUSE_STATIC_LIBS=${{ matrix.cmake.USE_STATIC_LIBS }} \
          -DUSE_WASM_COMPATIBLE_SOURCE=${{ matrix.cmake.USE_WASM_COMPATIBLE_SOURCE }} \
+          -DUSE_APPLE_ACCELERATE=${{ matrix.cmake.USE_APPLE_ACCELERATE }} \
          -DUSE_FBGEMM=${{ matrix.cmake.USE_FBGEMM }}

    - name: Compile
--- a/3rd_party/marian-dev
+++ b/3rd_party/marian-dev
@ -1 +1 @@
-Subproject commit 94aeaa4616a0fb01ac95a23f0e74a214a94e7609
+Subproject commit 03db505fda750fdecf8000d7ef7dd78dae65861c
--- a/3rd_party/ssplit-cpp
+++ b/3rd_party/ssplit-cpp
@ -1 +1 @@
-Subproject commit 8d338ed5c77d22f8c86f60554596fa57bf5091e6
+Subproject commit 177ee2a326b733f8395842f01b197637047de9f6
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -4,6 +4,10 @@ if (POLICY CMP0074)
  cmake_policy(SET CMP0074 NEW) # CMake 3.12
 endif ()

+if (POLICY CMP0077)
+  cmake_policy(SET CMP0077 NEW)
+endif()
+
 project(bergamot_translator CXX C)

 set(CMAKE_CXX_STANDARD 17)
@ -35,7 +39,7 @@ include(CMakeDependentOption)

 # Project specific cmake options
 option(COMPILE_WASM "Compile for WASM" OFF)
-option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" ON)
+cmake_dependent_option(USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" OFF "NOT COMPILE_WASM" ON)
 option(COMPILE_TESTS "Compile bergamot-tests" OFF)

 SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be packaged (pre-loaded) in wasm builds")
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@ -1,16 +1,10 @@
 add_executable(bergamot-translator-app bergamot-translator-app.cpp)
 target_link_libraries(bergamot-translator-app PRIVATE bergamot-translator)

-add_executable(bergamot-translator-app-bytearray bergamot-translator-app-bytearray.cpp)
-target_link_libraries(bergamot-translator-app-bytearray PRIVATE bergamot-translator)
-
 if (NOT USE_WASM_COMPATIBLE_SOURCE)
    add_executable(service-cli service-cli.cpp)
    target_link_libraries(service-cli PRIVATE bergamot-translator)

-    add_executable(service-cli-bytearray service-cli-bytearray.cpp)
-    target_link_libraries(service-cli-bytearray PRIVATE bergamot-translator)
-
    add_executable(marian-decoder-new marian-decoder-new.cpp)
    target_link_libraries(marian-decoder-new PRIVATE bergamot-translator)
 endif()
--- a/app/bergamot-translator-app-bytearray.cpp
+++ b/app/bergamot-translator-app-bytearray.cpp
@ -1,40 +0,0 @@
-/*
- * main.cpp
- *
- * An example application to demonstrate the use of Bergamot translator.
- *
- */
-
-#include <iostream>
-
-#include "translator/byte_array_util.h"
-#include "translator/parser.h"
-#include "translator/service.h"
-
-int main(int argc, char **argv) {
-
-  // Create a configParser and load command line parameters into a YAML config
-  // string.
-  auto configParser = marian::bergamot::createConfigParser();
-  auto options = configParser.parseOptions(argc, argv, true);
-  std::string config = options->asYamlString();
-
-  // Route the config string to construct marian model through TranslationModel
-  marian::bergamot::Service model(
-      config, marian::bergamot::getModelMemoryFromConfig(options));
-
-  TranslationRequest translationRequest;
-  std::vector<std::string> texts;
-
-  for (std::string line; std::getline(std::cin, line);) {
-    texts.emplace_back(line);
-  }
-
-  auto results = model.translateMultiple(std::move(texts), translationRequest);
-
-  for (auto &result : results) {
-    std::cout << result.getTranslatedText() << std::endl;
-  }
-
-  return 0;
-}
--- a/app/service-cli-bytearray.cpp
+++ b/app/service-cli-bytearray.cpp
@ -1,92 +0,0 @@
-#include <cstdlib>
-#include <future>
-#include <iostream>
-#include <sstream>
-
-#include "common/definitions.h"
-#include "common/utils.h"
-#include "marian.h"
-#include "translator/parser.h"
-#include "translator/response.h"
-#include "translator/service.h"
-#include "translator/byte_array_util.h"
-
-int main(int argc, char *argv[]) {
-  auto cp = marian::bergamot::createConfigParser();
-  auto options = cp.parseOptions(argc, argv, true);
-
-  // Prepare memories for model and shortlist
-  marian::bergamot::AlignedMemory modelBytes = marian::bergamot::getModelMemoryFromConfig(options);
-  marian::bergamot::AlignedMemory shortlistBytes = marian::bergamot::getShortlistMemoryFromConfig(options);
-
-  marian::bergamot::Service service(options, std::move(modelBytes), std::move(shortlistBytes));
-
-  // Read a large input text blob from stdin
-  std::ostringstream std_input;
-  std_input << std::cin.rdbuf();
-  std::string input = std_input.str();
-  using marian::bergamot::Response;
-
-  marian::bergamot::ResponseOptions responseOptions;
-  responseOptions.qualityScores = true;
-  responseOptions.alignment = true;
-  responseOptions.alignmentThreshold = 0.2f;
-
-  // Wait on future until Response is complete
-  std::future<Response> responseFuture =
-      service.translate(std::move(input), responseOptions);
-  responseFuture.wait();
-  Response response = responseFuture.get();
-
-  std::cout << "[original]: " << response.source.text << '\n';
-  std::cout << "[translated]: " << response.target.text << '\n';
-  for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
-    std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx)
-              << '\n';
-    std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx)
-              << '\n';
-    std::cout << "Alignments" << '\n';
-    typedef std::pair<size_t, float> Point;
-
-    // Initialize a point vector.
-    std::vector<std::vector<Point>> aggregate(
-        response.source.numWords(sentenceIdx));
-
-    // Handle alignments
-    auto &alignments = response.alignments[sentenceIdx];
-    for (auto &p : alignments) {
-      aggregate[p.src].emplace_back(p.tgt, p.prob);
-    }
-
-    for (size_t src = 0; src < aggregate.size(); src++) {
-      std::cout << response.source.word(sentenceIdx, src) << ": ";
-      for (auto &p : aggregate[src]) {
-        std::cout << response.target.word(sentenceIdx, p.first) << "("
-                  << p.second << ") ";
-      }
-      std::cout << '\n';
-    }
-
-    // Handle quality.
-    auto &quality = response.qualityScores[sentenceIdx];
-    std::cout << "Quality: whole(" << quality.sequence
-              << "), tokens below:" << '\n';
-    size_t wordIdx = 0;
-    bool first = true;
-    for (auto &p : quality.word) {
-      if (first) {
-        first = false;
-      } else {
-        std::cout << " ";
-      }
-      std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p
-                << ")";
-      wordIdx++;
-    }
-    std::cout << '\n';
-  }
-  std::cout << "--------------------------\n";
-  std::cout << '\n';
-
-  return 0;
-}
--- a/app/service-cli.cpp
+++ b/app/service-cli.cpp
@ -6,6 +6,7 @@
 #include "common/definitions.h"
 #include "common/utils.h"
 #include "marian.h"
+#include "translator/byte_array_util.h"
 #include "translator/parser.h"
 #include "translator/response.h"
 #include "translator/response_options.h"
@ -14,7 +15,20 @@
 int main(int argc, char *argv[]) {
  auto cp = marian::bergamot::createConfigParser();
  auto options = cp.parseOptions(argc, argv, true);
-  marian::bergamot::Service service(options);
+
+  // Prepare memories for model and shortlist
+  marian::bergamot::AlignedMemory modelBytes, shortlistBytes;
+  std::vector<std::shared_ptr<marian::bergamot::AlignedMemory>> vocabsBytes;
+
+  if (options->get<bool>("check-bytearray")) {
+    // Load legit values into bytearrays.
+    modelBytes = marian::bergamot::getModelMemoryFromConfig(options);
+    shortlistBytes = marian::bergamot::getShortlistMemoryFromConfig(options);
+    marian::bergamot::getVocabsMemoryFromConfig(options, vocabsBytes);
+  }
+
+  marian::bergamot::Service service(options, std::move(modelBytes),
+                                    std::move(shortlistBytes), std::move(vocabsBytes));

  // Read a large input text blob from stdin
  std::ostringstream std_input;
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 3771001720a8f01bba185ee5d5d908b7c266ef31
+Subproject commit 9209aa51e71f57b90172ffd259cf3021c4890bcf
--- a/src/tests/annotation_tests.cpp
+++ b/src/tests/annotation_tests.cpp
@ -1,5 +1,5 @@
 #include "catch.hpp"
-#include "translator/sentence_ranges.h"
+#include "translator/annotation.h"
 #include <random>
 #include <vector>

--- a/src/translator/CMakeLists.txt
+++ b/src/translator/CMakeLists.txt
@ -7,7 +7,7 @@ add_library(bergamot-translator STATIC
    batcher.cpp
    response_builder.cpp
    batch.cpp
-    sentence_ranges.cpp
+    annotation.cpp
    service.cpp
 )
 if (USE_WASM_COMPATIBLE_SOURCE)
--- a/src/translator/sentence_ranges.cpp
+++ b/src/translator/sentence_ranges.cpp
@ -1,4 +1,4 @@
-#include "sentence_ranges.h"
+#include "annotation.h"
 #include <cassert>
 #include <iostream>

@ -63,7 +63,7 @@ void AnnotatedText::appendSentence(std::string prefix, std::string &reference,
  text += reference;           // Append reference to text
  std::vector<ByteRange> sentence;
  for (auto &wordView : wordRanges) {
-    size_t thisWordBegin = offset + wordView.data() - &reference[0];
+    size_t thisWordBegin = offset + wordView.data() - reference.data();
    sentence.push_back(
        ByteRange{thisWordBegin, thisWordBegin + wordView.size()});
  }
@ -78,7 +78,7 @@ void AnnotatedText::addSentence(std::vector<string_view>::iterator begin,
                                std::vector<string_view>::iterator end) {
  std::vector<ByteRange> sentence;
  for (auto p = begin; p != end; p++) {
-    size_t begin_offset = p->data() - &text[0];
+    size_t begin_offset = p->data() - text.data();
    sentence.push_back(ByteRange{begin_offset, begin_offset + p->size()});
  }
  annotation.addSentence(sentence);
@ -99,5 +99,33 @@ string_view AnnotatedText::asStringView(const ByteRange &byteRange) const {
  return string_view(data, size);
 }

+string_view AnnotatedText::gap(size_t sentenceIdx) const {
+  // Find start of filler-text before, there's a corner case when there's no
+  // sentence before.
+  const char *start = nullptr;
+  if (sentenceIdx == 0) {
+    // If first sentence, filler begins at start of whole-text.
+    start = text.data();
+  } else {
+    // Otherwise, filler begins at end of previous sentence.
+    string_view sentenceBefore = sentence(sentenceIdx - 1);
+    start = sentenceBefore.data() + sentenceBefore.size();
+  }
+
+  // Find end of filler-text, but there is a corner-case to handle.
+  const char *end = nullptr;
+  if (sentenceIdx == numSentences()) {
+    // If last sentence, manually find end of whole-text.
+    const char *begin = text.data();
+    end = begin + text.size();
+  } else {
+    // Otherwise, the filler ends at the start of next sentence.
+    string_view sentenceAfter = sentence(sentenceIdx);
+    end = sentenceAfter.data();
+  }
+
+  return string_view(start, end - start);
+}
+
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/sentence_ranges.h
+++ b/src/translator/sentence_ranges.h
@ -151,6 +151,19 @@ public:
  /// Returns a string_view representing sentence corresponding to sentenceIdx.
  string_view sentence(size_t sentenceIdx) const;

+  /// Returns the string_view of the gap between two sentences in the container.
+  ///
+  /// More precisely where `i = sentenceIdx, N = numSentences()` for brevity:
+  ///
+  /// * For `i = 0`: The gap between the start of text and the first sentence.
+  /// * For `i = 1...N-1`, returns the text comprising of the gap
+  ///   between the `i-1`-th and `i`-th sentence.
+  /// * For `i = N`, the gap between the last sentence and end of
+  ///   text.
+
+  /// @param sentenceIdx: Can be between `[0, numSentences()]`.
+  string_view gap(size_t sentenceIdx) const;
+
  /// Returns a ByteRange representing wordIdx in sentenceIdx
  ByteRange wordAsByteRange(size_t sentenceIdx, size_t wordIdx) const;

--- a/src/translator/batch_translator.cpp
+++ b/src/translator/batch_translator.cpp
@ -4,6 +4,7 @@
 #include "data/corpus.h"
 #include "data/text_input.h"
 #include "translator/beam_search.h"
+#include "byte_array_util.h"

 namespace marian {
 namespace bergamot {
@ -18,11 +19,11 @@ BatchTranslator::BatchTranslator(DeviceId const device,

 void BatchTranslator::initialize() {
  // Initializes the graph.
+  bool check = options_->get<bool>("check-bytearray",false); // Flag holds whether validate the bytearray (model and shortlist)
  if (options_->hasAndNotEmpty("shortlist")) {
    int srcIdx = 0, trgIdx = 1;
    bool shared_vcb = vocabs_->front() == vocabs_->back();
    if (shortlistMemory_->size() > 0 && shortlistMemory_->begin() != nullptr) {
-      bool check = options_->get<bool>("check-bytearray",true);
      slgen_ = New<data::BinaryShortlistGenerator>(shortlistMemory_->begin(), shortlistMemory_->size(),
                                                     vocabs_->front(), vocabs_->back(),
                                                     srcIdx, trgIdx, shared_vcb, check);
@ -45,6 +46,10 @@ void BatchTranslator::initialize() {
  if (modelMemory_->size() > 0 && modelMemory_->begin() != nullptr) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file
    ABORT_IF((uintptr_t)modelMemory_->begin() % 256 != 0,
             "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
+    if (check) {
+      ABORT_IF(!validateBinaryModel(*modelMemory_, modelMemory_->size()),
+               "The binary file is invalid. Incomplete or corrupted download?");
+    }
    const std::vector<const void *> container = {modelMemory_->begin()}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding.
    scorers_ = createScorers(options_, container);
  } else {
--- a/src/translator/byte_array_util.cpp
+++ b/src/translator/byte_array_util.cpp
@ -1,12 +1,12 @@
 #include "byte_array_util.h"
 #include <stdlib.h>
 #include <iostream>
+#include <memory>

 namespace marian {
 namespace bergamot {

 namespace {
-
 // This is a basic validator that checks if the file has not been truncated
 // it basically loads up the header and checks

@ -26,9 +26,10 @@ const T* get(const void*& current, uint64_t num = 1) {
  current = (const T*)current + num;
  return ptr;
 }
+} // Anonymous namespace

-bool validateBinaryModel(AlignedMemory& model, uint64_t fileSize) {
-  const void * current = &model[0];
+bool validateBinaryModel(const AlignedMemory& model, uint64_t fileSize) {
+  const void * current = model.begin();
  uint64_t memoryNeeded = sizeof(uint64_t)*2; // We keep track of how much memory we would need if we have a complete file
  uint64_t numHeaders;
  if (fileSize >= memoryNeeded) { // We have enough filesize to fetch the headers.
@ -76,8 +77,6 @@ bool validateBinaryModel(AlignedMemory& model, uint64_t fileSize) {
  }
 }

-} // Anonymous namespace
-
 AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){
  uint64_t fileSize = filesystem::fileSize(path);
  io::InputFileStream in(path);
@ -89,13 +88,12 @@ AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){
 }

 AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options){
-    auto models = options->get<std::vector<std::string>>("models");
-    ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
-    marian::filesystem::Path modelPath(models[0]);
-    ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin");
-    AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
-    ABORT_IF(!validateBinaryModel(alignedMemory, alignedMemory.size()), "The binary file is invalid. Incomplete or corrupted download?");
-    return alignedMemory;
+  auto models = options->get<std::vector<std::string>>("models");
+  ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
+  marian::filesystem::Path modelPath(models[0]);
+  ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin");
+  AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
+  return alignedMemory;
 }

 AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options){
@ -104,5 +102,20 @@ AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options)
  return loadFileToMemory(shortlist[0], 64);
 }

+void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
+                               std::vector<std::shared_ptr<AlignedMemory>>& vocabMemories){
+  auto vfiles = options->get<std::vector<std::string>>("vocabs");
+  ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
+  vocabMemories.resize(vfiles.size());
+  std::unordered_map<std::string, std::shared_ptr<AlignedMemory>> vocabMap;
+  for (size_t i = 0; i < vfiles.size(); ++i) {
+    auto m = vocabMap.emplace(std::make_pair(vfiles[i], std::shared_ptr<AlignedMemory>()));
+    if (m.second) {
+      m.first->second = std::make_shared<AlignedMemory>(loadFileToMemory(vfiles[i], 64));
+    }
+    vocabMemories[i] = m.first->second;
+  }
+}
+
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/byte_array_util.h
+++ b/src/translator/byte_array_util.h
@ -7,6 +7,8 @@ namespace bergamot {
 AlignedMemory loadFileToMemory(const std::string& path, size_t alignment);
 AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
 AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options);
-
+void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
+                               std::vector<std::shared_ptr<AlignedMemory>>& vocabMemories);
+bool validateBinaryModel(const AlignedMemory& model, uint64_t fileSize);
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/definitions.h
+++ b/src/translator/definitions.h
@ -11,16 +11,6 @@ namespace bergamot {

 typedef marian::Words Segment;
 typedef std::vector<Segment> Segments;
-typedef std::vector<marian::string_view> TokenRanges;
-typedef std::vector<TokenRanges> SentenceTokenRanges;
-
-/** @brief Creates unique_ptr any type, passes all arguments to any available
- *  * constructor */
-template <class T, typename... Args> UPtr<T> UNew(Args &&... args) {
-  return UPtr<T>(new T(std::forward<Args>(args)...));
-}
-
-template <class T> UPtr<T> UNew(UPtr<T> p) { return UPtr<T>(p); }

 /// Shortcut to AlignedVector<char> for byte arrays
 typedef AlignedVector<char> AlignedMemory;
--- a/src/translator/pcqueue.h
+++ b/src/translator/pcqueue.h
@ -113,17 +113,15 @@ class Semaphore {


    void wait() {
-      while (true) {
-        switch (WaitForSingleObject(sem_, 0L)) {
-          case WAIT_OBJECT_0:
-            return;
-          case WAIT_ABANDONED:
-            ABORT("A semaphore can't be abandoned, confused by Windows");
-          case WAIT_TIMEOUT:
-            continue;
-          case WAIT_FAILED:
-            ABORT("Waiting on Semaphore failed {}", GetLastError());
-        }
+      switch (WaitForSingleObject(sem_, INFINITE)) {
+        case WAIT_OBJECT_0:
+          return;
+        case WAIT_ABANDONED:
+          ABORT("A semaphore can't be abandoned, confused by Windows");
+        case WAIT_TIMEOUT:
+          ABORT("Timeout on an infinite wait?");
+        case WAIT_FAILED:
+          ABORT("Waiting on Semaphore failed {}", GetLastError());
      }
    }

--- a/src/translator/request.cpp
+++ b/src/translator/request.cpp
@ -1,7 +1,7 @@
 #include "request.h"
 #include "definitions.h"
 #include "response.h"
-#include "sentence_ranges.h"
+#include "annotation.h"

 #include "common/logging.h"

--- a/src/translator/request.h
+++ b/src/translator/request.h
@ -4,7 +4,7 @@
 #include "definitions.h"
 #include "response.h"
 #include "response_builder.h"
-#include "sentence_ranges.h"
+#include "annotation.h"

 #include "common/logging.h"
 #include "data/types.h"
--- a/src/translator/response.h
+++ b/src/translator/response.h
@ -4,7 +4,7 @@
 #include "data/alignment.h"
 #include "data/types.h"
 #include "definitions.h"
-#include "sentence_ranges.h"
+#include "annotation.h"
 #include "translator/beam_search.h"

 #include <cassert>
--- a/src/translator/response_builder.cpp
+++ b/src/translator/response_builder.cpp
@ -1,4 +1,5 @@
 #include "response_builder.h"
+#include "response_options.h"

 namespace marian {
 namespace bergamot {
@ -56,11 +57,10 @@ void ResponseBuilder::buildTranslatedText(Histories &histories,
  // thing to do to avoid reallocations.
  response.target.text.reserve(response.source.text.size());

-  size_t offset{0};
-  bool first{true};
-
-  for (auto &history : histories) {
+  for (size_t sentenceIdx = 0; sentenceIdx < histories.size(); sentenceIdx++) {
    // TODO(jerin): Change hardcode of nBest = 1
+
+    auto &history = histories[sentenceIdx];
    NBestList onebest = history->nBest(1);

    Result result = onebest[0]; // Expecting only one result;
@ -71,15 +71,33 @@ void ResponseBuilder::buildTranslatedText(Histories &histories,
    std::vector<string_view> targetSentenceMappings;
    targetVocab->decodeWithByteRanges(words, decoded, targetSentenceMappings);

-    // delimiter can be used to fill in the blanks from source as well.
-    std::string delimiter;
-    if (first) {
-      first = false;
-    } else {
-      delimiter = " ";
+    switch (responseOptions_.concatStrategy) {
+    case ConcatStrategy::FAITHFUL: {
+      // For each sentence, prepend the filler text between the corresponding
+      // source-sentence and the source-sentence before.
+      string_view pre = response.source.gap(sentenceIdx);
+      response.target.appendSentence(std::string(pre.data(), pre.size()),
+                                     decoded, targetSentenceMappings);
+
+      // If this is the last history to be decoded and translated-text
+      // constructed, append the text till the end, which could be spaces or
+      // empty.
+      if (sentenceIdx + 1 == histories.size()) {
+        string_view post = response.source.gap(sentenceIdx + 1);
+        response.target.text += std::string(post.data(), post.size());
+      }
+      break;
+    }
+    case ConcatStrategy::SPACE: {
+      std::string delimiter = (sentenceIdx == 0) ? "" : " ";
+      response.target.appendSentence(delimiter, decoded,
+                                     targetSentenceMappings);
+      break;
    }

-    response.target.appendSentence(delimiter, decoded, targetSentenceMappings);
+    default:
+      ABORT("Unknown concat-strategy");
+    }
  }
 }

--- a/src/translator/service.cpp
+++ b/src/translator/service.cpp
@ -6,21 +6,34 @@
 #include <utility>

 inline std::vector<marian::Ptr<const marian::Vocab>>
-loadVocabularies(marian::Ptr<marian::Options> options) {
+loadVocabularies(marian::Ptr<marian::Options> options,
+                 std::vector<std::shared_ptr<marian::bergamot::AlignedMemory>>&& vocabMemories) {
  // @TODO: parallelize vocab loading for faster startup
-  auto vfiles = options->get<std::vector<std::string>>("vocabs");
-  // with the current setup, we need at least two vocabs: src and trg
-  ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
-  std::vector<marian::Ptr<marian::Vocab const>> vocabs(vfiles.size());
-  std::unordered_map<std::string, marian::Ptr<marian::Vocab>> vmap;
-  for (size_t i = 0; i < vocabs.size(); ++i) {
-    auto m =
-        vmap.emplace(std::make_pair(vfiles[i], marian::Ptr<marian::Vocab>()));
-    if (m.second) { // new: load the vocab
-      m.first->second = marian::New<marian::Vocab>(options, i);
-      m.first->second->load(vfiles[i]);
+  std::vector<marian::Ptr<marian::Vocab const>> vocabs;
+  if(!vocabMemories.empty()){
+    // load vocabs from buffer
+    ABORT_IF(vocabMemories.size() < 2, "Insufficient number of vocabularies.");
+    vocabs.resize(vocabMemories.size());
+    for (size_t i = 0; i < vocabs.size(); i++) {
+      marian::Ptr<marian::Vocab> vocab = marian::New<marian::Vocab>(options, i);
+      vocab->loadFromSerialized(absl::string_view(vocabMemories[i]->begin(), vocabMemories[i]->size()));
+      vocabs[i] = vocab;
+    }
+  } else {
+    // load vocabs from file
+    auto vfiles = options->get<std::vector<std::string>>("vocabs");
+    // with the current setup, we need at least two vocabs: src and trg
+    ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
+    vocabs.resize(vfiles.size());
+    std::unordered_map<std::string, marian::Ptr<marian::Vocab>> vmap;
+    for (size_t i = 0; i < vocabs.size(); ++i) {
+      auto m = vmap.emplace(std::make_pair(vfiles[i], marian::Ptr<marian::Vocab>()));
+      if (m.second) { // new: load the vocab
+        m.first->second = marian::New<marian::Vocab>(options, i);
+        m.first->second->load(vfiles[i]);
+      }
+      vocabs[i] = m.first->second;
    }
-    vocabs[i] = m.first->second;
  }
  return vocabs;
 }
@ -28,11 +41,14 @@ loadVocabularies(marian::Ptr<marian::Options> options) {
 namespace marian {
 namespace bergamot {

-Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory) 
-    : requestId_(0), options_(options), vocabs_(std::move(loadVocabularies(options))),
+Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory,
+                 std::vector<std::shared_ptr<AlignedMemory>> vocabMemories)
+    : requestId_(0), options_(options),
+      vocabs_(std::move(loadVocabularies(options, std::move(vocabMemories)))),
      text_processor_(vocabs_, options), batcher_(options),
      numWorkers_(options->get<int>("cpu-threads")),
-      modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))
+      modelMemory_(std::move(modelMemory)),
+      shortlistMemory_(std::move(shortlistMemory))
 #ifndef WASM_COMPATIBLE_SOURCE
      // 0 elements in PCQueue is illegal and can lead to failures. Adding a
      // guard to have at least one entry allocated. In the single-threaded
--- a/src/translator/service.h
+++ b/src/translator/service.h
@ -64,10 +64,12 @@ class Service {
 public:
  /// @param options Marian options object
  /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
-  /// of a model.bin. Optional, defaults to nullptr when not used
+  /// of a model.bin.
  /// @param shortlistMemory byte array of shortlist (aligned to 64)
+  /// @param vocabMemories vector of vocabulary memories (aligned to 64)
  explicit Service(Ptr<Options> options, AlignedMemory modelMemory,
-                   AlignedMemory shortlistMemory);
+                   AlignedMemory shortlistMemory,
+                   std::vector<std::shared_ptr<AlignedMemory>> vocabMemories);

  /// Construct Service purely from Options. This expects options which
  /// marian-decoder expects to be set for loading model shortlist and
@ -76,24 +78,30 @@ public:
  ///
  /// This is equivalent to a call to:
  /// ```cpp
-  ///    Service(options, AlignedMemory(),  AlignedMemory())
+  ///    Service(options, AlignedMemory(), AlignedMemory(), {})
  /// ```
  /// wherein empty memory is passed and internal flow defaults to file-based
-  /// model, shortlist loading.
+  /// model, shortlist loading. AlignedMemory() corresponds to empty memory
  explicit Service(Ptr<Options> options)
-      : Service(options, AlignedMemory(), AlignedMemory()) {}
+      : Service(options, AlignedMemory(), AlignedMemory(), {}) {}

  /// Construct Service from a string configuration.
  /// @param [in] config string parsable as YAML expected to adhere with marian
  /// config
-  /// @param [in] model_memory byte array (aligned to 256!!!) that contains the
-  /// bytes of a model.bin. Optional.
-  /// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
+  /// @param [in] modelMemory byte array (aligned to 256!!!) that contains the
+  /// bytes of a model.bin. Optional. AlignedMemory() corresponds to empty memory
+  /// @param [in] shortlistMemory byte array of shortlist (aligned to 64). Optional.
+  /// @param [in] vocabMemories vector of vocabulary memories (aligned to 64). Optional.
+  /// If two vocabularies are the same (based on the filenames), two entries (shared
+  /// pointers) will be generated which share the same AlignedMemory object.
  explicit Service(const std::string &config,
                   AlignedMemory modelMemory = AlignedMemory(),
-                   AlignedMemory shortlistMemory = AlignedMemory())
+                   AlignedMemory shortlistMemory = AlignedMemory(),
+                   std::vector<std::shared_ptr<AlignedMemory>> vocabsMemories = {})
      : Service(parseOptions(config, /*validate=*/false),
-                std::move(modelMemory), std::move(shortlistMemory)) {}
+                std::move(modelMemory),
+                std::move(shortlistMemory),
+                std::move(vocabsMemories)) {}

  /// Explicit destructor to clean up after any threads initialized in
  /// asynchronous operation mode.
@ -187,7 +195,6 @@ private:
  /// ordering among requests and logging/book-keeping.

  size_t requestId_;
-
  /// Store vocabs representing source and target.
  std::vector<Ptr<Vocab const>> vocabs_; // ORDER DEPENDENCY (text_processor_)

--- a/src/translator/text_processor.cpp
+++ b/src/translator/text_processor.cpp
@ -1,7 +1,7 @@
 #include "text_processor.h"
 #include "data/types.h"
 #include "definitions.h"
-#include "sentence_ranges.h"
+#include "annotation.h"

 #include "common/options.h"
 #include "data/vocab.h"
--- a/src/translator/text_processor.h
+++ b/src/translator/text_processor.h
@ -4,7 +4,7 @@
 #include "data/types.h"
 #include "data/vocab.h"
 #include "definitions.h"
-#include "sentence_ranges.h"
+#include "annotation.h"

 #include "sentence_splitter.h"

--- a/wasm/CMakeLists.txt
+++ b/wasm/CMakeLists.txt
@ -14,7 +14,7 @@ target_include_directories(bergamot-translator-worker
 target_compile_definitions(bergamot-translator-worker PRIVATE WASM_BINDINGS)
 target_compile_options(bergamot-translator-worker PRIVATE ${WASM_COMPILE_FLAGS})

-set(LINKER_FLAGS "--bind -s ASSERTIONS=0 -s DISABLE_EXCEPTION_CATCHING=1 -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -s NO_DYNAMIC_EXECUTION=1")
+set(LINKER_FLAGS "-g2 --bind -s ASSERTIONS=0 -s DISABLE_EXCEPTION_CATCHING=1 -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -s NO_DYNAMIC_EXECUTION=1 -s EXPORTED_RUNTIME_METHODS=[addOnPreMain]")
 if (NOT PACKAGE_DIR STREQUAL "")
  get_filename_component(REALPATH_PACKAGE_DIR ${PACKAGE_DIR} REALPATH BASE_DIR ${CMAKE_BINARY_DIR})
  set(LINKER_FLAGS "${LINKER_FLAGS} --preload-file ${REALPATH_PACKAGE_DIR}@/")
--- a/wasm/bindings/TranslationModelBindings.cpp
+++ b/wasm/bindings/TranslationModelBindings.cpp
@ -13,23 +13,49 @@ using namespace emscripten;

 typedef marian::bergamot::Service TranslationModel;
 typedef marian::bergamot::Response TranslationResult;
+typedef marian::bergamot::AlignedMemory AlignedMemory;

-val getByteArrayView(marian::bergamot::AlignedMemory& alignedMemory) {
+val getByteArrayView(AlignedMemory& alignedMemory) {
  return val(typed_memory_view(alignedMemory.size(), alignedMemory.as<char>()));
 }

 EMSCRIPTEN_BINDINGS(aligned_memory) {
-  class_<marian::bergamot::AlignedMemory>("AlignedMemory")
+  class_<AlignedMemory>("AlignedMemory")
    .constructor<std::size_t, std::size_t>()
-    .function("size", &marian::bergamot::AlignedMemory::size)
+    .function("size", &AlignedMemory::size)
 	  .function("getByteArrayView", &getByteArrayView)
    ;
+
+    register_vector<AlignedMemory*>("AlignedMemoryList");
+}
+
+// When source and target vocab files are same, only one memory object is passed from JS to
+// avoid allocating memory twice for the same file. However, the constructor of the TranslationModel
+// class still expects 2 entries in this case, where each entry has the shared ownership of the
+// same AlignedMemory object. This function prepares these smart pointer based AlignedMemory objects
+// for unique AlignedMemory objects passed from JS.
+std::vector<std::shared_ptr<AlignedMemory>> prepareVocabsSmartMemories(std::vector<AlignedMemory*>& vocabsMemories) {
+  auto sourceVocabMemory = std::make_shared<AlignedMemory>(std::move(*(vocabsMemories[0])));
+  std::vector<std::shared_ptr<AlignedMemory>> vocabsSmartMemories;
+  vocabsSmartMemories.push_back(sourceVocabMemory);
+  if (vocabsMemories.size() == 2) {
+    auto targetVocabMemory = std::make_shared<AlignedMemory>(std::move(*(vocabsMemories[1])));
+    vocabsSmartMemories.push_back(std::move(targetVocabMemory));
+  }
+  else {
+    vocabsSmartMemories.push_back(sourceVocabMemory);
+  }
+  return vocabsSmartMemories;
 }

 TranslationModel* TranslationModelFactory(const std::string &config,
-                                          marian::bergamot::AlignedMemory* modelMemory,
-                                          marian::bergamot::AlignedMemory* shortlistMemory) {
-  return new TranslationModel(config, std::move(*modelMemory), std::move(*shortlistMemory));
+                                          AlignedMemory* modelMemory,
+                                          AlignedMemory* shortlistMemory,
+                                          std::vector<AlignedMemory*> uniqueVocabsMemories) {
+  return new TranslationModel(config,
+                              std::move(*modelMemory),
+                              std::move(*shortlistMemory),
+                              std::move(prepareVocabsSmartMemories(uniqueVocabsMemories)));
 }

 EMSCRIPTEN_BINDINGS(translation_model) {
--- a/wasm/test_page/bergamot.html
+++ b/wasm/test_page/bergamot.html
@ -113,10 +113,7 @@ shortlist:
 `;
 */

-const modelConfigWithoutModelAndShortList = `vocabs:
-  - /${languagePair}/vocab.${vocabLanguagePair}.spm
-  - /${languagePair}/vocab.${vocabLanguagePair}.spm
-beam-size: 1
+const modelConfig = `beam-size: 1
 normalize: 1.0
 word-penalty: 0
 max-length-break: 128
@ -136,9 +133,15 @@ gemm-precision: int8shift
 // gemm-precision: int8shiftAlphaAll

    const modelFile = `models/${languagePair}/model.${languagePair}.intgemm.alphas.bin`;
-    console.debug("modelFile: ", modelFile);
    const shortlistFile = `models/${languagePair}/lex.50.50.${languagePair}.s2t.bin`;
+    const vocabFiles = [`models/${languagePair}/vocab.${vocabLanguagePair}.spm`,
+                        `models/${languagePair}/vocab.${vocabLanguagePair}.spm`];
+
+    const uniqueVocabFiles = new Set(vocabFiles);
+    console.debug("modelFile: ", modelFile);
    console.debug("shortlistFile: ", shortlistFile);
+    console.debug("No. of unique vocabs: ", uniqueVocabFiles.size);
+    uniqueVocabFiles.forEach(item => console.debug("unique vocabFile: ", item));

    try {
      // Download the files as buffers from the given urls
@ -146,16 +149,23 @@ gemm-precision: int8shift
      const downloadedBuffers = await Promise.all([downloadAsArrayBuffer(modelFile), downloadAsArrayBuffer(shortlistFile)]);
      const modelBuffer = downloadedBuffers[0];
      const shortListBuffer = downloadedBuffers[1];
+
+      const downloadedVocabBuffers = [];
+      for (let item of uniqueVocabFiles.values()) {
+        downloadedVocabBuffers.push(await downloadAsArrayBuffer(item));
+      }
      log(`${languagePair} file download took ${(Date.now() - start) / 1000} secs`);

      // Construct AlignedMemory objects with downloaded buffers
      var alignedModelMemory = constructAlignedMemoryFromBuffer(modelBuffer, 256);
      var alignedShortlistMemory = constructAlignedMemoryFromBuffer(shortListBuffer, 64);
+      var alignedVocabsMemoryList = new Module.AlignedMemoryList;
+      downloadedVocabBuffers.forEach(item => alignedVocabsMemoryList.push_back(constructAlignedMemoryFromBuffer(item, 64)));

      // Instantiate the TranslationModel
      if (translationModel) translationModel.delete();
-      console.debug("Creating TranslationModel with config:", modelConfigWithoutModelAndShortList);
-      translationModel = new Module.TranslationModel(modelConfigWithoutModelAndShortList, alignedModelMemory, alignedShortlistMemory);
+      console.debug("Creating TranslationModel with config:", modelConfig);
+      translationModel = new Module.TranslationModel(modelConfig, alignedModelMemory, alignedShortlistMemory, alignedVocabsMemoryList);
    } catch (error) {
      log(error);
    }