From 7099b9e9ad4b525f32f255a3aaefcc26a49e4709 Mon Sep 17 00:00:00 2001
From: Jerin Philip <jerinphilip@live.in>
Date: Wed, 19 Jan 2022 16:36:48 +0000
Subject: [PATCH] Streamline memory-bundle loads (#307)

Provides an additional constructor which takes care of the bundle
loading inside the boundary of the source here, when a configuration
file is supplied from a client like translateLocally or python bindings.
Once the config file is read, we have access to the information required
to construct the MemoryBundle.

 - The command-line application supplied from here, app/bergamot is
   configured to use the fast-load path now.
 - Changes to binary-loading additionally revealed a bug in the
   example-run script used in docs and tied to CI and the fix is
   included.
 - Shortlist is made optional in the memory bundle, making changes to
   getModelMemoryFromConfig.

Fixes #304.
Fixes #306.
See also: XapaJIaMnu/translateLocally#82.
---
 app/bergamot.cpp                     |  3 +--
 examples/run-native.sh               |  2 +-
 src/translator/byte_array_util.cpp   | 10 ++++----
 src/translator/service.h             |  5 ++--
 src/translator/translation_model.cpp | 35 +++++++++++++++-------------
 src/translator/translation_model.h   |  6 ++++-
 6 files changed, 34 insertions(+), 27 deletions(-)
diff --git a/app/bergamot.cpp b/app/bergamot.cpp
index 5629f91..195e167 100644
--- a/app/bergamot.cpp
+++ b/app/bergamot.cpp
@@ -16,8 +16,7 @@ int main(int argc, char *argv[]) {
   // Construct a model.
   auto options = parseOptionsFromFilePath(config.modelConfigPaths.front());
 
-  MemoryBundle memoryBundle;
-  std::shared_ptr<TranslationModel> model = service.createCompatibleModel(options, std::move(memoryBundle));
+  std::shared_ptr<TranslationModel> model = service.createCompatibleModel(options);
 
   ResponseOptions responseOptions;
   std::string input = readFromStdin();
diff --git a/examples/run-native.sh b/examples/run-native.sh
index 81d8625..b02968a 100644
--- a/examples/run-native.sh
+++ b/examples/run-native.sh
@@ -9,7 +9,7 @@ wget --quiet --continue --directory models/ \
 # Patch the config-files generated from marian for use in bergamot.
 python3 bergamot-translator-tests/tools/patch-marian-for-bergamot.py \
     --config-path models/ende.student.tiny11/config.intgemm8bitalpha.yml \
-    --ssplit-prefix-file 3rd-party/ssplit-cpp/split-cpp/nonbreaking_prefixes/nonbreaking_prefix.en
+    --ssplit-prefix-file $(realpath 3rd_party/ssplit-cpp/nonbreaking_prefixes/nonbreaking_prefix.en)
 
 # Patched config file will be available with .bergamot.yml suffix.
 CONFIG=models/ende.student.tiny11/config.intgemm8bitalpha.yml.bergamot.yml
diff --git a/src/translator/byte_array_util.cpp b/src/translator/byte_array_util.cpp
index 83d06ac..d0fddee 100644
--- a/src/translator/byte_array_util.cpp
+++ b/src/translator/byte_array_util.cpp
@@ -101,10 +101,12 @@ AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options) {
 
 AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options) {
   auto shortlist = options->get<std::vector<std::string>>("shortlist");
-  ABORT_IF(shortlist.empty(), "No path to shortlist file is given.");
-  ABORT_IF(!marian::data::isBinaryShortlist(shortlist[0]),
-           "Loading non-binary shortlist file into memory is not supported");
-  return loadFileToMemory(shortlist[0], 64);
+  if (!shortlist.empty()) {
+    ABORT_IF(!marian::data::isBinaryShortlist(shortlist[0]),
+             "Loading non-binary shortlist file into memory is not supported");
+    return loadFileToMemory(shortlist[0], 64);
+  }
+  return AlignedMemory();
 }
 
 void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
diff --git a/src/translator/service.h b/src/translator/service.h
index 4798d0b..9a54a7a 100644
--- a/src/translator/service.h
+++ b/src/translator/service.h
@@ -127,10 +127,9 @@ class AsyncService {
 
   /// Create a TranslationModel compatible with this instance of Service. Internally assigns how many replicas of
   /// backend needed based on worker threads set. See TranslationModel for documentation on other params.
-  template <class ConfigType>
-  Ptr<TranslationModel> createCompatibleModel(const ConfigType &config, MemoryBundle &&memory = MemoryBundle{}) {
+  Ptr<TranslationModel> createCompatibleModel(const TranslationModel::Config &config) {
     // @TODO: Remove this remove this dependency/coupling.
-    return New<TranslationModel>(config, std::move(memory), /*replicas=*/config_.numWorkers);
+    return New<TranslationModel>(config, /*replicas=*/config_.numWorkers);
   }
 
   /// With the supplied TranslationModel, translate an input. A Response is constructed with optional items set/unset
diff --git a/src/translator/translation_model.cpp b/src/translator/translation_model.cpp
index f2ad39b..09c935b 100644
--- a/src/translator/translation_model.cpp
+++ b/src/translator/translation_model.cpp
@@ -27,22 +27,25 @@ TranslationModel::TranslationModel(const Config &options, MemoryBundle &&memory
   ABORT_IF(replicas == 0, "At least one replica needs to be created.");
   backend_.resize(replicas);
 
-  if (options_->hasAndNotEmpty("shortlist")) {
-    int srcIdx = 0, trgIdx = 1;
-    bool shared_vcb =
-        vocabs_.sources().front() ==
-        vocabs_.target();  // vocabs_->sources().front() is invoked as we currently only support one source vocab
-    if (memory_.shortlist.size() > 0 && memory_.shortlist.begin() != nullptr) {
-      bool check = options_->get<bool>("check-bytearray", false);
-      shortlistGenerator_ = New<data::BinaryShortlistGenerator>(memory_.shortlist.begin(), memory_.shortlist.size(),
-                                                                vocabs_.sources().front(), vocabs_.target(), srcIdx,
-                                                                trgIdx, shared_vcb, check);
-    } else {
-      // Changed to BinaryShortlistGenerator to enable loading binary shortlist file
-      // This class also supports text shortlist file
-      shortlistGenerator_ = New<data::BinaryShortlistGenerator>(options_, vocabs_.sources().front(), vocabs_.target(),
-                                                                srcIdx, trgIdx, shared_vcb);
-    }
+  // Try to load shortlist from memory-bundle. If not available, try to load from options_;
+
+  int srcIdx = 0, trgIdx = 1;
+  // vocabs_->sources().front() is invoked as we currently only support one source vocab
+  bool shared_vcb = (vocabs_.sources().front() == vocabs_.target());
+
+  if (memory_.shortlist.size() > 0 && memory_.shortlist.begin() != nullptr) {
+    bool check = options_->get<bool>("check-bytearray", false);
+    shortlistGenerator_ = New<data::BinaryShortlistGenerator>(memory_.shortlist.begin(), memory_.shortlist.size(),
+                                                              vocabs_.sources().front(), vocabs_.target(), srcIdx,
+                                                              trgIdx, shared_vcb, check);
+  } else if (options_->hasAndNotEmpty("shortlist")) {
+    // Changed to BinaryShortlistGenerator to enable loading binary shortlist file
+    // This class also supports text shortlist file
+    shortlistGenerator_ = New<data::BinaryShortlistGenerator>(options_, vocabs_.sources().front(), vocabs_.target(),
+                                                              srcIdx, trgIdx, shared_vcb);
+  } else {
+    // In this case, the loadpath does not load shortlist.
+    shortlistGenerator_ = nullptr;
   }
 }
 
diff --git a/src/translator/translation_model.h b/src/translator/translation_model.h
index 8519ad4..eac7b5a 100644
--- a/src/translator/translation_model.h
+++ b/src/translator/translation_model.h
@@ -6,6 +6,7 @@
 
 #include "batch.h"
 #include "batching_pool.h"
+#include "byte_array_util.h"
 #include "cache.h"
 #include "common/utils.h"
 #include "data/shortlist.h"
@@ -56,7 +57,10 @@ class TranslationModel {
   /// @param [in] options: Marian options object.
   /// @param [in] memory: MemoryBundle object holding memory buffers containing parameters to build MarianBackend,
   /// ShortlistGenerator, Vocabs and SentenceSplitter.
-  TranslationModel(const Config& options, MemoryBundle&& memory = MemoryBundle{}, size_t replicas = 1);
+  TranslationModel(const Config& options, MemoryBundle&& memory, size_t replicas = 1);
+
+  TranslationModel(const Config& options, size_t replicas = 1)
+      : TranslationModel(options, getMemoryBundleFromConfig(options), replicas) {}
 
   /// Make a Request to be translated by this TranslationModel instance.
   /// @param [in] requestId: Unique identifier associated with this request, available from Service.