Enable vocabs pass as byte arrays (#122)

* first attempt to enable vocabs pass as byte arrays * pass vocabs bytes as AlignedMemory * add vocabIndices to avoid double loading * small fix on parameter names and documentation * fix windows build plus tiny update on documentation * update marian-dev submodule * move validate model bytearray in BatchTranslator * small refactors on validateBinaryModel() * switch vocab memories to std::vector<marian::Ptr<AlignedMemory>> * update marian-dev submodule * replace marian::Ptr to std::shared_ptr for vocab memories * add note for vocab memories
2024-08-15 08:30:46 +03:00 · 2021-05-07 14:54:48 +01:00 · 2021-05-07 14:54:48 +01:00 · 5b02008a97
commit 5b02008a97
parent b86c76b004
7 changed files with 88 additions and 43 deletions
--- a/3rd_party/marian-dev
+++ b/3rd_party/marian-dev
@ -1 +1 @@
-Subproject commit 94aeaa4616a0fb01ac95a23f0e74a214a94e7609
+Subproject commit ca15d61c87ef2f8f2c290b75a5da6236eb9833d2
--- a/app/service-cli.cpp
+++ b/app/service-cli.cpp
@ -18,15 +18,17 @@ int main(int argc, char *argv[]) {

  // Prepare memories for model and shortlist
  marian::bergamot::AlignedMemory modelBytes, shortlistBytes;
+  std::vector<std::shared_ptr<marian::bergamot::AlignedMemory>> vocabsBytes;

  if (options->get<bool>("check-bytearray")) {
    // Load legit values into bytearrays.
    modelBytes = marian::bergamot::getModelMemoryFromConfig(options);
    shortlistBytes = marian::bergamot::getShortlistMemoryFromConfig(options);
+    marian::bergamot::getVocabsMemoryFromConfig(options, vocabsBytes);
  }

  marian::bergamot::Service service(options, std::move(modelBytes),
-                                    std::move(shortlistBytes));
+                                    std::move(shortlistBytes), std::move(vocabsBytes));

  // Read a large input text blob from stdin
  std::ostringstream std_input;
--- a/src/translator/batch_translator.cpp
+++ b/src/translator/batch_translator.cpp
@ -4,6 +4,7 @@
 #include "data/corpus.h"
 #include "data/text_input.h"
 #include "translator/beam_search.h"
+#include "byte_array_util.h"

 namespace marian {
 namespace bergamot {
@ -18,11 +19,11 @@ BatchTranslator::BatchTranslator(DeviceId const device,

 void BatchTranslator::initialize() {
  // Initializes the graph.
+  bool check = options_->get<bool>("check-bytearray",false); // Flag holds whether validate the bytearray (model and shortlist)
  if (options_->hasAndNotEmpty("shortlist")) {
    int srcIdx = 0, trgIdx = 1;
    bool shared_vcb = vocabs_->front() == vocabs_->back();
    if (shortlistMemory_->size() > 0 && shortlistMemory_->begin() != nullptr) {
-      bool check = options_->get<bool>("check-bytearray",true);
      slgen_ = New<data::BinaryShortlistGenerator>(shortlistMemory_->begin(), shortlistMemory_->size(),
                                                     vocabs_->front(), vocabs_->back(),
                                                     srcIdx, trgIdx, shared_vcb, check);
@ -45,6 +46,10 @@ void BatchTranslator::initialize() {
  if (modelMemory_->size() > 0 && modelMemory_->begin() != nullptr) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file
    ABORT_IF((uintptr_t)modelMemory_->begin() % 256 != 0,
             "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
+    if (check) {
+      ABORT_IF(!validateBinaryModel(*modelMemory_, modelMemory_->size()),
+               "The binary file is invalid. Incomplete or corrupted download?");
+    }
    const std::vector<const void *> container = {modelMemory_->begin()}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding.
    scorers_ = createScorers(options_, container);
  } else {
--- a/src/translator/byte_array_util.cpp
+++ b/src/translator/byte_array_util.cpp
@ -1,12 +1,12 @@
 #include "byte_array_util.h"
 #include <stdlib.h>
 #include <iostream>
+#include <memory>

 namespace marian {
 namespace bergamot {

 namespace {
-
 // This is a basic validator that checks if the file has not been truncated
 // it basically loads up the header and checks

@ -26,9 +26,10 @@ const T* get(const void*& current, uint64_t num = 1) {
  current = (const T*)current + num;
  return ptr;
 }
+} // Anonymous namespace

-bool validateBinaryModel(AlignedMemory& model, uint64_t fileSize) {
-  const void * current = &model[0];
+bool validateBinaryModel(const AlignedMemory& model, uint64_t fileSize) {
+  const void * current = model.begin();
  uint64_t memoryNeeded = sizeof(uint64_t)*2; // We keep track of how much memory we would need if we have a complete file
  uint64_t numHeaders;
  if (fileSize >= memoryNeeded) { // We have enough filesize to fetch the headers.
@ -76,8 +77,6 @@ bool validateBinaryModel(AlignedMemory& model, uint64_t fileSize) {
  }
 }

-} // Anonymous namespace
-
 AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){
  uint64_t fileSize = filesystem::fileSize(path);
  io::InputFileStream in(path);
@ -89,13 +88,12 @@ AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){
 }

 AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options){
-    auto models = options->get<std::vector<std::string>>("models");
-    ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
-    marian::filesystem::Path modelPath(models[0]);
-    ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin");
-    AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
-    ABORT_IF(!validateBinaryModel(alignedMemory, alignedMemory.size()), "The binary file is invalid. Incomplete or corrupted download?");
-    return alignedMemory;
+  auto models = options->get<std::vector<std::string>>("models");
+  ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
+  marian::filesystem::Path modelPath(models[0]);
+  ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin");
+  AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
+  return alignedMemory;
 }

 AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options){
@ -104,5 +102,20 @@ AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options)
  return loadFileToMemory(shortlist[0], 64);
 }

+void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
+                               std::vector<std::shared_ptr<AlignedMemory>>& vocabMemories){
+  auto vfiles = options->get<std::vector<std::string>>("vocabs");
+  ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
+  vocabMemories.resize(vfiles.size());
+  std::unordered_map<std::string, std::shared_ptr<AlignedMemory>> vocabMap;
+  for (size_t i = 0; i < vfiles.size(); ++i) {
+    auto m = vocabMap.emplace(std::make_pair(vfiles[i], std::shared_ptr<AlignedMemory>()));
+    if (m.second) {
+      m.first->second = std::make_shared<AlignedMemory>(loadFileToMemory(vfiles[i], 64));
+    }
+    vocabMemories[i] = m.first->second;
+  }
+}
+
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/byte_array_util.h
+++ b/src/translator/byte_array_util.h
@ -7,6 +7,8 @@ namespace bergamot {
 AlignedMemory loadFileToMemory(const std::string& path, size_t alignment);
 AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
 AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options);
-
+void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
+                               std::vector<std::shared_ptr<AlignedMemory>>& vocabMemories);
+bool validateBinaryModel(const AlignedMemory& model, uint64_t fileSize);
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/service.cpp
+++ b/src/translator/service.cpp
@ -6,21 +6,34 @@
 #include <utility>

 inline std::vector<marian::Ptr<const marian::Vocab>>
-loadVocabularies(marian::Ptr<marian::Options> options) {
+loadVocabularies(marian::Ptr<marian::Options> options,
+                 std::vector<std::shared_ptr<marian::bergamot::AlignedMemory>>&& vocabMemories) {
  // @TODO: parallelize vocab loading for faster startup
-  auto vfiles = options->get<std::vector<std::string>>("vocabs");
-  // with the current setup, we need at least two vocabs: src and trg
-  ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
-  std::vector<marian::Ptr<marian::Vocab const>> vocabs(vfiles.size());
-  std::unordered_map<std::string, marian::Ptr<marian::Vocab>> vmap;
-  for (size_t i = 0; i < vocabs.size(); ++i) {
-    auto m =
-        vmap.emplace(std::make_pair(vfiles[i], marian::Ptr<marian::Vocab>()));
-    if (m.second) { // new: load the vocab
-      m.first->second = marian::New<marian::Vocab>(options, i);
-      m.first->second->load(vfiles[i]);
+  std::vector<marian::Ptr<marian::Vocab const>> vocabs;
+  if(!vocabMemories.empty()){
+    // load vocabs from buffer
+    ABORT_IF(vocabMemories.size() < 2, "Insufficient number of vocabularies.");
+    vocabs.resize(vocabMemories.size());
+    for (size_t i = 0; i < vocabs.size(); i++) {
+      marian::Ptr<marian::Vocab> vocab = marian::New<marian::Vocab>(options, i);
+      vocab->loadFromSerialized(absl::string_view(vocabMemories[i]->begin(), vocabMemories[i]->size()));
+      vocabs[i] = vocab;
+    }
+  } else {
+    // load vocabs from file
+    auto vfiles = options->get<std::vector<std::string>>("vocabs");
+    // with the current setup, we need at least two vocabs: src and trg
+    ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
+    vocabs.resize(vfiles.size());
+    std::unordered_map<std::string, marian::Ptr<marian::Vocab>> vmap;
+    for (size_t i = 0; i < vocabs.size(); ++i) {
+      auto m = vmap.emplace(std::make_pair(vfiles[i], marian::Ptr<marian::Vocab>()));
+      if (m.second) { // new: load the vocab
+        m.first->second = marian::New<marian::Vocab>(options, i);
+        m.first->second->load(vfiles[i]);
+      }
+      vocabs[i] = m.first->second;
    }
-    vocabs[i] = m.first->second;
  }
  return vocabs;
 }
@ -28,11 +41,14 @@ loadVocabularies(marian::Ptr<marian::Options> options) {
 namespace marian {
 namespace bergamot {

-Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory) 
-    : requestId_(0), options_(options), vocabs_(std::move(loadVocabularies(options))),
+Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory,
+                 std::vector<std::shared_ptr<AlignedMemory>> vocabMemories)
+    : requestId_(0), options_(options),
+      vocabs_(std::move(loadVocabularies(options, std::move(vocabMemories)))),
      text_processor_(vocabs_, options), batcher_(options),
      numWorkers_(options->get<int>("cpu-threads")),
-      modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))
+      modelMemory_(std::move(modelMemory)),
+      shortlistMemory_(std::move(shortlistMemory))
 #ifndef WASM_COMPATIBLE_SOURCE
      // 0 elements in PCQueue is illegal and can lead to failures. Adding a
      // guard to have at least one entry allocated. In the single-threaded
--- a/src/translator/service.h
+++ b/src/translator/service.h
@ -64,10 +64,12 @@ class Service {
 public:
  /// @param options Marian options object
  /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
-  /// of a model.bin. Optional, defaults to nullptr when not used
+  /// of a model.bin.
  /// @param shortlistMemory byte array of shortlist (aligned to 64)
+  /// @param vocabMemories vector of vocabulary memories (aligned to 64)
  explicit Service(Ptr<Options> options, AlignedMemory modelMemory,
-                   AlignedMemory shortlistMemory);
+                   AlignedMemory shortlistMemory,
+                   std::vector<std::shared_ptr<AlignedMemory>> vocabMemories);

  /// Construct Service purely from Options. This expects options which
  /// marian-decoder expects to be set for loading model shortlist and
@ -76,24 +78,30 @@ public:
  ///
  /// This is equivalent to a call to:
  /// ```cpp
-  ///    Service(options, AlignedMemory(),  AlignedMemory())
+  ///    Service(options, AlignedMemory(), AlignedMemory(), {})
  /// ```
  /// wherein empty memory is passed and internal flow defaults to file-based
-  /// model, shortlist loading.
+  /// model, shortlist loading. AlignedMemory() corresponds to empty memory
  explicit Service(Ptr<Options> options)
-      : Service(options, AlignedMemory(), AlignedMemory()) {}
+      : Service(options, AlignedMemory(), AlignedMemory(), {}) {}

  /// Construct Service from a string configuration.
  /// @param [in] config string parsable as YAML expected to adhere with marian
  /// config
-  /// @param [in] model_memory byte array (aligned to 256!!!) that contains the
-  /// bytes of a model.bin. Optional.
-  /// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
+  /// @param [in] modelMemory byte array (aligned to 256!!!) that contains the
+  /// bytes of a model.bin. Optional. AlignedMemory() corresponds to empty memory
+  /// @param [in] shortlistMemory byte array of shortlist (aligned to 64). Optional.
+  /// @param [in] vocabMemories vector of vocabulary memories (aligned to 64). Optional.
+  /// If two vocabularies are the same (based on the filenames), two entries (shared
+  /// pointers) will be generated which share the same AlignedMemory object.
  explicit Service(const std::string &config,
                   AlignedMemory modelMemory = AlignedMemory(),
-                   AlignedMemory shortlistMemory = AlignedMemory())
+                   AlignedMemory shortlistMemory = AlignedMemory(),
+                   std::vector<std::shared_ptr<AlignedMemory>> vocabsMemories = {})
      : Service(parseOptions(config, /*validate=*/false),
-                std::move(modelMemory), std::move(shortlistMemory)) {}
+                std::move(modelMemory),
+                std::move(shortlistMemory),
+                std::move(vocabsMemories)) {}

  /// Explicit destructor to clean up after any threads initialized in
  /// asynchronous operation mode.
@ -187,7 +195,6 @@ private:
  /// ordering among requests and logging/book-keeping.

  size_t requestId_;
-
  /// Store vocabs representing source and target.
  std::vector<Ptr<Vocab const>> vocabs_; // ORDER DEPENDENCY (text_processor_)