Enable binary shortlist loading from bytebuffer (#69)

Contains "hack" that must go immediately by editing TranslationModel, to come in following commit. * add shortlist_memory and update service-cli-bytearray test * update marian-dev * address review comments * fix ccompliation and tests failures and further address review comments * small update on marian-dev (based on browsermt/marian-dev PR#28) * update marian-dev with upstream * code refactoring according to review * fix marian-dev submodule conflicts * switch MemoryGift to AlignedVector * copy aligned.h from kpu/intgemm for AlignedVector * changes based on memory ownership and AlignedVector * fix BatchTranslator inits * small fixes according to review comments * update submodule marian-dev to master * update submodule marian-dev with upstream Co-authored-by: Kenneth Heafield <kpu@users.noreply.github.com>
2024-08-15 08:30:46 +03:00 · 2021-04-01 19:36:07 +01:00 · 2021-04-01 19:36:07 +01:00 · f654ab0f71
commit f654ab0f71
parent 2e5daac978
14 changed files with 211 additions and 95 deletions
--- a/app/bergamot-translator-app-bytearray.cpp
+++ b/app/bergamot-translator-app-bytearray.cpp
@ -9,7 +9,7 @@

 #include "TranslationModel.h"
 #include "translator/parser.h"
-#include "translator/byteArrayExample.h"
+#include "translator/byte_array_util.h"

 int main(int argc, char **argv) {

@ -19,9 +19,11 @@ int main(int argc, char **argv) {
  auto options = configParser.parseOptions(argc, argv, true);
  std::string config = options->asYamlString();

+  // Prepare model byte array
+  marian::bergamot::AlignedMemory modelBytes = marian::bergamot::getModelMemoryFromConfig(options);
+
  // Route the config string to construct marian model through TranslationModel
-  void * model_bytes = bergamot::getBinaryModelFromConfig(options);
-  auto model = std::make_shared<TranslationModel>(config, model_bytes);
+  TranslationModel model(config, modelBytes.begin());

  TranslationRequest translationRequest;
  std::vector<std::string> texts;
@ -42,7 +44,7 @@ int main(int argc, char **argv) {
      "Prague, the University of Sheffield, University of Tartu, and "
      "Mozilla.");

-  auto results = model->translate(std::move(texts), translationRequest);
+  auto results = model.translate(std::move(texts), translationRequest);

  // Resolve the future and get the actual result
  //std::vector<TranslationResult> results = futureResults.get();
@ -61,8 +63,5 @@ int main(int argc, char **argv) {
    std::cout << std::endl;
  }

-  // Clear the memory used for the byte array
-  free(model_bytes); // Ideally, this should be done after the translation model has been gracefully shut down.
-
  return 0;
 }
--- a/app/service-cli-bytearray.cpp
+++ b/app/service-cli-bytearray.cpp
@ -9,14 +9,17 @@
 #include "translator/parser.h"
 #include "translator/response.h"
 #include "translator/service.h"
-#include "translator/byteArrayExample.h"
+#include "translator/byte_array_util.h"

 int main(int argc, char *argv[]) {
  auto cp = marian::bergamot::createConfigParser();
  auto options = cp.parseOptions(argc, argv, true);

-  void * model_bytes = bergamot::getBinaryModelFromConfig(options);
-  marian::bergamot::Service service(options, model_bytes);
+  // Prepare memories for model and shortlist
+  marian::bergamot::AlignedMemory modelBytes = marian::bergamot::getModelMemoryFromConfig(options);
+  marian::bergamot::AlignedMemory shortlistBytes = marian::bergamot::getShortlistMemoryFromConfig(options);
+
+  marian::bergamot::Service service(options, std::move(modelBytes), std::move(shortlistBytes));

  // Read a large input text blob from stdin
  std::ostringstream std_input;
@ -30,8 +33,5 @@ int main(int argc, char *argv[]) {
  Response response = responseFuture.get();
  std::cout << response.target.text << std::endl;

-  // Clear the memory used for the byte array
-  free(model_bytes); // Ideally, this should be done after the translation model has been gracefully shut down.
-
  return 0;
 }
--- a/src/translator/CMakeLists.txt
+++ b/src/translator/CMakeLists.txt
@ -1,7 +1,6 @@
 add_library(bergamot-translator STATIC
    TranslationModel.cpp
-
-    byteArrayExample.cpp
+    byte_array_util.cpp
    text_processor.cpp
    sentence_splitter.cpp
    batch_translator.cpp 
--- a/src/translator/aligned.h
+++ b/src/translator/aligned.h
@ -0,0 +1,71 @@
+#pragma once
+#include <cstdlib>
+#include <new>
+#ifdef _MSC_VER
+#include <malloc.h>
+#endif
+
+// Aligned simple vector.
+
+namespace marian {
+namespace bergamot {
+
+template <class T> class AlignedVector {
+public:
+  AlignedVector() : mem_(nullptr), size_(0) {}
+
+  explicit AlignedVector(std::size_t size, std::size_t alignment = 64 /* CPU cares about this */)
+          : size_(size) {
+#ifdef _MSC_VER
+    mem_ = static_cast<T*>(_aligned_malloc(size * sizeof(T), alignment));
+      if (!mem_) throw std::bad_alloc();
+#else
+    if (posix_memalign(reinterpret_cast<void **>(&mem_), alignment, size * sizeof(T))) {
+      throw std::bad_alloc();
+    }
+#endif
+  }
+
+  AlignedVector(AlignedVector &&from) : mem_(from.mem_), size_(from.size_) {
+    from.mem_ = nullptr;
+    from.size_ = 0;
+  }
+
+  AlignedVector &operator=(AlignedVector &&from) {
+    mem_ = from.mem_;
+    size_ = from.size_;
+    from.mem_ = nullptr;
+    from.size_ = 0;
+    return *this;
+  }
+
+  AlignedVector(const AlignedVector&) = delete;
+  AlignedVector& operator=(const AlignedVector&) = delete;
+
+  ~AlignedVector() {
+#ifdef _MSC_VER
+    _aligned_free(mem_);
+#else
+    std::free(mem_);
+#endif
+  }
+
+  std::size_t size() const { return size_; }
+
+  T &operator[](std::size_t offset) { return mem_[offset]; }
+  const T &operator[](std::size_t offset) const { return mem_[offset]; }
+
+  T *begin() { return mem_; }
+  const T *begin() const { return mem_; }
+  T *end() { return mem_ + size_; }
+  const T *end() const { return mem_ + size_; }
+
+  template <typename ReturnType>
+  ReturnType *as() { return reinterpret_cast<ReturnType*>(mem_); }
+
+private:
+  T *mem_;
+  std::size_t size_;
+};
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/batch_translator.cpp
+++ b/src/translator/batch_translator.cpp
@ -11,17 +11,29 @@ namespace bergamot {
 BatchTranslator::BatchTranslator(DeviceId const device,
                                 std::vector<Ptr<Vocab const>> &vocabs,
                                 Ptr<Options> options,
-                                 const void * model_memory)
-    : device_(device), options_(options), vocabs_(&vocabs), model_memory_(model_memory) {}
+                                 const AlignedMemory* modelMemory,
+                                 const AlignedMemory* shortlistMemory)
+    : device_(device), options_(options), vocabs_(&vocabs),
+    modelMemory_(modelMemory), shortlistMemory_(shortlistMemory) {}

 void BatchTranslator::initialize() {
  // Initializes the graph.
  if (options_->hasAndNotEmpty("shortlist")) {
    int srcIdx = 0, trgIdx = 1;
    bool shared_vcb = vocabs_->front() == vocabs_->back();
-    slgen_ = New<data::LexicalShortlistGenerator>(options_, vocabs_->front(),
-                                                  vocabs_->back(), srcIdx,
-                                                  trgIdx, shared_vcb);
+    if (shortlistMemory_->size() > 0 && shortlistMemory_->begin() != nullptr) {
+      bool check = options_->get<bool>("check-bytearray",true);
+      slgen_ = New<data::BinaryShortlistGenerator>(shortlistMemory_->begin(), shortlistMemory_->size(),
+                                                     vocabs_->front(), vocabs_->back(),
+                                                     srcIdx, trgIdx, shared_vcb, check);
+    }
+    else {
+      // Changed to BinaryShortlistGenerator to enable loading binary shortlist file
+      // This class also supports text shortlist file
+      slgen_ = New<data::BinaryShortlistGenerator>(options_, vocabs_->front(),
+                                                    vocabs_->back(), srcIdx,
+                                                    trgIdx, shared_vcb);
+    }
  }

  graph_ = New<ExpressionGraph>(true); // always optimize
@ -30,12 +42,10 @@ void BatchTranslator::initialize() {
  graph_->setDevice(device_);
  graph_->getBackend()->configureDevice(options_);
  graph_->reserveWorkspaceMB(options_->get<size_t>("workspace"));
-  if (model_memory_) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file
-    if ((uintptr_t)model_memory_ % 256 != 0) {
-      std::cerr << "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it." << std::endl;
-      exit(1);
-    }
-    const std::vector<const void *> container = {model_memory_}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding.
+  if (modelMemory_->size() > 0 && modelMemory_->begin() != nullptr) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file
+    ABORT_IF((uintptr_t)modelMemory_->begin() % 256 != 0,
+             "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
+    const std::vector<const void *> container = {modelMemory_->begin()}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding.
    scorers_ = createScorers(options_, container);
  } else {
    scorers_ = createScorers(options_);
--- a/src/translator/batch_translator.h
+++ b/src/translator/batch_translator.h
@ -31,10 +31,11 @@ public:
   * @param device DeviceId that performs translation. Could be CPU or GPU
   * @param vocabs Vector that contains ptrs to two vocabs
   * @param options Marian options object
-   * @param model_memory byte array (aligned to 64!!!) that contains the bytes of a model.bin. Provide a nullptr if not used.
+   * @param modelMemory byte array (aligned to 256!!!) that contains the bytes of a model.bin. Provide a nullptr if not used.
+   * @param shortlistMemory byte array of shortlist (aligned to 64)
   */
  explicit BatchTranslator(DeviceId const device, std::vector<Ptr<Vocab const>> &vocabs,
-                  Ptr<Options> options, const void * model_memory);
+                  Ptr<Options> options, const AlignedMemory* modelMemory, const AlignedMemory* shortlistMemory);

  // convenience function for logging. TODO(jerin)
  std::string _identifier() { return "worker" + std::to_string(device_.no); }
@ -48,7 +49,8 @@ private:
  Ptr<ExpressionGraph> graph_;
  std::vector<Ptr<Scorer>> scorers_;
  Ptr<data::ShortlistGenerator const> slgen_;
-  const void * model_memory_;
+  const AlignedMemory* modelMemory_{nullptr};
+  const AlignedMemory* shortlistMemory_{nullptr};
 };

 } // namespace bergamot
--- a/src/translator/byteArrayExample.cpp
+++ b/src/translator/byteArrayExample.cpp
@ -1,45 +0,0 @@
-#include "byteArrayExample.h"
-#include <stdlib.h>
-#include <fstream>
-#include <iostream>
-
-namespace bergamot {
-
-void * getBinaryFile(std::string path) {
-    std::ifstream is (path, std::ifstream::binary);
-    uint64_t length = 0; // Determine the length of file in bytes
-    if (is) {
-        is.seekg(0, is.end);
-        length = is.tellg();
-        is.seekg(0, is.beg);
-    } else {
-        std::cerr << "Failed opening file stream: " << path << std::endl;
-        std::exit(1);
-    }
-    void *result;
-    int fail = posix_memalign(&result, 256, length);
-    if (fail) {
-        std::cerr << "Failed to allocate aligned memory." << std::endl;
-        std::exit(1);
-    }
-    is.read(static_cast<char *>(result), length);
-    return result;
-}
-
-void * getBinaryModelFromConfig(marian::Ptr<marian::Options> options) {
-    std::vector<std::string> models = options->get<std::vector<std::string>>("models");
-    if (models.size() != 1) {
-        std::cerr << "Loading multiple binary models is not supported for now as it is not necessary." << std::endl;
-        std::exit(1);
-        marian::filesystem::Path modelPath(models[0]);
-        if (modelPath.extension() != marian::filesystem::Path(".bin")) {
-            std::cerr << "Non binary models cannot be loaded as a byte array." << std::endl;
-            std::exit(1);
-        }
-        return nullptr;
-    } else {
-        return getBinaryFile(models[0]);
-    }
-}
-
-} // namespace bergamot
--- a/src/translator/byteArrayExample.h
+++ b/src/translator/byteArrayExample.h
@ -1,8 +0,0 @@
-#include "marian.h"
-
-namespace bergamot {
-
-void * getBinaryFile(std::string path);
-void * getBinaryModelFromConfig(marian::Ptr<marian::Options> options);
-
-} // namespace bergamot
--- a/src/translator/byte_array_util.cpp
+++ b/src/translator/byte_array_util.cpp
@ -0,0 +1,33 @@
+#include "byte_array_util.h"
+#include <stdlib.h>
+#include <iostream>
+
+namespace marian {
+namespace bergamot {
+
+AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){
+  uint64_t fileSize = filesystem::fileSize(path);
+  io::InputFileStream in(path);
+  ABORT_IF(in.bad(), "Failed opening file stream: {}", path);
+  AlignedMemory alignedMemory(fileSize, alignment);
+  in.read(reinterpret_cast<char *>(alignedMemory.begin()), fileSize);
+  ABORT_IF(alignedMemory.size() != fileSize, "Error reading file {}", path);
+  return alignedMemory;
+}
+
+AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options){
+    auto models = options->get<std::vector<std::string>>("models");
+    ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
+    marian::filesystem::Path modelPath(models[0]);
+    ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin");
+    return loadFileToMemory(models[0], 256);
+}
+
+AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options){
+  auto shortlist = options->get<std::vector<std::string>>("shortlist");
+  ABORT_IF(shortlist.empty(), "No path to shortlist file is given.");
+  return loadFileToMemory(shortlist[0], 64);
+}
+
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/byte_array_util.h
+++ b/src/translator/byte_array_util.h
@ -0,0 +1,12 @@
+#include "marian.h"
+#include "definitions.h"
+
+namespace marian {
+namespace bergamot {
+
+AlignedMemory loadFileToMemory(const std::string& path, size_t alignment);
+AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
+AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options);
+
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/definitions.h
+++ b/src/translator/definitions.h
@ -3,6 +3,7 @@

 #include "data/types.h"
 #include "data/vocab_base.h"
+#include "aligned.h"
 #include <vector>

 namespace marian {
@ -21,6 +22,9 @@ template <class T, typename... Args> UPtr<T> UNew(Args &&... args) {

 template <class T> UPtr<T> UNew(UPtr<T> p) { return UPtr<T>(p); }

+/// Shortcut to AlignedVector<const void*> for byte arrays
+typedef AlignedVector<const void*> AlignedMemory;
+
 } // namespace bergamot
 } // namespace marian

--- a/src/translator/parser.h
+++ b/src/translator/parser.h
@ -23,7 +23,11 @@ inline marian::ConfigParser createConfigParser() {
      "--max-length-break", "Bergamot Options",
      "Maximum input tokens to be processed in a single sentence.", 128);

-  return cp;
+  cp.addOption<bool>(
+      "--check-bytearray", "Bergamot Options",
+      "Flag holds whether to check the content of the bytearray (true by default)", true);
+
+    return cp;
 }

 inline std::shared_ptr<marian::Options>
--- a/src/translator/service.cpp
+++ b/src/translator/service.cpp
@ -28,10 +28,11 @@ loadVocabularies(marian::Ptr<marian::Options> options) {
 namespace marian {
 namespace bergamot {

-Service::Service(Ptr<Options> options, const void *model_memory)
+Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory)
    : requestId_(0), vocabs_(std::move(loadVocabularies(options))),
      text_processor_(vocabs_, options), batcher_(options),
-      numWorkers_(options->get<int>("cpu-threads")), model_memory_(model_memory)
+      numWorkers_(options->get<int>("cpu-threads")),
+      modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))
 #ifndef WASM_COMPATIBLE_SOURCE
      // 0 elements in PCQueue is illegal and can lead to failures. Adding a
      // guard to have at least one entry allocated. In the single-threaded
@ -54,7 +55,7 @@ void Service::build_translators(Ptr<Options> options, size_t numTranslators) {
  translators_.reserve(numTranslators);
  for (size_t cpuId = 0; cpuId < numTranslators; cpuId++) {
    marian::DeviceId deviceId(cpuId, DeviceType::cpu);
-    translators_.emplace_back(deviceId, vocabs_, options, model_memory_);
+    translators_.emplace_back(deviceId, vocabs_, options, &modelMemory_, &shortlistMemory_);
  }
 }

--- a/src/translator/service.h
+++ b/src/translator/service.h
@ -18,6 +18,33 @@
 namespace marian {
 namespace bergamot {

+// Hack code to construct AlignedMemory* from void*
+inline AlignedMemory hackModel(const void* modelMemory) {
+  if(modelMemory != nullptr){
+    // Here is a hack to make TranslationModel works
+    size_t modelMemorySize = 73837568;   // Hack: model memory size should be changed to actual model size
+    AlignedMemory alignedMemory(modelMemorySize);
+    memcpy(alignedMemory.begin(), modelMemory, modelMemorySize);
+    return alignedMemory;
+  } else {
+    return AlignedMemory();
+  }
+}
+
+inline AlignedMemory hackShortLis(const void* shortlistMemory) {
+  if(shortlistMemory!= nullptr) {
+    // Hacks to obtain shortlist memory size as this will be checked during construction
+    size_t shortlistMemorySize = sizeof(uint64_t) * (6 + *((uint64_t*)shortlistMemory+4))
+                                 + sizeof(uint32_t) * *((uint64_t*)shortlistMemory+5);
+    // Here is a hack to make TranslationModel works
+    AlignedMemory alignedMemory(shortlistMemorySize);
+    memcpy(alignedMemory.begin(), shortlistMemory, shortlistMemorySize);
+    return alignedMemory;
+  }else {
+    return AlignedMemory();
+  }
+}
+
 /// Service exposes methods to translate an incoming blob of text to the
 /// Consumer of bergamot API.
 ///
@ -38,18 +65,22 @@ class Service {

 public:
  /// @param options Marian options object
-  /// @param model_memory byte array (aligned to 64!!!) that contains the bytes
+  /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
  /// of a model.bin. Optional, defaults to nullptr when not used
-  explicit Service(Ptr<Options> options, const void *model_memory = nullptr);
+  /// @param shortlistMemory byte array of shortlist (aligned to 64)
+  explicit Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory);

-  /// Construct Service from a string configuration.
+  explicit Service(Ptr<Options> options) : Service(options, AlignedMemory(), AlignedMemory()){}
+
+/// Construct Service from a string configuration.
  /// @param [in] config string parsable as YAML expected to adhere with marian
  /// config
-  /// @param [in] model_memory byte array (aligned to 64!!!) that contains the
+  /// @param [in] model_memory byte array (aligned to 256!!!) that contains the
  /// bytes of a model.bin. Optional, defaults to nullptr when not used
+  /// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
  explicit Service(const std::string &config,
-                   const void *model_memory = nullptr)
-      : Service(parseOptions(config), model_memory) {}
+                   const void* modelMemory = nullptr, const void* shortlistMemory = nullptr)
+      : Service(parseOptions(config), hackModel(modelMemory), hackShortLis(shortlistMemory)) {}

  /// Explicit destructor to clean up after any threads initialized in
  /// asynchronous operation mode.
@ -85,13 +116,16 @@ private:
  void async_translate();

  /// Number of workers to launch.
-  size_t numWorkers_;        // ORDER DEPENDENCY (pcqueue_)
-  const void *model_memory_; /// Model memory to load model passed as bytes.
+  size_t numWorkers_;              // ORDER DEPENDENCY (pcqueue_)
+  /// Model memory to load model passed as bytes.
+  AlignedMemory modelMemory_;      // ORDER DEPENDENCY (translators_)
+  /// Shortlist memory passed as bytes.
+  AlignedMemory shortlistMemory_;  // ORDER DEPENDENCY (translators_)

  /// Holds instances of batch translators, just one in case
  /// of single-threaded application, numWorkers_ in case of multithreaded
  /// setting.
-  std::vector<BatchTranslator> translators_;
+  std::vector<BatchTranslator> translators_;  // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)

  /// Stores requestId of active request. Used to establish
  /// ordering among requests and logging/book-keeping.