diff --git a/app/bergamot-translator-app-bytearray.cpp b/app/bergamot-translator-app-bytearray.cpp index b58c638..215b573 100644 --- a/app/bergamot-translator-app-bytearray.cpp +++ b/app/bergamot-translator-app-bytearray.cpp @@ -9,7 +9,7 @@ #include "TranslationModel.h" #include "translator/parser.h" -#include "translator/byteArrayExample.h" +#include "translator/byte_array_util.h" int main(int argc, char **argv) { @@ -19,9 +19,11 @@ int main(int argc, char **argv) { auto options = configParser.parseOptions(argc, argv, true); std::string config = options->asYamlString(); + // Prepare model byte array + marian::bergamot::AlignedMemory modelBytes = marian::bergamot::getModelMemoryFromConfig(options); + // Route the config string to construct marian model through TranslationModel - void * model_bytes = bergamot::getBinaryModelFromConfig(options); - auto model = std::make_shared(config, model_bytes); + TranslationModel model(config, modelBytes.begin()); TranslationRequest translationRequest; std::vector texts; @@ -42,7 +44,7 @@ int main(int argc, char **argv) { "Prague, the University of Sheffield, University of Tartu, and " "Mozilla."); - auto results = model->translate(std::move(texts), translationRequest); + auto results = model.translate(std::move(texts), translationRequest); // Resolve the future and get the actual result //std::vector results = futureResults.get(); @@ -61,8 +63,5 @@ int main(int argc, char **argv) { std::cout << std::endl; } - // Clear the memory used for the byte array - free(model_bytes); // Ideally, this should be done after the translation model has been gracefully shut down. - return 0; } diff --git a/app/service-cli-bytearray.cpp b/app/service-cli-bytearray.cpp index cb3b17f..6b3948b 100644 --- a/app/service-cli-bytearray.cpp +++ b/app/service-cli-bytearray.cpp @@ -9,14 +9,17 @@ #include "translator/parser.h" #include "translator/response.h" #include "translator/service.h" -#include "translator/byteArrayExample.h" +#include "translator/byte_array_util.h" int main(int argc, char *argv[]) { auto cp = marian::bergamot::createConfigParser(); auto options = cp.parseOptions(argc, argv, true); - void * model_bytes = bergamot::getBinaryModelFromConfig(options); - marian::bergamot::Service service(options, model_bytes); + // Prepare memories for model and shortlist + marian::bergamot::AlignedMemory modelBytes = marian::bergamot::getModelMemoryFromConfig(options); + marian::bergamot::AlignedMemory shortlistBytes = marian::bergamot::getShortlistMemoryFromConfig(options); + + marian::bergamot::Service service(options, std::move(modelBytes), std::move(shortlistBytes)); // Read a large input text blob from stdin std::ostringstream std_input; @@ -30,8 +33,5 @@ int main(int argc, char *argv[]) { Response response = responseFuture.get(); std::cout << response.target.text << std::endl; - // Clear the memory used for the byte array - free(model_bytes); // Ideally, this should be done after the translation model has been gracefully shut down. - return 0; } diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index a341427..3ddfa79 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -1,7 +1,6 @@ add_library(bergamot-translator STATIC TranslationModel.cpp - - byteArrayExample.cpp + byte_array_util.cpp text_processor.cpp sentence_splitter.cpp batch_translator.cpp diff --git a/src/translator/aligned.h b/src/translator/aligned.h new file mode 100644 index 0000000..6edb84e --- /dev/null +++ b/src/translator/aligned.h @@ -0,0 +1,71 @@ +#pragma once +#include +#include +#ifdef _MSC_VER +#include +#endif + +// Aligned simple vector. + +namespace marian { +namespace bergamot { + +template class AlignedVector { +public: + AlignedVector() : mem_(nullptr), size_(0) {} + + explicit AlignedVector(std::size_t size, std::size_t alignment = 64 /* CPU cares about this */) + : size_(size) { +#ifdef _MSC_VER + mem_ = static_cast(_aligned_malloc(size * sizeof(T), alignment)); + if (!mem_) throw std::bad_alloc(); +#else + if (posix_memalign(reinterpret_cast(&mem_), alignment, size * sizeof(T))) { + throw std::bad_alloc(); + } +#endif + } + + AlignedVector(AlignedVector &&from) : mem_(from.mem_), size_(from.size_) { + from.mem_ = nullptr; + from.size_ = 0; + } + + AlignedVector &operator=(AlignedVector &&from) { + mem_ = from.mem_; + size_ = from.size_; + from.mem_ = nullptr; + from.size_ = 0; + return *this; + } + + AlignedVector(const AlignedVector&) = delete; + AlignedVector& operator=(const AlignedVector&) = delete; + + ~AlignedVector() { +#ifdef _MSC_VER + _aligned_free(mem_); +#else + std::free(mem_); +#endif + } + + std::size_t size() const { return size_; } + + T &operator[](std::size_t offset) { return mem_[offset]; } + const T &operator[](std::size_t offset) const { return mem_[offset]; } + + T *begin() { return mem_; } + const T *begin() const { return mem_; } + T *end() { return mem_ + size_; } + const T *end() const { return mem_ + size_; } + + template + ReturnType *as() { return reinterpret_cast(mem_); } + +private: + T *mem_; + std::size_t size_; +}; +} // namespace bergamot +} // namespace marian diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index c83cf8c..8278ca7 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -11,17 +11,29 @@ namespace bergamot { BatchTranslator::BatchTranslator(DeviceId const device, std::vector> &vocabs, Ptr options, - const void * model_memory) - : device_(device), options_(options), vocabs_(&vocabs), model_memory_(model_memory) {} + const AlignedMemory* modelMemory, + const AlignedMemory* shortlistMemory) + : device_(device), options_(options), vocabs_(&vocabs), + modelMemory_(modelMemory), shortlistMemory_(shortlistMemory) {} void BatchTranslator::initialize() { // Initializes the graph. if (options_->hasAndNotEmpty("shortlist")) { int srcIdx = 0, trgIdx = 1; bool shared_vcb = vocabs_->front() == vocabs_->back(); - slgen_ = New(options_, vocabs_->front(), - vocabs_->back(), srcIdx, - trgIdx, shared_vcb); + if (shortlistMemory_->size() > 0 && shortlistMemory_->begin() != nullptr) { + bool check = options_->get("check-bytearray",true); + slgen_ = New(shortlistMemory_->begin(), shortlistMemory_->size(), + vocabs_->front(), vocabs_->back(), + srcIdx, trgIdx, shared_vcb, check); + } + else { + // Changed to BinaryShortlistGenerator to enable loading binary shortlist file + // This class also supports text shortlist file + slgen_ = New(options_, vocabs_->front(), + vocabs_->back(), srcIdx, + trgIdx, shared_vcb); + } } graph_ = New(true); // always optimize @@ -30,12 +42,10 @@ void BatchTranslator::initialize() { graph_->setDevice(device_); graph_->getBackend()->configureDevice(options_); graph_->reserveWorkspaceMB(options_->get("workspace")); - if (model_memory_) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file - if ((uintptr_t)model_memory_ % 256 != 0) { - std::cerr << "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it." << std::endl; - exit(1); - } - const std::vector container = {model_memory_}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding. + if (modelMemory_->size() > 0 && modelMemory_->begin() != nullptr) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file + ABORT_IF((uintptr_t)modelMemory_->begin() % 256 != 0, + "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it."); + const std::vector container = {modelMemory_->begin()}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding. scorers_ = createScorers(options_, container); } else { scorers_ = createScorers(options_); diff --git a/src/translator/batch_translator.h b/src/translator/batch_translator.h index 4e17b65..761a534 100644 --- a/src/translator/batch_translator.h +++ b/src/translator/batch_translator.h @@ -31,10 +31,11 @@ public: * @param device DeviceId that performs translation. Could be CPU or GPU * @param vocabs Vector that contains ptrs to two vocabs * @param options Marian options object - * @param model_memory byte array (aligned to 64!!!) that contains the bytes of a model.bin. Provide a nullptr if not used. + * @param modelMemory byte array (aligned to 256!!!) that contains the bytes of a model.bin. Provide a nullptr if not used. + * @param shortlistMemory byte array of shortlist (aligned to 64) */ explicit BatchTranslator(DeviceId const device, std::vector> &vocabs, - Ptr options, const void * model_memory); + Ptr options, const AlignedMemory* modelMemory, const AlignedMemory* shortlistMemory); // convenience function for logging. TODO(jerin) std::string _identifier() { return "worker" + std::to_string(device_.no); } @@ -48,7 +49,8 @@ private: Ptr graph_; std::vector> scorers_; Ptr slgen_; - const void * model_memory_; + const AlignedMemory* modelMemory_{nullptr}; + const AlignedMemory* shortlistMemory_{nullptr}; }; } // namespace bergamot diff --git a/src/translator/byteArrayExample.cpp b/src/translator/byteArrayExample.cpp deleted file mode 100644 index 28f9d9b..0000000 --- a/src/translator/byteArrayExample.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "byteArrayExample.h" -#include -#include -#include - -namespace bergamot { - -void * getBinaryFile(std::string path) { - std::ifstream is (path, std::ifstream::binary); - uint64_t length = 0; // Determine the length of file in bytes - if (is) { - is.seekg(0, is.end); - length = is.tellg(); - is.seekg(0, is.beg); - } else { - std::cerr << "Failed opening file stream: " << path << std::endl; - std::exit(1); - } - void *result; - int fail = posix_memalign(&result, 256, length); - if (fail) { - std::cerr << "Failed to allocate aligned memory." << std::endl; - std::exit(1); - } - is.read(static_cast(result), length); - return result; -} - -void * getBinaryModelFromConfig(marian::Ptr options) { - std::vector models = options->get>("models"); - if (models.size() != 1) { - std::cerr << "Loading multiple binary models is not supported for now as it is not necessary." << std::endl; - std::exit(1); - marian::filesystem::Path modelPath(models[0]); - if (modelPath.extension() != marian::filesystem::Path(".bin")) { - std::cerr << "Non binary models cannot be loaded as a byte array." << std::endl; - std::exit(1); - } - return nullptr; - } else { - return getBinaryFile(models[0]); - } -} - -} // namespace bergamot diff --git a/src/translator/byteArrayExample.h b/src/translator/byteArrayExample.h deleted file mode 100644 index 321ea5d..0000000 --- a/src/translator/byteArrayExample.h +++ /dev/null @@ -1,8 +0,0 @@ -#include "marian.h" - -namespace bergamot { - -void * getBinaryFile(std::string path); -void * getBinaryModelFromConfig(marian::Ptr options); - -} // namespace bergamot diff --git a/src/translator/byte_array_util.cpp b/src/translator/byte_array_util.cpp new file mode 100644 index 0000000..b464612 --- /dev/null +++ b/src/translator/byte_array_util.cpp @@ -0,0 +1,33 @@ +#include "byte_array_util.h" +#include +#include + +namespace marian { +namespace bergamot { + +AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){ + uint64_t fileSize = filesystem::fileSize(path); + io::InputFileStream in(path); + ABORT_IF(in.bad(), "Failed opening file stream: {}", path); + AlignedMemory alignedMemory(fileSize, alignment); + in.read(reinterpret_cast(alignedMemory.begin()), fileSize); + ABORT_IF(alignedMemory.size() != fileSize, "Error reading file {}", path); + return alignedMemory; +} + +AlignedMemory getModelMemoryFromConfig(marian::Ptr options){ + auto models = options->get>("models"); + ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary."); + marian::filesystem::Path modelPath(models[0]); + ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin"); + return loadFileToMemory(models[0], 256); +} + +AlignedMemory getShortlistMemoryFromConfig(marian::Ptr options){ + auto shortlist = options->get>("shortlist"); + ABORT_IF(shortlist.empty(), "No path to shortlist file is given."); + return loadFileToMemory(shortlist[0], 64); +} + +} // namespace bergamot +} // namespace marian diff --git a/src/translator/byte_array_util.h b/src/translator/byte_array_util.h new file mode 100644 index 0000000..a8df1cb --- /dev/null +++ b/src/translator/byte_array_util.h @@ -0,0 +1,12 @@ +#include "marian.h" +#include "definitions.h" + +namespace marian { +namespace bergamot { + +AlignedMemory loadFileToMemory(const std::string& path, size_t alignment); +AlignedMemory getModelMemoryFromConfig(marian::Ptr options); +AlignedMemory getShortlistMemoryFromConfig(marian::Ptr options); + +} // namespace bergamot +} // namespace marian diff --git a/src/translator/definitions.h b/src/translator/definitions.h index 35797a2..32998b9 100644 --- a/src/translator/definitions.h +++ b/src/translator/definitions.h @@ -3,6 +3,7 @@ #include "data/types.h" #include "data/vocab_base.h" +#include "aligned.h" #include namespace marian { @@ -21,6 +22,9 @@ template UPtr UNew(Args &&... args) { template UPtr UNew(UPtr p) { return UPtr(p); } +/// Shortcut to AlignedVector for byte arrays +typedef AlignedVector AlignedMemory; + } // namespace bergamot } // namespace marian diff --git a/src/translator/parser.h b/src/translator/parser.h index 4d93e3a..fa4e7bb 100644 --- a/src/translator/parser.h +++ b/src/translator/parser.h @@ -23,7 +23,11 @@ inline marian::ConfigParser createConfigParser() { "--max-length-break", "Bergamot Options", "Maximum input tokens to be processed in a single sentence.", 128); - return cp; + cp.addOption( + "--check-bytearray", "Bergamot Options", + "Flag holds whether to check the content of the bytearray (true by default)", true); + + return cp; } inline std::shared_ptr diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 718bc4d..76bcba2 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -28,10 +28,11 @@ loadVocabularies(marian::Ptr options) { namespace marian { namespace bergamot { -Service::Service(Ptr options, const void *model_memory) +Service::Service(Ptr options, AlignedMemory modelMemory, AlignedMemory shortlistMemory) : requestId_(0), vocabs_(std::move(loadVocabularies(options))), text_processor_(vocabs_, options), batcher_(options), - numWorkers_(options->get("cpu-threads")), model_memory_(model_memory) + numWorkers_(options->get("cpu-threads")), + modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory)) #ifndef WASM_COMPATIBLE_SOURCE // 0 elements in PCQueue is illegal and can lead to failures. Adding a // guard to have at least one entry allocated. In the single-threaded @@ -54,7 +55,7 @@ void Service::build_translators(Ptr options, size_t numTranslators) { translators_.reserve(numTranslators); for (size_t cpuId = 0; cpuId < numTranslators; cpuId++) { marian::DeviceId deviceId(cpuId, DeviceType::cpu); - translators_.emplace_back(deviceId, vocabs_, options, model_memory_); + translators_.emplace_back(deviceId, vocabs_, options, &modelMemory_, &shortlistMemory_); } } diff --git a/src/translator/service.h b/src/translator/service.h index 59f2f4d..8fc1de7 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -18,6 +18,33 @@ namespace marian { namespace bergamot { +// Hack code to construct AlignedMemory* from void* +inline AlignedMemory hackModel(const void* modelMemory) { + if(modelMemory != nullptr){ + // Here is a hack to make TranslationModel works + size_t modelMemorySize = 73837568; // Hack: model memory size should be changed to actual model size + AlignedMemory alignedMemory(modelMemorySize); + memcpy(alignedMemory.begin(), modelMemory, modelMemorySize); + return alignedMemory; + } else { + return AlignedMemory(); + } +} + +inline AlignedMemory hackShortLis(const void* shortlistMemory) { + if(shortlistMemory!= nullptr) { + // Hacks to obtain shortlist memory size as this will be checked during construction + size_t shortlistMemorySize = sizeof(uint64_t) * (6 + *((uint64_t*)shortlistMemory+4)) + + sizeof(uint32_t) * *((uint64_t*)shortlistMemory+5); + // Here is a hack to make TranslationModel works + AlignedMemory alignedMemory(shortlistMemorySize); + memcpy(alignedMemory.begin(), shortlistMemory, shortlistMemorySize); + return alignedMemory; + }else { + return AlignedMemory(); + } +} + /// Service exposes methods to translate an incoming blob of text to the /// Consumer of bergamot API. /// @@ -38,18 +65,22 @@ class Service { public: /// @param options Marian options object - /// @param model_memory byte array (aligned to 64!!!) that contains the bytes + /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes /// of a model.bin. Optional, defaults to nullptr when not used - explicit Service(Ptr options, const void *model_memory = nullptr); + /// @param shortlistMemory byte array of shortlist (aligned to 64) + explicit Service(Ptr options, AlignedMemory modelMemory, AlignedMemory shortlistMemory); - /// Construct Service from a string configuration. + explicit Service(Ptr options) : Service(options, AlignedMemory(), AlignedMemory()){} + +/// Construct Service from a string configuration. /// @param [in] config string parsable as YAML expected to adhere with marian /// config - /// @param [in] model_memory byte array (aligned to 64!!!) that contains the + /// @param [in] model_memory byte array (aligned to 256!!!) that contains the /// bytes of a model.bin. Optional, defaults to nullptr when not used + /// @param [in] shortlistMemory byte array of shortlist (aligned to 64) explicit Service(const std::string &config, - const void *model_memory = nullptr) - : Service(parseOptions(config), model_memory) {} + const void* modelMemory = nullptr, const void* shortlistMemory = nullptr) + : Service(parseOptions(config), hackModel(modelMemory), hackShortLis(shortlistMemory)) {} /// Explicit destructor to clean up after any threads initialized in /// asynchronous operation mode. @@ -85,13 +116,16 @@ private: void async_translate(); /// Number of workers to launch. - size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_) - const void *model_memory_; /// Model memory to load model passed as bytes. + size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_) + /// Model memory to load model passed as bytes. + AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_) + /// Shortlist memory passed as bytes. + AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_) /// Holds instances of batch translators, just one in case /// of single-threaded application, numWorkers_ in case of multithreaded /// setting. - std::vector translators_; + std::vector translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_) /// Stores requestId of active request. Used to establish /// ordering among requests and logging/book-keeping.