Load mode as a byte array (#55)

* Switch to wasm branch for this example

* Load marian model from a byte array

* Sanitise executable names

* Change marian branch

* Update marian branch that loads binary models

* Example of loading model as a byte array

* Add the byte array loading files

* Die on misaligned memory

* Remove the unused argument

* Allow loading without a ptr parameter so that we don't break emc workflow
This commit is contained in:
Nikolay Bogoychev 2021-03-22 14:22:56 +00:00 committed by GitHub
parent f89c989b44
commit d75dd85def
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 235 additions and 21 deletions

@ -1 +1 @@
Subproject commit 8ddb73fad1001ae4c1697d2514ac1e5bd43e2ed3
Subproject commit 370fdb5a212cfcd2d1c5fca9fffc041d2787a432

View File

@ -1,10 +1,16 @@
add_executable(bergamot-translator-app main.cpp)
add_executable(bergamot-translator-app bergamot-translator-app.cpp)
target_link_libraries(bergamot-translator-app PRIVATE bergamot-translator)
add_executable(bergamot-translator-app-bytearray bergamot-translator-app-bytearray.cpp)
target_link_libraries(bergamot-translator-app-bytearray PRIVATE bergamot-translator)
if (NOT USE_WASM_COMPATIBLE_SOURCES)
add_executable(service-cli main-mts.cpp)
add_executable(service-cli service-cli.cpp)
target_link_libraries(service-cli PRIVATE bergamot-translator)
add_executable(service-cli-bytearray service-cli-bytearray.cpp)
target_link_libraries(service-cli-bytearray PRIVATE bergamot-translator)
add_executable(marian-decoder-new marian-decoder-new.cpp)
target_link_libraries(marian-decoder-new PRIVATE bergamot-translator)
endif()

View File

@ -0,0 +1,72 @@
/*
* main.cpp
*
* An example application to demonstrate the use of Bergamot translator.
*
*/
#include <iostream>
#include "AbstractTranslationModel.h"
#include "TranslationRequest.h"
#include "TranslationResult.h"
#include "translator/parser.h"
#include "translator/byteArrayExample.h"
int main(int argc, char **argv) {
// Create a configParser and load command line parameters into a YAML config
// string.
auto configParser = marian::bergamot::createConfigParser();
auto options = configParser.parseOptions(argc, argv, true);
std::string config = options->asYamlString();
// Route the config string to construct marian model through
// AbstractTranslationModel
void * model_bytes = bergamot::getBinaryModelFromConfig(options);
std::shared_ptr<AbstractTranslationModel> model =
AbstractTranslationModel::createInstance(config, model_bytes);
TranslationRequest translationRequest;
std::vector<std::string> texts;
texts.emplace_back(
"The Bergamot project will add and improve client-side machine "
"translation in a web browser. Unlike current cloud-based "
"options, running directly on users machines empowers citizens to "
"preserve their privacy and increases the uptake of language "
"technologies in Europe in various sectors that require "
"confidentiality.");
texts.emplace_back(
"Free software integrated with an open-source web "
"browser, such as Mozilla Firefox, will enable bottom-up adoption "
"by non-experts, resulting in cost savings for private and public "
"sector users who would otherwise procure translation or operate "
"monolingually. Bergamot is a consortium coordinated by the "
"University of Edinburgh with partners Charles University in "
"Prague, the University of Sheffield, University of Tartu, and "
"Mozilla.");
auto results = model->translate(std::move(texts), translationRequest);
// Resolve the future and get the actual result
//std::vector<TranslationResult> results = futureResults.get();
for (auto &result : results) {
std::cout << "[original]: " << result.getOriginalText() << std::endl;
std::cout << "[translated]: " << result.getTranslatedText() << std::endl;
auto mappings = result.getSentenceMappings();
for (auto &p : mappings) {
std::string_view src = p.first;
std::string_view tgt = p.second;
std::cout << " [src Sentence]: " << src << std::endl;
std::cout << " [tgt Sentence]: " << tgt << std::endl;
}
std::cout << std::endl;
}
// Clear the memory used for the byte array
free(model_bytes); // Ideally, this should be done after the translation model has been gracefully shut down.
return 0;
}

View File

@ -0,0 +1,40 @@
#include <cstdlib>
#include <future>
#include <iostream>
#include <sstream>
#include "common/definitions.h"
#include "common/utils.h"
#include "marian.h"
#include "translator/parser.h"
#include "translator/response.h"
#include "translator/service.h"
#include "translator/byteArrayExample.h"
int main(int argc, char *argv[]) {
auto cp = marian::bergamot::createConfigParser();
auto options = cp.parseOptions(argc, argv, true);
void * model_bytes = bergamot::getBinaryModelFromConfig(options);
marian::bergamot::Service service(options, model_bytes);
// Read a large input text blob from stdin
std::ostringstream std_input;
std_input << std::cin.rdbuf();
std::string input = std_input.str();
using marian::bergamot::Response;
// Wait on future until Response is complete
std::future<Response> responseFuture = service.translate(std::move(input));
responseFuture.wait();
Response response = responseFuture.get();
std::cout << response.translation() << std::endl;
// Stop Service.
service.stop();
// Clear the memory used for the byte array
free(model_bytes); // Ideally, this should be done after the translation model has been gracefully shut down.
return 0;
}

View File

@ -28,8 +28,12 @@ public:
* AbstractTranslationModel. The instance is created using translation model
* configuration provided as yaml-formatted string.
*/
/**
* @param config Marian yml config file in the form of a string
* @param model_memory byte array (aligned to 64!!!) that contains the bytes of a model.bin. Optional, defaults to nullptr when not used
*/
static std::shared_ptr<AbstractTranslationModel>
createInstance(const std::string &config);
createInstance(const std::string &config, const void * model_memory=nullptr);
AbstractTranslationModel() = default;

View File

@ -8,7 +8,11 @@
#include "AbstractTranslationModel.h"
#include "TranslationModel.h"
/**
* @param config Marian yml config file in the form of a string
* @param model_memory byte array (aligned to 64!!!) that contains the bytes of a model.bin. Optional, defaults to nullptr when not used
*/
std::shared_ptr<AbstractTranslationModel>
AbstractTranslationModel::createInstance(const std::string &config) {
return std::make_shared<TranslationModel>(config);
AbstractTranslationModel::createInstance(const std::string &config, const void * model_memory) {
return std::make_shared<TranslationModel>(config, model_memory);
}

View File

@ -7,6 +7,7 @@ add_library(bergamot-translator STATIC
TranslationModel.cpp
# Following files added from browsermt/mts@nuke
byteArrayExample.cpp
text_processor.cpp
sentence_splitter.cpp
batch_translator.cpp

View File

@ -50,9 +50,9 @@ std::shared_ptr<marian::Options> parseOptions(const std::string &config) {
return std::make_shared<marian::Options>(options);
}
TranslationModel::TranslationModel(const std::string &config)
TranslationModel::TranslationModel(const std::string &config, const void * model_memory)
: configOptions_(std::move(parseOptions(config))),
AbstractTranslationModel(), service_(configOptions_) {}
AbstractTranslationModel(), service_(configOptions_, model_memory) {}
TranslationModel::~TranslationModel() {}

View File

@ -27,7 +27,11 @@ public:
/* Construct the model using the model configuration options as yaml-formatted
* string
*/
TranslationModel(const std::string &config);
/**
* @param config Marian yml config file in the form of a string
* @param model_memory optional byte array (aligned to 64!!!) that contains the bytes of a model.bin.
*/
TranslationModel(const std::string &config, const void * model_memory = nullptr);
~TranslationModel();

View File

@ -10,8 +10,9 @@ namespace bergamot {
BatchTranslator::BatchTranslator(DeviceId const device,
std::vector<Ptr<Vocab const>> &vocabs,
Ptr<Options> options)
: device_(device), options_(options), vocabs_(&vocabs) {}
Ptr<Options> options,
const void * model_memory)
: device_(device), options_(options), vocabs_(&vocabs), model_memory_(model_memory) {}
void BatchTranslator::initialize() {
// Initializes the graph.
@ -29,7 +30,16 @@ void BatchTranslator::initialize() {
graph_->setDevice(device_);
graph_->getBackend()->configureDevice(options_);
graph_->reserveWorkspaceMB(options_->get<size_t>("workspace"));
scorers_ = createScorers(options_);
if (model_memory_) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file
if ((uintptr_t)model_memory_ % 256 != 0) {
std::cerr << "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it." << std::endl;
exit(1);
}
const std::vector<const void *> container = {model_memory_}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding.
scorers_ = createScorers(options_, container);
} else {
scorers_ = createScorers(options_);
}
for (auto scorer : scorers_) {
scorer->init(graph_);
if (slgen_) {

View File

@ -26,8 +26,15 @@ class BatchTranslator {
// shut down in Service which calls join() on the threads.
public:
BatchTranslator(DeviceId const device, std::vector<Ptr<Vocab const>> &vocabs,
Ptr<Options> options);
/**
* Initialise the marian translator.
* @param device DeviceId that performs translation. Could be CPU or GPU
* @param vocabs Vector that contains ptrs to two vocabs
* @param options Marian options object
* @param model_memory byte array (aligned to 64!!!) that contains the bytes of a model.bin. Provide a nullptr if not used.
*/
explicit BatchTranslator(DeviceId const device, std::vector<Ptr<Vocab const>> &vocabs,
Ptr<Options> options, const void * model_memory);
// convenience function for logging. TODO(jerin)
std::string _identifier() { return "worker" + std::to_string(device_.no); }
@ -41,6 +48,7 @@ private:
Ptr<ExpressionGraph> graph_;
std::vector<Ptr<Scorer>> scorers_;
Ptr<data::ShortlistGenerator const> slgen_;
const void * model_memory_;
};
} // namespace bergamot

View File

@ -0,0 +1,45 @@
#include "byteArrayExample.h"
#include <stdlib.h>
#include <fstream>
#include <iostream>
namespace bergamot {
void * getBinaryFile(std::string path) {
std::ifstream is (path, std::ifstream::binary);
uint64_t length = 0; // Determine the length of file in bytes
if (is) {
is.seekg(0, is.end);
length = is.tellg();
is.seekg(0, is.beg);
} else {
std::cerr << "Failed opening file stream: " << path << std::endl;
std::exit(1);
}
void *result;
int fail = posix_memalign(&result, 256, length);
if (fail) {
std::cerr << "Failed to allocate aligned memory." << std::endl;
std::exit(1);
}
is.read(static_cast<char *>(result), length);
return result;
}
void * getBinaryModelFromConfig(marian::Ptr<marian::Options> options) {
std::vector<std::string> models = options->get<std::vector<std::string>>("models");
if (models.size() != 1) {
std::cerr << "Loading multiple binary models is not supported for now as it is not necessary." << std::endl;
std::exit(1);
marian::filesystem::Path modelPath(models[0]);
if (modelPath.extension() != marian::filesystem::Path(".bin")) {
std::cerr << "Non binary models cannot be loaded as a byte array." << std::endl;
std::exit(1);
}
return nullptr;
} else {
return getBinaryFile(models[0]);
}
}
} // namespace bergamot

View File

@ -0,0 +1,8 @@
#include "marian.h"
namespace bergamot {
void * getBinaryFile(std::string path);
void * getBinaryModelFromConfig(marian::Ptr<marian::Options> options);
} // namespace bergamot

View File

@ -8,9 +8,9 @@
namespace marian {
namespace bergamot {
Service::Service(Ptr<Options> options)
Service::Service(Ptr<Options> options, const void * model_memory)
: ServiceBase(options), numWorkers_(options->get<int>("cpu-threads")),
pcqueue_(numWorkers_) {
pcqueue_(numWorkers_), model_memory_{model_memory} {
if (numWorkers_ == 0) {
ABORT("Fatal: Attempt to create multithreaded instance with --cpu-threads "
"0. ");
@ -21,7 +21,7 @@ Service::Service(Ptr<Options> options)
for (size_t cpuId = 0; cpuId < numWorkers_; cpuId++) {
marian::DeviceId deviceId(cpuId, DeviceType::cpu);
translators_.emplace_back(deviceId, vocabs_, options);
translators_.emplace_back(deviceId, vocabs_, options, model_memory_);
auto &translator = translators_.back();
workers_.emplace_back([&translator, this] {

View File

@ -31,7 +31,11 @@ class Service : public ServiceBase {
// Response result = response.get();
public:
explicit Service(Ptr<Options> options);
/**
* @param options Marian options object
* @param model_memory byte array (aligned to 64!!!) that contains the bytes of a model.bin. Optional, defaults to nullptr when not used
*/
explicit Service(Ptr<Options> options, const void * model_memory=nullptr);
// Implements enqueue and top through blocking methods.
void stop() override;
~Service();
@ -46,6 +50,7 @@ private:
size_t numWorkers_; // ORDER DEPENDENCY
PCQueue<Batch> pcqueue_; // ORDER DEPENDENCY
const void * model_memory_;
std::vector<std::thread> workers_;
std::vector<BatchTranslator> translators_;
};

View File

@ -24,9 +24,9 @@ std::future<Response> ServiceBase::translate(std::string &&input) {
return future;
}
NonThreadedService::NonThreadedService(Ptr<Options> options)
NonThreadedService::NonThreadedService(Ptr<Options> options, const void * model_memory)
: ServiceBase(options),
translator_(DeviceId(0, DeviceType::cpu), vocabs_, options) {
translator_(DeviceId(0, DeviceType::cpu), vocabs_, options, model_memory) {
translator_.initialize();
}

View File

@ -16,6 +16,9 @@ namespace bergamot {
class ServiceBase {
public:
/**
* @param options Marian options object
*/
explicit ServiceBase(Ptr<Options> options);
// Transfers ownership of input string to Service, returns a future containing
@ -44,7 +47,11 @@ protected:
class NonThreadedService : public ServiceBase {
public:
explicit NonThreadedService(Ptr<Options> options);
/**
* @param options Marian options object
* @param model_memory byte array (aligned to 64!!!) that contains the bytes of a model.bin. Provide a nullptr if not used.
*/
explicit NonThreadedService(Ptr<Options> options, const void * model_memory);
void stop() override{};
private: