mirror of
https://github.com/browsermt/bergamot-translator.git
synced 2024-08-15 08:30:46 +03:00
Enable vocabs pass as byte arrays (#122)
* first attempt to enable vocabs pass as byte arrays * pass vocabs bytes as AlignedMemory * add vocabIndices to avoid double loading * small fix on parameter names and documentation * fix windows build plus tiny update on documentation * update marian-dev submodule * move validate model bytearray in BatchTranslator * small refactors on validateBinaryModel() * switch vocab memories to std::vector<marian::Ptr<AlignedMemory>> * update marian-dev submodule * replace marian::Ptr to std::shared_ptr for vocab memories * add note for vocab memories
This commit is contained in:
parent
b86c76b004
commit
5b02008a97
2
3rd_party/marian-dev
vendored
2
3rd_party/marian-dev
vendored
@ -1 +1 @@
|
||||
Subproject commit 94aeaa4616a0fb01ac95a23f0e74a214a94e7609
|
||||
Subproject commit ca15d61c87ef2f8f2c290b75a5da6236eb9833d2
|
@ -18,15 +18,17 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
// Prepare memories for model and shortlist
|
||||
marian::bergamot::AlignedMemory modelBytes, shortlistBytes;
|
||||
std::vector<std::shared_ptr<marian::bergamot::AlignedMemory>> vocabsBytes;
|
||||
|
||||
if (options->get<bool>("check-bytearray")) {
|
||||
// Load legit values into bytearrays.
|
||||
modelBytes = marian::bergamot::getModelMemoryFromConfig(options);
|
||||
shortlistBytes = marian::bergamot::getShortlistMemoryFromConfig(options);
|
||||
marian::bergamot::getVocabsMemoryFromConfig(options, vocabsBytes);
|
||||
}
|
||||
|
||||
marian::bergamot::Service service(options, std::move(modelBytes),
|
||||
std::move(shortlistBytes));
|
||||
std::move(shortlistBytes), std::move(vocabsBytes));
|
||||
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream std_input;
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "data/corpus.h"
|
||||
#include "data/text_input.h"
|
||||
#include "translator/beam_search.h"
|
||||
#include "byte_array_util.h"
|
||||
|
||||
namespace marian {
|
||||
namespace bergamot {
|
||||
@ -18,11 +19,11 @@ BatchTranslator::BatchTranslator(DeviceId const device,
|
||||
|
||||
void BatchTranslator::initialize() {
|
||||
// Initializes the graph.
|
||||
bool check = options_->get<bool>("check-bytearray",false); // Flag holds whether validate the bytearray (model and shortlist)
|
||||
if (options_->hasAndNotEmpty("shortlist")) {
|
||||
int srcIdx = 0, trgIdx = 1;
|
||||
bool shared_vcb = vocabs_->front() == vocabs_->back();
|
||||
if (shortlistMemory_->size() > 0 && shortlistMemory_->begin() != nullptr) {
|
||||
bool check = options_->get<bool>("check-bytearray",true);
|
||||
slgen_ = New<data::BinaryShortlistGenerator>(shortlistMemory_->begin(), shortlistMemory_->size(),
|
||||
vocabs_->front(), vocabs_->back(),
|
||||
srcIdx, trgIdx, shared_vcb, check);
|
||||
@ -45,6 +46,10 @@ void BatchTranslator::initialize() {
|
||||
if (modelMemory_->size() > 0 && modelMemory_->begin() != nullptr) { // If we have provided a byte array that contains the model memory, we can initialise the model from there, as opposed to from reading in the config file
|
||||
ABORT_IF((uintptr_t)modelMemory_->begin() % 256 != 0,
|
||||
"The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
|
||||
if (check) {
|
||||
ABORT_IF(!validateBinaryModel(*modelMemory_, modelMemory_->size()),
|
||||
"The binary file is invalid. Incomplete or corrupted download?");
|
||||
}
|
||||
const std::vector<const void *> container = {modelMemory_->begin()}; // Marian supports multiple models initialised in this manner hence std::vector. However we will only ever use 1 during decoding.
|
||||
scorers_ = createScorers(options_, container);
|
||||
} else {
|
||||
|
@ -1,12 +1,12 @@
|
||||
#include "byte_array_util.h"
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
namespace marian {
|
||||
namespace bergamot {
|
||||
|
||||
namespace {
|
||||
|
||||
// This is a basic validator that checks if the file has not been truncated
|
||||
// it basically loads up the header and checks
|
||||
|
||||
@ -26,9 +26,10 @@ const T* get(const void*& current, uint64_t num = 1) {
|
||||
current = (const T*)current + num;
|
||||
return ptr;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
bool validateBinaryModel(AlignedMemory& model, uint64_t fileSize) {
|
||||
const void * current = &model[0];
|
||||
bool validateBinaryModel(const AlignedMemory& model, uint64_t fileSize) {
|
||||
const void * current = model.begin();
|
||||
uint64_t memoryNeeded = sizeof(uint64_t)*2; // We keep track of how much memory we would need if we have a complete file
|
||||
uint64_t numHeaders;
|
||||
if (fileSize >= memoryNeeded) { // We have enough filesize to fetch the headers.
|
||||
@ -76,8 +77,6 @@ bool validateBinaryModel(AlignedMemory& model, uint64_t fileSize) {
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){
|
||||
uint64_t fileSize = filesystem::fileSize(path);
|
||||
io::InputFileStream in(path);
|
||||
@ -89,13 +88,12 @@ AlignedMemory loadFileToMemory(const std::string& path, size_t alignment){
|
||||
}
|
||||
|
||||
AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options){
|
||||
auto models = options->get<std::vector<std::string>>("models");
|
||||
ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
|
||||
marian::filesystem::Path modelPath(models[0]);
|
||||
ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin");
|
||||
AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
|
||||
ABORT_IF(!validateBinaryModel(alignedMemory, alignedMemory.size()), "The binary file is invalid. Incomplete or corrupted download?");
|
||||
return alignedMemory;
|
||||
auto models = options->get<std::vector<std::string>>("models");
|
||||
ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
|
||||
marian::filesystem::Path modelPath(models[0]);
|
||||
ABORT_IF(modelPath.extension() != marian::filesystem::Path(".bin"), "The file of binary model should end with .bin");
|
||||
AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
|
||||
return alignedMemory;
|
||||
}
|
||||
|
||||
AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options){
|
||||
@ -104,5 +102,20 @@ AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options)
|
||||
return loadFileToMemory(shortlist[0], 64);
|
||||
}
|
||||
|
||||
void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
|
||||
std::vector<std::shared_ptr<AlignedMemory>>& vocabMemories){
|
||||
auto vfiles = options->get<std::vector<std::string>>("vocabs");
|
||||
ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
|
||||
vocabMemories.resize(vfiles.size());
|
||||
std::unordered_map<std::string, std::shared_ptr<AlignedMemory>> vocabMap;
|
||||
for (size_t i = 0; i < vfiles.size(); ++i) {
|
||||
auto m = vocabMap.emplace(std::make_pair(vfiles[i], std::shared_ptr<AlignedMemory>()));
|
||||
if (m.second) {
|
||||
m.first->second = std::make_shared<AlignedMemory>(loadFileToMemory(vfiles[i], 64));
|
||||
}
|
||||
vocabMemories[i] = m.first->second;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace bergamot
|
||||
} // namespace marian
|
||||
|
@ -7,6 +7,8 @@ namespace bergamot {
|
||||
AlignedMemory loadFileToMemory(const std::string& path, size_t alignment);
|
||||
AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
|
||||
AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options);
|
||||
|
||||
void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
|
||||
std::vector<std::shared_ptr<AlignedMemory>>& vocabMemories);
|
||||
bool validateBinaryModel(const AlignedMemory& model, uint64_t fileSize);
|
||||
} // namespace bergamot
|
||||
} // namespace marian
|
||||
|
@ -6,21 +6,34 @@
|
||||
#include <utility>
|
||||
|
||||
inline std::vector<marian::Ptr<const marian::Vocab>>
|
||||
loadVocabularies(marian::Ptr<marian::Options> options) {
|
||||
loadVocabularies(marian::Ptr<marian::Options> options,
|
||||
std::vector<std::shared_ptr<marian::bergamot::AlignedMemory>>&& vocabMemories) {
|
||||
// @TODO: parallelize vocab loading for faster startup
|
||||
auto vfiles = options->get<std::vector<std::string>>("vocabs");
|
||||
// with the current setup, we need at least two vocabs: src and trg
|
||||
ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
|
||||
std::vector<marian::Ptr<marian::Vocab const>> vocabs(vfiles.size());
|
||||
std::unordered_map<std::string, marian::Ptr<marian::Vocab>> vmap;
|
||||
for (size_t i = 0; i < vocabs.size(); ++i) {
|
||||
auto m =
|
||||
vmap.emplace(std::make_pair(vfiles[i], marian::Ptr<marian::Vocab>()));
|
||||
if (m.second) { // new: load the vocab
|
||||
m.first->second = marian::New<marian::Vocab>(options, i);
|
||||
m.first->second->load(vfiles[i]);
|
||||
std::vector<marian::Ptr<marian::Vocab const>> vocabs;
|
||||
if(!vocabMemories.empty()){
|
||||
// load vocabs from buffer
|
||||
ABORT_IF(vocabMemories.size() < 2, "Insufficient number of vocabularies.");
|
||||
vocabs.resize(vocabMemories.size());
|
||||
for (size_t i = 0; i < vocabs.size(); i++) {
|
||||
marian::Ptr<marian::Vocab> vocab = marian::New<marian::Vocab>(options, i);
|
||||
vocab->loadFromSerialized(absl::string_view(vocabMemories[i]->begin(), vocabMemories[i]->size()));
|
||||
vocabs[i] = vocab;
|
||||
}
|
||||
} else {
|
||||
// load vocabs from file
|
||||
auto vfiles = options->get<std::vector<std::string>>("vocabs");
|
||||
// with the current setup, we need at least two vocabs: src and trg
|
||||
ABORT_IF(vfiles.size() < 2, "Insufficient number of vocabularies.");
|
||||
vocabs.resize(vfiles.size());
|
||||
std::unordered_map<std::string, marian::Ptr<marian::Vocab>> vmap;
|
||||
for (size_t i = 0; i < vocabs.size(); ++i) {
|
||||
auto m = vmap.emplace(std::make_pair(vfiles[i], marian::Ptr<marian::Vocab>()));
|
||||
if (m.second) { // new: load the vocab
|
||||
m.first->second = marian::New<marian::Vocab>(options, i);
|
||||
m.first->second->load(vfiles[i]);
|
||||
}
|
||||
vocabs[i] = m.first->second;
|
||||
}
|
||||
vocabs[i] = m.first->second;
|
||||
}
|
||||
return vocabs;
|
||||
}
|
||||
@ -28,11 +41,14 @@ loadVocabularies(marian::Ptr<marian::Options> options) {
|
||||
namespace marian {
|
||||
namespace bergamot {
|
||||
|
||||
Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory)
|
||||
: requestId_(0), options_(options), vocabs_(std::move(loadVocabularies(options))),
|
||||
Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory,
|
||||
std::vector<std::shared_ptr<AlignedMemory>> vocabMemories)
|
||||
: requestId_(0), options_(options),
|
||||
vocabs_(std::move(loadVocabularies(options, std::move(vocabMemories)))),
|
||||
text_processor_(vocabs_, options), batcher_(options),
|
||||
numWorkers_(options->get<int>("cpu-threads")),
|
||||
modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))
|
||||
modelMemory_(std::move(modelMemory)),
|
||||
shortlistMemory_(std::move(shortlistMemory))
|
||||
#ifndef WASM_COMPATIBLE_SOURCE
|
||||
// 0 elements in PCQueue is illegal and can lead to failures. Adding a
|
||||
// guard to have at least one entry allocated. In the single-threaded
|
||||
|
@ -64,10 +64,12 @@ class Service {
|
||||
public:
|
||||
/// @param options Marian options object
|
||||
/// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
|
||||
/// of a model.bin. Optional, defaults to nullptr when not used
|
||||
/// of a model.bin.
|
||||
/// @param shortlistMemory byte array of shortlist (aligned to 64)
|
||||
/// @param vocabMemories vector of vocabulary memories (aligned to 64)
|
||||
explicit Service(Ptr<Options> options, AlignedMemory modelMemory,
|
||||
AlignedMemory shortlistMemory);
|
||||
AlignedMemory shortlistMemory,
|
||||
std::vector<std::shared_ptr<AlignedMemory>> vocabMemories);
|
||||
|
||||
/// Construct Service purely from Options. This expects options which
|
||||
/// marian-decoder expects to be set for loading model shortlist and
|
||||
@ -76,24 +78,30 @@ public:
|
||||
///
|
||||
/// This is equivalent to a call to:
|
||||
/// ```cpp
|
||||
/// Service(options, AlignedMemory(), AlignedMemory())
|
||||
/// Service(options, AlignedMemory(), AlignedMemory(), {})
|
||||
/// ```
|
||||
/// wherein empty memory is passed and internal flow defaults to file-based
|
||||
/// model, shortlist loading.
|
||||
/// model, shortlist loading. AlignedMemory() corresponds to empty memory
|
||||
explicit Service(Ptr<Options> options)
|
||||
: Service(options, AlignedMemory(), AlignedMemory()) {}
|
||||
: Service(options, AlignedMemory(), AlignedMemory(), {}) {}
|
||||
|
||||
/// Construct Service from a string configuration.
|
||||
/// @param [in] config string parsable as YAML expected to adhere with marian
|
||||
/// config
|
||||
/// @param [in] model_memory byte array (aligned to 256!!!) that contains the
|
||||
/// bytes of a model.bin. Optional.
|
||||
/// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
|
||||
/// @param [in] modelMemory byte array (aligned to 256!!!) that contains the
|
||||
/// bytes of a model.bin. Optional. AlignedMemory() corresponds to empty memory
|
||||
/// @param [in] shortlistMemory byte array of shortlist (aligned to 64). Optional.
|
||||
/// @param [in] vocabMemories vector of vocabulary memories (aligned to 64). Optional.
|
||||
/// If two vocabularies are the same (based on the filenames), two entries (shared
|
||||
/// pointers) will be generated which share the same AlignedMemory object.
|
||||
explicit Service(const std::string &config,
|
||||
AlignedMemory modelMemory = AlignedMemory(),
|
||||
AlignedMemory shortlistMemory = AlignedMemory())
|
||||
AlignedMemory shortlistMemory = AlignedMemory(),
|
||||
std::vector<std::shared_ptr<AlignedMemory>> vocabsMemories = {})
|
||||
: Service(parseOptions(config, /*validate=*/false),
|
||||
std::move(modelMemory), std::move(shortlistMemory)) {}
|
||||
std::move(modelMemory),
|
||||
std::move(shortlistMemory),
|
||||
std::move(vocabsMemories)) {}
|
||||
|
||||
/// Explicit destructor to clean up after any threads initialized in
|
||||
/// asynchronous operation mode.
|
||||
@ -187,7 +195,6 @@ private:
|
||||
/// ordering among requests and logging/book-keeping.
|
||||
|
||||
size_t requestId_;
|
||||
|
||||
/// Store vocabs representing source and target.
|
||||
std::vector<Ptr<Vocab const>> vocabs_; // ORDER DEPENDENCY (text_processor_)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user