Enables model ensembles (#450)

* Enables model ensembles

Adds the ability to use ensembles of models. This supports ensembles of
binary- or npz-format models, as well as mixtures of both.

When all models in the ensembles are of binary format, the load from
memory path is used. Otherwise, they are loaded via the file system.
Enable log-level debug for output related to this.

* Fix formatting

* Fix WASM bindings for MemoryBundle

For now, this does not support ensembles.

* Remove shared_ptr wrapping the AlignedMemory of models.

* Fix formatting
This commit is contained in:
Graeme Nail 2023-08-01 19:35:11 +01:00 committed by GitHub
parent 8011f9c849
commit 4b0da8d434
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 45 additions and 31 deletions

View File

@ -91,21 +91,24 @@ AlignedMemory loadFileToMemory(const std::string& path, size_t alignment) {
return alignedMemory;
}
AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options) {
std::vector<AlignedMemory> getModelMemoryFromConfig(marian::Ptr<marian::Options> options) {
auto models = options->get<std::vector<std::string>>("models");
ABORT_IF(models.size() != 1, "Loading multiple binary models is not supported for now as it is not necessary.");
// If binary model we load into aligned memory. If .npz we leave it be to
// return empty aligned memory, thus allowing traditional file system loads.
if (marian::io::isBin(models[0])) {
AlignedMemory alignedMemory = loadFileToMemory(models[0], 256);
return alignedMemory;
} else if (marian::io::isNpz(models[0])) {
return AlignedMemory();
} else {
ABORT("Unknown extension for model: {}, should be one of `.bin` or `.npz`", models[0]);
std::vector<AlignedMemory> modelMemories(models.size());
for (size_t i = 0; i < models.size(); ++i) {
const auto model = models[i];
if (marian::io::isBin(model)) {
modelMemories[i] = loadFileToMemory(model, 256);
} else if (marian::io::isNpz(model)) {
// if any of the models are npz format, we revert to loading from file for all models.
LOG(debug, "Encountered an npz file {}; will use file loading for {} models", model, models.size());
return {};
} else {
ABORT("Unknown extension for model: {}, should be one of `.bin` or `.npz`", model);
}
}
return AlignedMemory();
return modelMemories;
}
AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options) {
@ -153,7 +156,7 @@ AlignedMemory getQualityEstimatorModel(MemoryBundle& memoryBundle, const marian:
MemoryBundle getMemoryBundleFromConfig(marian::Ptr<marian::Options> options) {
MemoryBundle memoryBundle;
memoryBundle.model = getModelMemoryFromConfig(options);
memoryBundle.models = getModelMemoryFromConfig(options);
memoryBundle.shortlist = getShortlistMemoryFromConfig(options);
getVocabsMemoryFromConfig(options, memoryBundle.vocabs);
memoryBundle.ssplitPrefixFile = getSsplitPrefixFileMemoryFromConfig(options);

View File

@ -5,7 +5,7 @@ namespace marian {
namespace bergamot {
AlignedMemory loadFileToMemory(const std::string& path, size_t alignment);
AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
std::vector<AlignedMemory> getModelMemoryFromConfig(marian::Ptr<marian::Options> options);
AlignedMemory getQualityEstimatorModel(const marian::Ptr<marian::Options>& options);
AlignedMemory getQualityEstimatorModel(MemoryBundle& memoryBundle, const marian::Ptr<marian::Options>& options);
AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options);

View File

@ -19,8 +19,8 @@ typedef AlignedVector<char> AlignedMemory;
/// Memory bundle for all byte-arrays.
/// Can be a set/subset of model, shortlist, vocabs and ssplitPrefixFile bytes.
struct MemoryBundle {
AlignedMemory model{}; ///< Byte-array of model (aligned to 256)
AlignedMemory shortlist{}; ///< Byte-array of shortlist (aligned to 64)
std::vector<AlignedMemory> models{}; ///< Byte-array of model (each element is aligned to 256)
AlignedMemory shortlist{}; ///< Byte-array of shortlist (aligned to 64)
/// Vector of vocabulary memories (aligned to 64).
/// If two vocabularies are the same (based on the filenames), two entries (shared

View File

@ -61,24 +61,35 @@ void TranslationModel::loadBackend(size_t idx) {
graph->getBackend()->configureDevice(options_);
graph->reserveWorkspaceMB(options_->get<size_t>("workspace"));
// Marian Model: Load from memoryBundle or shortList
if (memory_.model.size() > 0 &&
memory_.model.begin() !=
nullptr) { // If we have provided a byte array that contains the model memory, we can initialise the
// model from there, as opposed to from reading in the config file
ABORT_IF((uintptr_t)memory_.model.begin() % 256 != 0,
"The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
if (options_->get<bool>("check-bytearray", false)) {
ABORT_IF(!validateBinaryModel(memory_.model, memory_.model.size()),
"The binary file is invalid. Incomplete or corrupted download?");
}
const std::vector<const void *> container = {
memory_.model.begin()}; // Marian supports multiple models initialised in this manner hence std::vector.
// However we will only ever use 1 during decoding.
// if memory_.models is populated, then all models were of binary format
if (memory_.models.size() >= 1) {
const std::vector<const void *> container = std::invoke([&]() {
std::vector<const void *> model_ptrs(memory_.models.size());
for (size_t i = 0; i < memory_.models.size(); ++i) {
const AlignedMemory &model = memory_.models[i];
ABORT_IF(model.size() == 0 || model.begin() == nullptr, "The provided memory is empty. Cannot load the model.");
ABORT_IF(
(uintptr_t)model.begin() % 256 != 0,
"The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
if (options_->get<bool>("check-bytearray", false)) {
ABORT_IF(!validateBinaryModel(model, model.size()),
"The binary file is invalid. Incomplete or corrupted download?");
}
model_ptrs[i] = model.begin();
LOG(debug, "Loaded model {} of {} from memory", (i + 1), model_ptrs.size());
}
return model_ptrs;
});
scorerEnsemble = createScorers(options_, container);
} else {
// load npz format models, or a mixture of binary/npz formats
scorerEnsemble = createScorers(options_);
LOG(debug, "Loaded {} model(s) from file", scorerEnsemble.size());
}
for (auto scorer : scorerEnsemble) {
scorer->init(graph);
if (shortlistGenerator_) {

View File

@ -48,7 +48,7 @@ MemoryBundle prepareMemoryBundle(AlignedMemory* modelMemory, AlignedMemory* shor
std::vector<AlignedMemory*> uniqueVocabsMemories,
AlignedMemory* qualityEstimatorMemory) {
MemoryBundle memoryBundle;
memoryBundle.model = std::move(*modelMemory);
memoryBundle.models.emplace_back(std::move(*modelMemory));
memoryBundle.shortlist = std::move(*shortlistMemory);
memoryBundle.vocabs = std::move(prepareVocabsSmartMemories(uniqueVocabsMemories));
if (qualityEstimatorMemory != nullptr) {