Streamline memory-bundle loads (#307)

Provides an additional constructor which takes care of the bundle
loading inside the boundary of the source here, when a configuration
file is supplied from a client like translateLocally or python bindings.
Once the config file is read, we have access to the information required
to construct the MemoryBundle.

 - The command-line application supplied from here, app/bergamot is
   configured to use the fast-load path now.
 - Changes to binary-loading additionally revealed a bug in the
   example-run script used in docs and tied to CI and the fix is
   included.
 - Shortlist is made optional in the memory bundle, making changes to
   getModelMemoryFromConfig.

Fixes #304.
Fixes #306.
See also: XapaJIaMnu/translateLocally#82.
This commit is contained in:
Jerin Philip 2022-01-19 16:36:48 +00:00 committed by GitHub
parent acbc46d816
commit 7099b9e9ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 34 additions and 27 deletions

View File

@ -16,8 +16,7 @@ int main(int argc, char *argv[]) {
// Construct a model.
auto options = parseOptionsFromFilePath(config.modelConfigPaths.front());
MemoryBundle memoryBundle;
std::shared_ptr<TranslationModel> model = service.createCompatibleModel(options, std::move(memoryBundle));
std::shared_ptr<TranslationModel> model = service.createCompatibleModel(options);
ResponseOptions responseOptions;
std::string input = readFromStdin();

View File

@ -9,7 +9,7 @@ wget --quiet --continue --directory models/ \
# Patch the config-files generated from marian for use in bergamot.
python3 bergamot-translator-tests/tools/patch-marian-for-bergamot.py \
--config-path models/ende.student.tiny11/config.intgemm8bitalpha.yml \
--ssplit-prefix-file 3rd-party/ssplit-cpp/split-cpp/nonbreaking_prefixes/nonbreaking_prefix.en
--ssplit-prefix-file $(realpath 3rd_party/ssplit-cpp/nonbreaking_prefixes/nonbreaking_prefix.en)
# Patched config file will be available with .bergamot.yml suffix.
CONFIG=models/ende.student.tiny11/config.intgemm8bitalpha.yml.bergamot.yml

View File

@ -101,10 +101,12 @@ AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options) {
AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options) {
auto shortlist = options->get<std::vector<std::string>>("shortlist");
ABORT_IF(shortlist.empty(), "No path to shortlist file is given.");
ABORT_IF(!marian::data::isBinaryShortlist(shortlist[0]),
"Loading non-binary shortlist file into memory is not supported");
return loadFileToMemory(shortlist[0], 64);
if (!shortlist.empty()) {
ABORT_IF(!marian::data::isBinaryShortlist(shortlist[0]),
"Loading non-binary shortlist file into memory is not supported");
return loadFileToMemory(shortlist[0], 64);
}
return AlignedMemory();
}
void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,

View File

@ -127,10 +127,9 @@ class AsyncService {
/// Create a TranslationModel compatible with this instance of Service. Internally assigns how many replicas of
/// backend needed based on worker threads set. See TranslationModel for documentation on other params.
template <class ConfigType>
Ptr<TranslationModel> createCompatibleModel(const ConfigType &config, MemoryBundle &&memory = MemoryBundle{}) {
Ptr<TranslationModel> createCompatibleModel(const TranslationModel::Config &config) {
// @TODO: Remove this remove this dependency/coupling.
return New<TranslationModel>(config, std::move(memory), /*replicas=*/config_.numWorkers);
return New<TranslationModel>(config, /*replicas=*/config_.numWorkers);
}
/// With the supplied TranslationModel, translate an input. A Response is constructed with optional items set/unset

View File

@ -27,22 +27,25 @@ TranslationModel::TranslationModel(const Config &options, MemoryBundle &&memory
ABORT_IF(replicas == 0, "At least one replica needs to be created.");
backend_.resize(replicas);
if (options_->hasAndNotEmpty("shortlist")) {
int srcIdx = 0, trgIdx = 1;
bool shared_vcb =
vocabs_.sources().front() ==
vocabs_.target(); // vocabs_->sources().front() is invoked as we currently only support one source vocab
if (memory_.shortlist.size() > 0 && memory_.shortlist.begin() != nullptr) {
bool check = options_->get<bool>("check-bytearray", false);
shortlistGenerator_ = New<data::BinaryShortlistGenerator>(memory_.shortlist.begin(), memory_.shortlist.size(),
vocabs_.sources().front(), vocabs_.target(), srcIdx,
trgIdx, shared_vcb, check);
} else {
// Changed to BinaryShortlistGenerator to enable loading binary shortlist file
// This class also supports text shortlist file
shortlistGenerator_ = New<data::BinaryShortlistGenerator>(options_, vocabs_.sources().front(), vocabs_.target(),
srcIdx, trgIdx, shared_vcb);
}
// Try to load shortlist from memory-bundle. If not available, try to load from options_;
int srcIdx = 0, trgIdx = 1;
// vocabs_->sources().front() is invoked as we currently only support one source vocab
bool shared_vcb = (vocabs_.sources().front() == vocabs_.target());
if (memory_.shortlist.size() > 0 && memory_.shortlist.begin() != nullptr) {
bool check = options_->get<bool>("check-bytearray", false);
shortlistGenerator_ = New<data::BinaryShortlistGenerator>(memory_.shortlist.begin(), memory_.shortlist.size(),
vocabs_.sources().front(), vocabs_.target(), srcIdx,
trgIdx, shared_vcb, check);
} else if (options_->hasAndNotEmpty("shortlist")) {
// Changed to BinaryShortlistGenerator to enable loading binary shortlist file
// This class also supports text shortlist file
shortlistGenerator_ = New<data::BinaryShortlistGenerator>(options_, vocabs_.sources().front(), vocabs_.target(),
srcIdx, trgIdx, shared_vcb);
} else {
// In this case, the loadpath does not load shortlist.
shortlistGenerator_ = nullptr;
}
}

View File

@ -6,6 +6,7 @@
#include "batch.h"
#include "batching_pool.h"
#include "byte_array_util.h"
#include "cache.h"
#include "common/utils.h"
#include "data/shortlist.h"
@ -56,7 +57,10 @@ class TranslationModel {
/// @param [in] options: Marian options object.
/// @param [in] memory: MemoryBundle object holding memory buffers containing parameters to build MarianBackend,
/// ShortlistGenerator, Vocabs and SentenceSplitter.
TranslationModel(const Config& options, MemoryBundle&& memory = MemoryBundle{}, size_t replicas = 1);
TranslationModel(const Config& options, MemoryBundle&& memory, size_t replicas = 1);
TranslationModel(const Config& options, size_t replicas = 1)
: TranslationModel(options, getMemoryBundleFromConfig(options), replicas) {}
/// Make a Request to be translated by this TranslationModel instance.
/// @param [in] requestId: Unique identifier associated with this request, available from Service.