From 8bec1b7b6ba30c522c102621e55b8cbe4bd051da Mon Sep 17 00:00:00 2001 From: Qianqian Zhu Date: Tue, 25 May 2021 12:05:16 +0100 Subject: [PATCH] Fix failures when loading text shortlist (#154) --- app/service-cli.cpp | 2 +- bergamot-translator-tests | 2 +- src/translator/batch_translator.cpp | 6 ++---- src/translator/byte_array_util.cpp | 10 +++++++--- src/translator/parser.h | 5 ++++- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/app/service-cli.cpp b/app/service-cli.cpp index fbf0131..a29e71b 100644 --- a/app/service-cli.cpp +++ b/app/service-cli.cpp @@ -19,7 +19,7 @@ int main(int argc, char *argv[]) { // Prepare memories for bytearrays (including model, shortlist and vocabs) marian::bergamot::MemoryBundle memoryBundle; - if (options->get("check-bytearray")) { + if (options->get("bytearray")) { // Load legit values into bytearrays. memoryBundle = marian::bergamot::getMemoryBundleFromConfig(options); } diff --git a/bergamot-translator-tests b/bergamot-translator-tests index 636af01..1b20a62 160000 --- a/bergamot-translator-tests +++ b/bergamot-translator-tests @@ -1 +1 @@ -Subproject commit 636af01c63f2f080a9e59e99b15ac4bfdaec76e1 +Subproject commit 1b20a62f6614db371f59b97ff83262b8ebd235de diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index c27edc1..889ff00 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -20,8 +20,6 @@ BatchTranslator::BatchTranslator(DeviceId const device, Vocabs &vocabs, Ptrget("check-bytearray", false); // Flag holds whether validate the bytearray (model and shortlist) if (options_->hasAndNotEmpty("shortlist")) { int srcIdx = 0, trgIdx = 1; bool shared_vcb = @@ -30,7 +28,7 @@ void BatchTranslator::initialize() { if (shortlistMemory_->size() > 0 && shortlistMemory_->begin() != nullptr) { slgen_ = New(shortlistMemory_->begin(), shortlistMemory_->size(), vocabs_.sources().front(), vocabs_.target(), srcIdx, trgIdx, - shared_vcb, check); + shared_vcb, options_->get("check-bytearray")); } else { // Changed to BinaryShortlistGenerator to enable loading binary shortlist file // This class also supports text shortlist file @@ -51,7 +49,7 @@ void BatchTranslator::initialize() { // from there, as opposed to from reading in the config file ABORT_IF((uintptr_t)modelMemory_->begin() % 256 != 0, "The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it."); - if (check) { + if (options_->get("check-bytearray")) { ABORT_IF(!validateBinaryModel(*modelMemory_, modelMemory_->size()), "The binary file is invalid. Incomplete or corrupted download?"); } diff --git a/src/translator/byte_array_util.cpp b/src/translator/byte_array_util.cpp index 247d164..8c3d608 100644 --- a/src/translator/byte_array_util.cpp +++ b/src/translator/byte_array_util.cpp @@ -1,10 +1,10 @@ #include "byte_array_util.h" -#include - -#include +#include #include +#include "data/shortlist.h" + namespace marian { namespace bergamot { @@ -102,6 +102,8 @@ AlignedMemory getModelMemoryFromConfig(marian::Ptr options) { AlignedMemory getShortlistMemoryFromConfig(marian::Ptr options) { auto shortlist = options->get>("shortlist"); ABORT_IF(shortlist.empty(), "No path to shortlist file is given."); + ABORT_IF(!marian::data::isBinaryShortlist(shortlist[0]), + "Loading non-binary shortlist file into memory is not supported"); return loadFileToMemory(shortlist[0], 64); } @@ -112,6 +114,8 @@ void getVocabsMemoryFromConfig(marian::Ptr options, vocabMemories.resize(vfiles.size()); std::unordered_map> vocabMap; for (size_t i = 0; i < vfiles.size(); ++i) { + ABORT_IF(marian::filesystem::Path(vfiles[i]).extension() != marian::filesystem::Path(".spm"), + "Loading non-SentencePiece vocab files into memory is not supported"); auto m = vocabMap.emplace(std::make_pair(vfiles[i], std::shared_ptr())); if (m.second) { m.first->second = std::make_shared(loadFileToMemory(vfiles[i], 64)); diff --git a/src/translator/parser.h b/src/translator/parser.h index b2b0a80..790717c 100644 --- a/src/translator/parser.h +++ b/src/translator/parser.h @@ -20,8 +20,11 @@ inline marian::ConfigParser createConfigParser() { cp.addOption("--max-length-break", "Bergamot Options", "Maximum input tokens to be processed in a single sentence.", 128); + cp.addOption("--bytearray", "Bergamot Options", + "Flag holds whether to construct service from bytearrays, only for testing purpose", false); + cp.addOption("--check-bytearray", "Bergamot Options", - "Flag holds whether to check the content of the bytearray (true by default)", true); + "Flag holds whether to check the content of the bytearrays (true by default)", true); return cp; }