mirror of
https://github.com/browsermt/bergamot-translator.git
synced 2024-08-15 16:40:26 +03:00
Fix failures when loading text shortlist (#154)
This commit is contained in:
parent
576afae6b3
commit
8bec1b7b6b
@ -19,7 +19,7 @@ int main(int argc, char *argv[]) {
|
||||
// Prepare memories for bytearrays (including model, shortlist and vocabs)
|
||||
marian::bergamot::MemoryBundle memoryBundle;
|
||||
|
||||
if (options->get<bool>("check-bytearray")) {
|
||||
if (options->get<bool>("bytearray")) {
|
||||
// Load legit values into bytearrays.
|
||||
memoryBundle = marian::bergamot::getMemoryBundleFromConfig(options);
|
||||
}
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 636af01c63f2f080a9e59e99b15ac4bfdaec76e1
|
||||
Subproject commit 1b20a62f6614db371f59b97ff83262b8ebd235de
|
@ -20,8 +20,6 @@ BatchTranslator::BatchTranslator(DeviceId const device, Vocabs &vocabs, Ptr<Opti
|
||||
|
||||
void BatchTranslator::initialize() {
|
||||
// Initializes the graph.
|
||||
bool check =
|
||||
options_->get<bool>("check-bytearray", false); // Flag holds whether validate the bytearray (model and shortlist)
|
||||
if (options_->hasAndNotEmpty("shortlist")) {
|
||||
int srcIdx = 0, trgIdx = 1;
|
||||
bool shared_vcb =
|
||||
@ -30,7 +28,7 @@ void BatchTranslator::initialize() {
|
||||
if (shortlistMemory_->size() > 0 && shortlistMemory_->begin() != nullptr) {
|
||||
slgen_ = New<data::BinaryShortlistGenerator>(shortlistMemory_->begin(), shortlistMemory_->size(),
|
||||
vocabs_.sources().front(), vocabs_.target(), srcIdx, trgIdx,
|
||||
shared_vcb, check);
|
||||
shared_vcb, options_->get<bool>("check-bytearray"));
|
||||
} else {
|
||||
// Changed to BinaryShortlistGenerator to enable loading binary shortlist file
|
||||
// This class also supports text shortlist file
|
||||
@ -51,7 +49,7 @@ void BatchTranslator::initialize() {
|
||||
// from there, as opposed to from reading in the config file
|
||||
ABORT_IF((uintptr_t)modelMemory_->begin() % 256 != 0,
|
||||
"The provided memory is not aligned to 256 bytes and will crash when vector instructions are used on it.");
|
||||
if (check) {
|
||||
if (options_->get<bool>("check-bytearray")) {
|
||||
ABORT_IF(!validateBinaryModel(*modelMemory_, modelMemory_->size()),
|
||||
"The binary file is invalid. Incomplete or corrupted download?");
|
||||
}
|
||||
|
@ -1,10 +1,10 @@
|
||||
#include "byte_array_util.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
|
||||
#include "data/shortlist.h"
|
||||
|
||||
namespace marian {
|
||||
namespace bergamot {
|
||||
|
||||
@ -102,6 +102,8 @@ AlignedMemory getModelMemoryFromConfig(marian::Ptr<marian::Options> options) {
|
||||
AlignedMemory getShortlistMemoryFromConfig(marian::Ptr<marian::Options> options) {
|
||||
auto shortlist = options->get<std::vector<std::string>>("shortlist");
|
||||
ABORT_IF(shortlist.empty(), "No path to shortlist file is given.");
|
||||
ABORT_IF(!marian::data::isBinaryShortlist(shortlist[0]),
|
||||
"Loading non-binary shortlist file into memory is not supported");
|
||||
return loadFileToMemory(shortlist[0], 64);
|
||||
}
|
||||
|
||||
@ -112,6 +114,8 @@ void getVocabsMemoryFromConfig(marian::Ptr<marian::Options> options,
|
||||
vocabMemories.resize(vfiles.size());
|
||||
std::unordered_map<std::string, std::shared_ptr<AlignedMemory>> vocabMap;
|
||||
for (size_t i = 0; i < vfiles.size(); ++i) {
|
||||
ABORT_IF(marian::filesystem::Path(vfiles[i]).extension() != marian::filesystem::Path(".spm"),
|
||||
"Loading non-SentencePiece vocab files into memory is not supported");
|
||||
auto m = vocabMap.emplace(std::make_pair(vfiles[i], std::shared_ptr<AlignedMemory>()));
|
||||
if (m.second) {
|
||||
m.first->second = std::make_shared<AlignedMemory>(loadFileToMemory(vfiles[i], 64));
|
||||
|
@ -20,8 +20,11 @@ inline marian::ConfigParser createConfigParser() {
|
||||
cp.addOption<int>("--max-length-break", "Bergamot Options",
|
||||
"Maximum input tokens to be processed in a single sentence.", 128);
|
||||
|
||||
cp.addOption<bool>("--bytearray", "Bergamot Options",
|
||||
"Flag holds whether to construct service from bytearrays, only for testing purpose", false);
|
||||
|
||||
cp.addOption<bool>("--check-bytearray", "Bergamot Options",
|
||||
"Flag holds whether to check the content of the bytearray (true by default)", true);
|
||||
"Flag holds whether to check the content of the bytearrays (true by default)", true);
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user