mirror of
https://github.com/browsermt/bergamot-translator.git
synced 2024-08-15 16:40:26 +03:00
Nick hates state, let's try this one
This commit is contained in:
parent
32258934e7
commit
e56db9f474
305
app/cli.h
305
app/cli.h
@ -4,6 +4,7 @@
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/definitions.h"
|
||||
#include "common/timer.h"
|
||||
@ -25,176 +26,172 @@ namespace app {
|
||||
/// Interface for command-line applications. All applications are expected to use the Options based parsing until
|
||||
/// someone builds a suitable non-marian based yet complete replacement.
|
||||
|
||||
class CLIAppInterface {
|
||||
public:
|
||||
virtual void run(Ptr<Options> options) = 0; // Make pure virtual, attempt to compile at MSVC
|
||||
};
|
||||
typedef std::function<void(Ptr<Options> options)> CLI;
|
||||
|
||||
/// Previously bergamot-translator-app. Provides a command-line app on native which executes the code-path used by Web
|
||||
/// Assembly. Expected to be maintained consistent with how the browser (Mozilla through WebAssembly) dictates its API
|
||||
/// and tests be intact. Also used in [bergamot-evaluation](https://github.com/mozilla/bergamot-evaluation).
|
||||
/// @param [cmdline]: Options to translate passed down to marian through Options.
|
||||
/// @param [stdin] sentences as lines of text.
|
||||
/// @param [stdout] translations for the sentences supplied in corresponding lines
|
||||
class WASM : public CLIAppInterface {
|
||||
public:
|
||||
void run(Ptr<Options> options) {
|
||||
// Here, we take the command-line interface which is uniform across all apps. This is parsed into Ptr<Options> by
|
||||
// marian. However, mozilla does not allow a Ptr<Options> constructor and demands an std::string constructor since
|
||||
// std::string isn't marian internal unlike Ptr<Options>. Since this std::string path needs to be tested for mozilla
|
||||
// and since this class/CLI is intended at testing mozilla's path, we go from:
|
||||
//
|
||||
// cmdline -> Ptr<Options> -> std::string -> Service(std::string)
|
||||
//
|
||||
// Overkill, yes.
|
||||
static const std::unordered_map<std::string, CLI> ftable {
|
||||
/// Previously bergamot-translator-app. Provides a command-line app on native which executes the code-path used by
|
||||
/// Web Assembly. Expected to be maintained consistent with how the browser (Mozilla through WebAssembly) dictates
|
||||
/// its API and tests be intact. Also used in [bergamot-evaluation](https://github.com/mozilla/bergamot-evaluation).
|
||||
/// @param [cmdline]: Options to translate passed down to marian through Options.
|
||||
/// @param [stdin] sentences as lines of text.
|
||||
/// @param [stdout] translations for the sentences supplied in corresponding lines
|
||||
|
||||
std::string config = options->asYamlString();
|
||||
Service model(config);
|
||||
{
|
||||
"wasm", ([](Ptr<Options> options) {
|
||||
// Here, we take the command-line interface which is uniform across all apps. This is parsed into Ptr<Options> by
|
||||
// marian. However, mozilla does not allow a Ptr<Options> constructor and demands an std::string constructor since
|
||||
// std::string isn't marian internal unlike Ptr<Options>. Since this std::string path needs to be tested for
|
||||
// mozilla and since this class/CLI is intended at testing mozilla's path, we go from:
|
||||
//
|
||||
// cmdline -> Ptr<Options> -> std::string -> Service(std::string)
|
||||
//
|
||||
// Overkill, yes.
|
||||
|
||||
ResponseOptions responseOptions;
|
||||
std::vector<std::string> texts;
|
||||
std::string config = options->asYamlString();
|
||||
Service model(config);
|
||||
|
||||
for (std::string line; std::getline(std::cin, line);) {
|
||||
texts.emplace_back(line);
|
||||
}
|
||||
ResponseOptions responseOptions;
|
||||
std::vector<std::string> texts;
|
||||
|
||||
auto results = model.translateMultiple(std::move(texts), responseOptions);
|
||||
|
||||
for (auto &result : results) {
|
||||
std::cout << result.getTranslatedText() << std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Application used to benchmark with marian-decoder from time-to-time. The implementation in this repository follows a
|
||||
/// different route than marian-decoder and routinely needs to be checked that the speeds while operating similar to
|
||||
/// marian-decoder are not affected during the course of development.
|
||||
///
|
||||
/// Example usage:
|
||||
/// [brt/speed-tests/test_wngt20_perf.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/speed-tests/test_wngt20_perf.sh).
|
||||
///
|
||||
/// Expected to be compatible with Translator[1] and marian-decoder[2].
|
||||
///
|
||||
/// - [1]
|
||||
/// [marian-dev/../src/translator/translator.h](https://github.com/marian-nmt/marian-dev/blob/master/src/translator/translator.h)
|
||||
/// - [2]
|
||||
/// [marian-dev/../src/command/marian_decoder.cpp](https://github.com/marian-nmt/marian/blob/master/src/command/marian_decoder.cpp)
|
||||
///
|
||||
/// @param [cmdline] options constructed from command-line supplied arguments
|
||||
/// @param [stdin] lines containing sentences, same as marian-decoder.
|
||||
/// @param [stdout] translations of the sentences supplied via stdin in corresponding lines
|
||||
|
||||
class Decoder : public CLIAppInterface {
|
||||
public:
|
||||
void run(Ptr<Options> options) {
|
||||
marian::timer::Timer decoderTimer;
|
||||
Service service(options);
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream std_input;
|
||||
std_input << std::cin.rdbuf();
|
||||
std::string input = std_input.str();
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture = service.translate(std::move(input));
|
||||
responseFuture.wait();
|
||||
const Response &response = responseFuture.get();
|
||||
|
||||
for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << response.target.sentence(sentenceIdx) << "\n";
|
||||
}
|
||||
LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed());
|
||||
}
|
||||
};
|
||||
|
||||
/// Command line interface to the test the features being developed as part of bergamot C++ library on native platform.
|
||||
///
|
||||
/// @param [cmdline]: options to build translator
|
||||
/// @param [stdin]: Blob of text, read as a whole ; sentence-splitting etc handled internally.
|
||||
/// @param [stdout]: Translation of the source text and additional information like sentences, alignments between source
|
||||
/// and target tokens and quality scores.
|
||||
class Native : public CLIAppInterface {
|
||||
public:
|
||||
void run(Ptr<Options> options) {
|
||||
// Prepare memories for bytearrays (including model, shortlist and vocabs)
|
||||
MemoryBundle memoryBundle;
|
||||
|
||||
if (options->get<bool>("bytearray")) {
|
||||
// Load legit values into bytearrays.
|
||||
memoryBundle = getMemoryBundleFromConfig(options);
|
||||
}
|
||||
|
||||
Service service(options, std::move(memoryBundle));
|
||||
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream std_input;
|
||||
std_input << std::cin.rdbuf();
|
||||
std::string input = std_input.str();
|
||||
|
||||
ResponseOptions responseOptions;
|
||||
responseOptions.qualityScores = true;
|
||||
responseOptions.alignment = true;
|
||||
responseOptions.alignmentThreshold = 0.2f;
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
|
||||
responseFuture.wait();
|
||||
Response response = responseFuture.get();
|
||||
|
||||
std::cout << "[original]: " << response.source.text << '\n';
|
||||
std::cout << "[translated]: " << response.target.text << '\n';
|
||||
for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx) << '\n';
|
||||
std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx) << '\n';
|
||||
std::cout << "Alignments" << '\n';
|
||||
typedef std::pair<size_t, float> Point;
|
||||
|
||||
// Initialize a point vector.
|
||||
std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
|
||||
|
||||
// Handle alignments
|
||||
auto &alignments = response.alignments[sentenceIdx];
|
||||
for (auto &p : alignments) {
|
||||
aggregate[p.src].emplace_back(p.tgt, p.prob);
|
||||
for (std::string line; std::getline(std::cin, line);) {
|
||||
texts.emplace_back(line);
|
||||
}
|
||||
|
||||
for (size_t src = 0; src < aggregate.size(); src++) {
|
||||
std::cout << response.source.word(sentenceIdx, src) << ": ";
|
||||
for (auto &p : aggregate[src]) {
|
||||
std::cout << response.target.word(sentenceIdx, p.first) << "(" << p.second << ") ";
|
||||
auto results = model.translateMultiple(std::move(texts), responseOptions);
|
||||
|
||||
for (auto &result : results) {
|
||||
std::cout << result.getTranslatedText() << std::endl;
|
||||
}
|
||||
});)
|
||||
}
|
||||
,
|
||||
|
||||
/// Application used to benchmark with marian-decoder from time-to-time. The implementation in this repository follows
|
||||
/// a different route than marian-decoder and routinely needs to be checked that the speeds while operating similar
|
||||
/// to marian-decoder are not affected during the course of development.
|
||||
///
|
||||
/// Example usage:
|
||||
/// [brt/speed-tests/test_wngt20_perf.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/speed-tests/test_wngt20_perf.sh).
|
||||
///
|
||||
/// Expected to be compatible with Translator[1] and marian-decoder[2].
|
||||
///
|
||||
/// - [1]
|
||||
/// [marian-dev/../src/translator/translator.h](https://github.com/marian-nmt/marian-dev/blob/master/src/translator/translator.h)
|
||||
/// - [2]
|
||||
/// [marian-dev/../src/command/marian_decoder.cpp](https://github.com/marian-nmt/marian/blob/master/src/command/marian_decoder.cpp)
|
||||
///
|
||||
/// @param [cmdline] options constructed from command-line supplied arguments
|
||||
/// @param [stdin] lines containing sentences, same as marian-decoder.
|
||||
/// @param [stdout] translations of the sentences supplied via stdin in corresponding lines
|
||||
|
||||
{
|
||||
"decoder", ([](Ptr<Options> options) {
|
||||
marian::timer::Timer decoderTimer;
|
||||
Service service(options);
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream std_input;
|
||||
std_input << std::cin.rdbuf();
|
||||
std::string input = std_input.str();
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture = service.translate(std::move(input));
|
||||
responseFuture.wait();
|
||||
const Response &response = responseFuture.get();
|
||||
|
||||
for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << response.target.sentence(sentenceIdx) << "\n";
|
||||
}
|
||||
LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed());
|
||||
});)
|
||||
}
|
||||
,
|
||||
|
||||
/// Command line interface to the test the features being developed as part of bergamot C++ library on native
|
||||
/// platform.
|
||||
///
|
||||
/// @param [cmdline]: options to build translator
|
||||
/// @param [stdin]: Blob of text, read as a whole ; sentence-splitting etc handled internally.
|
||||
/// @param [stdout]: Translation of the source text and additional information like sentences, alignments between
|
||||
/// source and target tokens and quality scores.
|
||||
{
|
||||
"native", ([](Ptr<Options> options) {
|
||||
// Prepare memories for bytearrays (including model, shortlist and vocabs)
|
||||
MemoryBundle memoryBundle;
|
||||
|
||||
if (options->get<bool>("bytearray")) {
|
||||
// Load legit values into bytearrays.
|
||||
memoryBundle = getMemoryBundleFromConfig(options);
|
||||
}
|
||||
|
||||
Service service(options, std::move(memoryBundle));
|
||||
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream std_input;
|
||||
std_input << std::cin.rdbuf();
|
||||
std::string input = std_input.str();
|
||||
|
||||
ResponseOptions responseOptions;
|
||||
responseOptions.qualityScores = true;
|
||||
responseOptions.alignment = true;
|
||||
responseOptions.alignmentThreshold = 0.2f;
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
|
||||
responseFuture.wait();
|
||||
Response response = responseFuture.get();
|
||||
|
||||
std::cout << "[original]: " << response.source.text << '\n';
|
||||
std::cout << "[translated]: " << response.target.text << '\n';
|
||||
for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx) << '\n';
|
||||
std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx) << '\n';
|
||||
std::cout << "Alignments" << '\n';
|
||||
typedef std::pair<size_t, float> Point;
|
||||
|
||||
// Initialize a point vector.
|
||||
std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
|
||||
|
||||
// Handle alignments
|
||||
auto &alignments = response.alignments[sentenceIdx];
|
||||
for (auto &p : alignments) {
|
||||
aggregate[p.src].emplace_back(p.tgt, p.prob);
|
||||
}
|
||||
|
||||
for (size_t src = 0; src < aggregate.size(); src++) {
|
||||
std::cout << response.source.word(sentenceIdx, src) << ": ";
|
||||
for (auto &p : aggregate[src]) {
|
||||
std::cout << response.target.word(sentenceIdx, p.first) << "(" << p.second << ") ";
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
|
||||
// Handle quality.
|
||||
auto &quality = response.qualityScores[sentenceIdx];
|
||||
std::cout << "Quality: whole(" << quality.sequence << "), tokens below:" << '\n';
|
||||
size_t wordIdx = 0;
|
||||
bool first = true;
|
||||
for (auto &p : quality.word) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
std::cout << " ";
|
||||
}
|
||||
std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p << ")";
|
||||
wordIdx++;
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
|
||||
// Handle quality.
|
||||
auto &quality = response.qualityScores[sentenceIdx];
|
||||
std::cout << "Quality: whole(" << quality.sequence << "), tokens below:" << '\n';
|
||||
size_t wordIdx = 0;
|
||||
bool first = true;
|
||||
for (auto &p : quality.word) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
std::cout << " ";
|
||||
}
|
||||
std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p << ")";
|
||||
wordIdx++;
|
||||
}
|
||||
std::cout << "--------------------------\n";
|
||||
std::cout << '\n';
|
||||
}
|
||||
std::cout << "--------------------------\n";
|
||||
std::cout << '\n';
|
||||
});)
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace app
|
||||
|
||||
void execute(const std::string &mode, Ptr<Options> options) {
|
||||
if (mode == "wasm") {
|
||||
app::WASM().run(options);
|
||||
} else if (mode == "native") {
|
||||
app::Native().run(options);
|
||||
} else if (mode == "decoder") {
|
||||
app::Decoder().run(options);
|
||||
if (app::ftable.find(mode) != app::ftable.end()) {
|
||||
app.ftable[mode](options);
|
||||
} else {
|
||||
ABORT("Unknown --mode {}. Use one of: wasm,native,decoder", mode);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user