mirror of
https://github.com/browsermt/bergamot-translator.git
synced 2024-08-15 16:40:26 +03:00
Single executable (#175)
* Collapsing executables
* Adding new test executable
* Deleting old executable sources
* Updating brt to operate with modes
* cli-framework -> cli
* Updating workflows to check for bergamot instead of bergamot-translator-app
* Adding documentation
* Making fn pure virtual
* Shuffling apps into app namespace, alongside class documentation
* Include app folder in documentation
* BRT update service-cli -> native
* parser.h: service-cli -> native
* Updates to marian-integration.md
* Cleanup: Remove templates, interface proper
* change 4 to 2 cores for build instructions
* service-cli -> native
* Commenting the string constructor explanation
* Not doing halfway interface / inheritance
* Nick hates state, let's try this one
* Revert "Nick hates state, let's try this one"
This reverts commit e56db9f474
.
* class -> struct before trying std::function stuff
* oop -> functional?
* Hints on what is happening
* app::ftable -> app::REGISTRY
* We have if-else and functions now.
And we won't have test apps.
* Doc linking to usage examples in brt
* Remove unordered_map
* Documentation updates
* Fix warning
This commit is contained in:
parent
eb579ed26f
commit
5d3ec9c0a9
2
.github/workflows/native-mac.yml
vendored
2
.github/workflows/native-mac.yml
vendored
@ -91,7 +91,7 @@ jobs:
|
||||
- name: Print versions
|
||||
working-directory: build
|
||||
run: |
|
||||
./app/bergamot-translator-app --version
|
||||
./app/bergamot --version
|
||||
|
||||
- name: Install regression-test framework (BRT)
|
||||
working-directory: bergamot-translator-tests
|
||||
|
2
.github/workflows/native-ubuntu.yml
vendored
2
.github/workflows/native-ubuntu.yml
vendored
@ -180,7 +180,7 @@ jobs:
|
||||
- name: Print versions
|
||||
working-directory: build
|
||||
run: |
|
||||
./app/bergamot-translator-app --version
|
||||
./app/bergamot --version
|
||||
|
||||
|
||||
- name: Install regression-test framework (BRT)
|
||||
|
@ -1,10 +1,2 @@
|
||||
add_executable(bergamot-translator-app bergamot-translator-app.cpp)
|
||||
target_link_libraries(bergamot-translator-app PRIVATE bergamot-translator)
|
||||
|
||||
if (NOT USE_WASM_COMPATIBLE_SOURCE)
|
||||
add_executable(service-cli service-cli.cpp)
|
||||
target_link_libraries(service-cli PRIVATE bergamot-translator)
|
||||
|
||||
add_executable(marian-decoder-new marian-decoder-new.cpp)
|
||||
target_link_libraries(marian-decoder-new PRIVATE bergamot-translator)
|
||||
endif()
|
||||
add_executable(bergamot bergamot.cpp)
|
||||
target_link_libraries(bergamot PRIVATE bergamot-translator)
|
||||
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
* main.cpp
|
||||
*
|
||||
* An application which accepts line separated texts in stdin and returns
|
||||
* translated ones in stdout. It is convenient for batch processing and can be
|
||||
* used with tools like SacreBLEU.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "translator/parser.h"
|
||||
#include "translator/service.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
// Create a configParser and load command line parameters into a YAML config
|
||||
// string.
|
||||
auto configParser = marian::bergamot::createConfigParser();
|
||||
auto options = configParser.parseOptions(argc, argv, true);
|
||||
std::string config = options->asYamlString();
|
||||
|
||||
// Route the config string to construct marian model through TranslationModel
|
||||
marian::bergamot::Service model(config);
|
||||
|
||||
marian::bergamot::ResponseOptions responseOptions;
|
||||
std::vector<std::string> texts;
|
||||
|
||||
for (std::string line; std::getline(std::cin, line);) {
|
||||
texts.emplace_back(line);
|
||||
}
|
||||
|
||||
auto results = model.translateMultiple(std::move(texts), responseOptions);
|
||||
|
||||
for (auto &result : results) {
|
||||
std::cout << result.getTranslatedText() << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
18
app/bergamot.cpp
Normal file
18
app/bergamot.cpp
Normal file
@ -0,0 +1,18 @@
|
||||
#include "cli.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
auto cp = marian::bergamot::createConfigParser();
|
||||
auto options = cp.parseOptions(argc, argv, true);
|
||||
const std::string mode = options->get<std::string>("bergamot-mode");
|
||||
using namespace marian::bergamot;
|
||||
if (mode == "wasm") {
|
||||
app::wasm(options);
|
||||
} else if (mode == "native") {
|
||||
app::native(options);
|
||||
} else if (mode == "decoder") {
|
||||
app::decoder(options);
|
||||
} else {
|
||||
ABORT("Unknown --mode {}. Use one of: {wasm,native,decoder}", mode);
|
||||
}
|
||||
return 0;
|
||||
}
|
186
app/cli.h
Normal file
186
app/cli.h
Normal file
@ -0,0 +1,186 @@
|
||||
#ifndef BERGAMOT_APP_CLI_H
|
||||
#define BERGAMOT_APP_CLI_H
|
||||
#include <cstdlib>
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "common/definitions.h"
|
||||
#include "common/timer.h"
|
||||
#include "common/utils.h"
|
||||
#include "marian.h"
|
||||
#include "translator/byte_array_util.h"
|
||||
#include "translator/parser.h"
|
||||
#include "translator/response.h"
|
||||
#include "translator/response_options.h"
|
||||
#include "translator/service.h"
|
||||
|
||||
namespace marian {
|
||||
namespace bergamot {
|
||||
|
||||
// marian::bergamot:: makes life easier, won't need to prefix it everywhere and these classes plenty use constructs.
|
||||
|
||||
namespace app {
|
||||
|
||||
/// Previously bergamot-translator-app. Provides a command-line app on native which executes the code-path used by Web
|
||||
/// Assembly. Expected to be maintained consistent with how the browser (Mozilla through WebAssembly) dictates its API
|
||||
/// and tests be intact. Also used in [bergamot-evaluation](https://github.com/mozilla/bergamot-evaluation).
|
||||
///
|
||||
/// Usage example:
|
||||
/// [brt/tests/basic/test_bergamot_translator_app_intgemm_8bit.cpu-threads.0.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/tests/basic/test_bergamot_translator_app_intgemm_8bit.cpu-threads.0.sh)
|
||||
///
|
||||
/// * Input : read from stdin as sentences as lines of text.
|
||||
/// * Output: written to stdout as translations for the sentences supplied in corresponding lines
|
||||
///
|
||||
/// @param [options]: Options to translate passed down to marian through Options.
|
||||
void wasm(Ptr<Options> options) {
|
||||
// Here, we take the command-line interface which is uniform across all apps. This is parsed into Ptr<Options> by
|
||||
// marian. However, mozilla does not allow a Ptr<Options> constructor and demands an std::string constructor since
|
||||
// std::string isn't marian internal unlike Ptr<Options>. Since this std::string path needs to be tested for mozilla
|
||||
// and since this class/CLI is intended at testing mozilla's path, we go from:
|
||||
//
|
||||
// cmdline -> Ptr<Options> -> std::string -> Service(std::string)
|
||||
//
|
||||
// Overkill, yes.
|
||||
|
||||
std::string config = options->asYamlString();
|
||||
Service model(config);
|
||||
|
||||
ResponseOptions responseOptions;
|
||||
std::vector<std::string> texts;
|
||||
|
||||
for (std::string line; std::getline(std::cin, line);) {
|
||||
texts.emplace_back(line);
|
||||
}
|
||||
|
||||
auto results = model.translateMultiple(std::move(texts), responseOptions);
|
||||
|
||||
for (auto &result : results) {
|
||||
std::cout << result.getTranslatedText() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/// Application used to benchmark with marian-decoder from time-to-time. The implementation in this repository follows a
|
||||
/// different route than marian-decoder and routinely needs to be checked that the speeds while operating similar to
|
||||
/// marian-decoder are not affected during the course of development.
|
||||
///
|
||||
/// Example usage:
|
||||
/// [brt/speed-tests/test_wngt20_perf.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/speed-tests/test_wngt20_perf.sh).
|
||||
///
|
||||
/// Expected to be compatible with Translator[1] and marian-decoder[2].
|
||||
///
|
||||
/// - [1]
|
||||
/// [marian-dev/../src/translator/translator.h](https://github.com/marian-nmt/marian-dev/blob/master/src/translator/translator.h)
|
||||
/// - [2]
|
||||
/// [marian-dev/../src/command/marian_decoder.cpp](https://github.com/marian-nmt/marian/blob/master/src/command/marian_decoder.cpp)
|
||||
///
|
||||
/// * Input: stdin, lines containing sentences, same as marian-decoder.
|
||||
/// * Output: to stdout, translations of the sentences supplied via stdin in corresponding lines
|
||||
///
|
||||
/// @param [in] options: constructed from command-line supplied arguments
|
||||
void decoder(Ptr<Options> options) {
|
||||
marian::timer::Timer decoderTimer;
|
||||
Service service(options);
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream std_input;
|
||||
std_input << std::cin.rdbuf();
|
||||
std::string input = std_input.str();
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture = service.translate(std::move(input));
|
||||
responseFuture.wait();
|
||||
const Response &response = responseFuture.get();
|
||||
|
||||
for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << response.target.sentence(sentenceIdx) << "\n";
|
||||
}
|
||||
LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed());
|
||||
}
|
||||
|
||||
/// Command line interface to the test the features being developed as part of bergamot C++ library on native platform.
|
||||
///
|
||||
/// Usage example:
|
||||
/// [brt/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh](https://github.com/browsermt/bergamot-translator-tests/blob/main/tests/basic/test_service-cli_intgemm_8bit.cpu-threads.4.sh)
|
||||
///
|
||||
/// * Input: reads from stdin, blob of text, read as a whole ; sentence-splitting etc handled internally.
|
||||
/// * Output: to stdout, translation of the source text and additional information like sentences, alignments between
|
||||
/// source and target tokens and quality scores.
|
||||
///
|
||||
/// @param [in] options: options to build translator
|
||||
void native(Ptr<Options> options) {
|
||||
// Prepare memories for bytearrays (including model, shortlist and vocabs)
|
||||
MemoryBundle memoryBundle;
|
||||
|
||||
if (options->get<bool>("bytearray")) {
|
||||
// Load legit values into bytearrays.
|
||||
memoryBundle = getMemoryBundleFromConfig(options);
|
||||
}
|
||||
|
||||
Service service(options, std::move(memoryBundle));
|
||||
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream std_input;
|
||||
std_input << std::cin.rdbuf();
|
||||
std::string input = std_input.str();
|
||||
|
||||
ResponseOptions responseOptions;
|
||||
responseOptions.qualityScores = true;
|
||||
responseOptions.alignment = true;
|
||||
responseOptions.alignmentThreshold = 0.2f;
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture = service.translate(std::move(input), responseOptions);
|
||||
responseFuture.wait();
|
||||
Response response = responseFuture.get();
|
||||
|
||||
std::cout << "[original]: " << response.source.text << '\n';
|
||||
std::cout << "[translated]: " << response.target.text << '\n';
|
||||
for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx) << '\n';
|
||||
std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx) << '\n';
|
||||
std::cout << "Alignments" << '\n';
|
||||
typedef std::pair<size_t, float> Point;
|
||||
|
||||
// Initialize a point vector.
|
||||
std::vector<std::vector<Point>> aggregate(response.source.numWords(sentenceIdx));
|
||||
|
||||
// Handle alignments
|
||||
auto &alignments = response.alignments[sentenceIdx];
|
||||
for (auto &p : alignments) {
|
||||
aggregate[p.src].emplace_back(p.tgt, p.prob);
|
||||
}
|
||||
|
||||
for (size_t src = 0; src < aggregate.size(); src++) {
|
||||
std::cout << response.source.word(sentenceIdx, src) << ": ";
|
||||
for (auto &p : aggregate[src]) {
|
||||
std::cout << response.target.word(sentenceIdx, p.first) << "(" << p.second << ") ";
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
|
||||
// Handle quality.
|
||||
auto &quality = response.qualityScores[sentenceIdx];
|
||||
std::cout << "Quality: whole(" << quality.sequence << "), tokens below:" << '\n';
|
||||
size_t wordIdx = 0;
|
||||
bool first = true;
|
||||
for (auto &p : quality.word) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
std::cout << " ";
|
||||
}
|
||||
std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p << ")";
|
||||
wordIdx++;
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
std::cout << "--------------------------\n";
|
||||
std::cout << '\n';
|
||||
}
|
||||
|
||||
} // namespace app
|
||||
|
||||
} // namespace bergamot
|
||||
} // namespace marian
|
||||
|
||||
#endif // BERGAMOT_APP_CLI_H
|
@ -1,46 +0,0 @@
|
||||
#include <cstdlib>
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "common/definitions.h"
|
||||
#include "common/timer.h"
|
||||
#include "common/utils.h"
|
||||
#include "marian.h"
|
||||
#include "translator/history.h"
|
||||
#include "translator/output_collector.h"
|
||||
#include "translator/output_printer.h"
|
||||
#include "translator/parser.h"
|
||||
#include "translator/response.h"
|
||||
#include "translator/service.h"
|
||||
|
||||
void marian_decoder_minimal(const marian::bergamot::Response &response,
|
||||
marian::Ptr<marian::Options> options) {
|
||||
// We are no longer marian-decoder compatible. Server ideas are on hold.
|
||||
for (size_t sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << response.target.sentence(sentenceIdx) << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
auto cp = marian::bergamot::createConfigParser();
|
||||
auto options = cp.parseOptions(argc, argv, true);
|
||||
marian::timer::Timer decoderTimer;
|
||||
|
||||
marian::bergamot::Service service(options);
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream std_input;
|
||||
std_input << std::cin.rdbuf();
|
||||
std::string input = std_input.str();
|
||||
using marian::bergamot::Response;
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture = service.translate(std::move(input));
|
||||
responseFuture.wait();
|
||||
const Response &response = responseFuture.get();
|
||||
|
||||
marian_decoder_minimal(response, options);
|
||||
|
||||
LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed());
|
||||
return 0;
|
||||
}
|
@ -1,97 +0,0 @@
|
||||
#include <cstdlib>
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "common/definitions.h"
|
||||
#include "common/utils.h"
|
||||
#include "marian.h"
|
||||
#include "translator/byte_array_util.h"
|
||||
#include "translator/parser.h"
|
||||
#include "translator/response.h"
|
||||
#include "translator/response_options.h"
|
||||
#include "translator/service.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
auto cp = marian::bergamot::createConfigParser();
|
||||
auto options = cp.parseOptions(argc, argv, true);
|
||||
|
||||
// Prepare memories for bytearrays (including model, shortlist and vocabs)
|
||||
marian::bergamot::MemoryBundle memoryBundle;
|
||||
|
||||
if (options->get<bool>("bytearray")) {
|
||||
// Load legit values into bytearrays.
|
||||
memoryBundle = marian::bergamot::getMemoryBundleFromConfig(options);
|
||||
}
|
||||
|
||||
marian::bergamot::Service service(options, std::move(memoryBundle));
|
||||
|
||||
// Read a large input text blob from stdin
|
||||
std::ostringstream std_input;
|
||||
std_input << std::cin.rdbuf();
|
||||
std::string input = std_input.str();
|
||||
using marian::bergamot::Response;
|
||||
|
||||
marian::bergamot::ResponseOptions responseOptions;
|
||||
responseOptions.qualityScores = true;
|
||||
responseOptions.alignment = true;
|
||||
responseOptions.alignmentThreshold = 0.2f;
|
||||
|
||||
// Wait on future until Response is complete
|
||||
std::future<Response> responseFuture =
|
||||
service.translate(std::move(input), responseOptions);
|
||||
responseFuture.wait();
|
||||
Response response = responseFuture.get();
|
||||
|
||||
std::cout << "[original]: " << response.source.text << '\n';
|
||||
std::cout << "[translated]: " << response.target.text << '\n';
|
||||
for (int sentenceIdx = 0; sentenceIdx < response.size(); sentenceIdx++) {
|
||||
std::cout << " [src Sentence]: " << response.source.sentence(sentenceIdx)
|
||||
<< '\n';
|
||||
std::cout << " [tgt Sentence]: " << response.target.sentence(sentenceIdx)
|
||||
<< '\n';
|
||||
std::cout << "Alignments" << '\n';
|
||||
typedef std::pair<size_t, float> Point;
|
||||
|
||||
// Initialize a point vector.
|
||||
std::vector<std::vector<Point>> aggregate(
|
||||
response.source.numWords(sentenceIdx));
|
||||
|
||||
// Handle alignments
|
||||
auto &alignments = response.alignments[sentenceIdx];
|
||||
for (auto &p : alignments) {
|
||||
aggregate[p.src].emplace_back(p.tgt, p.prob);
|
||||
}
|
||||
|
||||
for (size_t src = 0; src < aggregate.size(); src++) {
|
||||
std::cout << response.source.word(sentenceIdx, src) << ": ";
|
||||
for (auto &p : aggregate[src]) {
|
||||
std::cout << response.target.word(sentenceIdx, p.first) << "("
|
||||
<< p.second << ") ";
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
|
||||
// Handle quality.
|
||||
auto &quality = response.qualityScores[sentenceIdx];
|
||||
std::cout << "Quality: whole(" << quality.sequence
|
||||
<< "), tokens below:" << '\n';
|
||||
size_t wordIdx = 0;
|
||||
bool first = true;
|
||||
for (auto &p : quality.word) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
std::cout << " ";
|
||||
}
|
||||
std::cout << response.target.word(sentenceIdx, wordIdx) << "(" << p
|
||||
<< ")";
|
||||
wordIdx++;
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
std::cout << "--------------------------\n";
|
||||
std::cout << '\n';
|
||||
|
||||
return 0;
|
||||
}
|
@ -1 +1 @@
|
||||
Subproject commit 1b20a62f6614db371f59b97ff83262b8ebd235de
|
||||
Subproject commit 020135af1b620caa27929c1403c50ec3299e5bff
|
@ -82,7 +82,7 @@ breathe_projects = { 'bergamot-translator': './doxygen/xml' }
|
||||
breathe_default_project = 'bergamot-translator'
|
||||
|
||||
doxygen_config = """
|
||||
INPUT = ../src
|
||||
INPUT = ../src ../app
|
||||
EXCLUDE += ../3rd_party
|
||||
EXCLUDE += ../src/tests
|
||||
EXCLUDE_PATTERNS = *.md *.txt
|
||||
|
@ -1,11 +1,23 @@
|
||||
# Building marian code for bergamot
|
||||
# Bergamot C++ Library
|
||||
|
||||
This document summarizes the minimal build instructions develop for the
|
||||
marian machine translation toolkit powering bergamot-translator.
|
||||
This document contains instructions to develop for modifications on top of the
|
||||
marian machine translation toolkit powering bergamot-translator. The library is
|
||||
optimized towards fast and efficient translation of a given input.
|
||||
|
||||
## Build Instructions
|
||||
|
||||
Marian CPU version requires Intel MKL or OpenBLAS. Both are free, but MKL is not open-sourced. Intel MKL is strongly recommended as it is faster. On Ubuntu 16.04 and newer it can be installed from the APT repositories.
|
||||
Note: You are strongly advised to refer to the continuous integration on this
|
||||
repository, which builds bergamot-translator and associated applications from
|
||||
scratch. Examples to run these command line-applications are available in the
|
||||
[bergamot-translator-tests](https://github.com/browsermt/bergamot-translator-tests)
|
||||
repository. Builds take about 30 mins on a consumer grade machine, so using a
|
||||
tool like ccache is highly recommended.
|
||||
|
||||
### Dependencies
|
||||
|
||||
Marian CPU version requires Intel MKL or OpenBLAS. Both are free, but MKL is
|
||||
not open-sourced. Intel MKL is strongly recommended as it is faster. On Ubuntu
|
||||
16.04 and newer it can be installed from the APT repositories.
|
||||
|
||||
```bash
|
||||
wget -qO- 'https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB' | sudo apt-key add -
|
||||
@ -15,32 +27,47 @@ sudo apt-get install intel-mkl-64bit-2020.0-088
|
||||
```
|
||||
On MacOS, apple accelerate framework will be used instead of MKL/OpenBLAS.
|
||||
|
||||
|
||||
### Building bergamot-translator
|
||||
|
||||
Web Assembly (WASM) reduces building to only using a subset of functionalities
|
||||
of marian, the translation library powering bergamot-translator. When
|
||||
developing bergamot-translator it is important that the sources added be
|
||||
compatible with marian. Therefore, it is required to set
|
||||
`-DUSE_WASM_COMPATIBLE_SOURCE=on`.
|
||||
|
||||
```
|
||||
$ git clone https://github.com/browsermt/bergamot-translator
|
||||
$ cd bergamot-translator
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ cmake .. -DUSE_WASM_COMPATIBLE_SOURCE=off -DCMAKE_BUILD_TYPE=Release
|
||||
$ make -j
|
||||
$ make -j2
|
||||
```
|
||||
|
||||
|
||||
The build will generate the library that can be linked to any project. All the
|
||||
public header files are specified in `src` folder.
|
||||
|
||||
## Command line apps
|
||||
|
||||
The following executables are created by the build:
|
||||
Bergamot-translator is intended to be used as a library. However, we provide a
|
||||
command-line application which is capable of translating text provided on
|
||||
standard-input. During development this application is used to perform
|
||||
regression-tests.
|
||||
|
||||
1. `app/service-cli`: Extends marian to capability to work with string_views.
|
||||
`service-cli` exists to check if the underlying code, without the
|
||||
integration works or not.
|
||||
2. `app/bergamot-translator-app`: App which integreates service-cli's
|
||||
functionality into the translator agnostic API specified as part of the
|
||||
project. Integration failures are detected if same arguments work with
|
||||
`service-cli` and does not with `bergamot-translator-app`.
|
||||
3. `app/marian-decoder-new`: Helper executable to conveniently benchmark new
|
||||
implementation with the optimized upstream marian-decoder.
|
||||
There are effectively multiple CLIs subclassed from a unified interface all
|
||||
provided in `app/cli.h`. These are packed into a single executable named
|
||||
`bergamot` by means of a `--bergamot-mode BERGAMOT_MODE` switch.
|
||||
|
||||
The following modes are available:
|
||||
|
||||
* `--bergamot-mode native`
|
||||
* `--bergamot-mode wasm`
|
||||
* `--bergamot-mode decoder`
|
||||
|
||||
Find documentation on these modes with the API documentation for apps [here](./api/namespace_marian__bergamot__app.html#functions).
|
||||
|
||||
## Example command line run
|
||||
|
||||
The models required to run the command-line are available at
|
||||
[data.statmt.org/bergamot/models/](http://data.statmt.org/bergamot/models/).
|
||||
@ -49,13 +76,11 @@ at:
|
||||
|
||||
* [data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz](http://data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz)
|
||||
|
||||
<details>
|
||||
<summary> Example run of commandline: Click to expand </summary>
|
||||
<p>
|
||||
|
||||
```bash
|
||||
MODEL_DIR=... # path to where the model-files are.
|
||||
BERGAMOT_MODE='native'
|
||||
ARGS=(
|
||||
--bergamot-mode $BERGAMOT_MODE
|
||||
-m $MODEL_DIR/model.intgemm.alphas.bin # Path to model file.
|
||||
--vocabs
|
||||
$MODEL_DIR/vocab.deen.spm # source-vocabulary
|
||||
@ -84,14 +109,10 @@ ARGS=(
|
||||
--ssplit-mode paragraph
|
||||
)
|
||||
|
||||
./app/service-cli "${ARGS[@]}" < path-to-input-file
|
||||
./app/bergamot-translator-app "${ARGS[@]}" < path-to-input-file
|
||||
./app/bergamot "${ARGS[@]}" < path-to-input-file
|
||||
|
||||
```
|
||||
</p>
|
||||
|
||||
</summary>
|
||||
</details>
|
||||
|
||||
## Coding Style
|
||||
|
||||
@ -108,4 +129,3 @@ used to also check for the coding style in the CI.
|
||||
```bash
|
||||
python3 run-clang-format.py -i --style file -r src wasm
|
||||
```
|
||||
|
||||
|
@ -26,6 +26,9 @@ inline marian::ConfigParser createConfigParser() {
|
||||
cp.addOption<bool>("--check-bytearray", "Bergamot Options",
|
||||
"Flag holds whether to check the content of the bytearrays (true by default)", true);
|
||||
|
||||
cp.addOption<std::string>("--bergamot-mode", "Bergamot Options",
|
||||
"Operating mode for bergamot: [wasm, native, decoder]", "native");
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user