Merge pull request #38 from browsermt/wasm-integration

wasm-integration -> integration
This commit is contained in:
abhi-agg 2021-02-23 16:34:22 +01:00 committed by GitHub
commit c28687fffb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
47 changed files with 1801 additions and 457 deletions

4
.gitignore vendored
View File

@ -16,3 +16,7 @@ CTestTestfile.cmake
_deps
wasm/test_page/node_modules
build-*
models
wasm/test_page/bergamot-translator-worker.*

2
.gitmodules vendored
View File

@ -1,6 +1,6 @@
[submodule "3rd_party/ssplit-cpp"]
path = 3rd_party/ssplit-cpp
url = https://github.com/ugermann/ssplit-cpp
url = https://github.com/abhi-agg/ssplit-cpp
[submodule "3rd_party/marian-dev"]
path = 3rd_party/marian-dev
url = https://github.com/browsermt/marian-dev

View File

@ -1,4 +1,10 @@
add_subdirectory(marian-dev)
if(COMPILE_WASM)
# This is a bad way of adding compilation flags. Will be improved soon.
add_compile_options(${WASM_COMPILE_FLAGS})
endif(COMPILE_WASM)
add_subdirectory(ssplit-cpp)
# Add include directories for 3rd party targets to be able to use it anywhere in the

@ -1 +1 @@
Subproject commit 2f65280459737c37c270e4ad0b6d41de215d11e0
Subproject commit 467c43a292a68b7913af2a00d353de97c1740f92

@ -1 +1 @@
Subproject commit 01e71b4964fdc351f932a7a23cab4cb80b9698e8
Subproject commit 432208826ee27e7b3984b53774b1a16d74256d77

View File

@ -8,19 +8,69 @@ project(bergamot_translator CXX C)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.")
# Custom CMake options to compile marian (a 3rd party submodule) for this project
option(COMPILE_CUDA "Compile GPU version" OFF)
option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON)
option(USE_STATIC_LIBS "Link statically against non-system libs" ON)
option(USE_MKL "Compile with MKL support" ON)
include(CMakeDependentOption)
execute_process(COMMAND git submodule update --init --recursive --no-fetch
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
# Project specific cmake options
option(COMPILE_WASM "Compile for WASM" OFF)
option(USE_WASM_COMPATIBLE_MARIAN "Use wasm compatible marian backend" ON)
CMAKE_DEPENDENT_OPTION(COMPILE_THREAD_VARIANT "Compile the project with thread support" OFF
"USE_WASM_COMPATIBLE_MARIAN" ON)
SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be packaged (pre-loaded) in wasm builds")
# Set marian (3rd party submodule) cmake options to compile for this project
SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version")
SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece")
SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs")
SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.")
if (USE_WASM_COMPATIBLE_MARIAN)
# If using wasm compatible marian then set following flags
SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
SET(COMPILE_DECODER_ONLY ON CACHE BOOL "Compile marian-decoder only")
SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support")
SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds")
SET(COMPILE_WITHOUT_EXCEPTIONS ON CACHE BOOL "Compile without exceptions")
if(COMPILE_WASM)
# Set WORMHOLE to ON for marian whenever compiling for wasm platform
SET(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160")
endif()
endif()
# Set ssplit (3rd party submodule) cmake options to compile for this project
SET(USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")
# Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html
# Ensures the submodules are set correctly during a build.
find_package(Git QUIET)
if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
# Update submodules as needed
option(GIT_SUBMODULE "Check submodules during build" ON)
if(GIT_SUBMODULE)
message(STATUS "Submodule update")
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE GIT_SUBMOD_RESULT)
if(NOT GIT_SUBMOD_RESULT EQUAL "0")
message(FATAL_ERROR "git submodule update --init failed with ${GIT_SUBMOD_RESULT}, please checkout submodules")
endif()
endif()
endif()
if(NOT COMPILE_WASM)
# Set BUILD_ARCH to native only while compiling for non wasm platform
set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.")
endif()
if(COMPILE_WASM)
list(APPEND WASM_COMPILE_FLAGS -pthread -O3 -g2 -fPIC -mssse3 -msimd128)
list(APPEND WASM_COMPILE_FLAGS "SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=0" "SHELL:-s DISABLE_EXCEPTION_CATCHING=1" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1")
list(APPEND WASM_COMPILE_FLAGS -Wno-error=pthreads-mem-growth)
endif(COMPILE_WASM)
add_subdirectory(3rd_party)
add_subdirectory(src)
add_subdirectory(app)
if(NOT COMPILE_WASM)
add_subdirectory(app)
endif()
if(COMPILE_WASM)
add_subdirectory(wasm)
endif(COMPILE_WASM)

136
README.md
View File

@ -3,58 +3,92 @@
Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.github.io/) framework based) neural machine translation functionality in accordance with the [Bergamot](https://browser.mt/) project that focuses on improving client-side machine translation in a web browser.
## Build Instructions
```
$ git clone https://github.com/browsermt/bergamot-translator
$ cd bergamot-translator
$ mkdir build
$ cd build
$ cmake ../
$ make -j
```
## Usage
### Bergamot Translator
The build will generate the library that can be linked to any project. All the public header files are specified in `src` folder.
### `service-cli`
An executable `service-cli` is generated by the build in the `app` folder and
provides command line interface to the underlying translator. The models
required to run the command-line are available at
[data.statmt.org/bergamot/models/](http://data.statmt.org/bergamot/models/).
The following example uses an English to German tiny11 student model, available
at:
* [data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz](http://data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz)
### Build Natively
```bash
MODEL_DIR=... # path to where the model-files are.
ARGS=(
-m $MODEL_DIR/model.intgemm.alphas.bin # Path to model file.
--vocabs
$MODEL_DIR/vocab.deen.spm # source-vocabulary
$MODEL_DIR/vocab.deen.spm # target-vocabulary
# The following increases speed through one-best-decoding, shortlist and quantization.
--beam-size 1 --skip-cost --shortlist $MODEL_DIR/lex.s2t.gz 50 50 --int8shiftAlphaAll
# Number of CPU threads (workers to launch). Parallelizes over cores and improves speed.
--cpu-threads 4
# Hyperparameters of how many tokens to be accounted for in a batch and maximum tokens in a sentence.
--max-input-sentence-tokens 1024 --max-input-tokens 1024
# Three modes are supported
# - sentence: One sentence per line
# - paragraph: One paragraph per line.
# - wrapped text: Paragraphs are separated by empty line.
--ssplit-mode paragraph
)
./app/service-cli "${ARGS[@]}" < path-to-input-file
git clone --recursive https://github.com/browsermt/bergamot-translator
cd bergamot-translator
mkdir build
cd build
cmake ../
make -j
```
### Build WASM
#### Compiling for the first time
1. Download and Install Emscripten using following instructions
* Get the latest sdk: `git clone https://github.com/emscripten-core/emsdk.git`
* Enter the cloned directory: `cd emsdk`
* Install the lastest sdk tools: `./emsdk install latest`
* Activate the latest sdk tools: `./emsdk activate latest`
* Activate path variables: `source ./emsdk_env.sh`
2. Clone the repository and checkout the appropriate branch using these instructions:
```bash
git clone https://github.com/browsermt/bergamot-translator
cd bergamot-translator
git checkout -b wasm-integration origin/wasm-integration
git submodule update --init --recursive
```
3. Download files (only required if you want to package files in wasm binary)
This step is only required if you want to package files (e.g. models, vocabularies etc.)
into wasm binary. If you don't then just skip this step.
The build preloads the files in Emscriptens virtual file system.
If you want to package bergamot project specific models, please follow these instructions:
```bash
mkdir models
git clone https://github.com/mozilla-applied-ml/bergamot-models
cp -rf bergamot-models/* models
gunzip models/*/*
```
4. Compile
1. Create a folder where you want to build all the artefacts (`build-wasm` in this case)
```bash
mkdir build-wasm
cd build-wasm
```
2. Compile the artefacts
* If you want to package files into wasm binary then execute following commands (Replace `FILES_TO_PACKAGE` with the path of the
directory containing the files to be packaged in wasm binary)
```bash
emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR=FILES_TO_PACKAGE ../
emmake make -j
```
e.g. If you want to package bergamot project specific models (downloaded using step 3 above) then
replace `FILES_TO_PACKAGE` with `../models`
* If you don't want to package any file into wasm binary then execute following commands:
```bash
emcmake cmake -DCOMPILE_WASM=on ../
emmake make -j
```
The artefacts (.js and .wasm files) will be available in `wasm` folder of build directory ("build-wasm" in this case).
#### Recompiling
As long as you don't update any submodule, just follow steps in `4.ii` to recompile.\
If you update a submodule, execute following command before executing steps in `4.ii` to recompile.
```bash
git submodule update --init --recursive
```
## How to use
### Using Native version
The builds generate library that can be integrated to any project. All the public header files are specified in `src` folder.\
A short example of how to use the APIs is provided in `app/main.cpp` file.
### Using WASM version
Please follow the `README` inside the `wasm` folder of this repository that demonstrates how to use the translator in JavaScript.

View File

@ -7,8 +7,8 @@
#include "common/utils.h"
#include "marian.h"
#include "translator/parser.h"
#include "translator/response.h"
#include "translator/service.h"
#include "translator/translation_result.h"
int main(int argc, char *argv[]) {
auto cp = marian::bergamot::createConfigParser();
@ -19,14 +19,13 @@ int main(int argc, char *argv[]) {
std::ostringstream std_input;
std_input << std::cin.rdbuf();
std::string input = std_input.str();
using marian::bergamot::TranslationResult;
using marian::bergamot::Response;
// Wait on future until TranslationResult is complete
std::future<TranslationResult> translation_result_future =
service.translate(std::move(input));
translation_result_future.wait();
const TranslationResult &translation_result = translation_result_future.get();
std::cout << translation_result.getTranslatedText() << std::endl;
// Wait on future until Response is complete
std::future<Response> responseFuture = service.translate(std::move(input));
responseFuture.wait();
Response response = responseFuture.get();
std::cout << response.translation() << std::endl;
// Stop Service.
service.stop();

View File

@ -44,10 +44,10 @@ int main(int argc, char **argv) {
"Prague, the University of Sheffield, University of Tartu, and "
"Mozilla.");
auto futureResults = model->translate(std::move(texts), translationRequest);
auto results = model->translate(std::move(texts), translationRequest);
// Resolve the future and get the actual result
std::vector<TranslationResult> results = futureResults.get();
//std::vector<TranslationResult> results = futureResults.get();
for (auto &result : results) {
std::cout << "[original]: " << result.getOriginalText() << std::endl;

View File

@ -11,8 +11,8 @@
#include "translator/output_collector.h"
#include "translator/output_printer.h"
#include "translator/parser.h"
#include "translator/response.h"
#include "translator/service.h"
#include "translator/translation_result.h"
void marian_decoder_minimal(const marian::Histories &histories,
marian::Ptr<marian::Vocab const> targetVocab,
@ -46,16 +46,14 @@ int main(int argc, char *argv[]) {
std::ostringstream std_input;
std_input << std::cin.rdbuf();
std::string input = std_input.str();
using marian::bergamot::TranslationResult;
using marian::bergamot::Response;
// Wait on future until TranslationResult is complete
std::future<TranslationResult> translation_result_future =
service.translate(std::move(input));
translation_result_future.wait();
const TranslationResult &translation_result = translation_result_future.get();
// Wait on future until Response is complete
std::future<Response> responseFuture = service.translate(std::move(input));
responseFuture.wait();
const Response &response = responseFuture.get();
marian_decoder_minimal(translation_result.getHistories(),
service.targetVocab(), options);
marian_decoder_minimal(response.histories(), service.targetVocab(), options);
LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed());
service.stop();

85
doc/marian-integration.md Normal file
View File

@ -0,0 +1,85 @@
# Marian Integration
This document summarizes the minimal build instructions develop for the
marian-code powering bergamot-translator.
## Build Instructions
```
$ git clone https://github.com/browsermt/bergamot-translator
$ cd bergamot-translator
$ mkdir build
$ cd build
$ cmake .. -DUSE_WASM_COMPATIBLE_MARIAN=off -DCMAKE_BUILD_TYPE=Release
$ make -j
```
The build will generate the library that can be linked to any project. All the
public header files are specified in `src` folder.
## Command line apps
The following executables are created by the build:
1. `app/service-cli`: Extends marian to capability to work with string_views.
`service-cli` exists to check if the underlying code, without the
integration works or not.
2. `app/bergamot-translator-app`: App which integreates service-cli's
functionality into the translator agnostic API specified as part of the
project. Integration failures are detected if same arguments work with
`service-cli` and does not with `bergamot-translator-app`.
3. `app/marian-decoder-new`: Helper executable to conveniently benchmark new
implementation with the optimized upstream marian-decoder.
The models required to run the command-line are available at
[data.statmt.org/bergamot/models/](http://data.statmt.org/bergamot/models/).
The following example uses an English to German tiny11 student model, available
at:
* [data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz](http://data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz)
<details>
<summary> Example run of commandline: Click to expand </summary>
<p>
```bash
MODEL_DIR=... # path to where the model-files are.
ARGS=(
-m $MODEL_DIR/model.intgemm.alphas.bin # Path to model file.
--vocabs
$MODEL_DIR/vocab.deen.spm # source-vocabulary
$MODEL_DIR/vocab.deen.spm # target-vocabulary
# The following increases speed through one-best-decoding, shortlist and quantization.
--beam-size 1 --skip-cost --shortlist $MODEL_DIR/lex.s2t.gz 50 50 --int8shiftAlphaAll
# Number of CPU threads (workers to launch). Parallelizes over cores and improves speed.
# A value of 0 allows a path with no worker thread-launches and a single-thread.
--cpu-threads 4
# Maximum size of a sentence allowed. If a sentence is above this length,
# it's broken into pieces of less than or equal to this size.
--max-length-break 1024
# Maximum number of tokens that can be fit in a batch. The optimal value
# for the parameter is dependant on hardware and can be obtained by running
# with variations and benchmarking.
--mini-batch-words 1024
# Three modes are supported
# - sentence: One sentence per line
# - paragraph: One paragraph per line.
# - wrapped_text: Paragraphs are separated by empty line.
--ssplit-mode paragraph
)
./app/service-cli "${ARGS[@]}" < path-to-input-file
./app/bergamot-translator-app "${ARGS[@]}" < path-to-input-file
```
</p>
</summary>
</details>

View File

@ -57,7 +57,7 @@ public:
* entry of texts list will be moved to its corresponding TranslationResult
* object).
*/
virtual std::future<std::vector<TranslationResult>>
virtual std::vector<TranslationResult>
translate(std::vector<std::string> &&texts, TranslationRequest request) = 0;
/* Check if the model can provide alignment information b/w original and

View File

@ -20,7 +20,11 @@ class TranslationResult {
public:
typedef std::vector<std::pair<std::string_view, std::string_view>>
SentenceMappings;
#ifdef WASM_BINDINGS
TranslationResult(const std::string &original, const std::string &translation)
: originalText(original), translatedText(translation),
sentenceMappings() {}
#endif
TranslationResult(const std::string &original, const std::string &translation,
SentenceMappings &sentenceMappings)
: originalText(original), translatedText(translation),
@ -31,13 +35,29 @@ public:
translatedText(std::move(other.translatedText)),
sentenceMappings(std::move(other.sentenceMappings)) {}
#ifdef WASM_BINDINGS
TranslationResult(const TranslationResult &other)
: originalText(other.originalText),
translatedText(other.translatedText),
sentenceMappings(other.sentenceMappings) {}
#endif
TranslationResult(std::string &&original, std::string &&translation,
SentenceMappings &&sentenceMappings)
: originalText(std::move(original)),
translatedText(std::move(translation)),
sentenceMappings(std::move(sentenceMappings)) {}
#ifndef WASM_BINDINGS
TranslationResult &operator=(const TranslationResult &) = delete;
#else
TranslationResult &operator=(const TranslationResult &result) {
originalText = result.originalText;
translatedText = result.translatedText;
sentenceMappings = result.sentenceMappings;
return *this;
}
#endif
/* Return the original text. */
const std::string &getOriginalText() const { return originalText; }

View File

@ -10,8 +10,26 @@ add_library(bergamot-translator STATIC
request.cpp
service.cpp
batcher.cpp
translation_result.cpp
response.cpp
batch.cpp
sentence_ranges.cpp
)
if (COMPILE_DECODER_ONLY)
# A dirty hack because of marian's bad cmake practices
target_compile_definitions(bergamot-translator PUBLIC DECODER_ONLY)
endif()
if(COMPILE_WASM)
# A dirty hack because of marian's bad cmake practices
target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM)
# Enable code that is required for generating JS bindings
target_compile_definitions(bergamot-translator PRIVATE WASM_BINDINGS)
target_compile_options(bergamot-translator PRIVATE ${WASM_COMPILE_FLAGS})
endif(COMPILE_WASM)
if (COMPILE_THREAD_VARIANT)
target_compile_definitions(bergamot-translator PRIVATE WITH_PTHREADS)
endif()
target_link_libraries(bergamot-translator marian ssplit)

View File

@ -14,6 +14,7 @@
// All local project includes
#include "TranslationModel.h"
#include "translator/parser.h"
#include "translator/service.h"
std::shared_ptr<marian::Options> parseOptions(const std::string &config) {
@ -34,7 +35,7 @@ std::shared_ptr<marian::Options> parseOptions(const std::string &config) {
// Error: Aborted from void unhandledException() in
// 3rd_party/marian-dev/src/common/logging.cpp:113
marian::ConfigParser configParser(marian::cli::mode::translation);
marian::ConfigParser configParser = marian::bergamot::createConfigParser();
const YAML::Node &defaultConfig = configParser.getConfig();
options.merge(defaultConfig);
@ -55,7 +56,7 @@ TranslationModel::TranslationModel(const std::string &config)
TranslationModel::~TranslationModel() {}
std::future<std::vector<TranslationResult>>
std::vector<TranslationResult>
TranslationModel::translate(std::vector<std::string> &&texts,
TranslationRequest request) {
// Implementing a non-async version first. Unpleasant, but should work.
@ -68,24 +69,30 @@ TranslationModel::translate(std::vector<std::string> &&texts,
// Collect future as marian::bergamot::TranslationResult
auto intermediate = service_.translate(std::move(text));
intermediate.wait();
auto mTranslationResult(std::move(intermediate.get()));
auto marianResponse(std::move(intermediate.get()));
// This mess because marian::string_view != std::string_view
std::string source, translation;
marian::bergamot::Response::SentenceMappings mSentenceMappings;
marianResponse.move(source, translation, mSentenceMappings);
// Convert to UnifiedAPI::TranslationResult
TranslationResult::SentenceMappings sentenceMappings;
for (auto &p : mTranslationResult.getSentenceMappings()) {
for (auto &p : mSentenceMappings) {
std::string_view src(p.first.data(), p.first.size()),
tgt(p.second.data(), p.second.size());
sentenceMappings.emplace_back(src, tgt);
}
// In place construction.
translationResults.emplace_back(std::move(mTranslationResult.source_),
std::move(mTranslationResult.translation_),
std::move(sentenceMappings));
translationResults.emplace_back(
std::move(source), // &&marianResponse.source_
std::move(translation), // &&marianResponse.translation_
std::move(sentenceMappings) // &&sentenceMappings
);
}
promise.set_value(std::move(translationResults));
return future;
return translationResults;
}
bool TranslationModel::isAlignmentSupported() const { return false; }

View File

@ -24,7 +24,8 @@
*/
class TranslationModel : public AbstractTranslationModel {
public:
/* Construct the model using the model configuration options as yaml-formatted string
/* Construct the model using the model configuration options as yaml-formatted
* string
*/
TranslationModel(const std::string &config);
@ -54,7 +55,7 @@ public:
* entry of texts list will be moved to its corresponding TranslationResult
* object).
*/
std::future<std::vector<TranslationResult>>
std::vector<TranslationResult>
translate(std::vector<std::string> &&texts,
TranslationRequest request) override;

28
src/translator/batch.cpp Normal file
View File

@ -0,0 +1,28 @@
#include "batch.h"
#include "request.h"
namespace marian {
namespace bergamot {
void Batch::log() {
size_t numTokens{0}, maxLength{0};
for (auto &sentence : sentences_) {
numTokens += sentence.numTokens();
maxLength = std::max(maxLength, static_cast<size_t>(sentence.numTokens()));
}
LOG(info, "Batch(tokens={}, max-length={}, sentences_={})", numTokens,
maxLength, sentences_.size());
}
void Batch::add(const RequestSentence &sentence) {
sentences_.push_back(sentence);
}
void Batch::completeBatch(const Histories &histories) {
for (size_t i = 0; i < sentences_.size(); i++) {
sentences_[i].completeSentence(histories[i]);
}
}
} // namespace bergamot
} // namespace marian

52
src/translator/batch.h Normal file
View File

@ -0,0 +1,52 @@
#ifndef SRC_BERGAMOT_BATCH_H
#define SRC_BERGAMOT_BATCH_H
#include "request.h"
#include "translator/beam_search.h"
namespace marian {
namespace bergamot {
class Batch {
public:
Batch() {}
void clear() { sentences_.clear(); }
// Methods to construct and determine poison.
static Batch poison() {
Batch batch;
batch.poison_ = true;
return batch;
}
bool isPoison() const { return poison_; }
size_t size() const { return sentences_.size(); }
void add(const RequestSentence &sentence);
// Accessors to read from a Batch. For use in BatchTranslator (consumer on a
// PCQueue holding batches).
//
// sentences() are used to access sentences to construct marian internal
// batch.
const RequestSentences &sentences() { return sentences_; }
// On obtaining Histories after translating a batch, completeBatch can be
// called with Histories , which forwards the call to Request through
// RequestSentence and triggers completion, by setting the promised value to
// the future given to client.
void completeBatch(const Histories &histories);
// Convenience function to log batch-statistics. numTokens, max-length.
void log();
private:
bool poison_{false};
RequestSentences sentences_;
};
} // namespace bergamot
} // namespace marian
#endif // SRC_BERGAMOT_BATCH_H_

View File

@ -1,4 +1,5 @@
#include "batch_translator.h"
#include "batch.h"
#include "common/logging.h"
#include "data/corpus.h"
#include "data/text_input.h"
@ -8,15 +9,12 @@ namespace marian {
namespace bergamot {
BatchTranslator::BatchTranslator(DeviceId const device,
PCQueue<PCItem> &pcqueue,
std::vector<Ptr<Vocab const>> &vocabs,
Ptr<Options> options)
: device_(device), options_(options), pcqueue_(&pcqueue), vocabs_(&vocabs) {
: device_(device), options_(options), vocabs_(&vocabs) {}
thread_ = std::thread([this] { this->mainloop(); });
}
void BatchTranslator::initGraph() {
void BatchTranslator::initialize() {
// Initializes the graph.
if (options_->hasAndNotEmpty("shortlist")) {
int srcIdx = 0, trgIdx = 1;
bool shared_vcb = vocabs_->front() == vocabs_->back();
@ -38,15 +36,14 @@ void BatchTranslator::initGraph() {
scorer->setShortlistGenerator(slgen_);
}
}
graph_->forward();
}
void BatchTranslator::translate(RequestSentences &requestSentences,
Histories &histories) {
void BatchTranslator::translate(Batch &batch) {
std::vector<data::SentenceTuple> batchVector;
for (auto &sentence : requestSentences) {
auto &sentences = batch.sentences();
for (auto &sentence : sentences) {
data::SentenceTuple sentence_tuple(sentence.lineNumber());
Segment segment = sentence.getUnderlyingSegment();
sentence_tuple.push_back(segment);
@ -89,35 +86,32 @@ void BatchTranslator::translate(RequestSentences &requestSentences,
for (size_t j = 0; j < maxDims.size(); ++j)
subBatches[j]->setWords(words[j]);
auto batch = Ptr<CorpusBatch>(new CorpusBatch(subBatches));
batch->setSentenceIds(sentenceIds);
auto corpus_batch = Ptr<CorpusBatch>(new CorpusBatch(subBatches));
corpus_batch->setSentenceIds(sentenceIds);
auto trgVocab = vocabs_->back();
auto search = New<BeamSearch>(options_, scorers_, trgVocab);
histories = std::move(search->search(graph_, batch));
auto histories = std::move(search->search(graph_, corpus_batch));
batch.completeBatch(histories);
}
void BatchTranslator::mainloop() {
initGraph();
#ifdef WITH_PTHREADS
PCItem pcitem;
void BatchTranslator::consumeFrom(PCQueue<Batch> &pcqueue) {
Batch batch;
Histories histories;
while (true) {
pcqueue_->ConsumeSwap(pcitem);
if (pcitem.isPoison()) {
pcqueue.ConsumeSwap(batch);
if (batch.isPoison()) {
return;
} else {
translate(pcitem.sentences, histories);
for (int i = 0; i < pcitem.sentences.size(); i++) {
pcitem.sentences[i].completeSentence(histories[i]);
}
translate(batch);
}
}
}
void BatchTranslator::join() { thread_.join(); }
#endif
} // namespace bergamot
} // namespace marian

View File

@ -4,14 +4,18 @@
#include <string>
#include <vector>
#include "batch.h"
#include "common/utils.h"
#include "data/shortlist.h"
#include "definitions.h"
#include "pcqueue.h"
#include "request.h"
#include "translator/history.h"
#include "translator/scorers.h"
#ifdef WITH_PTHREADS
#include "pcqueue.h"
#endif
namespace marian {
namespace bergamot {
@ -22,29 +26,27 @@ class BatchTranslator {
// shut down in Service which calls join() on the threads.
public:
BatchTranslator(DeviceId const device, PCQueue<PCItem> &pcqueue,
std::vector<Ptr<Vocab const>> &vocabs, Ptr<Options> options);
void join();
BatchTranslator(DeviceId const device, std::vector<Ptr<Vocab const>> &vocabs,
Ptr<Options> options);
// convenience function for logging. TODO(jerin)
std::string _identifier() { return "worker" + std::to_string(device_.no); }
void translate(Batch &batch);
void initialize();
#ifdef WITH_PTHREADS
void consumeFrom(PCQueue<Batch> &pcqueue);
#endif
private:
void initGraph();
void translate(RequestSentences &requestSentences, Histories &histories);
void mainloop();
Ptr<Options> options_;
DeviceId device_;
std::vector<Ptr<Vocab const>> *vocabs_;
Ptr<ExpressionGraph> graph_;
std::vector<Ptr<Scorer>> scorers_;
Ptr<data::ShortlistGenerator const> slgen_;
PCQueue<PCItem> *pcqueue_;
std::thread thread_;
};
} // namespace bergamot
} // namespace marian

View File

@ -1,4 +1,5 @@
#include "batcher.h"
#include "batch.h"
#include "common/logging.h"
#include <cassert>
@ -6,49 +7,64 @@ namespace marian {
namespace bergamot {
Batcher::Batcher(Ptr<Options> options) {
max_input_tokens_ = options->get<int>("max-input-tokens");
bucket_.resize(options->get<int>("max-input-sentence-tokens") + 1);
ABORT_IF(
max_input_tokens_ < bucket_.size() - 1,
"max-input-tokens cannot be less than than max-input-sentence-tokens, "
"batcher fail");
miniBatchWords = options->get<int>("mini-batch-words");
bucket_.resize(options->get<int>("max-length-break") + 1);
ABORT_IF(bucket_.size() - 1 > miniBatchWords,
"Fatal: max-length-break > mini-batch-words will lead to sentences "
"longer than what can fit in a batch.");
}
void Batcher::addSentenceWithPriority(RequestSentence &sentence) {
int bucket_id = sentence.numTokens();
size_t bucket_id = sentence.numTokens();
assert(bucket_id < bucket_.size());
bucket_[bucket_id].insert(sentence);
}
void Batcher::cleaveBatch(RequestSentences &sentences) {
bool Batcher::operator>>(Batch &batch) { return cleaveBatch(batch); }
bool Batcher::cleaveBatch(Batch &batch) {
// For now simply iterates on buckets and converts batches greedily. This
// has to be enhanced with optimizing over priority. The baseline
// implementation should at least be as fast as marian's maxi-batch with full
// corpus size as maxi-batch size.
batch.clear();
size_t paddedBatchSize = 0;
int segments_added = 0;
int current_input_tokens = 0;
int padded_batch_size = 0;
int prev_padded_batch_size;
for (int i = 0; i < bucket_.size(); i++) {
auto p = bucket_[i].begin();
while (p != bucket_[i].end()) {
padded_batch_size = (segments_added + 1) * i;
if (padded_batch_size <= max_input_tokens_) {
auto q = p;
++p;
current_input_tokens += i;
sentences.push_back(*q);
++segments_added;
bucket_[i].erase(q);
prev_padded_batch_size = padded_batch_size;
for (size_t length = 0; length < bucket_.size(); length++) {
auto p = bucket_[length].begin();
while (p != bucket_[length].end()) {
paddedBatchSize = (batch.size() + 1) * length;
if (paddedBatchSize <= miniBatchWords) {
auto q = p++;
batch.add(*q);
bucket_[length].erase(q);
} else {
return;
// Check if elements exist
assert(batch.size() > 0);
return true;
}
}
}
bool isValidBatch = batch.size() > 0;
return isValidBatch;
}
void Batcher::addWholeRequest(Ptr<Request> request) {
for (size_t i = 0; i < request->numSegments(); i++) {
RequestSentence requestSentence(i, request);
addSentenceWithPriority(requestSentence);
}
}
#ifdef WITH_PTHREADS
void Batcher::produceTo(PCQueue<Batch> &pcqueue) {
Batch batch;
while (cleaveBatch(batch)) {
pcqueue.ProduceSwap(batch);
}
}
#endif
} // namespace bergamot
} // namespace marian

View File

@ -1,11 +1,16 @@
#ifndef SRC_BERGAMOT_BATCHER_H_
#define SRC_BERGAMOT_BATCHER_H_
#include "batch.h"
#include "common/options.h"
#include "data/corpus_base.h"
#include "definitions.h"
#include "request.h"
#ifdef WITH_PTHREADS
#include "pcqueue.h"
#endif
#include <set>
#include <vector>
@ -19,14 +24,20 @@ public:
// sentence. This method inserts the sentence into the internal data-structure
// which maintains priority among sentences from multiple concurrent requests.
void addSentenceWithPriority(RequestSentence &sentence);
void addWholeRequest(Ptr<Request> request);
#ifdef WITH_PTHREADS
void produceTo(PCQueue<Batch> &pcqueue);
#endif
// Loads sentences with sentences compiled from (tentatively) multiple
// requests optimizing for both padding and priority.
void cleaveBatch(RequestSentences &sentences);
bool cleaveBatch(Batch &batch);
bool operator>>(Batch &batch); // alias
private:
unsigned int max_input_tokens_;
size_t miniBatchWords;
std::vector<std::set<RequestSentence>> bucket_;
size_t batchNumber_{0};
};
} // namespace bergamot

View File

@ -5,7 +5,8 @@
namespace marian {
namespace bergamot {
marian::ConfigParser createConfigParser() {
inline marian::ConfigParser createConfigParser() {
marian::ConfigParser cp(marian::cli::mode::translation);
cp.addOption<std::string>(
"--ssplit-prefix-file", "Bergamot Options",
@ -15,14 +16,9 @@ marian::ConfigParser createConfigParser() {
"[paragraph, sentence, wrapped_text]", "paragraph");
cp.addOption<int>(
"--max-input-sentence-tokens", "Bergamot Options",
"--max-length-break", "Bergamot Options",
"Maximum input tokens to be processed in a single sentence.", 128);
cp.addOption<int>("--max-input-tokens", "Bergamot Options",
"Maximum input tokens in a batch. control for"
"Bergamot Queue",
1024);
return cp;
}

View File

@ -1,7 +1,7 @@
#include "request.h"
#include "definitions.h"
#include "translation_result.h"
#include "response.h"
#include "sentence_ranges.h"
#include "common/logging.h"
@ -10,15 +10,15 @@
namespace marian {
namespace bergamot {
Request::Request(unsigned int Id, int lineNumberBegin,
// -----------------------------------------------------------------
Request::Request(size_t Id, size_t lineNumberBegin,
std::vector<Ptr<Vocab const>> &vocabs, std::string &&source,
Segments &&segments,
std::vector<TokenRanges> &&sourceAlignments,
std::promise<TranslationResult> translationResultPromise)
Segments &&segments, SentenceRanges &&sourceRanges,
std::promise<Response> responsePromise)
: Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs),
source_(std::move(source)), segments_(std::move(segments)),
sourceAlignments_(std::move(sourceAlignments)),
response_(std::move(translationResultPromise)) {
sourceRanges_(std::move(sourceRanges)),
response_(std::move(responsePromise)) {
counter_ = segments_.size();
histories_.resize(segments_.size(), nullptr);
@ -47,11 +47,10 @@ void Request::processHistory(size_t index, Ptr<History> history) {
void Request::completeRequest() {
// Request no longer needs to hold the content, can transfer it to
// TranslationResult.
TranslationResult translation_result(std::move(source_),
std::move(sourceAlignments_),
std::move(histories_), *vocabs_);
response_.set_value(std::move(translation_result));
// Response.
Response response(std::move(source_), std::move(sourceRanges_),
std::move(histories_), *vocabs_);
response_.set_value(std::move(response));
}
bool Request::operator<(const Request &b) const {
@ -59,6 +58,8 @@ bool Request::operator<(const Request &b) const {
return Id_ < b.Id_;
}
// ------------------------------------------------------------------
RequestSentence::RequestSentence(size_t index, Ptr<Request> request)
: index_(index), request_(request) {}
@ -88,5 +89,7 @@ bool operator<(const RequestSentence &a, const RequestSentence &b) {
return a.request_ < b.request_;
}
// ----------------------------------------------------------------------
} // namespace bergamot
} // namespace marian

View File

@ -3,30 +3,30 @@
//
// Request: holds the input blob of a text, Segments (vector<Words>) which are
// to go to the batching mechanism and alignments between the processed
// segments and the input blob (sourceAlignments). In addition, Request takes
// segments and the input blob (sourceTokenRanges). In addition, Request takes
// care of the barrier which fires when all the Segments in a request are done
// translating by the workers (BatchTranslator). Request is to be extended with
// notions of Priority (sequence, user-given).
// translating by the workers (BatchTranslator).
// TODO(jerinphilip): Extend Request with notions of Priority (sequence,
// user-given).
//
// RequestSentence: is a tuple of (index, Request*). This provides the
// RequestSentence: is a tuple of (index, Ptr<Request>). This provides the
// batching mechanism access to the segment within the request. The backref to
// Request allows event triggering the barrier upon completion of the last
// sentence by a worker.
//
// PCItem: is a vector of RequestSentences and a batchNumber, which is what the
// PCQueue holds. The batches are constructed from segments returned by a
// RequestSentence. Can be enhanced with paddingSize, countTokens eventually for
// logging.
#ifndef SRC_BERGAMOT_REQUEST_H_
#define SRC_BERGAMOT_REQUEST_H_
#include "definitions.h"
#include "translation_result.h"
#include "response.h"
#include "sentence_ranges.h"
#include "common/logging.h"
#include "data/types.h"
#include "translator/beam_search.h"
#include <cassert>
#include <future>
#include <vector>
@ -34,24 +34,11 @@ namespace marian {
namespace bergamot {
class Request {
private:
unsigned int Id_;
int lineNumberBegin_;
std::string source_;
std::atomic<int> counter_;
std::vector<Ptr<Vocab const>> *vocabs_;
Segments segments_;
std::vector<TokenRanges> sourceAlignments_;
std::vector<Ptr<History>> histories_;
std::promise<TranslationResult> response_;
public:
Request(unsigned int Id, int lineNumberBegin,
Request(size_t Id, size_t lineNumberBegin,
std::vector<Ptr<Vocab const>> &vocabs_, std::string &&source,
Segments &&segments, std::vector<TokenRanges> &&sourceAlignments,
std::promise<TranslationResult> translationResultPromise);
Segments &&segments, SentenceRanges &&sourceTokenRanges,
std::promise<Response> responsePromise);
// Obtain the count of tokens in the segment correponding to index. Used to
// insert sentence from multiple requests into the corresponding size bucket.
@ -65,7 +52,8 @@ public:
// several requests.
Segment getSegment(size_t index) const;
// For notions of priority among requests (used to enable <set> in Batcher).
// For notions of priority among requests, used to enable std::set in
// Batcher.
bool operator<(const Request &request) const;
// Processes a history obtained after translating in a heterogenous batch
@ -74,40 +62,64 @@ public:
// On completion of last segment, sets value of the promise.
void completeRequest();
private:
size_t Id_;
size_t lineNumberBegin_;
// Multiple translation-workers can concurrently access the same Request. The
// following atomic atomically operates on the variable holding sentences
// remaining to be translated.
std::atomic<int> counter_;
// source_ holds the source string to be translated. segments_ hold the
// sentences generated from source_ in vector<Words>. sourceRanges_ are
// string_views of the text corresponding to these words, pointing to
// sequences in source_. histories_ is a buffer which eventually stores the
// translations of each segment in the corresponding index.
std::string source_;
Segments segments_;
SentenceRanges sourceRanges_;
std::vector<Ptr<History>> histories_;
// Members above are moved into newly constructed Response on completion
// of translation of all segments. The promise below is set to this Response
// value. future to this promise is made available to the user through
// Service.
std::promise<Response> response_;
// Constructing Response requires the vocabs_ used to generate Request.
std::vector<Ptr<Vocab const>> *vocabs_;
};
class RequestSentence {
private:
size_t index_;
Ptr<Request> request_;
// A RequestSentence provides a view to a sentence within a Request. Existence
// of this class allows the sentences and associated information to be kept
// within Request.
public:
RequestSentence(size_t, Ptr<Request>);
size_t numTokens() const;
// lineNumber in Request, used for matching marian-decoder. SentenceTuple
// requires lineNumber to be set for Corpus based batches.
size_t lineNumber() const;
// Accessor to the segment represented by the RequestSentence.
Segment getUnderlyingSegment() const;
// Forwards call to Request, checking for completion.
void completeSentence(Ptr<History> history);
friend bool operator<(const RequestSentence &a, const RequestSentence &b);
private:
size_t index_;
Ptr<Request> request_;
};
typedef std::vector<RequestSentence> RequestSentences;
struct PCItem {
int batchNumber;
RequestSentences sentences;
// PCItem should be default constructible for PCQueue. Default constructed
// element is poison.
PCItem() : batchNumber(-1) {}
// PCItem constructor to construct a legit PCItem.
explicit PCItem(int batchNumber, RequestSentences &&sentences)
: batchNumber(batchNumber), sentences(std::move(sentences)) {}
// Convenience function to determine poison.
bool isPoison() { return (batchNumber == -1); }
};
} // namespace bergamot
} // namespace marian

View File

@ -0,0 +1,98 @@
#include "response.h"
#include "sentence_ranges.h"
#include "common/logging.h"
#include "data/alignment.h"
#include <utility>
namespace marian {
namespace bergamot {
Response::Response(std::string &&source, SentenceRanges &&sourceRanges,
Histories &&histories, std::vector<Ptr<Vocab const>> &vocabs)
: source_(std::move(source)), sourceRanges_(std::move(sourceRanges)),
histories_(std::move(histories)), vocabs_(&vocabs) {}
void Response::move(std::string &source, std::string &translation,
SentenceMappings &sentenceMappings) {
// Construct required stuff first.
constructTranslation();
constructSentenceMappings(sentenceMappings);
// Move content out.
source = std::move(source_);
translation = std::move(translation_);
// The above assignment expects source, target be moved.
// which makes the following invalid, hence required to be cleared.
sourceRanges_.clear();
targetRanges_.clear();
histories_.clear();
}
void Response::constructTranslation() {
if (translationConstructed_) {
return;
}
// Reserving length at least as much as source_ seems like a reasonable thing
// to do to avoid reallocations.
translation_.reserve(source_.size());
// In a first step, the decoded units (individual senteneces) are compiled
// into a huge string. This is done by computing indices first and appending
// to the string as each sentences are decoded.
std::vector<std::pair<size_t, size_t>> translationRanges;
size_t offset{0};
bool first{true};
for (auto &history : histories_) {
// TODO(jerin): Change hardcode of nBest = 1
NBestList onebest = history->nBest(1);
Result result = onebest[0]; // Expecting only one result;
Words words = std::get<0>(result);
auto targetVocab = vocabs_->back();
std::string decoded = targetVocab->decode(words);
if (first) {
first = false;
} else {
translation_ += " ";
++offset;
}
translation_ += decoded;
translationRanges.emplace_back(offset, decoded.size());
offset += decoded.size();
}
// Once the entire string is constructed, there are no further possibility of
// reallocation in the string's storage, the indices are converted into
// string_views.
for (auto &range : translationRanges) {
// TODO(@jerinphilip): Currently considers target tokens as whole text.
// Needs to be further enhanced in marian-dev to extract alignments.
std::vector<string_view> targetMappings;
const char *begin = &translation_[range.first];
targetMappings.emplace_back(begin, range.second);
targetRanges_.addSentence(targetMappings);
}
translationConstructed_ = true;
}
void Response::constructSentenceMappings(
Response::SentenceMappings &sentenceMappings) {
for (size_t i = 0; i < sourceRanges_.numSentences(); i++) {
string_view src = sourceRanges_.sentence(i);
string_view tgt = targetRanges_.sentence(i);
sentenceMappings.emplace_back(src, tgt);
}
}
} // namespace bergamot
} // namespace marian

99
src/translator/response.h Normal file
View File

@ -0,0 +1,99 @@
#ifndef SRC_BERGAMOT_RESPONSE_H_
#define SRC_BERGAMOT_RESPONSE_H_
#include "sentence_ranges.h"
#include "data/types.h"
#include "definitions.h"
#include "translator/beam_search.h"
#include <cassert>
#include <string>
#include <vector>
namespace marian {
namespace bergamot {
class Response {
// Response is a marian internal class (not a bergamot-translator class)
// holding source blob of text, vector of TokenRanges corresponding to each
// sentence in the source text blob and histories obtained from translating
// these sentences.
//
// This class provides an API at a higher level in comparison to History to
// access translations and additionally use string_view manipulations to
// recover structure in translation from source-text's structure known through
// reference string and string_view. As many of these computations are not
// required until invoked, they are computed as required and stored in data
// members where it makes sense to do so (translation,translationTokenRanges).
//
// Examples of such use-cases are:
// translation()
// translationInSourceStructure() TODO(@jerinphilip)
// alignment(idx) TODO(@jerinphilip)
// sentenceMappings (for bergamot-translator)
public:
Response(std::string &&source, SentenceRanges &&sourceRanges,
Histories &&histories,
// Required for constructing translation and TokenRanges within
// translation lazily.
std::vector<Ptr<Vocab const>> &vocabs);
// Move constructor.
Response(Response &&other)
: source_(std::move(other.source_)),
translation_(std::move(other.translation_)),
sourceRanges_(std::move(other.sourceRanges_)),
targetRanges_(std::move(other.targetRanges_)),
histories_(std::move(other.histories_)),
vocabs_(std::move(other.vocabs_)){};
// Prevents CopyConstruction and CopyAssignment. sourceRanges_ is constituted
// by string_view and copying invalidates the data member.
Response(const Response &) = delete;
Response &operator=(const Response &) = delete;
typedef std::vector<std::pair<const string_view, const string_view>>
SentenceMappings;
// Moves source sentence into source, translated text into translation.
// Pairs of string_views to corresponding sentences in
// source and translation are loaded into sentenceMappings. These string_views
// reference the new source and translation.
//
// Calling move() invalidates the Response object as ownership is transferred.
// Exists for moving strc
void move(std::string &source, std::string &translation,
SentenceMappings &sentenceMappings);
const Histories &histories() const { return histories_; }
const std::string &source() const { return source_; }
const std::string &translation() {
constructTranslation();
return translation_;
}
// A convenience function provided to return translated text placed within
// source's structure. This is useful when the source text is a multi-line
// paragraph or string_views extracted from structured text like HTML and it's
// desirable to place the individual sentences in the locations of the source
// sentences.
// const std::string translationInSourceStructure();
// const PendingAlignmentType alignment(size_t idx);
private:
void constructTranslation();
void constructSentenceMappings(SentenceMappings &);
std::string source_;
SentenceRanges sourceRanges_;
Histories histories_;
std::vector<Ptr<Vocab const>> *vocabs_;
bool translationConstructed_{false};
std::string translation_;
SentenceRanges targetRanges_;
};
} // namespace bergamot
} // namespace marian
#endif // SRC_BERGAMOT_RESPONSE_H_

View File

@ -0,0 +1,46 @@
#include "sentence_ranges.h"
#include <cassert>
#include <iostream>
namespace marian {
namespace bergamot {
void SentenceRanges::addSentence(std::vector<string_view> &wordRanges) {
addSentence(std::begin(wordRanges), std::end(wordRanges));
}
void SentenceRanges::addSentence(WordIterator begin, WordIterator end) {
size_t size = flatByteRanges_.size();
flatByteRanges_.insert(std::end(flatByteRanges_), begin, end);
sentenceBeginIds_.push_back(size);
}
string_view SentenceRanges::sentence(size_t index) const {
size_t bos_id;
string_view eos, bos;
bos_id = sentenceBeginIds_[index];
bos = flatByteRanges_[bos_id];
if (index + 1 == numSentences()) {
eos = flatByteRanges_.back();
} else {
assert(index < numSentences());
size_t eos_id = sentenceBeginIds_[index + 1];
--eos_id;
eos = flatByteRanges_[eos_id];
}
return sentenceBetween(bos, eos);
}
string_view SentenceRanges::sentenceBetween(string_view firstWord,
string_view lastWord) const {
const char *data = firstWord.data();
size_t size = lastWord.data() + lastWord.size() - firstWord.data();
return string_view(data, size);
}
} // namespace bergamot
} // namespace marian

View File

@ -0,0 +1,52 @@
#ifndef BERGAMOT_SENTENCE_RANGES_H_
#define BERGAMOT_SENTENCE_RANGES_H_
#include "data/types.h"
#include <cassert>
#include <vector>
namespace marian {
namespace bergamot {
class SentenceRanges {
// SentenceRanges stores string_views into a source text, with additional
// annotations to mark sentence boundaries.
//
// Given the availability annotations, this container provides capabilty to
// add sentences, and access individual sentences.
public:
typedef std::vector<string_view>::iterator WordIterator;
void addSentence(std::vector<string_view> &wordRanges);
void addSentence(WordIterator begin, WordIterator end);
void clear() {
flatByteRanges_.clear();
sentenceBeginIds_.clear();
}
size_t numSentences() const { return sentenceBeginIds_.size(); }
// Returns a string_view into the ith sentence.
string_view sentence(size_t index) const;
private:
// A flat storage for string_views. Can be words or sentences.
std::vector<string_view> flatByteRanges_;
// The container grows dynamically with addSentence. size_t marking index is
// used to ensure the sentence boundaries stay same while underlying storage
// might be changed during reallocation.
std::vector<size_t> sentenceBeginIds_;
// Utility function to extract the string starting at firstWord and ending at
// lastWord as a single string-view.
string_view sentenceBetween(string_view firstWord,
string_view lastWord) const;
};
} // namespace bergamot
} // namespace marian
#endif // BERGAMOT_SENTENCE_RANGES_H_

View File

@ -1,7 +1,7 @@
#include "sentence_splitter.h"
#include "common/cli_helper.h"
#include "common/logging.h"
#include "common/options.h"
#include "sentence_splitter.h"
#include <string>
namespace marian {
@ -30,8 +30,9 @@ SentenceSplitter::SentenceSplitter(marian::Ptr<marian::Options> options)
ug::ssplit::SentenceStream
SentenceSplitter::createSentenceStream(const string_view &input) {
return std::move(ug::ssplit::SentenceStream(input.data(), input.size(),
this->ssplit_, mode_));
std::string_view input_converted(input.data(), input.size());
return std::move(
ug::ssplit::SentenceStream(input_converted, this->ssplit_, mode_));
}
ug::ssplit::SentenceStream::splitmode

View File

@ -1,4 +1,5 @@
#include "service.h"
#include "batch.h"
#include "definitions.h"
#include <string>
@ -8,26 +9,53 @@ namespace marian {
namespace bergamot {
Service::Service(Ptr<Options> options)
: requestId_(0), batchNumber_(0),
numWorkers_(options->get<int>("cpu-threads")),
: requestId_(0), numWorkers_(options->get<int>("cpu-threads")),
vocabs_(std::move(loadVocabularies(options))),
text_processor_(vocabs_, options), batcher_(options),
pcqueue_(2 * options->get<int>("cpu-threads")) {
text_processor_(vocabs_, options), batcher_(options)
#ifdef WITH_PTHREADS
,
pcqueue_(2 * options->get<int>("cpu-threads"))
#endif // WITH_PTHREADS
{
workers_.reserve(numWorkers_);
if (numWorkers_ == 0) {
// In case workers are 0, a single-translator is created and initialized
// in the main thread.
marian::DeviceId deviceId(/*cpuId=*/0, DeviceType::cpu);
translators_.emplace_back(deviceId, vocabs_, options);
translators_.back().initialize();
} else {
#ifdef WITH_PTHREADS
// If workers specified are greater than 0, translators_ are populated with
// unitialized instances. These are then initialized inside
// individual threads and set to consume from producer-consumer queue.
workers_.reserve(numWorkers_);
translators_.reserve(numWorkers_);
for (size_t cpuId = 0; cpuId < numWorkers_; cpuId++) {
marian::DeviceId deviceId(cpuId, DeviceType::cpu);
translators_.emplace_back(deviceId, vocabs_, options);
for (int i = 0; i < numWorkers_; i++) {
marian::DeviceId deviceId(i, DeviceType::cpu);
workers_.emplace_back(deviceId, pcqueue_, vocabs_, options);
auto &translator = translators_.back();
workers_.emplace_back([&translator, this] {
translator.initialize();
translator.consumeFrom(pcqueue_);
});
}
#else // WITH_PTHREADS
ABORT(
"Fatal: Service started requesting multiple threadswhile compiled with "
"COMPILE_THREAD_VARIANT=off. Please check your cmake build "
"configuration");
#endif
}
}
std::future<TranslationResult> Service::translateWithCopy(std::string input) {
std::future<Response> Service::translateWithCopy(std::string input) {
return translate(std::move(input));
}
std::future<TranslationResult> Service::translate(std::string &&input) {
// Takes in a blob of text. Segments and std::vector<TokenRanges> are
std::future<Response> Service::translate(std::string &&input) {
// Takes in a blob of text. Segments and SentenceRanges are
// extracted from the input (blob of text) and used to construct a Request
// along with a promise. promise value is set by the worker completing a
// request.
@ -40,56 +68,46 @@ std::future<TranslationResult> Service::translate(std::string &&input) {
// returns future corresponding to the promise.
Segments segments;
std::vector<TokenRanges> sourceAlignments;
text_processor_.process(input, segments, sourceAlignments);
SentenceRanges sourceRanges;
text_processor_.process(input, segments, sourceRanges);
std::promise<TranslationResult> translationResultPromise;
auto future = translationResultPromise.get_future();
std::promise<Response> responsePromise;
auto future = responsePromise.get_future();
Ptr<Request> request = New<Request>(
requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(input),
std::move(segments), std::move(sourceAlignments),
std::move(translationResultPromise));
std::move(segments), std::move(sourceRanges), std::move(responsePromise));
for (int i = 0; i < request->numSegments(); i++) {
RequestSentence requestSentence(i, request);
batcher_.addSentenceWithPriority(requestSentence);
batcher_.addWholeRequest(request);
if (numWorkers_ > 0) {
#ifdef WITH_PTHREADS
batcher_.produceTo(pcqueue_);
#endif
} else {
// Queue single-threaded
Batch batch;
while (batcher_ >> batch) {
translators_[0].translate(batch);
}
}
int numSentences;
do {
RequestSentences batchSentences;
batcher_.cleaveBatch(batchSentences);
numSentences = batchSentences.size();
if (numSentences > 0) {
PCItem pcitem(batchNumber_++, std::move(batchSentences));
pcqueue_.ProduceSwap(pcitem);
}
if (batchNumber_ % 500 == 0) {
LOG(info, "Queuing batch {}", batchNumber_);
}
} while (numSentences > 0);
return future;
}
void Service::stop() {
int counter = 0;
#ifdef WITH_PTHREADS
for (auto &worker : workers_) {
PCItem pcitem;
pcqueue_.ProduceSwap(pcitem);
++counter;
Batch poison = Batch::poison();
pcqueue_.ProduceSwap(poison);
}
counter = 0;
for (auto &worker : workers_) {
worker.join();
++counter;
}
workers_.clear(); // Takes care of idempotency.
#endif
}
Service::~Service() { stop(); }

View File

@ -3,15 +3,18 @@
#include "batch_translator.h"
#include "batcher.h"
#include "pcqueue.h"
#include "response.h"
#include "text_processor.h"
#include "translation_result.h"
#include <queue>
#include <vector>
#include "data/types.h"
#ifdef WITH_PTHREADS
#include "pcqueue.h"
#endif
namespace marian {
namespace bergamot {
@ -25,17 +28,17 @@ class Service {
// options = ...;
// service = Service(options);
// std::string input_blob = "Hello World";
// std::future<TranslationResult>
// std::future<Response>
// response = service.translate(std::move(input_blob));
// response.wait();
// TranslationResult result = response.get();
// Response result = response.get();
public:
explicit Service(Ptr<Options> options);
// Constructs new string copying, calls translate internally.
std::future<TranslationResult> translateWithCopy(std::string input);
std::future<TranslationResult> translate(std::string &&input);
std::future<Response> translateWithCopy(std::string input);
std::future<Response> translate(std::string &&input);
void stop();
@ -45,12 +48,11 @@ public:
~Service();
private:
unsigned int requestId_;
unsigned int batchNumber_;
int numWorkers_;
size_t requestId_;
size_t numWorkers_;
// vocabs are used to construct a Request, which later uses it to construct
// TranslationResult (decode from words to string).
// Response (decode from words to string).
std::vector<Ptr<Vocab const>> vocabs_; // ORDER DEPENDENCY
// Consists of:
@ -68,8 +70,12 @@ private:
TextProcessor text_processor_; // ORDER DEPENDENCY
Batcher batcher_;
PCQueue<PCItem> pcqueue_;
std::vector<BatchTranslator> workers_;
std::vector<BatchTranslator> translators_;
#ifdef WITH_PTHREADS
PCQueue<Batch> pcqueue_;
std::vector<std::thread> workers_;
#endif
};
std::vector<Ptr<const Vocab>> loadVocabularies(Ptr<Options> options);

View File

@ -1,6 +1,7 @@
#include "text_processor.h"
#include "data/types.h"
#include "definitions.h"
#include "sentence_ranges.h"
#include "common/options.h"
#include "data/vocab.h"
@ -10,23 +11,22 @@ namespace marian {
namespace bergamot {
Segment TextProcessor::tokenize(const string_view &segment,
TokenRanges &tokenRanges) {
std::vector<string_view> &wordRanges) {
return vocabs_->front()->encodeWithByteRanges(
segment, tokenRanges, /*addEOS=*/false, /*inference=*/true);
segment, wordRanges, /*addEOS=*/false, /*inference=*/true);
}
TextProcessor::TextProcessor(std::vector<Ptr<Vocab const>> &vocabs,
Ptr<Options> options)
: vocabs_(&vocabs), sentence_splitter_(options) {
max_input_sentence_tokens_ = options->get<int>("max-input-sentence-tokens");
max_input_sentence_tokens_ = max_input_sentence_tokens_ - 1;
ABORT_IF(max_input_sentence_tokens_ < 0,
"max-input-sentence-tokens cannot be < 0");
max_length_break_ = options->get<int>("max-length-break");
max_length_break_ = max_length_break_ - 1;
ABORT_IF(max_length_break_ < 0, "max-length-break cannot be < 0");
}
void TextProcessor::process(const string_view &query, Segments &segments,
std::vector<TokenRanges> &sourceRanges) {
SentenceRanges &sourceRanges) {
auto sentenceStream = sentence_splitter_.createSentenceStream(query);
std::string_view sentenceStringPiece;
@ -34,33 +34,34 @@ void TextProcessor::process(const string_view &query, Segments &segments,
while (sentenceStream >> sentenceStringPiece) {
marian::string_view sentence(sentenceStringPiece.data(),
sentenceStringPiece.size());
TokenRanges tokenRanges;
Segment segment = tokenize(sentence, tokenRanges);
std::vector<string_view> wordRanges;
Segment segment = tokenize(sentence, wordRanges);
// There are some cases where SentencePiece or vocab returns no words
// after normalization. 0 prevents any empty entries from being added.
if (segment.size() > 0) {
// Truncate segment into max_input_size segments.
truncate(segment, tokenRanges, segments, sourceRanges);
truncate(segment, wordRanges, segments, sourceRanges);
}
}
}
void TextProcessor::truncate(Segment &segment, TokenRanges &tokenRanges,
Segments &segments,
std::vector<TokenRanges> &sourceRanges) {
for (int offset = 0; offset < segment.size();
offset += max_input_sentence_tokens_) {
void TextProcessor::truncate(Segment &segment,
std::vector<string_view> &wordRanges,
Segments &segments, SentenceRanges &sourceRanges) {
for (size_t offset = 0; offset < segment.size();
offset += max_length_break_) {
auto start = segment.begin() + offset;
unsigned int left = segment.size() - offset;
unsigned int diff = std::min(max_input_sentence_tokens_, left);
size_t left = segment.size() - offset;
size_t diff = std::min(max_length_break_, left);
segments.emplace_back(start, start + diff);
segments.back().push_back(sourceEosId());
auto astart = tokenRanges.begin() + offset;
sourceRanges.emplace_back(astart, astart + diff);
auto astart = wordRanges.begin() + offset;
sourceRanges.addSentence(astart, astart + diff);
}
}

View File

@ -4,6 +4,7 @@
#include "data/types.h"
#include "data/vocab.h"
#include "definitions.h"
#include "sentence_ranges.h"
#include "sentence_splitter.h"
@ -23,23 +24,24 @@ public:
explicit TextProcessor(std::vector<Ptr<Vocab const>> &vocabs, Ptr<Options>);
void process(const string_view &query, Segments &segments,
std::vector<TokenRanges> &sourceRanges);
SentenceRanges &sourceRanges);
private:
// Tokenizes an input string, returns Words corresponding. Loads the
// corresponding byte-ranges into tokenRanges.
Segment tokenize(const string_view &input, TokenRanges &tokenRanges);
Segment tokenize(const string_view &input,
std::vector<string_view> &tokenRanges);
// Truncate sentence into max_input_size segments.
void truncate(Segment &sentence, TokenRanges &tokenRanges, Segments &segments,
std::vector<TokenRanges> &sourceRanges);
void truncate(Segment &sentence, std::vector<string_view> &tokenRanges,
Segments &segments, SentenceRanges &sourceRanges);
// shorthand, used only in truncate()
const Word sourceEosId() const { return vocabs_->front()->getEosId(); }
std::vector<Ptr<Vocab const>> *vocabs_;
SentenceSplitter sentence_splitter_;
unsigned int max_input_sentence_tokens_;
size_t max_length_break_;
};
} // namespace bergamot

View File

@ -1,72 +0,0 @@
#include "translation_result.h"
#include "common/logging.h"
#include "data/alignment.h"
#include <utility>
namespace marian {
namespace bergamot {
TranslationResult::TranslationResult(std::string &&source,
std::vector<TokenRanges> &&sourceRanges,
Histories &&histories,
std::vector<Ptr<Vocab const>> &vocabs)
: source_(std::move(source)), sourceRanges_(std::move(sourceRanges)),
histories_(std::move(histories)) {
std::vector<string_view> sourceMappings;
std::vector<string_view> targetMappings;
// Process sourceMappings into sourceMappings.
sourceMappings.reserve(sourceRanges_.size());
for (int i = 0; i < sourceRanges_.size(); i++) {
string_view first = sourceRanges_[i].front();
string_view last = sourceRanges_[i].back();
sourceMappings.emplace_back(first.data(), last.end() - first.begin());
}
// Compiles translations into a single std::string translation_
// Current implementation uses += on std::string, multiple resizes.
// Stores ByteRanges as indices first, followed by conversion into
// string_views.
// TODO(jerin): Add token level string_views here as well.
std::vector<std::pair<int, int>> translationRanges;
size_t offset{0};
bool first{true};
for (auto &history : histories_) {
// TODO(jerin): Change hardcode of nBest = 1
NBestList onebest = history->nBest(1);
Result result = onebest[0]; // Expecting only one result;
Words words = std::get<0>(result);
std::string decoded = (vocabs.back())->decode(words);
if (first) {
first = false;
} else {
translation_ += " ";
++offset;
}
translation_ += decoded;
translationRanges.emplace_back(offset, decoded.size());
offset += decoded.size();
}
// Converting ByteRanges as indices into string_views.
targetMappings.reserve(translationRanges.size());
for (auto &range : translationRanges) {
const char *begin = &translation_[range.first];
targetMappings.emplace_back(begin, range.second);
}
// Surely, let's add sentenceMappings_
for (auto src = sourceMappings.begin(), tgt = targetMappings.begin();
src != sourceMappings.end() && tgt != targetMappings.end();
++src, ++tgt) {
sentenceMappings_.emplace_back(*src, *tgt);
auto &t = sentenceMappings_.back();
}
}
} // namespace bergamot
} // namespace marian

View File

@ -1,76 +0,0 @@
#ifndef SRC_BERGAMOT_TRANSLATION_RESULT_H_
#define SRC_BERGAMOT_TRANSLATION_RESULT_H_
#include "data/types.h"
#include "definitions.h"
#include "translator/beam_search.h"
#include <cassert>
#include <string>
#include <vector>
namespace marian {
namespace bergamot {
class TranslationResult {
public:
TranslationResult(std::string &&source,
std::vector<TokenRanges> &&sourceRanges,
Histories &&histories,
std::vector<Ptr<Vocab const>> &vocabs);
TranslationResult(TranslationResult &&other)
: source_(std::move(other.source_)),
translation_(std::move(other.translation_)),
sourceRanges_(std::move(other.sourceRanges_)),
sentenceMappings_(std::move(other.sentenceMappings_)),
histories_(std::move(other.histories_)){};
TranslationResult(const TranslationResult &) = delete;
TranslationResult &operator=(const TranslationResult &) = delete;
// Returns const references to source and translated texts, for external
// consumption.
const std::string &getOriginalText() const { return source_; }
const std::string &getTranslatedText() const { return translation_; }
// A mapping of string_views in the source_ and translation_ are provide as a
// pair for external consumption. Each entry corresponding
// to a (source-sentence, target-sentence).
typedef std::vector<std::pair<const string_view, const string_view>>
SentenceMappings;
const SentenceMappings &getSentenceMappings() const {
return sentenceMappings_;
}
// Return the Quality scores of the translated text.
// Not implemented currently, commenting out.
// const QualityScore &getQualityScore() const { return qualityScore; }
// For development use to benchmark with marian-decoder.
const Histories &getHistories() const { return histories_; }
// @jerinphilip: Why are these members no longer-private? For move-semantics
// with consistent string_views for bergamot-translator.
std::string source_;
std::string translation_;
// Adding the following to complete bergamot-translator spec, redundant while
// sourceMappings_ and targetMappings_ exists or vice-versa.
SentenceMappings sentenceMappings_;
private:
// Histories are currently required for interoperability with OutputPrinter
// and OutputCollector and hence comparisons with marian-decoder.
// Future hook to gain alignments.
Histories histories_;
// string_views at the token level.
std::vector<TokenRanges> sourceRanges_;
};
} // namespace bergamot
} // namespace marian
#endif // SRC_BERGAMOT_TRANSLATION_RESULT_H_

28
wasm/CMakeLists.txt Normal file
View File

@ -0,0 +1,28 @@
add_executable(bergamot-translator-worker
bindings/TranslationModelBindings.cpp
bindings/TranslationRequestBindings.cpp
bindings/TranslationResultBindings.cpp
)
# This header inclusion needs to go away later as path to public headers of bergamot
# translator should be directly available from "bergamot-translator" target
target_include_directories(bergamot-translator-worker
PRIVATE ${CMAKE_SOURCE_DIR}/src/translator
PRIVATE ${CMAKE_SOURCE_DIR}
)
# This compile definition is required for generating binding code properly
target_compile_definitions(bergamot-translator-worker PRIVATE WASM_BINDINGS)
target_compile_options(bergamot-translator-worker PRIVATE ${WASM_COMPILE_FLAGS})
set(LINKER_FLAGS "--bind -s ASSERTIONS=0 -s DISABLE_EXCEPTION_CATCHING=1 -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -s NO_DYNAMIC_EXECUTION=1")
if (NOT PACKAGE_DIR STREQUAL "")
get_filename_component(REALPATH_PACKAGE_DIR ${PACKAGE_DIR} REALPATH BASE_DIR ${CMAKE_BINARY_DIR})
set(LINKER_FLAGS "${LINKER_FLAGS} --preload-file ${REALPATH_PACKAGE_DIR}@/")
endif()
set_target_properties(bergamot-translator-worker PROPERTIES
SUFFIX ".js"
LINK_FLAGS ${LINKER_FLAGS}
)
target_link_libraries(bergamot-translator-worker bergamot-translator)

65
wasm/README.md Normal file
View File

@ -0,0 +1,65 @@
## Using Bergamot Translator in JavaScript
The example file `bergamot.html` in the folder `test_page` demonstrates how to use the bergamot translator in JavaScript via a `<script>` tag.
Please note that everything below assumes that the [bergamot project specific model files](https://github.com/mozilla-applied-ml/bergamot-models) were packaged in wasm binary (using the compile instructions given in the top level README).
### Using JS APIs
```js
// The model configuration as YAML formatted string. For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
// This example captures the most relevant options: model file, vocabulary files and shortlist file
const modelConfig = "{\"models\":[\"/esen/model.esen.npz\"],\"vocabs\":[\"/esen/vocab.esen.spm\",\"/esen/vocab.esen.spm\"],\"shortlist\":[\"/esen/lex.esen.s2t\"],\"beam-size\":1}";
// Instantiate the TranslationModel
const model = new Module.TranslationModel(modelConfig);
// Instantiate the arguments of translate() API i.e. TranslationRequest and input (vector<string>)
const request = new Module.TranslationRequest();
const input = new Module.VectorString;
// Initialize the input
input.push_back("Hola"); input.push_back("Mundo");
// translate the input; the result is a vector<TranslationResult>
const result = model.translate(input, request);
// Print original and translated text from each entry of vector<TranslationResult>
for (let i = 0; i < result.size(); i++) {
console.log(' original=' + result.get(i).getOriginalText() + ', translation=' + result.get(i).getTranslatedText());
}
// Don't forget to clean up the instances
model.delete();
request.delete();
input.delete();
```
### Demo (see everything in action)
* Start the test webserver (ensure you have the latest nodejs installed)
```bash
cd test_page
bash start_server.sh
```
* Open any of the browsers below
* Firefox Nightly +87: make sure the following prefs are on (about:config)
```
dom.postMessage.sharedArrayBuffer.bypassCOOP_COEP.insecure.enabled = true
javascript.options.wasm_simd = true
javascript.options.wasm_simd_wormhole = true
```
* Chrome Canary +90: start with the following argument
```
--js-flags="--experimental-wasm-simd"
```
* Browse to the following page:
```
http://localhost:8000/bergamot.html
```
* Run some translations:
* Choose a model and press `Load Model`
* Type a sentence to be translated in the `From` textbox and press `Translate`
* See the results in the `To` and `Log` textboxes

View File

@ -0,0 +1,23 @@
/*
* TranslationModelBindings.cpp
*
* Bindings for TranslationModel class
*/
#include <emscripten/bind.h>
#include "TranslationModel.h"
using namespace emscripten;
// Binding code
EMSCRIPTEN_BINDINGS(translation_model) {
class_<TranslationModel>("TranslationModel")
.constructor<std::string>()
.function("translate", &TranslationModel::translate)
.function("isAlignmentSupported", &TranslationModel::isAlignmentSupported)
;
register_vector<std::string>("VectorString");
register_vector<TranslationResult>("VectorTranslationResult");
}

View File

@ -0,0 +1,17 @@
/*
* Bindings for TranslationRequest class
*
*/
#include <emscripten/bind.h>
#include "TranslationRequest.h"
using namespace emscripten;
// Binding code
EMSCRIPTEN_BINDINGS(translation_request) {
class_<TranslationRequest>("TranslationRequest")
.constructor<>()
;
}

View File

@ -0,0 +1,20 @@
/*
* Bindings for TranslationResult class
*
*/
#include <emscripten/bind.h>
#include <vector>
#include "TranslationResult.h"
using namespace emscripten;
// Binding code
EMSCRIPTEN_BINDINGS(translation_result) {
class_<TranslationResult>("TranslationResult")
.constructor<std::string, std::string, TranslationResult::SentenceMappings>()
.function("getOriginalText", &TranslationResult::getOriginalText)
.function("getTranslatedText", &TranslationResult::getTranslatedText)
;
}

View File

@ -0,0 +1,35 @@
require(__dirname + '/helper.js');
var http = require('http');
var express = require('express');
var app = express();
var server = http.createServer(app);
var fs = require('fs');
var url = require('url');
const nocache = require('nocache');
const cors = require('cors');
app.use(cors())
app.use(nocache());
app.get('/*.*' , cors(), function(req, res) {
var options = url.parse(req.url, true);
var mime = Helper.getMime(options);
serveFile(res, options.pathname, mime);
});
function serveFile(res, pathName, mime) {
mime = mime || 'text/html';
fs.readFile(__dirname + '/' + pathName, function (err, data) {
if (err) {
res.writeHead(500, {"Content-Type": "text/plain"});
return res.end('Error loading ' + pathName + " with Error: " + err);
}
res.header('Cross-Origin-Embedder-Policy','require-corp');
res.header('Cross-Origin-Opener-Policy','same-origin');
res.writeHead(200, {"Content-Type": mime});
res.end(data);
});
}
server.listen(8000);
console.log('HTTP and BinaryJS server started on port 8000');

View File

@ -0,0 +1,199 @@
<!doctype html>
<html>
<head>
<link rel="icon" href="data:,">
<meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1">
</head>
<style>
body, html, div {
margin-left: 1%;
margin-right: 1%;
margin-bottom: 1%;
margin-top: 1%;
padding-left: 1%;
padding-right: 1%;
padding-bottom: 1%;
padding-top: 1%;
}
textarea, #to, #from {
width: 100%;
max-width: 100%;
}
div {
float: left;
width: 80%;
}
</style>
<body>
<div id="divradios">
<label>Choose the model to use</label>
<input type="radio" name="modellang" value="enes"/><label>English to Spanish</label>
<input type="radio" name="modellang" value="esen" checked/><label>Spanish to English</label>
<input type="button" id="load" value="Load Model"/>
</div>
<div id="divtranslation">
<label for="from">From</label>
<textarea id="from" name="from">
Una estrategia republicana para obstaculizar la reelecci<63>n de Obama. Los dirigentes republicanos justificaron su pol<6F>tica por la necesidad de luchar contra el fraude electoral.
Ahora bien, el Centro Brennan considera esto <20>ltimo un mito y afirma que el fraude electoral es menos frecuente en los Estados Unidos que el n<>mero de personas que mueren a causa de la ca<63>da de un rayo. De hecho, los abogados republicanos no han encontrado m<>s que 300 casos de fraude electoral en los Estados Unidos en diez a<>os. Una cosa es cierta: esas nuevas disposiciones afectar<61>n negativamente a la tasa de participaci<63>n.
En ese sentido, estas medidas minar<61>n en parte el sistema democr<63>tico americano.
Al contrario de lo que ocurre en Canad<61>, los estados americanos son responsables de la organizaci<63>n de las elecciones federales en los Estados Unidos. Y en esa misma l<>nea una mayor<6F>a de los gobiernos americanos promulgaron, a partir de 2009, nuevas leyes que dificultaban el proceso de inscripci<63>n o de votaci<63>n.
Este fen<65>meno se ha extendido tras las elecciones de noviembre de 2010, que vieron el aumento de 675 nuevos representantes republicanos en 26 estados. En consecuencia, durante el a<>o 2011 se introdujeron 180 proyectos de ley que restring<6E>an el ejercicio del derecho de voto en 41 estados.
</textarea>
<br><br>
<label for="to">To</label>
<textarea id="to" name="to" readonly></textarea>
<br><br>
<input type="button" id="translate" value="Translate"/>
</div>
<div id="divlog">
<label for="log">Log:</label><br>
<textarea id="log" name="log" rows="50" cols="75"></textarea>
</div>
<script>
var model, request, input = undefined;
const loadModel = (from, to) => {
const languagePair = `${from}${to}`;
// Vocab files are re-used in both translation directions
const vocabLanguagePair = from === "en" ? `${to}${from}` : languagePair;
// Set the Model Configuration as YAML formatted string.
// For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/
const modelConfig = `models:
- /${languagePair}/model.${languagePair}.npz
vocabs:
- /${vocabLanguagePair}/vocab.${vocabLanguagePair}.spm
- /${vocabLanguagePair}/vocab.${vocabLanguagePair}.spm
beam-size: 1
normalize: 1.0
word-penalty: 0
max-length-break: 128
mini-batch-words: 1024
workspace: 128
max-length-factor: 2.0
skip-cost: true
cpu-threads: 0
quiet: true
quiet-translation: true
shortlist:
- /${languagePair}/lex.${languagePair}.s2t
- 50
- 50
`;
/*
This config is not valid anymore in new APIs
mini-batch: 32
maxi-batch: 100
maxi-batch-sort: src
*/
// TODO: Use in model config when wormhole is enabled:
// gemm-precision: int8shift
// TODO: Use in model config when loading of binary models is supported and we use model.intgemm.alphas.bin:
// gemm-precision: int8shiftAlphaAll
console.debug("modelConfig: ", modelConfig);
// Instantiate the TranslationModel
if (model) model.delete();
model = new Module.TranslationModel(modelConfig);
}
const translate = (paragraphs) => {
// Instantiate the arguments of translate() API i.e. TranslationRequest and input (vector<string>)
var request = new Module.TranslationRequest();
let input = new Module.VectorString;
// Initialize the input
paragraphs.forEach(paragraph => {
// prevent empty paragraph - it breaks the translation
if (paragraph.trim() === "") {
return;
}
input.push_back(paragraph.trim())
})
// Access input (just for debugging)
console.log('Input size=', input.size());
/*
for (let i = 0; i < input.size(); i++) {
console.log(' val:' + input.get(i));
}
*/
// Translate the input; the result is a vector<TranslationResult>
let result = model.translate(input, request);
// Access original and translated text from each entry of vector<TranslationResult>
//console.log('Result size=', result.size(), ' - TimeDiff - ', (Date.now() - start)/1000);
const translatedParagraphs = [];
for (let i = 0; i < result.size(); i++) {
translatedParagraphs.push(result.get(i).getTranslatedText());
}
console.log({ translatedParagraphs });
request.delete();
input.delete();
return translatedParagraphs;
}
document.querySelector("#load").addEventListener("click", () => {
const lang = document.querySelector('input[name="modellang"]:checked').value;
const from = lang.substring(0, 2);
const to = lang.substring(2, 4);
let start = Date.now();
loadModel(from, to)
log(`model ${from}${to} loaded in ${(Date.now() - start) / 1000} secs`);
//log('Model Alignment:', model.isAlignmentSupported());
});
const translateCall = () => {
const text = document.querySelector('#from').value;
const paragraphs = text.split("\n");
let wordCount = 0;
paragraphs.forEach(sentence => {
wordCount += sentence.trim().split(" ").filter(word => word.trim() !== "").length;
})
const start = Date.now();
const translatedParagraphs = translate(paragraphs);
const secs = (Date.now() - start) / 1000;
log(`Translation of (${wordCount}) words took ${secs} secs (${Math.round(wordCount / secs)} words per second)`);
document.querySelector('#to').value = translatedParagraphs.join("\n");
}
document.querySelector("#translate").addEventListener("click", () => {
translateCall();
});
document.querySelector("#from").addEventListener('keyup', function(event) {
if (event.keyCode === 13) {
translateCall();
}
});
const log = (message) => {
document.querySelector("#log").value += message + "\n";
}
const start = Date.now();
let moduleLoadStart;
var Module = {
preRun: [function() {
log(`Time until Module.preRun: ${(Date.now() - start) / 1000} secs`);
moduleLoadStart = Date.now();
}],
onRuntimeInitialized: function() {
log(`Wasm Runtime initialized (preRun -> onRuntimeInitialized) in ${(Date.now() - moduleLoadStart) / 1000} secs`);
}
};
</script>
<script src="bergamot-translator-worker.js"></script>
</body>
</html>

40
wasm/test_page/helper.js Normal file
View File

@ -0,0 +1,40 @@
/*
* @author - Based of a file from Gist here: https://gist.github.com/1757658
*
* @modified - Mike Newell - it was on Gist so I figure I can use it
*
* @Description - Added support for a few more mime types including the new
* .ogv, .webm, and .mp4 file types for HTML5 video.
*
*/
/*
* @modified - Andre Natal - removed unused types for the purpose of this use
case
*/
Helper = {
types: {
"wasm" : "application/wasm"
, "js" : "application/javascript"
, "html" : "text/html"
, "htm" : "text/html"
, "ico" : "image/vnd.microsoft.icon",
},
getMime: function(u) {
var ext = this.getExt(u.pathname).replace('.', '');
return this.types[ext.toLowerCase()] || 'application/octet-stream';
},
getExt: function(path) {
var i = path.lastIndexOf('.');
return (i < 0) ? '' : path.substr(i);
}
};

391
wasm/test_page/package-lock.json generated Normal file
View File

@ -0,0 +1,391 @@
{
"requires": true,
"lockfileVersion": 1,
"dependencies": {
"accepts": {
"version": "1.3.7",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz",
"integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==",
"requires": {
"mime-types": "~2.1.24",
"negotiator": "0.6.2"
}
},
"array-flatten": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
"integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI="
},
"body-parser": {
"version": "1.19.0",
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz",
"integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==",
"requires": {
"bytes": "3.1.0",
"content-type": "~1.0.4",
"debug": "2.6.9",
"depd": "~1.1.2",
"http-errors": "1.7.2",
"iconv-lite": "0.4.24",
"on-finished": "~2.3.0",
"qs": "6.7.0",
"raw-body": "2.4.0",
"type-is": "~1.6.17"
}
},
"bytes": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz",
"integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg=="
},
"content-disposition": {
"version": "0.5.3",
"resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz",
"integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==",
"requires": {
"safe-buffer": "5.1.2"
}
},
"content-type": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
"integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA=="
},
"cookie": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz",
"integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg=="
},
"cookie-signature": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
"integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw="
},
"cors": {
"version": "2.8.5",
"resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz",
"integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==",
"requires": {
"object-assign": "^4",
"vary": "^1"
}
},
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"requires": {
"ms": "2.0.0"
}
},
"depd": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz",
"integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak="
},
"destroy": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz",
"integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA="
},
"ee-first": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
"integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0="
},
"encodeurl": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
"integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k="
},
"escape-html": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
"integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg="
},
"etag": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
"integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc="
},
"express": {
"version": "4.17.1",
"resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz",
"integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==",
"requires": {
"accepts": "~1.3.7",
"array-flatten": "1.1.1",
"body-parser": "1.19.0",
"content-disposition": "0.5.3",
"content-type": "~1.0.4",
"cookie": "0.4.0",
"cookie-signature": "1.0.6",
"debug": "2.6.9",
"depd": "~1.1.2",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"etag": "~1.8.1",
"finalhandler": "~1.1.2",
"fresh": "0.5.2",
"merge-descriptors": "1.0.1",
"methods": "~1.1.2",
"on-finished": "~2.3.0",
"parseurl": "~1.3.3",
"path-to-regexp": "0.1.7",
"proxy-addr": "~2.0.5",
"qs": "6.7.0",
"range-parser": "~1.2.1",
"safe-buffer": "5.1.2",
"send": "0.17.1",
"serve-static": "1.14.1",
"setprototypeof": "1.1.1",
"statuses": "~1.5.0",
"type-is": "~1.6.18",
"utils-merge": "1.0.1",
"vary": "~1.1.2"
}
},
"finalhandler": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz",
"integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==",
"requires": {
"debug": "2.6.9",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"on-finished": "~2.3.0",
"parseurl": "~1.3.3",
"statuses": "~1.5.0",
"unpipe": "~1.0.0"
}
},
"forwarded": {
"version": "0.1.2",
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz",
"integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ="
},
"fresh": {
"version": "0.5.2",
"resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
"integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac="
},
"http-errors": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz",
"integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==",
"requires": {
"depd": "~1.1.2",
"inherits": "2.0.3",
"setprototypeof": "1.1.1",
"statuses": ">= 1.5.0 < 2",
"toidentifier": "1.0.0"
}
},
"iconv-lite": {
"version": "0.4.24",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
"integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
"requires": {
"safer-buffer": ">= 2.1.2 < 3"
}
},
"inherits": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz",
"integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4="
},
"ipaddr.js": {
"version": "1.9.1",
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
"integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="
},
"media-typer": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
"integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g="
},
"merge-descriptors": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
"integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E="
},
"methods": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
"integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4="
},
"mime": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
"integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="
},
"mime-db": {
"version": "1.45.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.45.0.tgz",
"integrity": "sha512-CkqLUxUk15hofLoLyljJSrukZi8mAtgd+yE5uO4tqRZsdsAJKv0O+rFMhVDRJgozy+yG6md5KwuXhD4ocIoP+w=="
},
"mime-types": {
"version": "2.1.28",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.28.tgz",
"integrity": "sha512-0TO2yJ5YHYr7M2zzT7gDU1tbwHxEUWBCLt0lscSNpcdAfFyJOVEpRYNS7EXVcTLNj/25QO8gulHC5JtTzSE2UQ==",
"requires": {
"mime-db": "1.45.0"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g="
},
"negotiator": {
"version": "0.6.2",
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz",
"integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw=="
},
"nocache": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/nocache/-/nocache-2.1.0.tgz",
"integrity": "sha512-0L9FvHG3nfnnmaEQPjT9xhfN4ISk0A8/2j4M37Np4mcDesJjHgEUfgPhdCyZuFI954tjokaIj/A3NdpFNdEh4Q=="
},
"object-assign": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
"integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM="
},
"on-finished": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz",
"integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=",
"requires": {
"ee-first": "1.1.1"
}
},
"parseurl": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
"integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="
},
"path-to-regexp": {
"version": "0.1.7",
"resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
"integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w="
},
"proxy-addr": {
"version": "2.0.6",
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.6.tgz",
"integrity": "sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==",
"requires": {
"forwarded": "~0.1.2",
"ipaddr.js": "1.9.1"
}
},
"qs": {
"version": "6.7.0",
"resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz",
"integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ=="
},
"range-parser": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
"integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg=="
},
"raw-body": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz",
"integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==",
"requires": {
"bytes": "3.1.0",
"http-errors": "1.7.2",
"iconv-lite": "0.4.24",
"unpipe": "1.0.0"
}
},
"safe-buffer": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
},
"safer-buffer": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
},
"send": {
"version": "0.17.1",
"resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz",
"integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==",
"requires": {
"debug": "2.6.9",
"depd": "~1.1.2",
"destroy": "~1.0.4",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"etag": "~1.8.1",
"fresh": "0.5.2",
"http-errors": "~1.7.2",
"mime": "1.6.0",
"ms": "2.1.1",
"on-finished": "~2.3.0",
"range-parser": "~1.2.1",
"statuses": "~1.5.0"
},
"dependencies": {
"ms": {
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz",
"integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg=="
}
}
},
"serve-static": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz",
"integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==",
"requires": {
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"parseurl": "~1.3.3",
"send": "0.17.1"
}
},
"setprototypeof": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz",
"integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw=="
},
"statuses": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz",
"integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow="
},
"toidentifier": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz",
"integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw=="
},
"type-is": {
"version": "1.6.18",
"resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
"integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
"requires": {
"media-typer": "0.3.0",
"mime-types": "~2.1.24"
}
},
"unpipe": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
"integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw="
},
"utils-merge": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
"integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM="
},
"vary": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
"integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw="
}
}
}

View File

@ -0,0 +1,7 @@
{
"dependencies": {
"cors": "^2.8.5",
"express": "^4.17.1",
"nocache": "^2.1.0"
}
}

View File

@ -0,0 +1,8 @@
#!/bin/bash
cp ../../build-wasm/wasm/bergamot-translator-worker.data .
cp ../../build-wasm/wasm/bergamot-translator-worker.js .
cp ../../build-wasm/wasm/bergamot-translator-worker.wasm .
cp ../../build-wasm/wasm/bergamot-translator-worker.worker.js .
npm install
node bergamot-httpserver.js