WASM Bindings collapse (#87)

* Safe transfer of bindings through typedefs

* Removing Translation* files and bringing in counterparts

* Remove previously commented out code

* Removing commented out include

* Absorb Translation* documentation

Co-authored-by: abhi-agg <66322306+abhi-agg@users.noreply.github.com>
This commit is contained in:
Jerin Philip 2021-05-03 13:41:37 +01:00 committed by GitHub
parent 4908e4019e
commit 36b3c7291a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 68 additions and 277 deletions

View File

@ -7,9 +7,9 @@
#include <iostream>
#include "TranslationModel.h"
#include "translator/parser.h"
#include "translator/byte_array_util.h"
#include "translator/parser.h"
#include "translator/service.h"
int main(int argc, char **argv) {
@ -20,19 +20,17 @@ int main(int argc, char **argv) {
std::string config = options->asYamlString();
// Route the config string to construct marian model through TranslationModel
TranslationModel model(config, marian::bergamot::getModelMemoryFromConfig(options));
marian::bergamot::Service model(
config, marian::bergamot::getModelMemoryFromConfig(options));
TranslationRequest translationRequest;
std::vector<std::string> texts;
for (std::string line; std::getline(std::cin, line);) {
texts.emplace_back(line);
texts.emplace_back(line);
}
auto results = model.translate(std::move(texts), translationRequest);
// Resolve the future and get the actual result
//std::vector<TranslationResult> results = futureResults.get();
auto results = model.translateMultiple(std::move(texts), translationRequest);
for (auto &result : results) {
std::cout << result.getTranslatedText() << std::endl;

View File

@ -1,16 +1,17 @@
/*
* main.cpp
*
* An application which accepts line separated texts in stdin and returns translated ones in stdout.
* It is convenient for batch processing and can be used with tools like SacreBLEU.
* An application which accepts line separated texts in stdin and returns
* translated ones in stdout. It is convenient for batch processing and can be
* used with tools like SacreBLEU.
*
*/
#include <iostream>
#include <string>
#include "TranslationModel.h"
#include "translator/parser.h"
#include "translator/service.h"
int main(int argc, char **argv) {
@ -21,19 +22,16 @@ int main(int argc, char **argv) {
std::string config = options->asYamlString();
// Route the config string to construct marian model through TranslationModel
TranslationModel model(config);
marian::bergamot::Service model(config);
TranslationRequest translationRequest;
std::vector<std::string> texts;
for (std::string line; std::getline(std::cin, line);) {
texts.emplace_back(line);
texts.emplace_back(line);
}
auto results = model.translate(std::move(texts), translationRequest);
// Resolve the future and get the actual result
//std::vector<TranslationResult> results = futureResults.get();
auto results = model.translateMultiple(std::move(texts), translationRequest);
for (auto &result : results) {
std::cout << result.getTranslatedText() << std::endl;

View File

@ -1,80 +0,0 @@
/*
* TranslationModel.h
*
* Main interface for translation API.
*/
#ifndef SRC_TRANSLATOR_TRANSLATIONMODEL_H_
#define SRC_TRANSLATOR_TRANSLATIONMODEL_H_
#include <future>
#include <string>
#include <vector>
// All 3rd party includes
#include "3rd_party/marian-dev/src/common/options.h"
// All local project includes
#include "TranslationRequest.h"
#include "TranslationResult.h"
#include "translator/definitions.h"
#include "translator/service.h"
/* A Translation model that translates a plain (without any markups and emojis)
* UTF-8 encoded text. This implementation supports translation from 1 source
* language to 1 target language.
*/
class TranslationModel {
public:
/* Construct the model using the model configuration options as yaml-formatted
* string
*/
/**
* @param config Marian yml config file in the form of a string
* @param model_memory optional byte array (aligned to 64!!!) that contains
* the bytes of a model.bin.
*/
TranslationModel(const std::string &config,
marian::bergamot::AlignedMemory modelMemory = marian::bergamot::AlignedMemory(),
marian::bergamot::AlignedMemory shortlistMemory = marian::bergamot::AlignedMemory());
~TranslationModel();
/* This method performs translation on a list of UTF-8 encoded plain text
* (without any markups or emojis) and returns a list of results in the same
* order. The model supports translation from 1 source language to 1 target
* language.
*
* Each text entry can either be a word, a phrase, a sentence or a list of
* sentences. Additional information related to the translated text can be
* requested via TranslationRequest which is applied equally to each text
* entry. The translated text corresponding to each text entry and the
* additional information (as specified in the TranslationRequest) is
* encapsulated and returned in TranslationResult.
*
* The API splits each text entry into sentences internally, which are then
* translated independent of each other. The translated sentences are then
* joined back together and returned in TranslationResult.
*
* Please refer to the TranslationRequest class to find out what additional
* information can be requested. The alignment information can only be
* requested if the model supports it (check isAlignmentSupported() API).
*
* The texts argument will become empty after the execution of this API (each
* entry of texts list will be moved to its corresponding TranslationResult
* object).
*/
std::vector<TranslationResult> translate(std::vector<std::string> &&texts,
TranslationRequest request);
/* Check if the model can provide alignment information b/w original and
* translated text. */
bool isAlignmentSupported() const;
private:
// Model configuration options
std::shared_ptr<marian::Options> configOptions_; // ORDER DEPENDECNY
marian::bergamot::Service service_; // ORDER DEPENDENCY
};
#endif /* SRC_TRANSLATOR_TRANSLATIONMODEL_H_ */

View File

@ -1,108 +0,0 @@
/*
* TranslationResult.h
*
* The class that represents the result of TranslationModel::translate()
* API for each of its text entry and TranslationRequest.
*/
#ifndef SRC_TRANSLATOR_TRANSLATIONRESULT_H_
#define SRC_TRANSLATOR_TRANSLATIONRESULT_H_
#include <string>
#include <vector>
#include "QualityScore.h"
/* This class represents the result of TranslationModel::translate() API
* for each of its text entry and TranslationRequest.
*/
class TranslationResult {
public:
typedef std::vector<std::pair<std::string_view, std::string_view>>
SentenceMappings;
#ifdef WASM_BINDINGS
TranslationResult(const std::string &original, const std::string &translation)
: originalText(original), translatedText(translation),
sentenceMappings() {}
#endif
TranslationResult(const std::string &original, const std::string &translation,
SentenceMappings &sentenceMappings)
: originalText(original), translatedText(translation),
sentenceMappings(sentenceMappings) {}
TranslationResult(TranslationResult &&other)
: originalText(std::move(other.originalText)),
translatedText(std::move(other.translatedText)),
sentenceMappings(std::move(other.sentenceMappings)) {}
#ifdef WASM_BINDINGS
TranslationResult(const TranslationResult &other)
: originalText(other.originalText),
translatedText(other.translatedText),
sentenceMappings(other.sentenceMappings) {}
#endif
TranslationResult(std::string &&original, std::string &&translation,
SentenceMappings &&sentenceMappings)
: originalText(std::move(original)),
translatedText(std::move(translation)),
sentenceMappings(std::move(sentenceMappings)) {}
#ifndef WASM_BINDINGS
TranslationResult &operator=(const TranslationResult &) = delete;
#else
TranslationResult &operator=(const TranslationResult &result) {
originalText = result.originalText;
translatedText = result.translatedText;
sentenceMappings = result.sentenceMappings;
return *this;
}
#endif
/* Return the original text. */
const std::string &getOriginalText() const { return originalText; }
/* Return the translated text. */
const std::string &getTranslatedText() const { return translatedText; }
/* Return the Quality scores of the translated text. */
const QualityScore &getQualityScore() const { return qualityScore; }
/* Return the Sentence mappings (information regarding how individual
* sentences of originalText map to corresponding translated sentences in
* translatedText).
*/
const SentenceMappings &getSentenceMappings() const {
return sentenceMappings;
}
private:
// Original text (in UTF-8 encoded format) that was supposed to be translated
std::string originalText;
// Translation (in UTF-8 encoded format) of the originalText
std::string translatedText;
// Quality score of the translated text at the granularity specified in
// TranslationRequest. It is an optional result (it will have no information
// if not requested in TranslationRequest)
QualityScore qualityScore;
// Information regarding how individual sentences of originalText map to
// corresponding translated sentences in joined translated text
// (translatedText) An example of sentence mapping:
// originalText (contains 2 sentences) = "What is your name?
// My name is Abc." translatedText (contains 2 translated sentences) =
// "Was ist dein Name? Mein Name ist Abc." sentenceMappings = [
// {"What is your name?", "Was ist dein Name?"}, //
// Pair(originalText[0],translatedText[0])
// {"My name is Abc", "Mein Name ist Abc."} //
// Pair(originalText[1],translatedText[1])
// ]
//
// It is an optional result (it will be empty if not requested in
// TranslationRequest).
SentenceMappings sentenceMappings;
};
#endif /* SRC_TRANSLATOR_TRANSLATIONRESULT_H_ */

View File

@ -1,5 +1,4 @@
add_library(bergamot-translator STATIC
TranslationModel.cpp
byte_array_util.cpp
text_processor.cpp
sentence_splitter.cpp

View File

@ -1,50 +0,0 @@
/*
* TranslationModel.cpp
*
*/
#include <future>
#include <vector>
// All local project includes
#include "TranslationModel.h"
#include "translator/parser.h"
#include "translator/response.h"
#include "translator/service.h"
TranslationModel::TranslationModel(const std::string &config,
marian::bergamot::AlignedMemory model_memory,
marian::bergamot::AlignedMemory lexical_memory)
: service_(config, std::move(model_memory), std::move(lexical_memory)) {}
TranslationModel::~TranslationModel() {}
std::vector<TranslationResult>
TranslationModel::translate(std::vector<std::string> &&texts,
TranslationRequest request) {
// This code, move into async?
std::vector<TranslationResult> translationResults;
std::vector<marian::bergamot::Response> responses =
service_.translateMultiple(std::move(texts), request);
for (auto &response : responses) {
TranslationResult::SentenceMappings sentenceMappings;
for (size_t idx = 0; idx < response.size(); idx++) {
marian::string_view src = response.source.sentence(idx);
marian::string_view tgt = response.target.sentence(idx);
sentenceMappings.emplace_back(std::string_view(src.data(), src.size()),
std::string_view(tgt.data(), tgt.size()));
}
// In place construction.
translationResults.emplace_back(
std::move(response.source.text), // &&response.source_
std::move(response.target.text), // &&response.translation_
std::move(sentenceMappings) // &&sentenceMappings
);
}
return translationResults;
}
bool TranslationModel::isAlignmentSupported() const { return false; }

View File

@ -64,6 +64,10 @@ struct Response {
/// sparse matrix representation with indices corresponding
/// to (sub-)words accessible through Annotation.
std::vector<Alignment> alignments;
const std::string &getOriginalText() const { return source.text; }
const std::string &getTranslatedText() const { return target.text; }
};
} // namespace bergamot
} // namespace marian

View File

@ -28,8 +28,8 @@ loadVocabularies(marian::Ptr<marian::Options> options) {
namespace marian {
namespace bergamot {
Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory)
: requestId_(0), vocabs_(std::move(loadVocabularies(options))),
Service::Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory)
: requestId_(0), options_(options), vocabs_(std::move(loadVocabularies(options))),
text_processor_(vocabs_, options), batcher_(options),
numWorkers_(options->get<int>("cpu-threads")),
modelMemory_(std::move(modelMemory)), shortlistMemory_(std::move(shortlistMemory))

View File

@ -20,15 +20,22 @@
namespace marian {
namespace bergamot {
/// Service offers methods create an asynchronous translation service. This is
/// intended to be similar to the ones provided for training or decoding in ML
/// pipelines with the following additional capabilities:
/// Service offers methods create an asynchronous translation service that
/// translates a plain (without any markups and emojis) UTF-8 encoded text.
/// This implementation supports translation from 1 source language to 1 target
/// language.
///
/// This is intended to be similar to the ones provided for training or
/// decoding in ML pipelines with the following additional capabilities:
///
/// 1. Provision of a request -> response based translation flow unlike the
/// usual a line based translation or decoding provided in most ML frameworks.
/// 2. Internal handling of normalization etc which changes source text to
/// provide to client translation meta-information like alignments consistent
/// with the unnormalized input text.
/// 3. The API splits each text entry into sentences internally, which are then
/// translated independent of each other. The translated sentences are then
/// joined back together and returned in Response.
///
/// Service exposes methods to instantiate the service from a string
/// configuration (which can cover most translators) and to translate an
@ -48,9 +55,10 @@ namespace bergamot {
/// // Do things with response.
/// ```
///
/// Optionally Service can be initialized by also passing model_memory for
/// Optionally Service can be initialized by also passing model memory for
/// purposes of efficiency (which defaults to nullpointer and then reads from
/// file supplied through config).
///
class Service {
public:
@ -84,8 +92,8 @@ public:
explicit Service(const std::string &config,
AlignedMemory modelMemory = AlignedMemory(),
AlignedMemory shortlistMemory = AlignedMemory())
: Service(parseOptions(config, /*validate=*/false), std::move(modelMemory),
std::move(shortlistMemory)) {}
: Service(parseOptions(config, /*validate=*/false),
std::move(modelMemory), std::move(shortlistMemory)) {}
/// Explicit destructor to clean up after any threads initialized in
/// asynchronous operation mode.
@ -108,12 +116,18 @@ public:
std::future<Response> translate(std::string &&source,
ResponseOptions options);
/// Translate an input, providing TranslationRequest across all texts to
/// construct Response. Provides the browser with the ability to break texts
/// into multiple Request keeping gains from efficiently batching internally.
/// Also useful when one has to set/unset alignments or quality in the
/// Response to save compute spent in constructing these objects.
/// Translate multiple text-blobs in a single *blocking* API call, providing
/// TranslationRequest which applies across all text-blobs dictating how to
/// construct Response. TranslationRequest can be used to enable/disable
/// additional information like quality-scores, alignments etc.
///
/// All texts are combined to efficiently construct batches together providing
/// speedups compared to calling translate() indepdently on individual
/// text-blob. Note that there will be minor differences in output when
/// text-blobs are individually translated due to approximations but similar
/// quality nonetheless. If you have async/multithread capabilities, it is
/// recommended to work with futures and translate() API.
///
/// @param [in] source: rvalue reference of the string to be translated
/// @param [in] translationRequest: TranslationRequest (Unified API)
/// indicating whether or not to include some member in the Response, also
@ -123,6 +137,11 @@ public:
translateMultiple(std::vector<std::string> &&source,
TranslationRequest translationRequest);
/// Returns if model is alignment capable or not.
bool isAlignmentSupported() const {
return options_->hasAndNotEmpty("alignment");
}
private:
/// Queue an input for translation.
std::future<Response> queueRequest(std::string &&input,
@ -149,6 +168,10 @@ private:
/// Number of workers to launch.
size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_)
/// Options object holding the options Service was instantiated with.
Ptr<Options> options_;
/// Model memory to load model passed as bytes.
AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_)
/// Shortlist memory passed as bytes.

View File

@ -6,10 +6,14 @@
#include <emscripten/bind.h>
#include "TranslationModel.h"
#include "response.h"
#include "service.h"
using namespace emscripten;
typedef marian::bergamot::Service TranslationModel;
typedef marian::bergamot::Response TranslationResult;
val getByteArrayView(marian::bergamot::AlignedMemory& alignedMemory) {
return val(typed_memory_view(alignedMemory.size(), alignedMemory.as<char>()));
}
@ -31,9 +35,11 @@ TranslationModel* TranslationModelFactory(const std::string &config,
EMSCRIPTEN_BINDINGS(translation_model) {
class_<TranslationModel>("TranslationModel")
.constructor(&TranslationModelFactory, allow_raw_pointers())
.function("translate", &TranslationModel::translate)
.function("translate", &TranslationModel::translateMultiple)
.function("isAlignmentSupported", &TranslationModel::isAlignmentSupported)
;
// ^ We redirect Service::translateMultiple to WASMBound::translate instead. Sane API is
// translate. If and when async comes, we can be done with this inconsistency.
register_vector<std::string>("VectorString");
register_vector<TranslationResult>("VectorTranslationResult");

View File

@ -6,15 +6,16 @@
#include <emscripten/bind.h>
#include <vector>
#include "TranslationResult.h"
#include "response.h"
typedef marian::bergamot::Response TranslationResult;
using namespace emscripten;
// Binding code
EMSCRIPTEN_BINDINGS(translation_result) {
class_<TranslationResult>("TranslationResult")
.constructor<std::string, std::string, TranslationResult::SentenceMappings>()
.function("getOriginalText", &TranslationResult::getOriginalText)
.function("getTranslatedText", &TranslationResult::getTranslatedText)
;
.constructor<>()
.function("getOriginalText", &TranslationResult::getOriginalText)
.function("getTranslatedText", &TranslationResult::getTranslatedText);
}