Cleanup API: Refactor request on-complete transition (#80)

This commit is contained in:
Jerin Philip 2021-04-27 15:56:39 +01:00 committed by GitHub
parent fdf9e66cef
commit fa2003e70d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 492 additions and 260 deletions

View File

@ -27,8 +27,14 @@ int main(int argc, char *argv[]) {
std::string input = std_input.str();
using marian::bergamot::Response;
marian::bergamot::ResponseOptions responseOptions;
responseOptions.qualityScores = true;
responseOptions.alignment = true;
responseOptions.alignmentThreshold = 0.2f;
// Wait on future until Response is complete
std::future<Response> responseFuture = service.translate(std::move(input));
std::future<Response> responseFuture =
service.translate(std::move(input), responseOptions);
responseFuture.wait();
Response response = responseFuture.get();

View File

@ -8,6 +8,7 @@
#include "marian.h"
#include "translator/parser.h"
#include "translator/response.h"
#include "translator/response_options.h"
#include "translator/service.h"
int main(int argc, char *argv[]) {
@ -21,8 +22,14 @@ int main(int argc, char *argv[]) {
std::string input = std_input.str();
using marian::bergamot::Response;
marian::bergamot::ResponseOptions responseOptions;
responseOptions.qualityScores = true;
responseOptions.alignment = true;
responseOptions.alignmentThreshold = 0.2f;
// Wait on future until Response is complete
std::future<Response> responseFuture = service.translate(std::move(input));
std::future<Response> responseFuture =
service.translate(std::move(input), responseOptions);
responseFuture.wait();
Response response = responseFuture.get();

View File

@ -6,7 +6,7 @@ add_library(bergamot-translator STATIC
batch_translator.cpp
request.cpp
batcher.cpp
response.cpp
response_builder.cpp
batch.cpp
sentence_ranges.cpp
service.cpp

View File

@ -9,6 +9,7 @@
// All local project includes
#include "TranslationModel.h"
#include "translator/parser.h"
#include "translator/response.h"
#include "translator/service.h"
TranslationModel::TranslationModel(const std::string &config,
@ -21,31 +22,25 @@ TranslationModel::~TranslationModel() {}
std::vector<TranslationResult>
TranslationModel::translate(std::vector<std::string> &&texts,
TranslationRequest request) {
// Implementing a non-async version first. Unpleasant, but should work.
std::promise<std::vector<TranslationResult>> promise;
auto future = promise.get_future();
// This code, move into async?
std::vector<TranslationResult> translationResults;
for (auto &text : texts) {
// Collect future as marian::bergamot::TranslationResult
auto intermediate = service_.translate(std::move(text));
intermediate.wait();
auto marianResponse(std::move(intermediate.get()));
std::vector<marian::bergamot::Response> responses =
service_.translateMultiple(std::move(texts), request);
for (auto &response : responses) {
TranslationResult::SentenceMappings sentenceMappings;
for (size_t idx = 0; idx < marianResponse.size(); idx++) {
marian::string_view src = marianResponse.source.sentence(idx);
marian::string_view tgt = marianResponse.target.sentence(idx);
for (size_t idx = 0; idx < response.size(); idx++) {
marian::string_view src = response.source.sentence(idx);
marian::string_view tgt = response.target.sentence(idx);
sentenceMappings.emplace_back(std::string_view(src.data(), src.size()),
std::string_view(tgt.data(), tgt.size()));
}
// In place construction.
translationResults.emplace_back(
std::move(marianResponse.source.text), // &&marianResponse.source_
std::move(marianResponse.target.text), // &&marianResponse.translation_
std::move(sentenceMappings) // &&sentenceMappings
std::move(response.source.text), // &&response.source_
std::move(response.target.text), // &&response.translation_
std::move(sentenceMappings) // &&sentenceMappings
);
}

View File

@ -63,11 +63,14 @@ void BatchTranslator::translate(Batch &batch) {
std::vector<data::SentenceTuple> batchVector;
auto &sentences = batch.sentences();
size_t batchSequenceNumber{0};
for (auto &sentence : sentences) {
data::SentenceTuple sentence_tuple(sentence.lineNumber());
data::SentenceTuple sentence_tuple(batchSequenceNumber);
Segment segment = sentence.getUnderlyingSegment();
sentence_tuple.push_back(segment);
batchVector.push_back(sentence_tuple);
++batchSequenceNumber;
}
size_t batchSize = batchVector.size();

View File

@ -11,18 +11,17 @@ namespace marian {
namespace bergamot {
// -----------------------------------------------------------------
Request::Request(size_t Id, size_t lineNumberBegin,
std::vector<Ptr<Vocab const>> &vocabs, AnnotatedText &&source,
Segments &&segments, std::promise<Response> responsePromise)
: Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs),
source_(std::move(source)), segments_(std::move(segments)),
response_(std::move(responsePromise)) {
Request::Request(size_t Id, Segments &&segments,
ResponseBuilder &&responseBuilder)
: Id_(Id), segments_(std::move(segments)),
responseBuilder_(std::move(responseBuilder))
{
counter_ = segments_.size();
histories_.resize(segments_.size(), nullptr);
}
size_t Request::lineNumberBegin() const { return lineNumberBegin_; }
size_t Request::numSegments() const { return segments_.size(); }
size_t Request::segmentTokens(size_t index) const {
@ -39,17 +38,10 @@ void Request::processHistory(size_t index, Ptr<History> history) {
// In case this is last request in, completeRequest is called, which sets the
// value of the promise.
if (--counter_ == 0) {
completeRequest();
responseBuilder_(std::move(histories_));
}
}
void Request::completeRequest() {
// Request no longer needs to hold the content, can transfer it to
// Response.
Response response(std::move(source_), std::move(histories_), *vocabs_);
response_.set_value(std::move(response));
}
bool Request::operator<(const Request &b) const {
// Among Requests, only sequence id is used for obtaining priority.
return Id_ < b.Id_;
@ -64,10 +56,6 @@ size_t RequestSentence::numTokens() const {
return (request_->segmentTokens(index_));
}
size_t RequestSentence::lineNumber() const {
return (request_->lineNumberBegin() + index_);
}
void RequestSentence::completeSentence(Ptr<History> history) {
// Relays completeSentence into request's processHistory, using index
// information.

View File

@ -1,24 +1,9 @@
//
// Defines:
//
// Request: holds the input text of a text, Segments (vector<Words>) which are
// to go to the batching mechanism and alignments between the processed
// segments and the input text (sourceTokenRanges). In addition, Request takes
// care of the barrier which fires when all the Segments in a request are done
// translating by the workers (BatchTranslator).
// TODO(jerinphilip): Extend Request with notions of Priority (sequence,
// user-given).
//
// RequestSentence: is a tuple of (index, Ptr<Request>). This provides the
// batching mechanism access to the segment within the request. The backref to
// Request allows event triggering the barrier upon completion of the last
// sentence by a worker.
#ifndef SRC_BERGAMOT_REQUEST_H_
#define SRC_BERGAMOT_REQUEST_H_
#include "definitions.h"
#include "response.h"
#include "response_builder.h"
#include "sentence_ranges.h"
#include "common/logging.h"
@ -33,80 +18,96 @@
namespace marian {
namespace bergamot {
/// A Request is an internal representation used to represent a request after
/// processed by TextProcessor into sentences constituted by marian::Words.
///
/// The batching mechanism (Batcher) draws from multiple Requests and compiles
/// sentences into a batch. When a batch completes translation (at
/// BatchTranslator, intended in a different thread), backward propogation
/// happens through:
///
/// ```cpp
/// Batch::completeBatch(...)
/// -> RequestSentence::completeSentence(..)
/// -> Request::processHistory(...)
/// ```
///
/// When all sentences in a Request are completed, responseBuilder is
/// triggered with the compiled Histories, to construct the Response
/// corresponding to the Request and set value of the promise which triggers the
/// future at client.
class Request {
public:
Request(size_t Id, size_t lineNumberBegin,
std::vector<Ptr<Vocab const>> &vocabs_, AnnotatedText &&source,
Segments &&segments, std::promise<Response> responsePromise);
/// Constructs an internal representation of the Request identified by Id,
/// processed Segments and accepts a callback (ResponseBuilder) which builds
/// the Response upon completion of the Request.
///
///
/// @param [in] Id: Identifier assigned to Request by Service.
/// @param [in] segments: Each segment is a unit to be translated.
/// @param [in] responseBuilder: Callback function (of ResponseBuilder type)
/// to be triggered upon the completion of translation of all units in a
/// Request.
Request(size_t Id, Segments &&segments, ResponseBuilder &&responseBuilder);
// Obtain the count of tokens in the segment correponding to index. Used to
// insert sentence from multiple requests into the corresponding size bucket.
/// Obtain the count of tokens in the segment correponding to index. Used to
/// insert sentence from multiple requests into the corresponding size bucket.
size_t segmentTokens(size_t index) const;
// Obtain number of segments in a request.
/// Obtain number of segments in a request.
size_t numSegments() const;
size_t lineNumberBegin() const;
// Obtains segment corresponding to index to create a batch of segments among
// several requests.
/// Obtains segment corresponding to index to create a batch of segments
/// among several requests.
Segment getSegment(size_t index) const;
// For notions of priority among requests, used to enable std::set in
// Batcher.
/// For notions of priority among requests, used to enable std::set in
/// Batcher.
bool operator<(const Request &request) const;
// Processes a history obtained after translating in a heterogenous batch
// compiled from requests.
/// Processes a history obtained after translating in a heterogenous batch
/// compiled from requests.
void processHistory(size_t index, Ptr<History> history);
// On completion of last segment, sets value of the promise.
void completeRequest();
private:
size_t Id_;
size_t lineNumberBegin_;
// Multiple translation-workers can concurrently access the same Request. The
// following atomic atomically operates on the variable holding sentences
// remaining to be translated.
/// Multiple translation-workers can concurrently access the same Request. The
/// following atomic atomically operates on the variable holding sentences
/// remaining to be translated.
std::atomic<int> counter_;
// source_ holds the source string to be translated. segments_ hold the
// sentences generated from source_ in vector<Words>. sourceRanges_ are
// string_views of the text corresponding to these words, pointing to
// sequences in source_. histories_ is a buffer which eventually stores the
// translations of each segment in the corresponding index.
AnnotatedText source_;
/// segments_ hold the sentences processed into Words which generated from
/// input string.
Segments segments_;
/// histories_ is a buffer which eventually stores the translations of each
/// segment in the corresponding index.
std::vector<Ptr<History>> histories_;
// Members above are moved into newly constructed Response on completion
// of translation of all segments. The promise below is set to this Response
// value. future to this promise is made available to the user through
// Service.
std::promise<Response> response_;
// Constructing Response requires the vocabs_ used to generate Request.
std::vector<Ptr<Vocab const>> *vocabs_;
/// Constructing Response requires the vocabs_ used to generate Request.
/// std::vector<Ptr<Vocab const>> *vocabs_;
ResponseBuilder responseBuilder_;
};
/// A RequestSentence provides a view to a sentence within a Request. Existence
/// of this class allows the sentences and associated information to be kept
/// within Request, while batching mechanism (Batcher) compiles Batch from
/// RequestSentence-s coming from different Requests.
class RequestSentence {
// A RequestSentence provides a view to a sentence within a Request. Existence
// of this class allows the sentences and associated information to be kept
// within Request.
public:
RequestSentence(size_t, Ptr<Request>);
/// Number of tokens in the segment this RequestSentence represents. Used to
/// order by length in batching.
size_t numTokens() const;
// lineNumber in Request, used for matching marian-decoder. SentenceTuple
// requires lineNumber to be set for Corpus based batches.
size_t lineNumber() const;
// Accessor to the segment represented by the RequestSentence.
/// Accessor to the segment represented by the RequestSentence.
Segment getUnderlyingSegment() const;
// Forwards call to Request, checking for completion.
/// Forwards history to Request to set history corresponding to this
/// RequestSentence.
void completeSentence(Ptr<History> history);
friend bool operator<(const RequestSentence &a, const RequestSentence &b);

View File

@ -1,106 +0,0 @@
#include "response.h"
#include "common/logging.h"
#include "data/alignment.h"
#include "sentence_ranges.h"
#include <utility>
namespace marian {
namespace bergamot {
Response::Response(AnnotatedText &&source, Histories &&histories,
std::vector<Ptr<Vocab const>> &vocabs)
: source(std::move(source)) {
// Reserving length at least as much as source_ seems like a reasonable thing
// to do to avoid reallocations.
target.text.reserve(source.text.size());
// In a first step, the decoded units (individual senteneces) are compiled
// into a huge string. This is done by computing indices first and appending
// to the string as each sentences are decoded.
std::vector<std::pair<size_t, size_t>> translationRanges;
std::vector<size_t> sentenceBegins;
size_t offset{0};
bool first{true};
for (auto &history : histories) {
// TODO(jerin): Change hardcode of nBest = 1
NBestList onebest = history->nBest(1);
Result result = onebest[0]; // Expecting only one result;
Words words = std::get<0>(result);
auto targetVocab = vocabs.back();
std::string decoded;
std::vector<string_view> targetMappings;
targetVocab->decodeWithByteRanges(words, decoded, targetMappings);
if (first) {
first = false;
} else {
target.text += " ";
++offset;
}
sentenceBegins.push_back(translationRanges.size());
target.text += decoded;
auto decodedStringBeginMarker = targetMappings.front().begin();
for (auto &sview : targetMappings) {
size_t startIdx = offset + sview.begin() - decodedStringBeginMarker;
translationRanges.emplace_back(startIdx, startIdx + sview.size());
}
offset += decoded.size();
// Alignments
// TODO(jerinphilip): The following double conversion might not be
// necessary. Hard alignment can directly be exported, but this would mean
// WASM bindings for a structure deep within marian source.
auto hyp = std::get<1>(result);
auto softAlignment = hyp->tracebackAlignment();
auto hardAlignment = data::ConvertSoftAlignToHardAlign(
softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a
// configurable parameter.
Alignment unified_alignment;
for (auto &p : hardAlignment) {
unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob});
}
alignments.push_back(std::move(unified_alignment));
// Quality scores: Sequence level is obtained as normalized path scores.
// Word level using hypothesis traceback. These are most-likely logprobs.
auto normalizedPathScore = std::get<2>(result);
auto wordQualities = hyp->tracebackWordScores();
wordQualities.pop_back();
qualityScores.push_back((Quality){normalizedPathScore, wordQualities});
}
// Once we have the indices in translation (which might be resized a few
// times) ready, we can prepare and store the string_view as annotations
// instead. This is accomplished by iterating over available sentences using
// sentenceBegin and using addSentence(...) API from Annotation.
for (size_t i = 1; i <= sentenceBegins.size(); i++) {
std::vector<string_view> targetMappings;
size_t begin = sentenceBegins[i - 1];
size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size()
: sentenceBegins[i];
for (size_t idx = begin; idx < safe_end; idx++) {
auto &p = translationRanges[idx];
size_t begin_idx = p.first;
size_t end_idx = p.second;
const char *data = &target.text[begin_idx];
size_t size = end_idx - begin_idx;
targetMappings.emplace_back(data, size);
}
target.addSentence(targetMappings);
}
}
} // namespace bergamot
} // namespace marian

View File

@ -40,34 +40,12 @@ struct Quality {
/// AnnotatedText provides an API to access markings of (sub)-word and
/// sentences boundaries, which are required to interpret Quality and
/// Alignment (s) at the moment.
class Response {
public:
///
Response(AnnotatedText &&source, Histories &&histories,
std::vector<Ptr<Vocab const>> &vocabs);
/// \cond HIDDEN_PUBLIC
// Move constructor.
Response(Response &&other)
: source(std::move(other.source)), target(std::move(other.target)),
alignments(std::move(other.alignments)),
qualityScores(std::move(other.qualityScores)){};
// The following copy bans are not stricitly required anymore since Annotation
// is composed of the ByteRange primitive (which was previously string_view
// and required to be bound to string), but makes movement efficient by
// banning these letting compiler complain about copies.
Response(const Response &) = delete;
Response &operator=(const Response &) = delete;
/// \endcond
/// Number of sentences translated. The processing of a text of into sentences
/// are handled internally, and this information can be used to iterate
/// through meaningful units of translation for which alignment and quality
/// information are available.
struct Response {
/// Convenience function to obtain number of units translated. Same as
/// `.source.numSentences()` and `.target.numSentences().` The processing of a
/// text of into sentences are handled internally, and this information can be
/// used to iterate through meaningful units of translation for which
/// alignment and quality information are available.
const size_t size() const { return source.numSentences(); }
/// source text and annotations of (sub-)words and sentences.

View File

@ -0,0 +1,87 @@
#include "response_builder.h"
namespace marian {
namespace bergamot {
void ResponseBuilder::buildQualityScores(Histories &histories,
Response &response) {
std::vector<Quality> qualityScores;
for (auto &history : histories) {
// TODO(jerin): Change hardcode of nBest = 1
NBestList onebest = history->nBest(1);
Result result = onebest[0]; // Expecting only one result;
Words words = std::get<0>(result);
auto hyp = std::get<1>(result);
// Quality scores: Sequence level is obtained as normalized path scores.
// Word level using hypothesis traceback. These are most-likely
// logprobs.
auto normalizedPathScore = std::get<2>(result);
auto wordQualities = hyp->tracebackWordScores();
wordQualities.pop_back();
response.qualityScores.push_back(
Quality{normalizedPathScore, wordQualities});
}
}
void ResponseBuilder::buildAlignments(Histories &histories,
Response &response) {
for (auto &history : histories) {
// TODO(jerin): Change hardcode of nBest = 1
NBestList onebest = history->nBest(1);
Result result = onebest[0]; // Expecting only one result;
Words words = std::get<0>(result);
// Alignments
// TODO(jerinphilip): The following double conversion might not be
// necessary. Hard alignment can directly be exported, but this would
// mean WASM bindings for a structure deep within marian source.
auto hyp = std::get<1>(result);
auto softAlignment = hyp->tracebackAlignment();
auto threshold = responseOptions_.alignmentThreshold;
auto hardAlignment =
data::ConvertSoftAlignToHardAlign(softAlignment, threshold);
Alignment unified_alignment;
for (auto &p : hardAlignment) {
unified_alignment.emplace_back(Point{p.srcPos, p.tgtPos, p.prob});
}
response.alignments.push_back(std::move(unified_alignment));
}
}
void ResponseBuilder::buildTranslatedText(Histories &histories,
Response &response) {
// Reserving length at least as much as source_ seems like a reasonable
// thing to do to avoid reallocations.
response.target.text.reserve(response.source.text.size());
size_t offset{0};
bool first{true};
for (auto &history : histories) {
// TODO(jerin): Change hardcode of nBest = 1
NBestList onebest = history->nBest(1);
Result result = onebest[0]; // Expecting only one result;
Words words = std::get<0>(result);
auto targetVocab = vocabs_->back();
std::string decoded;
std::vector<string_view> targetSentenceMappings;
targetVocab->decodeWithByteRanges(words, decoded, targetSentenceMappings);
// delimiter can be used to fill in the blanks from source as well.
std::string delimiter;
if (first) {
first = false;
} else {
delimiter = " ";
}
response.target.appendSentence(delimiter, decoded, targetSentenceMappings);
}
}
} // namespace bergamot
} // namespace marian

View File

@ -0,0 +1,93 @@
#ifndef SRC_BERGAMOT_RESPONSE_BUILDER_H_
#define SRC_BERGAMOT_RESPONSE_BUILDER_H_
#include "data/types.h"
#include "response.h"
#include "response_options.h"
// For now we will work with this, to avoid complaints another structure is hard
// to operate with.
namespace marian {
namespace bergamot {
/// ResponseBuilder is a callback functor. It is expected to be bound to a
/// Request after giving it the context of options, vocabs and promise to set.
/// It constructs the Response and it's members based on options
/// (quality=on|off, alignments=on|off, mappings=on|off, splitmode=sentence |
/// paragraph).
class ResponseBuilder {
public:
/// @param [in] responseOptions: ResponseOptions, indicating what to include
/// or not in the response and any additional configurable parameters.
/// @param [in] vocabs: marian vocab object (used in decoding)
/// @param [in] promise: promise to set with the constructed Response.
ResponseBuilder(ResponseOptions responseOptions, AnnotatedText &&source,
std::vector<Ptr<Vocab const>> &vocabs,
std::promise<Response> &&promise)
: responseOptions_(responseOptions), source_(std::move(source)),
vocabs_(&vocabs), promise_(std::move(promise)) {}
/// Constructs and sets the promise of a Response object from obtained
/// histories after translating.
/// @param [in] histories: Histories obtained after translating the Request
/// from which this functor is called.
void operator()(Histories &&histories) {
// TODO(jerinphilip) load ResponseOptions into options and turn build
// functions on or off.
// responseOptions_ is unused, but we can try something here.
ABORT_IF(source_.numSentences() != histories.size(),
"Mismatch in source and translated sentences");
Response response;
// Move source_ into response.
response.source = std::move(source_);
// Should be after source is set
buildTranslatedText(histories, response);
// Should always be after buildTranslatedText
if (responseOptions_.qualityScores) {
buildQualityScores(histories, response);
}
if (responseOptions_.alignment) {
buildAlignments(histories, response);
}
// Once complete, set promise.
promise_.set_value(std::move(response));
}
private:
/// Builds qualityScores from histories and writes to response. expects
/// buildTranslatedText to be run before to be able to obtain target text and
/// subword information.
/// @param histories [in]
/// @param response [out]
void buildQualityScores(Histories &histories, Response &response);
/// Builds alignments from histories and writes onto response.
/// @param histories [in]
/// @param response [out]
void buildAlignments(Histories &histories, Response &response);
/// Builds translated text and subword annotations and writes onto response.
/// @param histories [in]
/// @param response [out]
void buildTranslatedText(Histories &histories, Response &response);
// Data members are context/curried args for the functor.
ResponseOptions responseOptions_;
std::vector<Ptr<Vocab const>> *vocabs_; // vocabs are required for decoding
// and any source validation checks.
std::promise<Response> promise_; // To be set when callback triggered and
// after Response constructed.
AnnotatedText source_;
};
} // namespace bergamot
} // namespace marian
#endif // SRC_BERGAMOT_RESPONSE_BUILDER_H_

View File

@ -0,0 +1,50 @@
#ifndef SRC_BERGAMOT_RESPONSE_OPTIONS_H_
#define SRC_BERGAMOT_RESPONSE_OPTIONS_H_
#include <string>
namespace marian {
namespace bergamot {
enum ConcatStrategy {
/// Target text is constructed faithful to the source-text structure.
FAITHFUL,
/// Target text is concatenated by a space.
SPACE
};
enum QualityScoreType {
/// Provide a free quality-score that comes with the machine-translation model
/// itself.
FREE,
/// An expensive quality-score that runs additional computations to determine
/// quality of an output.
EXPENSIVE
};
/// ResponseOptions dictate how to construct a Response for an input string of
/// text to be translated.
struct ResponseOptions {
bool qualityScores{false}; ///< Include quality-scores or not.
bool alignment{false}; ///< Include alignments or not.
/// Whether to include sentenceMappings or not. Alignments require
/// sentenceMappings and are available irrespective of this option if
/// `alignment=true`.
bool sentenceMappings{false};
/// Threshold between `[0.0f, 1.0f]` to filter alignments into a sparse
/// matrix. Higher value implies stronger filtering leading to provision of
/// higher-confidence matches. `1.0f` gives argmax (not the full-dense
/// matrix).
float alignmentThreshold{0.2f};
QualityScoreType qualityScoreType{QualityScoreType::FREE};
ConcatStrategy concatStrategy{ConcatStrategy::FAITHFUL};
};
} // namespace bergamot
} // namespace marian
#endif // SRC_BERGAMOT_RESPONSE_OPTIONS_H_

View File

@ -32,11 +32,11 @@ ByteRange Annotation::sentence(size_t sentenceIdx) const {
// the flatByteRange and non-empty sentence before this happened and
// construct empty string-view equivalent ByteRange.
ByteRange eos = flatByteRanges_[eosId - 1];
sentenceByteRange = (ByteRange){eos.end, eos.end};
sentenceByteRange = ByteRange{eos.end, eos.end};
} else {
ByteRange bos = flatByteRanges_[bosId];
ByteRange eos = flatByteRanges_[eosId - 1];
sentenceByteRange = (ByteRange){bos.begin, eos.end};
sentenceByteRange = ByteRange{bos.begin, eos.end};
}
return sentenceByteRange;
}
@ -56,6 +56,20 @@ string_view AnnotatedText::sentence(size_t sentenceIdx) const {
return asStringView(sentenceAsByteRange);
}
void AnnotatedText::appendSentence(std::string prefix, std::string &reference,
std::vector<string_view> &wordRanges) {
text += prefix;
size_t offset = text.size(); // Get size before to do ByteRange arithmetic
text += reference; // Append reference to text
std::vector<ByteRange> sentence;
for (auto &wordView : wordRanges) {
size_t thisWordBegin = offset + wordView.data() - &reference[0];
sentence.push_back(
ByteRange{thisWordBegin, thisWordBegin + wordView.size()});
}
annotation.addSentence(sentence);
}
void AnnotatedText::addSentence(std::vector<string_view> &wordRanges) {
addSentence(std::begin(wordRanges), std::end(wordRanges));
};
@ -65,7 +79,7 @@ void AnnotatedText::addSentence(std::vector<string_view>::iterator begin,
std::vector<ByteRange> sentence;
for (auto p = begin; p != end; p++) {
size_t begin_offset = p->data() - &text[0];
sentence.push_back((ByteRange){begin_offset, begin_offset + p->size()});
sentence.push_back(ByteRange{begin_offset, begin_offset + p->size()});
}
annotation.addSentence(sentence);
};

View File

@ -64,7 +64,6 @@ public:
sentenceEndIds_.push_back(0);
}
/// Returns the number of sentences annotated in a text.
size_t numSentences() const { return sentenceEndIds_.size() - 1; }
/// Returns number of words in the sentence identified by `sentenceIdx`.
@ -125,10 +124,6 @@ public:
/// constructor is disallowed).
AnnotatedText(std::string &&text) : text(std::move(text)){};
AnnotatedText(AnnotatedText &&annotatedBlob)
: text(std::move(annotatedBlob.text)),
annotation(std::move(annotatedBlob.annotation)) {}
/// Returns the number of sentences in the annotation structure.
const size_t numSentences() const { return annotation.numSentences(); }
@ -137,6 +132,11 @@ public:
return annotation.numWords(sentenceIdx);
}
/// Appends a sentence to the existing text and transparently rebases
/// string_views
void appendSentence(std::string prefix, std::string &reference,
std::vector<string_view> &wordRanges);
/// Adds a sentence, used to load from SentencePiece annotations conveniently.
void addSentence(std::vector<string_view> &wordRanges);

View File

@ -112,6 +112,44 @@ void Service::async_translate() {
#endif // WASM_COMPATIBLE_SOURCE
std::future<Response> Service::translate(std::string &&input) {
ResponseOptions responseOptions; // Hardcode responseOptions for now
return translate(std::move(input), responseOptions);
}
std::vector<Response>
Service::translateMultiple(std::vector<std::string> &&inputs,
TranslationRequest translationRequest) {
ResponseOptions responseOptions;
// TODO(jerinphilip) Set options based on TranslationRequest, if and when it
// becomes non-dummy.
// We queue the individual Requests so they get compiled at batches to be
// efficiently translated.
std::vector<std::future<Response>> responseFutures;
for (auto &input : inputs) {
std::future<Response> inputResponse =
queueRequest(std::move(input), responseOptions);
responseFutures.push_back(std::move(inputResponse));
}
// Dispatch is called once per request so compilation of sentences from
// multiple Requests happen.
dispatchTranslate();
// Now wait for all Requests to complete, the future to fire and return the
// compiled Responses, we can probably return the future, but WASM quirks(?).
std::vector<Response> responses;
for (auto &future : responseFutures) {
future.wait();
responses.push_back(std::move(future.get()));
}
return responses;
}
std::future<Response> Service::queueRequest(std::string &&input,
ResponseOptions responseOptions) {
Segments segments;
AnnotatedText source(std::move(input));
text_processor_.process(source, segments);
@ -119,17 +157,29 @@ std::future<Response> Service::translate(std::string &&input) {
std::promise<Response> responsePromise;
auto future = responsePromise.get_future();
Ptr<Request> request = New<Request>(
requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(source),
std::move(segments), std::move(responsePromise));
ResponseBuilder responseBuilder(responseOptions, std::move(source), vocabs_,
std::move(responsePromise));
Ptr<Request> request = New<Request>(requestId_++, std::move(segments),
std::move(responseBuilder));
batcher_.addWholeRequest(request);
return future;
}
std::future<Response> Service::translate(std::string &&input,
ResponseOptions responseOptions) {
std::future<Response> future =
queueRequest(std::move(input), responseOptions);
dispatchTranslate();
return future;
}
void Service::dispatchTranslate() {
if (numWorkers_ == 0) {
blocking_translate();
} else {
async_translate();
}
return future;
}
Service::~Service() {

View File

@ -1,10 +1,12 @@
#ifndef SRC_BERGAMOT_SERVICE_H_
#define SRC_BERGAMOT_SERVICE_H_
#include "TranslationRequest.h"
#include "batch_translator.h"
#include "batcher.h"
#include "data/types.h"
#include "response.h"
#include "response_builder.h"
#include "text_processor.h"
#include "translator/parser.h"
@ -18,18 +20,33 @@
namespace marian {
namespace bergamot {
/// Service exposes methods to translate an incoming blob of text to the
/// Consumer of bergamot API.
/// Service offers methods create an asynchronous translation service. This is
/// intended to be similar to the ones provided for training or decoding in ML
/// pipelines with the following additional capabilities:
///
/// 1. Provision of a request -> response based translation flow unlike the
/// usual a line based translation or decoding provided in most ML frameworks.
/// 2. Internal handling of normalization etc which changes source text to
/// provide to client translation meta-information like alignments consistent
/// with the unnormalized input text.
///
/// Service exposes methods to instantiate the service from a string
/// configuration (which can cover most translators) and to translate an
/// incoming blob of text.
///
///
/// An example use of this API looks as follows:
///
/// ```cpp
/// options = ...;
/// service = Service(options);
/// std::string input_text = "Hello World";
/// std::future<Response>
/// response = service.translate(std::move(input_text));
/// response.wait();
/// Response result = response.get();
/// responseFuture = service.translate(std::move(input_text));
/// responseFuture.wait(); // Wait until translation has completed.
/// Response response(std::move(response.get());
///
/// // Do things with response.
/// ```
///
/// Optionally Service can be initialized by also passing model_memory for
/// purposes of efficiency (which defaults to nullpointer and then reads from
@ -41,9 +58,22 @@ public:
/// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
/// of a model.bin. Optional, defaults to nullptr when not used
/// @param shortlistMemory byte array of shortlist (aligned to 64)
explicit Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory);
explicit Service(Ptr<Options> options, AlignedMemory modelMemory,
AlignedMemory shortlistMemory);
explicit Service(Ptr<Options> options) : Service(options, AlignedMemory(), AlignedMemory()){}
/// Construct Service purely from Options. This expects options which
/// marian-decoder expects to be set for loading model shortlist and
/// vocabularies from files in addition to parameters that set unset desired
/// features (e.g: alignments, quality-scores).
///
/// This is equivalent to a call to:
/// ```cpp
/// Service(options, AlignedMemory(), AlignedMemory())
/// ```
/// wherein empty memory is passed and internal flow defaults to file-based
/// model, shortlist loading.
explicit Service(Ptr<Options> options)
: Service(options, AlignedMemory(), AlignedMemory()) {}
/// Construct Service from a string configuration.
/// @param [in] config string parsable as YAML expected to adhere with marian
@ -52,20 +82,55 @@ public:
/// bytes of a model.bin. Optional.
/// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
explicit Service(const std::string &config,
AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory())
: Service(parseOptions(config), std::move(modelMemory), std::move(shortlistMemory)) {}
AlignedMemory modelMemory = AlignedMemory(),
AlignedMemory shortlistMemory = AlignedMemory())
: Service(parseOptions(config), std::move(modelMemory),
std::move(shortlistMemory)) {}
/// Explicit destructor to clean up after any threads initialized in
/// asynchronous operation mode.
~Service();
/// To stay efficient and to refer to the string for alignments, expects
/// ownership be moved through std::move(..)
/// ownership be moved through `std::move(..)`
///
/// @param [in] rvalue reference of string to be translated.
std::future<Response> translate(std::string &&input);
/// @param [in] source: rvalue reference of string to be translated.
std::future<Response> translate(std::string &&source);
/// Translate an input, providing Options to construct Response. This is
/// useful when one has to set/unset alignments or quality in the Response to
/// save compute spent in constructing these objects.
///
/// @param [in] source: rvalue reference of the string to be translated
/// @param [in] responseOptions: Options indicating whether or not to include
/// some member in the Response, also specify any additional configurable
/// parameters.
std::future<Response> translate(std::string &&source,
ResponseOptions options);
/// Translate an input, providing TranslationRequest across all texts to
/// construct Response. Provides the browser with the ability to break texts
/// into multiple Request keeping gains from efficiently batching internally.
/// Also useful when one has to set/unset alignments or quality in the
/// Response to save compute spent in constructing these objects.
/// @param [in] source: rvalue reference of the string to be translated
/// @param [in] translationRequest: TranslationRequest (Unified API)
/// indicating whether or not to include some member in the Response, also
/// specify any additional configurable parameters.
std::vector<Response>
translateMultiple(std::vector<std::string> &&source,
TranslationRequest translationRequest);
private:
/// Queue an input for translation.
std::future<Response> queueRequest(std::string &&input,
ResponseOptions responseOptions);
/// Dispatch call to translate after inserting in queue
void dispatchTranslate();
/// Build numTranslators number of translators with options from options
void build_translators(Ptr<Options> options, size_t numTranslators);
/// Initializes a blocking translator without using std::thread
@ -83,16 +148,17 @@ private:
void async_translate();
/// Number of workers to launch.
size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_)
size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_)
/// Model memory to load model passed as bytes.
AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_)
AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_)
/// Shortlist memory passed as bytes.
AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_)
AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_)
/// Holds instances of batch translators, just one in case
/// of single-threaded application, numWorkers_ in case of multithreaded
/// setting.
std::vector<BatchTranslator> translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)
std::vector<BatchTranslator>
translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)
/// Stores requestId of active request. Used to establish
/// ordering among requests and logging/book-keeping.