diff --git a/app/service-cli-bytearray.cpp b/app/service-cli-bytearray.cpp index f868d4d..d8c7059 100644 --- a/app/service-cli-bytearray.cpp +++ b/app/service-cli-bytearray.cpp @@ -27,8 +27,14 @@ int main(int argc, char *argv[]) { std::string input = std_input.str(); using marian::bergamot::Response; + marian::bergamot::ResponseOptions responseOptions; + responseOptions.qualityScores = true; + responseOptions.alignment = true; + responseOptions.alignmentThreshold = 0.2f; + // Wait on future until Response is complete - std::future responseFuture = service.translate(std::move(input)); + std::future responseFuture = + service.translate(std::move(input), responseOptions); responseFuture.wait(); Response response = responseFuture.get(); diff --git a/app/service-cli.cpp b/app/service-cli.cpp index 6ed4d81..d7c72e6 100644 --- a/app/service-cli.cpp +++ b/app/service-cli.cpp @@ -8,6 +8,7 @@ #include "marian.h" #include "translator/parser.h" #include "translator/response.h" +#include "translator/response_options.h" #include "translator/service.h" int main(int argc, char *argv[]) { @@ -21,8 +22,14 @@ int main(int argc, char *argv[]) { std::string input = std_input.str(); using marian::bergamot::Response; + marian::bergamot::ResponseOptions responseOptions; + responseOptions.qualityScores = true; + responseOptions.alignment = true; + responseOptions.alignmentThreshold = 0.2f; + // Wait on future until Response is complete - std::future responseFuture = service.translate(std::move(input)); + std::future responseFuture = + service.translate(std::move(input), responseOptions); responseFuture.wait(); Response response = responseFuture.get(); diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index cbb8369..d7c8e3c 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -6,7 +6,7 @@ add_library(bergamot-translator STATIC batch_translator.cpp request.cpp batcher.cpp - response.cpp + response_builder.cpp batch.cpp sentence_ranges.cpp service.cpp diff --git a/src/translator/TranslationModel.cpp b/src/translator/TranslationModel.cpp index 06b04eb..026a126 100644 --- a/src/translator/TranslationModel.cpp +++ b/src/translator/TranslationModel.cpp @@ -9,6 +9,7 @@ // All local project includes #include "TranslationModel.h" #include "translator/parser.h" +#include "translator/response.h" #include "translator/service.h" TranslationModel::TranslationModel(const std::string &config, @@ -21,31 +22,25 @@ TranslationModel::~TranslationModel() {} std::vector TranslationModel::translate(std::vector &&texts, TranslationRequest request) { - // Implementing a non-async version first. Unpleasant, but should work. - std::promise> promise; - auto future = promise.get_future(); // This code, move into async? std::vector translationResults; - for (auto &text : texts) { - // Collect future as marian::bergamot::TranslationResult - auto intermediate = service_.translate(std::move(text)); - intermediate.wait(); - auto marianResponse(std::move(intermediate.get())); - + std::vector responses = + service_.translateMultiple(std::move(texts), request); + for (auto &response : responses) { TranslationResult::SentenceMappings sentenceMappings; - for (size_t idx = 0; idx < marianResponse.size(); idx++) { - marian::string_view src = marianResponse.source.sentence(idx); - marian::string_view tgt = marianResponse.target.sentence(idx); + for (size_t idx = 0; idx < response.size(); idx++) { + marian::string_view src = response.source.sentence(idx); + marian::string_view tgt = response.target.sentence(idx); sentenceMappings.emplace_back(std::string_view(src.data(), src.size()), std::string_view(tgt.data(), tgt.size())); } // In place construction. translationResults.emplace_back( - std::move(marianResponse.source.text), // &&marianResponse.source_ - std::move(marianResponse.target.text), // &&marianResponse.translation_ - std::move(sentenceMappings) // &&sentenceMappings + std::move(response.source.text), // &&response.source_ + std::move(response.target.text), // &&response.translation_ + std::move(sentenceMappings) // &&sentenceMappings ); } diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index 19cbaf9..6b2425d 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -63,11 +63,14 @@ void BatchTranslator::translate(Batch &batch) { std::vector batchVector; auto &sentences = batch.sentences(); + size_t batchSequenceNumber{0}; for (auto &sentence : sentences) { - data::SentenceTuple sentence_tuple(sentence.lineNumber()); + data::SentenceTuple sentence_tuple(batchSequenceNumber); Segment segment = sentence.getUnderlyingSegment(); sentence_tuple.push_back(segment); batchVector.push_back(sentence_tuple); + + ++batchSequenceNumber; } size_t batchSize = batchVector.size(); diff --git a/src/translator/request.cpp b/src/translator/request.cpp index b6d2438..7e9b739 100644 --- a/src/translator/request.cpp +++ b/src/translator/request.cpp @@ -11,18 +11,17 @@ namespace marian { namespace bergamot { // ----------------------------------------------------------------- -Request::Request(size_t Id, size_t lineNumberBegin, - std::vector> &vocabs, AnnotatedText &&source, - Segments &&segments, std::promise responsePromise) - : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs), - source_(std::move(source)), segments_(std::move(segments)), - response_(std::move(responsePromise)) { +Request::Request(size_t Id, Segments &&segments, + ResponseBuilder &&responseBuilder) + : Id_(Id), segments_(std::move(segments)), + responseBuilder_(std::move(responseBuilder)) + +{ counter_ = segments_.size(); histories_.resize(segments_.size(), nullptr); } -size_t Request::lineNumberBegin() const { return lineNumberBegin_; } size_t Request::numSegments() const { return segments_.size(); } size_t Request::segmentTokens(size_t index) const { @@ -39,17 +38,10 @@ void Request::processHistory(size_t index, Ptr history) { // In case this is last request in, completeRequest is called, which sets the // value of the promise. if (--counter_ == 0) { - completeRequest(); + responseBuilder_(std::move(histories_)); } } -void Request::completeRequest() { - // Request no longer needs to hold the content, can transfer it to - // Response. - Response response(std::move(source_), std::move(histories_), *vocabs_); - response_.set_value(std::move(response)); -} - bool Request::operator<(const Request &b) const { // Among Requests, only sequence id is used for obtaining priority. return Id_ < b.Id_; @@ -64,10 +56,6 @@ size_t RequestSentence::numTokens() const { return (request_->segmentTokens(index_)); } -size_t RequestSentence::lineNumber() const { - return (request_->lineNumberBegin() + index_); -} - void RequestSentence::completeSentence(Ptr history) { // Relays completeSentence into request's processHistory, using index // information. diff --git a/src/translator/request.h b/src/translator/request.h index 605dea7..e2188cd 100644 --- a/src/translator/request.h +++ b/src/translator/request.h @@ -1,24 +1,9 @@ -// -// Defines: -// -// Request: holds the input text of a text, Segments (vector) which are -// to go to the batching mechanism and alignments between the processed -// segments and the input text (sourceTokenRanges). In addition, Request takes -// care of the barrier which fires when all the Segments in a request are done -// translating by the workers (BatchTranslator). -// TODO(jerinphilip): Extend Request with notions of Priority (sequence, -// user-given). -// -// RequestSentence: is a tuple of (index, Ptr). This provides the -// batching mechanism access to the segment within the request. The backref to -// Request allows event triggering the barrier upon completion of the last -// sentence by a worker. - #ifndef SRC_BERGAMOT_REQUEST_H_ #define SRC_BERGAMOT_REQUEST_H_ #include "definitions.h" #include "response.h" +#include "response_builder.h" #include "sentence_ranges.h" #include "common/logging.h" @@ -33,80 +18,96 @@ namespace marian { namespace bergamot { +/// A Request is an internal representation used to represent a request after +/// processed by TextProcessor into sentences constituted by marian::Words. +/// +/// The batching mechanism (Batcher) draws from multiple Requests and compiles +/// sentences into a batch. When a batch completes translation (at +/// BatchTranslator, intended in a different thread), backward propogation +/// happens through: +/// +/// ```cpp +/// Batch::completeBatch(...) +/// -> RequestSentence::completeSentence(..) +/// -> Request::processHistory(...) +/// ``` +/// +/// When all sentences in a Request are completed, responseBuilder is +/// triggered with the compiled Histories, to construct the Response +/// corresponding to the Request and set value of the promise which triggers the +/// future at client. class Request { public: - Request(size_t Id, size_t lineNumberBegin, - std::vector> &vocabs_, AnnotatedText &&source, - Segments &&segments, std::promise responsePromise); + /// Constructs an internal representation of the Request identified by Id, + /// processed Segments and accepts a callback (ResponseBuilder) which builds + /// the Response upon completion of the Request. + /// + /// + /// @param [in] Id: Identifier assigned to Request by Service. + /// @param [in] segments: Each segment is a unit to be translated. + /// @param [in] responseBuilder: Callback function (of ResponseBuilder type) + /// to be triggered upon the completion of translation of all units in a + /// Request. + Request(size_t Id, Segments &&segments, ResponseBuilder &&responseBuilder); - // Obtain the count of tokens in the segment correponding to index. Used to - // insert sentence from multiple requests into the corresponding size bucket. + /// Obtain the count of tokens in the segment correponding to index. Used to + /// insert sentence from multiple requests into the corresponding size bucket. size_t segmentTokens(size_t index) const; - // Obtain number of segments in a request. + /// Obtain number of segments in a request. size_t numSegments() const; - size_t lineNumberBegin() const; - // Obtains segment corresponding to index to create a batch of segments among - // several requests. + /// Obtains segment corresponding to index to create a batch of segments + /// among several requests. Segment getSegment(size_t index) const; - // For notions of priority among requests, used to enable std::set in - // Batcher. + /// For notions of priority among requests, used to enable std::set in + /// Batcher. bool operator<(const Request &request) const; - // Processes a history obtained after translating in a heterogenous batch - // compiled from requests. + /// Processes a history obtained after translating in a heterogenous batch + /// compiled from requests. void processHistory(size_t index, Ptr history); - // On completion of last segment, sets value of the promise. - void completeRequest(); - private: size_t Id_; - size_t lineNumberBegin_; - // Multiple translation-workers can concurrently access the same Request. The - // following atomic atomically operates on the variable holding sentences - // remaining to be translated. + /// Multiple translation-workers can concurrently access the same Request. The + /// following atomic atomically operates on the variable holding sentences + /// remaining to be translated. std::atomic counter_; - // source_ holds the source string to be translated. segments_ hold the - // sentences generated from source_ in vector. sourceRanges_ are - // string_views of the text corresponding to these words, pointing to - // sequences in source_. histories_ is a buffer which eventually stores the - // translations of each segment in the corresponding index. - AnnotatedText source_; + /// segments_ hold the sentences processed into Words which generated from + /// input string. Segments segments_; + + /// histories_ is a buffer which eventually stores the translations of each + /// segment in the corresponding index. std::vector> histories_; - // Members above are moved into newly constructed Response on completion - // of translation of all segments. The promise below is set to this Response - // value. future to this promise is made available to the user through - // Service. - std::promise response_; - - // Constructing Response requires the vocabs_ used to generate Request. - std::vector> *vocabs_; + /// Constructing Response requires the vocabs_ used to generate Request. + /// std::vector> *vocabs_; + ResponseBuilder responseBuilder_; }; +/// A RequestSentence provides a view to a sentence within a Request. Existence +/// of this class allows the sentences and associated information to be kept +/// within Request, while batching mechanism (Batcher) compiles Batch from +/// RequestSentence-s coming from different Requests. class RequestSentence { - // A RequestSentence provides a view to a sentence within a Request. Existence - // of this class allows the sentences and associated information to be kept - // within Request. public: RequestSentence(size_t, Ptr); + + /// Number of tokens in the segment this RequestSentence represents. Used to + /// order by length in batching. size_t numTokens() const; - // lineNumber in Request, used for matching marian-decoder. SentenceTuple - // requires lineNumber to be set for Corpus based batches. - size_t lineNumber() const; - - // Accessor to the segment represented by the RequestSentence. + /// Accessor to the segment represented by the RequestSentence. Segment getUnderlyingSegment() const; - // Forwards call to Request, checking for completion. + /// Forwards history to Request to set history corresponding to this + /// RequestSentence. void completeSentence(Ptr history); friend bool operator<(const RequestSentence &a, const RequestSentence &b); diff --git a/src/translator/response.cpp b/src/translator/response.cpp deleted file mode 100644 index e5bc38f..0000000 --- a/src/translator/response.cpp +++ /dev/null @@ -1,106 +0,0 @@ -#include "response.h" -#include "common/logging.h" -#include "data/alignment.h" -#include "sentence_ranges.h" - -#include - -namespace marian { -namespace bergamot { - -Response::Response(AnnotatedText &&source, Histories &&histories, - std::vector> &vocabs) - : source(std::move(source)) { - // Reserving length at least as much as source_ seems like a reasonable thing - // to do to avoid reallocations. - target.text.reserve(source.text.size()); - - // In a first step, the decoded units (individual senteneces) are compiled - // into a huge string. This is done by computing indices first and appending - // to the string as each sentences are decoded. - std::vector> translationRanges; - std::vector sentenceBegins; - - size_t offset{0}; - bool first{true}; - - for (auto &history : histories) { - // TODO(jerin): Change hardcode of nBest = 1 - NBestList onebest = history->nBest(1); - - Result result = onebest[0]; // Expecting only one result; - Words words = std::get<0>(result); - auto targetVocab = vocabs.back(); - - std::string decoded; - std::vector targetMappings; - targetVocab->decodeWithByteRanges(words, decoded, targetMappings); - - if (first) { - first = false; - } else { - target.text += " "; - ++offset; - } - - sentenceBegins.push_back(translationRanges.size()); - target.text += decoded; - auto decodedStringBeginMarker = targetMappings.front().begin(); - for (auto &sview : targetMappings) { - size_t startIdx = offset + sview.begin() - decodedStringBeginMarker; - translationRanges.emplace_back(startIdx, startIdx + sview.size()); - } - - offset += decoded.size(); - - // Alignments - // TODO(jerinphilip): The following double conversion might not be - // necessary. Hard alignment can directly be exported, but this would mean - // WASM bindings for a structure deep within marian source. - auto hyp = std::get<1>(result); - auto softAlignment = hyp->tracebackAlignment(); - auto hardAlignment = data::ConvertSoftAlignToHardAlign( - softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a - // configurable parameter. - - Alignment unified_alignment; - for (auto &p : hardAlignment) { - unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob}); - } - - alignments.push_back(std::move(unified_alignment)); - - // Quality scores: Sequence level is obtained as normalized path scores. - // Word level using hypothesis traceback. These are most-likely logprobs. - auto normalizedPathScore = std::get<2>(result); - auto wordQualities = hyp->tracebackWordScores(); - wordQualities.pop_back(); - qualityScores.push_back((Quality){normalizedPathScore, wordQualities}); - } - - // Once we have the indices in translation (which might be resized a few - // times) ready, we can prepare and store the string_view as annotations - // instead. This is accomplished by iterating over available sentences using - // sentenceBegin and using addSentence(...) API from Annotation. - - for (size_t i = 1; i <= sentenceBegins.size(); i++) { - std::vector targetMappings; - size_t begin = sentenceBegins[i - 1]; - size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size() - : sentenceBegins[i]; - - for (size_t idx = begin; idx < safe_end; idx++) { - auto &p = translationRanges[idx]; - size_t begin_idx = p.first; - size_t end_idx = p.second; - - const char *data = &target.text[begin_idx]; - size_t size = end_idx - begin_idx; - targetMappings.emplace_back(data, size); - } - - target.addSentence(targetMappings); - } -} -} // namespace bergamot -} // namespace marian diff --git a/src/translator/response.h b/src/translator/response.h index 4f87b8d..3b1f48d 100644 --- a/src/translator/response.h +++ b/src/translator/response.h @@ -40,34 +40,12 @@ struct Quality { /// AnnotatedText provides an API to access markings of (sub)-word and /// sentences boundaries, which are required to interpret Quality and /// Alignment (s) at the moment. -class Response { - -public: - /// - Response(AnnotatedText &&source, Histories &&histories, - std::vector> &vocabs); - - /// \cond HIDDEN_PUBLIC - // Move constructor. - Response(Response &&other) - : source(std::move(other.source)), target(std::move(other.target)), - alignments(std::move(other.alignments)), - qualityScores(std::move(other.qualityScores)){}; - - // The following copy bans are not stricitly required anymore since Annotation - // is composed of the ByteRange primitive (which was previously string_view - // and required to be bound to string), but makes movement efficient by - // banning these letting compiler complain about copies. - - Response(const Response &) = delete; - Response &operator=(const Response &) = delete; - - /// \endcond - - /// Number of sentences translated. The processing of a text of into sentences - /// are handled internally, and this information can be used to iterate - /// through meaningful units of translation for which alignment and quality - /// information are available. +struct Response { + /// Convenience function to obtain number of units translated. Same as + /// `.source.numSentences()` and `.target.numSentences().` The processing of a + /// text of into sentences are handled internally, and this information can be + /// used to iterate through meaningful units of translation for which + /// alignment and quality information are available. const size_t size() const { return source.numSentences(); } /// source text and annotations of (sub-)words and sentences. diff --git a/src/translator/response_builder.cpp b/src/translator/response_builder.cpp new file mode 100644 index 0000000..c624707 --- /dev/null +++ b/src/translator/response_builder.cpp @@ -0,0 +1,87 @@ +#include "response_builder.h" + +namespace marian { +namespace bergamot { + +void ResponseBuilder::buildQualityScores(Histories &histories, + Response &response) { + std::vector qualityScores; + for (auto &history : histories) { + // TODO(jerin): Change hardcode of nBest = 1 + NBestList onebest = history->nBest(1); + + Result result = onebest[0]; // Expecting only one result; + Words words = std::get<0>(result); + auto hyp = std::get<1>(result); + // Quality scores: Sequence level is obtained as normalized path scores. + // Word level using hypothesis traceback. These are most-likely + // logprobs. + auto normalizedPathScore = std::get<2>(result); + auto wordQualities = hyp->tracebackWordScores(); + wordQualities.pop_back(); + response.qualityScores.push_back( + Quality{normalizedPathScore, wordQualities}); + } +} + +void ResponseBuilder::buildAlignments(Histories &histories, + Response &response) { + for (auto &history : histories) { + // TODO(jerin): Change hardcode of nBest = 1 + NBestList onebest = history->nBest(1); + + Result result = onebest[0]; // Expecting only one result; + Words words = std::get<0>(result); + // Alignments + // TODO(jerinphilip): The following double conversion might not be + // necessary. Hard alignment can directly be exported, but this would + // mean WASM bindings for a structure deep within marian source. + auto hyp = std::get<1>(result); + auto softAlignment = hyp->tracebackAlignment(); + auto threshold = responseOptions_.alignmentThreshold; + auto hardAlignment = + data::ConvertSoftAlignToHardAlign(softAlignment, threshold); + Alignment unified_alignment; + for (auto &p : hardAlignment) { + unified_alignment.emplace_back(Point{p.srcPos, p.tgtPos, p.prob}); + } + + response.alignments.push_back(std::move(unified_alignment)); + } +} + +void ResponseBuilder::buildTranslatedText(Histories &histories, + Response &response) { + // Reserving length at least as much as source_ seems like a reasonable + // thing to do to avoid reallocations. + response.target.text.reserve(response.source.text.size()); + + size_t offset{0}; + bool first{true}; + + for (auto &history : histories) { + // TODO(jerin): Change hardcode of nBest = 1 + NBestList onebest = history->nBest(1); + + Result result = onebest[0]; // Expecting only one result; + Words words = std::get<0>(result); + auto targetVocab = vocabs_->back(); + + std::string decoded; + std::vector targetSentenceMappings; + targetVocab->decodeWithByteRanges(words, decoded, targetSentenceMappings); + + // delimiter can be used to fill in the blanks from source as well. + std::string delimiter; + if (first) { + first = false; + } else { + delimiter = " "; + } + + response.target.appendSentence(delimiter, decoded, targetSentenceMappings); + } +} + +} // namespace bergamot +} // namespace marian diff --git a/src/translator/response_builder.h b/src/translator/response_builder.h new file mode 100644 index 0000000..85caffb --- /dev/null +++ b/src/translator/response_builder.h @@ -0,0 +1,93 @@ +#ifndef SRC_BERGAMOT_RESPONSE_BUILDER_H_ +#define SRC_BERGAMOT_RESPONSE_BUILDER_H_ + +#include "data/types.h" +#include "response.h" +#include "response_options.h" + +// For now we will work with this, to avoid complaints another structure is hard +// to operate with. + +namespace marian { +namespace bergamot { + +/// ResponseBuilder is a callback functor. It is expected to be bound to a +/// Request after giving it the context of options, vocabs and promise to set. +/// It constructs the Response and it's members based on options +/// (quality=on|off, alignments=on|off, mappings=on|off, splitmode=sentence | +/// paragraph). + +class ResponseBuilder { +public: + /// @param [in] responseOptions: ResponseOptions, indicating what to include + /// or not in the response and any additional configurable parameters. + /// @param [in] vocabs: marian vocab object (used in decoding) + /// @param [in] promise: promise to set with the constructed Response. + ResponseBuilder(ResponseOptions responseOptions, AnnotatedText &&source, + std::vector> &vocabs, + std::promise &&promise) + : responseOptions_(responseOptions), source_(std::move(source)), + vocabs_(&vocabs), promise_(std::move(promise)) {} + + /// Constructs and sets the promise of a Response object from obtained + /// histories after translating. + /// @param [in] histories: Histories obtained after translating the Request + /// from which this functor is called. + void operator()(Histories &&histories) { + // TODO(jerinphilip) load ResponseOptions into options and turn build + // functions on or off. + // responseOptions_ is unused, but we can try something here. + ABORT_IF(source_.numSentences() != histories.size(), + "Mismatch in source and translated sentences"); + Response response; + + // Move source_ into response. + response.source = std::move(source_); + + // Should be after source is set + buildTranslatedText(histories, response); + + // Should always be after buildTranslatedText + if (responseOptions_.qualityScores) { + buildQualityScores(histories, response); + } + + if (responseOptions_.alignment) { + buildAlignments(histories, response); + } + + // Once complete, set promise. + promise_.set_value(std::move(response)); + } + +private: + /// Builds qualityScores from histories and writes to response. expects + /// buildTranslatedText to be run before to be able to obtain target text and + /// subword information. + /// @param histories [in] + /// @param response [out] + void buildQualityScores(Histories &histories, Response &response); + + /// Builds alignments from histories and writes onto response. + /// @param histories [in] + /// @param response [out] + void buildAlignments(Histories &histories, Response &response); + + /// Builds translated text and subword annotations and writes onto response. + /// @param histories [in] + /// @param response [out] + void buildTranslatedText(Histories &histories, Response &response); + + // Data members are context/curried args for the functor. + + ResponseOptions responseOptions_; + std::vector> *vocabs_; // vocabs are required for decoding + // and any source validation checks. + std::promise promise_; // To be set when callback triggered and + // after Response constructed. + AnnotatedText source_; +}; +} // namespace bergamot +} // namespace marian + +#endif // SRC_BERGAMOT_RESPONSE_BUILDER_H_ diff --git a/src/translator/response_options.h b/src/translator/response_options.h new file mode 100644 index 0000000..ed3cce3 --- /dev/null +++ b/src/translator/response_options.h @@ -0,0 +1,50 @@ +#ifndef SRC_BERGAMOT_RESPONSE_OPTIONS_H_ +#define SRC_BERGAMOT_RESPONSE_OPTIONS_H_ +#include + +namespace marian { +namespace bergamot { + +enum ConcatStrategy { + /// Target text is constructed faithful to the source-text structure. + FAITHFUL, + + /// Target text is concatenated by a space. + SPACE +}; + +enum QualityScoreType { + /// Provide a free quality-score that comes with the machine-translation model + /// itself. + FREE, + + /// An expensive quality-score that runs additional computations to determine + /// quality of an output. + EXPENSIVE +}; + +/// ResponseOptions dictate how to construct a Response for an input string of +/// text to be translated. +struct ResponseOptions { + bool qualityScores{false}; ///< Include quality-scores or not. + bool alignment{false}; ///< Include alignments or not. + + /// Whether to include sentenceMappings or not. Alignments require + /// sentenceMappings and are available irrespective of this option if + /// `alignment=true`. + bool sentenceMappings{false}; + + /// Threshold between `[0.0f, 1.0f]` to filter alignments into a sparse + /// matrix. Higher value implies stronger filtering leading to provision of + /// higher-confidence matches. `1.0f` gives argmax (not the full-dense + /// matrix). + float alignmentThreshold{0.2f}; + + QualityScoreType qualityScoreType{QualityScoreType::FREE}; + ConcatStrategy concatStrategy{ConcatStrategy::FAITHFUL}; +}; + +} // namespace bergamot +} // namespace marian + +#endif // SRC_BERGAMOT_RESPONSE_OPTIONS_H_ diff --git a/src/translator/sentence_ranges.cpp b/src/translator/sentence_ranges.cpp index aae9dd3..da9d3ee 100644 --- a/src/translator/sentence_ranges.cpp +++ b/src/translator/sentence_ranges.cpp @@ -32,11 +32,11 @@ ByteRange Annotation::sentence(size_t sentenceIdx) const { // the flatByteRange and non-empty sentence before this happened and // construct empty string-view equivalent ByteRange. ByteRange eos = flatByteRanges_[eosId - 1]; - sentenceByteRange = (ByteRange){eos.end, eos.end}; + sentenceByteRange = ByteRange{eos.end, eos.end}; } else { ByteRange bos = flatByteRanges_[bosId]; ByteRange eos = flatByteRanges_[eosId - 1]; - sentenceByteRange = (ByteRange){bos.begin, eos.end}; + sentenceByteRange = ByteRange{bos.begin, eos.end}; } return sentenceByteRange; } @@ -56,6 +56,20 @@ string_view AnnotatedText::sentence(size_t sentenceIdx) const { return asStringView(sentenceAsByteRange); } +void AnnotatedText::appendSentence(std::string prefix, std::string &reference, + std::vector &wordRanges) { + text += prefix; + size_t offset = text.size(); // Get size before to do ByteRange arithmetic + text += reference; // Append reference to text + std::vector sentence; + for (auto &wordView : wordRanges) { + size_t thisWordBegin = offset + wordView.data() - &reference[0]; + sentence.push_back( + ByteRange{thisWordBegin, thisWordBegin + wordView.size()}); + } + annotation.addSentence(sentence); +} + void AnnotatedText::addSentence(std::vector &wordRanges) { addSentence(std::begin(wordRanges), std::end(wordRanges)); }; @@ -65,7 +79,7 @@ void AnnotatedText::addSentence(std::vector::iterator begin, std::vector sentence; for (auto p = begin; p != end; p++) { size_t begin_offset = p->data() - &text[0]; - sentence.push_back((ByteRange){begin_offset, begin_offset + p->size()}); + sentence.push_back(ByteRange{begin_offset, begin_offset + p->size()}); } annotation.addSentence(sentence); }; diff --git a/src/translator/sentence_ranges.h b/src/translator/sentence_ranges.h index b3986e3..f9c881e 100644 --- a/src/translator/sentence_ranges.h +++ b/src/translator/sentence_ranges.h @@ -64,7 +64,6 @@ public: sentenceEndIds_.push_back(0); } - /// Returns the number of sentences annotated in a text. size_t numSentences() const { return sentenceEndIds_.size() - 1; } /// Returns number of words in the sentence identified by `sentenceIdx`. @@ -125,10 +124,6 @@ public: /// constructor is disallowed). AnnotatedText(std::string &&text) : text(std::move(text)){}; - AnnotatedText(AnnotatedText &&annotatedBlob) - : text(std::move(annotatedBlob.text)), - annotation(std::move(annotatedBlob.annotation)) {} - /// Returns the number of sentences in the annotation structure. const size_t numSentences() const { return annotation.numSentences(); } @@ -137,6 +132,11 @@ public: return annotation.numWords(sentenceIdx); } + /// Appends a sentence to the existing text and transparently rebases + /// string_views + void appendSentence(std::string prefix, std::string &reference, + std::vector &wordRanges); + /// Adds a sentence, used to load from SentencePiece annotations conveniently. void addSentence(std::vector &wordRanges); diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 76bcba2..f676797 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -112,6 +112,44 @@ void Service::async_translate() { #endif // WASM_COMPATIBLE_SOURCE std::future Service::translate(std::string &&input) { + ResponseOptions responseOptions; // Hardcode responseOptions for now + return translate(std::move(input), responseOptions); +} + +std::vector +Service::translateMultiple(std::vector &&inputs, + TranslationRequest translationRequest) { + ResponseOptions responseOptions; + + // TODO(jerinphilip) Set options based on TranslationRequest, if and when it + // becomes non-dummy. + + // We queue the individual Requests so they get compiled at batches to be + // efficiently translated. + std::vector> responseFutures; + for (auto &input : inputs) { + std::future inputResponse = + queueRequest(std::move(input), responseOptions); + responseFutures.push_back(std::move(inputResponse)); + } + + // Dispatch is called once per request so compilation of sentences from + // multiple Requests happen. + dispatchTranslate(); + + // Now wait for all Requests to complete, the future to fire and return the + // compiled Responses, we can probably return the future, but WASM quirks(?). + std::vector responses; + for (auto &future : responseFutures) { + future.wait(); + responses.push_back(std::move(future.get())); + } + + return responses; +} + +std::future Service::queueRequest(std::string &&input, + ResponseOptions responseOptions) { Segments segments; AnnotatedText source(std::move(input)); text_processor_.process(source, segments); @@ -119,17 +157,29 @@ std::future Service::translate(std::string &&input) { std::promise responsePromise; auto future = responsePromise.get_future(); - Ptr request = New( - requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(source), - std::move(segments), std::move(responsePromise)); + ResponseBuilder responseBuilder(responseOptions, std::move(source), vocabs_, + std::move(responsePromise)); + Ptr request = New(requestId_++, std::move(segments), + std::move(responseBuilder)); batcher_.addWholeRequest(request); + return future; +} + +std::future Service::translate(std::string &&input, + ResponseOptions responseOptions) { + std::future future = + queueRequest(std::move(input), responseOptions); + dispatchTranslate(); + return future; +} + +void Service::dispatchTranslate() { if (numWorkers_ == 0) { blocking_translate(); } else { async_translate(); } - return future; } Service::~Service() { diff --git a/src/translator/service.h b/src/translator/service.h index 72f6d92..476be28 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -1,10 +1,12 @@ #ifndef SRC_BERGAMOT_SERVICE_H_ #define SRC_BERGAMOT_SERVICE_H_ +#include "TranslationRequest.h" #include "batch_translator.h" #include "batcher.h" #include "data/types.h" #include "response.h" +#include "response_builder.h" #include "text_processor.h" #include "translator/parser.h" @@ -18,18 +20,33 @@ namespace marian { namespace bergamot { -/// Service exposes methods to translate an incoming blob of text to the -/// Consumer of bergamot API. +/// Service offers methods create an asynchronous translation service. This is +/// intended to be similar to the ones provided for training or decoding in ML +/// pipelines with the following additional capabilities: +/// +/// 1. Provision of a request -> response based translation flow unlike the +/// usual a line based translation or decoding provided in most ML frameworks. +/// 2. Internal handling of normalization etc which changes source text to +/// provide to client translation meta-information like alignments consistent +/// with the unnormalized input text. +/// +/// Service exposes methods to instantiate the service from a string +/// configuration (which can cover most translators) and to translate an +/// incoming blob of text. +/// /// /// An example use of this API looks as follows: -/// +/// ```cpp /// options = ...; /// service = Service(options); /// std::string input_text = "Hello World"; /// std::future -/// response = service.translate(std::move(input_text)); -/// response.wait(); -/// Response result = response.get(); +/// responseFuture = service.translate(std::move(input_text)); +/// responseFuture.wait(); // Wait until translation has completed. +/// Response response(std::move(response.get()); +/// +/// // Do things with response. +/// ``` /// /// Optionally Service can be initialized by also passing model_memory for /// purposes of efficiency (which defaults to nullpointer and then reads from @@ -41,9 +58,22 @@ public: /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes /// of a model.bin. Optional, defaults to nullptr when not used /// @param shortlistMemory byte array of shortlist (aligned to 64) - explicit Service(Ptr options, AlignedMemory modelMemory, AlignedMemory shortlistMemory); + explicit Service(Ptr options, AlignedMemory modelMemory, + AlignedMemory shortlistMemory); - explicit Service(Ptr options) : Service(options, AlignedMemory(), AlignedMemory()){} + /// Construct Service purely from Options. This expects options which + /// marian-decoder expects to be set for loading model shortlist and + /// vocabularies from files in addition to parameters that set unset desired + /// features (e.g: alignments, quality-scores). + /// + /// This is equivalent to a call to: + /// ```cpp + /// Service(options, AlignedMemory(), AlignedMemory()) + /// ``` + /// wherein empty memory is passed and internal flow defaults to file-based + /// model, shortlist loading. + explicit Service(Ptr options) + : Service(options, AlignedMemory(), AlignedMemory()) {} /// Construct Service from a string configuration. /// @param [in] config string parsable as YAML expected to adhere with marian @@ -52,20 +82,55 @@ public: /// bytes of a model.bin. Optional. /// @param [in] shortlistMemory byte array of shortlist (aligned to 64) explicit Service(const std::string &config, - AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory()) - : Service(parseOptions(config), std::move(modelMemory), std::move(shortlistMemory)) {} + AlignedMemory modelMemory = AlignedMemory(), + AlignedMemory shortlistMemory = AlignedMemory()) + : Service(parseOptions(config), std::move(modelMemory), + std::move(shortlistMemory)) {} /// Explicit destructor to clean up after any threads initialized in /// asynchronous operation mode. ~Service(); /// To stay efficient and to refer to the string for alignments, expects - /// ownership be moved through std::move(..) + /// ownership be moved through `std::move(..)` /// - /// @param [in] rvalue reference of string to be translated. - std::future translate(std::string &&input); + /// @param [in] source: rvalue reference of string to be translated. + std::future translate(std::string &&source); + + /// Translate an input, providing Options to construct Response. This is + /// useful when one has to set/unset alignments or quality in the Response to + /// save compute spent in constructing these objects. + /// + /// @param [in] source: rvalue reference of the string to be translated + /// @param [in] responseOptions: Options indicating whether or not to include + /// some member in the Response, also specify any additional configurable + /// parameters. + std::future translate(std::string &&source, + ResponseOptions options); + + /// Translate an input, providing TranslationRequest across all texts to + /// construct Response. Provides the browser with the ability to break texts + /// into multiple Request keeping gains from efficiently batching internally. + /// Also useful when one has to set/unset alignments or quality in the + /// Response to save compute spent in constructing these objects. + + /// @param [in] source: rvalue reference of the string to be translated + /// @param [in] translationRequest: TranslationRequest (Unified API) + /// indicating whether or not to include some member in the Response, also + /// specify any additional configurable parameters. + + std::vector + translateMultiple(std::vector &&source, + TranslationRequest translationRequest); private: + /// Queue an input for translation. + std::future queueRequest(std::string &&input, + ResponseOptions responseOptions); + + /// Dispatch call to translate after inserting in queue + void dispatchTranslate(); + /// Build numTranslators number of translators with options from options void build_translators(Ptr options, size_t numTranslators); /// Initializes a blocking translator without using std::thread @@ -83,16 +148,17 @@ private: void async_translate(); /// Number of workers to launch. - size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_) + size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_) /// Model memory to load model passed as bytes. - AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_) + AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_) /// Shortlist memory passed as bytes. - AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_) + AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_) /// Holds instances of batch translators, just one in case /// of single-threaded application, numWorkers_ in case of multithreaded /// setting. - std::vector translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_) + std::vector + translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_) /// Stores requestId of active request. Used to establish /// ordering among requests and logging/book-keeping.