Cleanup API: Refactor request on-complete transition (#80)

2024-08-15 08:30:46 +03:00 · 2021-04-27 15:56:39 +01:00 · 2021-04-27 15:56:39 +01:00 · fa2003e70d
commit fa2003e70d
parent fdf9e66cef
16 changed files with 492 additions and 260 deletions
--- a/app/service-cli-bytearray.cpp
+++ b/app/service-cli-bytearray.cpp
@ -27,8 +27,14 @@ int main(int argc, char *argv[]) {
  std::string input = std_input.str();
  using marian::bergamot::Response;

+  marian::bergamot::ResponseOptions responseOptions;
+  responseOptions.qualityScores = true;
+  responseOptions.alignment = true;
+  responseOptions.alignmentThreshold = 0.2f;
+
  // Wait on future until Response is complete
-  std::future<Response> responseFuture = service.translate(std::move(input));
+  std::future<Response> responseFuture =
+      service.translate(std::move(input), responseOptions);
  responseFuture.wait();
  Response response = responseFuture.get();

--- a/app/service-cli.cpp
+++ b/app/service-cli.cpp
@ -8,6 +8,7 @@
 #include "marian.h"
 #include "translator/parser.h"
 #include "translator/response.h"
+#include "translator/response_options.h"
 #include "translator/service.h"

 int main(int argc, char *argv[]) {
@ -21,8 +22,14 @@ int main(int argc, char *argv[]) {
  std::string input = std_input.str();
  using marian::bergamot::Response;

+  marian::bergamot::ResponseOptions responseOptions;
+  responseOptions.qualityScores = true;
+  responseOptions.alignment = true;
+  responseOptions.alignmentThreshold = 0.2f;
+
  // Wait on future until Response is complete
-  std::future<Response> responseFuture = service.translate(std::move(input));
+  std::future<Response> responseFuture =
+      service.translate(std::move(input), responseOptions);
  responseFuture.wait();
  Response response = responseFuture.get();

--- a/src/translator/CMakeLists.txt
+++ b/src/translator/CMakeLists.txt
@ -6,7 +6,7 @@ add_library(bergamot-translator STATIC
    batch_translator.cpp 
    request.cpp 
    batcher.cpp
-    response.cpp
+    response_builder.cpp
    batch.cpp
    sentence_ranges.cpp
    service.cpp
--- a/src/translator/TranslationModel.cpp
+++ b/src/translator/TranslationModel.cpp
@ -9,6 +9,7 @@
 // All local project includes
 #include "TranslationModel.h"
 #include "translator/parser.h"
+#include "translator/response.h"
 #include "translator/service.h"

 TranslationModel::TranslationModel(const std::string &config,
@ -21,31 +22,25 @@ TranslationModel::~TranslationModel() {}
 std::vector<TranslationResult>
 TranslationModel::translate(std::vector<std::string> &&texts,
                            TranslationRequest request) {
-  // Implementing a non-async version first. Unpleasant, but should work.
-  std::promise<std::vector<TranslationResult>> promise;
-  auto future = promise.get_future();

  // This code, move into async?
  std::vector<TranslationResult> translationResults;
-  for (auto &text : texts) {
-    // Collect future as marian::bergamot::TranslationResult
-    auto intermediate = service_.translate(std::move(text));
-    intermediate.wait();
-    auto marianResponse(std::move(intermediate.get()));
-
+  std::vector<marian::bergamot::Response> responses =
+      service_.translateMultiple(std::move(texts), request);
+  for (auto &response : responses) {
    TranslationResult::SentenceMappings sentenceMappings;
-    for (size_t idx = 0; idx < marianResponse.size(); idx++) {
-      marian::string_view src = marianResponse.source.sentence(idx);
-      marian::string_view tgt = marianResponse.target.sentence(idx);
+    for (size_t idx = 0; idx < response.size(); idx++) {
+      marian::string_view src = response.source.sentence(idx);
+      marian::string_view tgt = response.target.sentence(idx);
      sentenceMappings.emplace_back(std::string_view(src.data(), src.size()),
                                    std::string_view(tgt.data(), tgt.size()));
    }

    // In place construction.
    translationResults.emplace_back(
-        std::move(marianResponse.source.text), // &&marianResponse.source_
-        std::move(marianResponse.target.text), // &&marianResponse.translation_
-        std::move(sentenceMappings)            // &&sentenceMappings
+        std::move(response.source.text), // &&response.source_
+        std::move(response.target.text), // &&response.translation_
+        std::move(sentenceMappings)      // &&sentenceMappings
    );
  }

--- a/src/translator/batch_translator.cpp
+++ b/src/translator/batch_translator.cpp
@ -63,11 +63,14 @@ void BatchTranslator::translate(Batch &batch) {
  std::vector<data::SentenceTuple> batchVector;

  auto &sentences = batch.sentences();
+  size_t batchSequenceNumber{0};
  for (auto &sentence : sentences) {
-    data::SentenceTuple sentence_tuple(sentence.lineNumber());
+    data::SentenceTuple sentence_tuple(batchSequenceNumber);
    Segment segment = sentence.getUnderlyingSegment();
    sentence_tuple.push_back(segment);
    batchVector.push_back(sentence_tuple);
+
+    ++batchSequenceNumber;
  }

  size_t batchSize = batchVector.size();
--- a/src/translator/request.cpp
+++ b/src/translator/request.cpp
@ -11,18 +11,17 @@ namespace marian {
 namespace bergamot {

 // -----------------------------------------------------------------
-Request::Request(size_t Id, size_t lineNumberBegin,
-                 std::vector<Ptr<Vocab const>> &vocabs, AnnotatedText &&source,
-                 Segments &&segments, std::promise<Response> responsePromise)
-    : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs),
-      source_(std::move(source)), segments_(std::move(segments)),
-      response_(std::move(responsePromise)) {
+Request::Request(size_t Id, Segments &&segments,
+                 ResponseBuilder &&responseBuilder)
+    : Id_(Id), segments_(std::move(segments)),
+      responseBuilder_(std::move(responseBuilder))
+
+{

  counter_ = segments_.size();
  histories_.resize(segments_.size(), nullptr);
 }

-size_t Request::lineNumberBegin() const { return lineNumberBegin_; }
 size_t Request::numSegments() const { return segments_.size(); }

 size_t Request::segmentTokens(size_t index) const {
@ -39,17 +38,10 @@ void Request::processHistory(size_t index, Ptr<History> history) {
  // In case this is last request in, completeRequest is called, which sets the
  // value of the promise.
  if (--counter_ == 0) {
-    completeRequest();
+    responseBuilder_(std::move(histories_));
  }
 }

-void Request::completeRequest() {
-  // Request no longer needs to hold the content, can transfer it to
-  // Response.
-  Response response(std::move(source_), std::move(histories_), *vocabs_);
-  response_.set_value(std::move(response));
-}
-
 bool Request::operator<(const Request &b) const {
  // Among Requests, only sequence id is used for obtaining priority.
  return Id_ < b.Id_;
@ -64,10 +56,6 @@ size_t RequestSentence::numTokens() const {
  return (request_->segmentTokens(index_));
 }

-size_t RequestSentence::lineNumber() const {
-  return (request_->lineNumberBegin() + index_);
-}
-
 void RequestSentence::completeSentence(Ptr<History> history) {
  // Relays completeSentence into request's processHistory, using index
  // information.
--- a/src/translator/request.h
+++ b/src/translator/request.h
@ -1,24 +1,9 @@
-//
-// Defines:
-//
-// Request: holds the input text of a text, Segments (vector<Words>) which are
-// to go to the batching mechanism and alignments between the processed
-// segments and the input text (sourceTokenRanges). In addition, Request takes
-// care of the barrier which fires when all the Segments in a request are done
-// translating by the workers (BatchTranslator).
-// TODO(jerinphilip):  Extend Request with notions of Priority (sequence,
-// user-given).
-//
-// RequestSentence: is a tuple of (index, Ptr<Request>). This provides the
-// batching mechanism access to the segment within the request. The backref to
-// Request allows event triggering the barrier upon completion of the last
-// sentence by a worker.
-
 #ifndef SRC_BERGAMOT_REQUEST_H_
 #define SRC_BERGAMOT_REQUEST_H_

 #include "definitions.h"
 #include "response.h"
+#include "response_builder.h"
 #include "sentence_ranges.h"

 #include "common/logging.h"
@ -33,80 +18,96 @@
 namespace marian {
 namespace bergamot {

+/// A Request is an internal representation used to represent a request after
+/// processed by TextProcessor into sentences constituted by marian::Words.
+///
+/// The batching mechanism (Batcher) draws from multiple Requests and compiles
+/// sentences into a batch. When a batch completes translation (at
+/// BatchTranslator, intended in a different thread), backward propogation
+/// happens through:
+///
+/// ```cpp
+///   Batch::completeBatch(...)
+///       -> RequestSentence::completeSentence(..)
+///          -> Request::processHistory(...)
+/// ```
+///
+/// When all sentences in a Request are completed, responseBuilder is
+/// triggered with the compiled Histories, to construct the Response
+/// corresponding to the Request and set value of the promise which triggers the
+/// future at client.
 class Request {
 public:
-  Request(size_t Id, size_t lineNumberBegin,
-          std::vector<Ptr<Vocab const>> &vocabs_, AnnotatedText &&source,
-          Segments &&segments, std::promise<Response> responsePromise);
+  /// Constructs an internal representation of the Request identified by Id,
+  /// processed Segments and accepts a callback (ResponseBuilder) which builds
+  /// the Response upon completion of the Request.
+  ///
+  ///
+  /// @param [in] Id: Identifier assigned to Request by Service.
+  /// @param [in] segments: Each segment is a unit to be translated.
+  /// @param [in] responseBuilder: Callback function (of ResponseBuilder type)
+  /// to be triggered upon the completion of translation of all units in a
+  /// Request.
+  Request(size_t Id, Segments &&segments, ResponseBuilder &&responseBuilder);

-  // Obtain the count of tokens in the segment correponding to index. Used to
-  // insert sentence from multiple requests into the corresponding size bucket.
+  /// Obtain the count of tokens in the segment correponding to index. Used to
+  /// insert sentence from multiple requests into the corresponding size bucket.
  size_t segmentTokens(size_t index) const;

-  // Obtain number of segments in a request.
+  /// Obtain number of segments in a request.
  size_t numSegments() const;
-  size_t lineNumberBegin() const;

-  // Obtains segment corresponding to index  to create a batch of segments among
-  // several requests.
+  /// Obtains segment corresponding to index  to create a batch of segments
+  /// among several requests.
  Segment getSegment(size_t index) const;

-  // For notions of priority among requests, used to enable std::set in
-  // Batcher.
+  /// For notions of priority among requests, used to enable std::set in
+  /// Batcher.
  bool operator<(const Request &request) const;

-  // Processes a history obtained after translating in a heterogenous batch
-  // compiled from requests.
+  /// Processes a history obtained after translating in a heterogenous batch
+  /// compiled from requests.
  void processHistory(size_t index, Ptr<History> history);

-  // On completion of last segment, sets value of the promise.
-  void completeRequest();
-
 private:
  size_t Id_;
-  size_t lineNumberBegin_;

-  // Multiple translation-workers can concurrently access the same Request. The
-  // following atomic atomically operates on the variable holding sentences
-  // remaining to be translated.
+  /// Multiple translation-workers can concurrently access the same Request. The
+  /// following atomic atomically operates on the variable holding sentences
+  /// remaining to be translated.
  std::atomic<int> counter_;

-  // source_ holds the source string to be translated. segments_ hold the
-  // sentences generated from source_ in vector<Words>. sourceRanges_ are
-  // string_views of the text corresponding to these words, pointing to
-  // sequences in source_. histories_ is a buffer which eventually stores the
-  // translations of each segment in the corresponding index.
-  AnnotatedText source_;
+  /// segments_ hold the sentences processed into Words which generated from
+  /// input string.
  Segments segments_;
+
+  /// histories_ is a buffer which eventually stores the translations of each
+  /// segment in the corresponding index.
  std::vector<Ptr<History>> histories_;

-  // Members above are moved into newly constructed Response on completion
-  // of translation of all segments. The promise below is set to this Response
-  // value. future to this promise is made available to the user through
-  // Service.
-  std::promise<Response> response_;
-
-  // Constructing Response requires the vocabs_ used to generate Request.
-  std::vector<Ptr<Vocab const>> *vocabs_;
+  /// Constructing Response requires the vocabs_ used to generate Request.
+  /// std::vector<Ptr<Vocab const>> *vocabs_;
+  ResponseBuilder responseBuilder_;
 };

+/// A RequestSentence provides a view to a sentence within a Request. Existence
+/// of this class allows the sentences and associated information to be kept
+/// within Request, while batching mechanism (Batcher) compiles Batch from
+/// RequestSentence-s coming from different Requests.
 class RequestSentence {
-  // A RequestSentence provides a view to a sentence within a Request. Existence
-  // of this class allows the sentences and associated information to be kept
-  // within Request.

 public:
  RequestSentence(size_t, Ptr<Request>);
+
+  /// Number of tokens in the segment this RequestSentence represents. Used to
+  /// order by length in batching.
  size_t numTokens() const;

-  // lineNumber in Request, used for matching marian-decoder. SentenceTuple
-  // requires lineNumber to be set for Corpus based batches.
-  size_t lineNumber() const;
-
-  // Accessor to the segment represented by the RequestSentence.
+  /// Accessor to the segment represented by the RequestSentence.
  Segment getUnderlyingSegment() const;

-  // Forwards call to Request, checking for completion.
+  /// Forwards history to Request to set history corresponding to this
+  /// RequestSentence.
  void completeSentence(Ptr<History> history);

  friend bool operator<(const RequestSentence &a, const RequestSentence &b);
--- a/src/translator/response.cpp
+++ b/src/translator/response.cpp
@ -1,106 +0,0 @@
-#include "response.h"
-#include "common/logging.h"
-#include "data/alignment.h"
-#include "sentence_ranges.h"
-
-#include <utility>
-
-namespace marian {
-namespace bergamot {
-
-Response::Response(AnnotatedText &&source, Histories &&histories,
-                   std::vector<Ptr<Vocab const>> &vocabs)
-    : source(std::move(source)) {
-  // Reserving length at least as much as source_ seems like a reasonable thing
-  // to do to avoid reallocations.
-  target.text.reserve(source.text.size());
-
-  // In a first step, the decoded units (individual senteneces) are compiled
-  // into a huge string. This is done by computing indices first and appending
-  // to the string as each sentences are decoded.
-  std::vector<std::pair<size_t, size_t>> translationRanges;
-  std::vector<size_t> sentenceBegins;
-
-  size_t offset{0};
-  bool first{true};
-
-  for (auto &history : histories) {
-    // TODO(jerin): Change hardcode of nBest = 1
-    NBestList onebest = history->nBest(1);
-
-    Result result = onebest[0]; // Expecting only one result;
-    Words words = std::get<0>(result);
-    auto targetVocab = vocabs.back();
-
-    std::string decoded;
-    std::vector<string_view> targetMappings;
-    targetVocab->decodeWithByteRanges(words, decoded, targetMappings);
-
-    if (first) {
-      first = false;
-    } else {
-      target.text += " ";
-      ++offset;
-    }
-
-    sentenceBegins.push_back(translationRanges.size());
-    target.text += decoded;
-    auto decodedStringBeginMarker = targetMappings.front().begin();
-    for (auto &sview : targetMappings) {
-      size_t startIdx = offset + sview.begin() - decodedStringBeginMarker;
-      translationRanges.emplace_back(startIdx, startIdx + sview.size());
-    }
-
-    offset += decoded.size();
-
-    // Alignments
-    // TODO(jerinphilip): The following double conversion might not be
-    // necessary. Hard alignment can directly be exported, but this would mean
-    // WASM bindings for a structure deep within marian source.
-    auto hyp = std::get<1>(result);
-    auto softAlignment = hyp->tracebackAlignment();
-    auto hardAlignment = data::ConvertSoftAlignToHardAlign(
-        softAlignment, /*threshold=*/0.2f); // TODO(jerinphilip): Make this a
-                                            // configurable parameter.
-
-    Alignment unified_alignment;
-    for (auto &p : hardAlignment) {
-      unified_alignment.emplace_back((Point){p.srcPos, p.tgtPos, p.prob});
-    }
-
-    alignments.push_back(std::move(unified_alignment));
-
-    // Quality scores: Sequence level is obtained as normalized path scores.
-    // Word level using hypothesis traceback. These are most-likely logprobs.
-    auto normalizedPathScore = std::get<2>(result);
-    auto wordQualities = hyp->tracebackWordScores();
-    wordQualities.pop_back();
-    qualityScores.push_back((Quality){normalizedPathScore, wordQualities});
-  }
-
-  // Once we have the indices in translation (which might be resized a few
-  // times) ready, we can prepare and store the string_view as annotations
-  // instead. This is accomplished by iterating over available sentences using
-  // sentenceBegin and using addSentence(...) API from Annotation.
-
-  for (size_t i = 1; i <= sentenceBegins.size(); i++) {
-    std::vector<string_view> targetMappings;
-    size_t begin = sentenceBegins[i - 1];
-    size_t safe_end = (i == sentenceBegins.size()) ? translationRanges.size()
-                                                   : sentenceBegins[i];
-
-    for (size_t idx = begin; idx < safe_end; idx++) {
-      auto &p = translationRanges[idx];
-      size_t begin_idx = p.first;
-      size_t end_idx = p.second;
-
-      const char *data = &target.text[begin_idx];
-      size_t size = end_idx - begin_idx;
-      targetMappings.emplace_back(data, size);
-    }
-
-    target.addSentence(targetMappings);
-  }
-}
-} // namespace bergamot
-} // namespace marian
--- a/src/translator/response.h
+++ b/src/translator/response.h
@ -40,34 +40,12 @@ struct Quality {
 /// AnnotatedText provides an API to access markings of (sub)-word and
 /// sentences boundaries, which are required to interpret Quality and
 /// Alignment (s) at the moment.
-class Response {
-
-public:
-  ///
-  Response(AnnotatedText &&source, Histories &&histories,
-           std::vector<Ptr<Vocab const>> &vocabs);
-
-  /// \cond HIDDEN_PUBLIC
-  // Move constructor.
-  Response(Response &&other)
-      : source(std::move(other.source)), target(std::move(other.target)),
-        alignments(std::move(other.alignments)),
-        qualityScores(std::move(other.qualityScores)){};
-
-  // The following copy bans are not stricitly required anymore since Annotation
-  // is composed of the ByteRange primitive (which was previously string_view
-  // and required to be bound to string), but makes movement efficient by
-  // banning these letting compiler complain about copies.
-
-  Response(const Response &) = delete;
-  Response &operator=(const Response &) = delete;
-
-  /// \endcond
-
-  /// Number of sentences translated. The processing of a text of into sentences
-  /// are handled internally, and this information can be used to iterate
-  /// through meaningful units of translation for which alignment and quality
-  /// information are available.
+struct Response {
+  /// Convenience function to obtain number of units translated. Same as
+  /// `.source.numSentences()` and `.target.numSentences().` The processing of a
+  /// text of into sentences are handled internally, and this information can be
+  /// used to iterate through meaningful units of translation for which
+  /// alignment and quality information are available.
  const size_t size() const { return source.numSentences(); }

  /// source text and annotations of (sub-)words and sentences.
--- a/src/translator/response_builder.cpp
+++ b/src/translator/response_builder.cpp
@ -0,0 +1,87 @@
+#include "response_builder.h"
+
+namespace marian {
+namespace bergamot {
+
+void ResponseBuilder::buildQualityScores(Histories &histories,
+                                         Response &response) {
+  std::vector<Quality> qualityScores;
+  for (auto &history : histories) {
+    // TODO(jerin): Change hardcode of nBest = 1
+    NBestList onebest = history->nBest(1);
+
+    Result result = onebest[0]; // Expecting only one result;
+    Words words = std::get<0>(result);
+    auto hyp = std::get<1>(result);
+    // Quality scores: Sequence level is obtained as normalized path scores.
+    // Word level using hypothesis traceback. These are most-likely
+    // logprobs.
+    auto normalizedPathScore = std::get<2>(result);
+    auto wordQualities = hyp->tracebackWordScores();
+    wordQualities.pop_back();
+    response.qualityScores.push_back(
+        Quality{normalizedPathScore, wordQualities});
+  }
+}
+
+void ResponseBuilder::buildAlignments(Histories &histories,
+                                      Response &response) {
+  for (auto &history : histories) {
+    // TODO(jerin): Change hardcode of nBest = 1
+    NBestList onebest = history->nBest(1);
+
+    Result result = onebest[0]; // Expecting only one result;
+    Words words = std::get<0>(result);
+    // Alignments
+    // TODO(jerinphilip): The following double conversion might not be
+    // necessary. Hard alignment can directly be exported, but this would
+    // mean WASM bindings for a structure deep within marian source.
+    auto hyp = std::get<1>(result);
+    auto softAlignment = hyp->tracebackAlignment();
+    auto threshold = responseOptions_.alignmentThreshold;
+    auto hardAlignment =
+        data::ConvertSoftAlignToHardAlign(softAlignment, threshold);
+    Alignment unified_alignment;
+    for (auto &p : hardAlignment) {
+      unified_alignment.emplace_back(Point{p.srcPos, p.tgtPos, p.prob});
+    }
+
+    response.alignments.push_back(std::move(unified_alignment));
+  }
+}
+
+void ResponseBuilder::buildTranslatedText(Histories &histories,
+                                          Response &response) {
+  // Reserving length at least as much as source_ seems like a reasonable
+  // thing to do to avoid reallocations.
+  response.target.text.reserve(response.source.text.size());
+
+  size_t offset{0};
+  bool first{true};
+
+  for (auto &history : histories) {
+    // TODO(jerin): Change hardcode of nBest = 1
+    NBestList onebest = history->nBest(1);
+
+    Result result = onebest[0]; // Expecting only one result;
+    Words words = std::get<0>(result);
+    auto targetVocab = vocabs_->back();
+
+    std::string decoded;
+    std::vector<string_view> targetSentenceMappings;
+    targetVocab->decodeWithByteRanges(words, decoded, targetSentenceMappings);
+
+    // delimiter can be used to fill in the blanks from source as well.
+    std::string delimiter;
+    if (first) {
+      first = false;
+    } else {
+      delimiter = " ";
+    }
+
+    response.target.appendSentence(delimiter, decoded, targetSentenceMappings);
+  }
+}
+
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/response_builder.h
+++ b/src/translator/response_builder.h
@ -0,0 +1,93 @@
+#ifndef SRC_BERGAMOT_RESPONSE_BUILDER_H_
+#define SRC_BERGAMOT_RESPONSE_BUILDER_H_
+
+#include "data/types.h"
+#include "response.h"
+#include "response_options.h"
+
+// For now we will work with this, to avoid complaints another structure is hard
+// to operate with.
+
+namespace marian {
+namespace bergamot {
+
+/// ResponseBuilder is a callback functor. It is expected to be bound to a
+/// Request after giving it the context of options, vocabs and promise to set.
+/// It constructs the Response and it's members based on options
+/// (quality=on|off, alignments=on|off, mappings=on|off, splitmode=sentence |
+/// paragraph).
+
+class ResponseBuilder {
+public:
+  /// @param [in] responseOptions: ResponseOptions, indicating what to include
+  /// or not in the response and any additional configurable parameters.
+  /// @param [in] vocabs: marian vocab object (used in decoding)
+  /// @param [in] promise: promise to set with the constructed Response.
+  ResponseBuilder(ResponseOptions responseOptions, AnnotatedText &&source,
+                  std::vector<Ptr<Vocab const>> &vocabs,
+                  std::promise<Response> &&promise)
+      : responseOptions_(responseOptions), source_(std::move(source)),
+        vocabs_(&vocabs), promise_(std::move(promise)) {}
+
+  /// Constructs and sets the promise of a Response object from obtained
+  /// histories after translating.
+  /// @param [in] histories: Histories obtained after translating the Request
+  /// from which this functor is called.
+  void operator()(Histories &&histories) {
+    // TODO(jerinphilip) load ResponseOptions into options and turn build
+    // functions on or off.
+    // responseOptions_ is unused, but we can try something here.
+    ABORT_IF(source_.numSentences() != histories.size(),
+             "Mismatch in source and translated sentences");
+    Response response;
+
+    // Move source_ into response.
+    response.source = std::move(source_);
+
+    // Should be after source is set
+    buildTranslatedText(histories, response);
+
+    // Should always be after buildTranslatedText
+    if (responseOptions_.qualityScores) {
+      buildQualityScores(histories, response);
+    }
+
+    if (responseOptions_.alignment) {
+      buildAlignments(histories, response);
+    }
+
+    // Once complete, set promise.
+    promise_.set_value(std::move(response));
+  }
+
+private:
+  /// Builds qualityScores from histories and writes to response. expects
+  /// buildTranslatedText to be run before to be able to obtain target text and
+  /// subword information.
+  /// @param histories [in]
+  /// @param response [out]
+  void buildQualityScores(Histories &histories, Response &response);
+
+  /// Builds alignments from histories and writes onto response.
+  /// @param histories [in]
+  /// @param response [out]
+  void buildAlignments(Histories &histories, Response &response);
+
+  /// Builds translated text and subword annotations and writes onto response.
+  /// @param histories [in]
+  /// @param response [out]
+  void buildTranslatedText(Histories &histories, Response &response);
+
+  // Data members are context/curried args for the functor.
+
+  ResponseOptions responseOptions_;
+  std::vector<Ptr<Vocab const>> *vocabs_; // vocabs are required for decoding
+                                          // and any source validation checks.
+  std::promise<Response> promise_; //  To be set when callback triggered and
+                                   //  after Response constructed.
+  AnnotatedText source_;
+};
+} // namespace bergamot
+} // namespace marian
+
+#endif //  SRC_BERGAMOT_RESPONSE_BUILDER_H_
--- a/src/translator/response_options.h
+++ b/src/translator/response_options.h
@ -0,0 +1,50 @@
+#ifndef SRC_BERGAMOT_RESPONSE_OPTIONS_H_
+#define SRC_BERGAMOT_RESPONSE_OPTIONS_H_
+#include <string>
+
+namespace marian {
+namespace bergamot {
+
+enum ConcatStrategy {
+  /// Target text is constructed faithful to the source-text  structure.
+  FAITHFUL,
+
+  /// Target text is concatenated by a space.
+  SPACE
+};
+
+enum QualityScoreType {
+  /// Provide a free quality-score that comes with the machine-translation model
+  /// itself.
+  FREE,
+
+  /// An expensive quality-score that runs additional computations to determine
+  /// quality of an output.
+  EXPENSIVE
+};
+
+/// ResponseOptions dictate how to construct a Response for an input string of
+/// text to be translated.
+struct ResponseOptions {
+  bool qualityScores{false}; ///< Include quality-scores or not.
+  bool alignment{false};     ///< Include alignments or not.
+
+  /// Whether to include sentenceMappings or not. Alignments require
+  /// sentenceMappings and are available irrespective of this option if
+  /// `alignment=true`.
+  bool sentenceMappings{false};
+
+  /// Threshold between `[0.0f, 1.0f]` to filter alignments into a sparse
+  /// matrix. Higher value implies stronger filtering leading to provision of
+  /// higher-confidence matches. `1.0f` gives argmax (not the full-dense
+  /// matrix).
+  float alignmentThreshold{0.2f};
+
+  QualityScoreType qualityScoreType{QualityScoreType::FREE};
+  ConcatStrategy concatStrategy{ConcatStrategy::FAITHFUL};
+};
+
+} // namespace bergamot
+} // namespace marian
+
+#endif //  SRC_BERGAMOT_RESPONSE_OPTIONS_H_
--- a/src/translator/sentence_ranges.cpp
+++ b/src/translator/sentence_ranges.cpp
@ -32,11 +32,11 @@ ByteRange Annotation::sentence(size_t sentenceIdx) const {
    // the flatByteRange and non-empty sentence before this happened and
    // construct empty string-view equivalent ByteRange.
    ByteRange eos = flatByteRanges_[eosId - 1];
-    sentenceByteRange = (ByteRange){eos.end, eos.end};
+    sentenceByteRange = ByteRange{eos.end, eos.end};
  } else {
    ByteRange bos = flatByteRanges_[bosId];
    ByteRange eos = flatByteRanges_[eosId - 1];
-    sentenceByteRange = (ByteRange){bos.begin, eos.end};
+    sentenceByteRange = ByteRange{bos.begin, eos.end};
  }
  return sentenceByteRange;
 }
@ -56,6 +56,20 @@ string_view AnnotatedText::sentence(size_t sentenceIdx) const {
  return asStringView(sentenceAsByteRange);
 }

+void AnnotatedText::appendSentence(std::string prefix, std::string &reference,
+                                   std::vector<string_view> &wordRanges) {
+  text += prefix;
+  size_t offset = text.size(); // Get size before to do ByteRange arithmetic
+  text += reference;           // Append reference to text
+  std::vector<ByteRange> sentence;
+  for (auto &wordView : wordRanges) {
+    size_t thisWordBegin = offset + wordView.data() - &reference[0];
+    sentence.push_back(
+        ByteRange{thisWordBegin, thisWordBegin + wordView.size()});
+  }
+  annotation.addSentence(sentence);
+}
+
 void AnnotatedText::addSentence(std::vector<string_view> &wordRanges) {
  addSentence(std::begin(wordRanges), std::end(wordRanges));
 };
@ -65,7 +79,7 @@ void AnnotatedText::addSentence(std::vector<string_view>::iterator begin,
  std::vector<ByteRange> sentence;
  for (auto p = begin; p != end; p++) {
    size_t begin_offset = p->data() - &text[0];
-    sentence.push_back((ByteRange){begin_offset, begin_offset + p->size()});
+    sentence.push_back(ByteRange{begin_offset, begin_offset + p->size()});
  }
  annotation.addSentence(sentence);
 };
--- a/src/translator/sentence_ranges.h
+++ b/src/translator/sentence_ranges.h
@ -64,7 +64,6 @@ public:
    sentenceEndIds_.push_back(0);
  }

-  /// Returns the number of sentences annotated in a text.
  size_t numSentences() const { return sentenceEndIds_.size() - 1; }

  /// Returns number of words in the sentence identified by `sentenceIdx`.
@ -125,10 +124,6 @@ public:
  /// constructor is disallowed).
  AnnotatedText(std::string &&text) : text(std::move(text)){};

-  AnnotatedText(AnnotatedText &&annotatedBlob)
-      : text(std::move(annotatedBlob.text)),
-        annotation(std::move(annotatedBlob.annotation)) {}
-
  /// Returns the number of sentences in the annotation structure.
  const size_t numSentences() const { return annotation.numSentences(); }

@ -137,6 +132,11 @@ public:
    return annotation.numWords(sentenceIdx);
  }

+  /// Appends a sentence to the existing text and transparently rebases
+  /// string_views
+  void appendSentence(std::string prefix, std::string &reference,
+                      std::vector<string_view> &wordRanges);
+
  /// Adds a sentence, used to load from SentencePiece annotations conveniently.
  void addSentence(std::vector<string_view> &wordRanges);

--- a/src/translator/service.cpp
+++ b/src/translator/service.cpp
@ -112,6 +112,44 @@ void Service::async_translate() {
 #endif // WASM_COMPATIBLE_SOURCE

 std::future<Response> Service::translate(std::string &&input) {
+  ResponseOptions responseOptions;  // Hardcode responseOptions for now
+  return translate(std::move(input), responseOptions);
+}
+
+std::vector<Response>
+Service::translateMultiple(std::vector<std::string> &&inputs,
+                           TranslationRequest translationRequest) {
+  ResponseOptions responseOptions;
+
+  // TODO(jerinphilip) Set options based on TranslationRequest, if and when it
+  // becomes non-dummy.
+
+  // We queue the individual Requests so they get compiled at batches to be
+  // efficiently translated.
+  std::vector<std::future<Response>> responseFutures;
+  for (auto &input : inputs) {
+    std::future<Response> inputResponse =
+        queueRequest(std::move(input), responseOptions);
+    responseFutures.push_back(std::move(inputResponse));
+  }
+
+  // Dispatch is called once per request so compilation of sentences from
+  // multiple Requests happen.
+  dispatchTranslate();
+
+  // Now wait for all Requests to complete, the future to fire and return the
+  // compiled Responses, we can probably return the future, but WASM quirks(?).
+  std::vector<Response> responses;
+  for (auto &future : responseFutures) {
+    future.wait();
+    responses.push_back(std::move(future.get()));
+  }
+
+  return responses;
+}
+
+std::future<Response> Service::queueRequest(std::string &&input,
+                                            ResponseOptions responseOptions) {
  Segments segments;
  AnnotatedText source(std::move(input));
  text_processor_.process(source, segments);
@ -119,17 +157,29 @@ std::future<Response> Service::translate(std::string &&input) {
  std::promise<Response> responsePromise;
  auto future = responsePromise.get_future();

-  Ptr<Request> request = New<Request>(
-      requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(source),
-      std::move(segments), std::move(responsePromise));
+  ResponseBuilder responseBuilder(responseOptions, std::move(source), vocabs_,
+                                  std::move(responsePromise));
+  Ptr<Request> request = New<Request>(requestId_++, std::move(segments),
+                                      std::move(responseBuilder));

  batcher_.addWholeRequest(request);
+  return future;
+}
+
+std::future<Response> Service::translate(std::string &&input,
+                                         ResponseOptions responseOptions) {
+  std::future<Response> future =
+      queueRequest(std::move(input), responseOptions);
+  dispatchTranslate();
+  return future;
+}
+
+void Service::dispatchTranslate() {
  if (numWorkers_ == 0) {
    blocking_translate();
  } else {
    async_translate();
  }
-  return future;
 }

 Service::~Service() {
--- a/src/translator/service.h
+++ b/src/translator/service.h
@ -1,10 +1,12 @@
 #ifndef SRC_BERGAMOT_SERVICE_H_
 #define SRC_BERGAMOT_SERVICE_H_

+#include "TranslationRequest.h"
 #include "batch_translator.h"
 #include "batcher.h"
 #include "data/types.h"
 #include "response.h"
+#include "response_builder.h"
 #include "text_processor.h"
 #include "translator/parser.h"

@ -18,18 +20,33 @@
 namespace marian {
 namespace bergamot {

-/// Service exposes methods to translate an incoming blob of text to the
-/// Consumer of bergamot API.
+/// Service offers methods create an asynchronous translation service. This is
+/// intended to be similar to the ones provided for training or decoding in ML
+/// pipelines with the following additional capabilities:
+///
+///  1. Provision of a request -> response based translation flow unlike the
+///  usual a line based translation or decoding provided in most ML frameworks.
+///  2. Internal handling of normalization etc which changes source text to
+///  provide to client translation meta-information like alignments consistent
+///  with the unnormalized input text.
+///
+/// Service exposes methods to instantiate the service from a string
+/// configuration (which can cover most translators) and to translate an
+/// incoming blob of text.
+///
 ///
 /// An example use of this API looks as follows:
-///
+/// ```cpp
 ///  options = ...;
 ///  service = Service(options);
 ///  std::string input_text = "Hello World";
 ///  std::future<Response>
-///      response = service.translate(std::move(input_text));
-///  response.wait();
-///  Response result = response.get();
+///      responseFuture = service.translate(std::move(input_text));
+///  responseFuture.wait(); // Wait until translation has completed.
+///  Response response(std::move(response.get());
+///
+/// // Do things with response.
+/// ```
 ///
 /// Optionally Service can be initialized by also passing model_memory for
 /// purposes of efficiency (which defaults to nullpointer and then reads from
@ -41,9 +58,22 @@ public:
  /// @param modelMemory byte array (aligned to 256!!!) that contains the bytes
  /// of a model.bin. Optional, defaults to nullptr when not used
  /// @param shortlistMemory byte array of shortlist (aligned to 64)
-  explicit Service(Ptr<Options> options, AlignedMemory modelMemory, AlignedMemory shortlistMemory);
+  explicit Service(Ptr<Options> options, AlignedMemory modelMemory,
+                   AlignedMemory shortlistMemory);

-  explicit Service(Ptr<Options> options) : Service(options, AlignedMemory(), AlignedMemory()){}
+  /// Construct Service purely from Options. This expects options which
+  /// marian-decoder expects to be set for loading model shortlist and
+  /// vocabularies from files in addition to parameters that set unset desired
+  /// features (e.g: alignments, quality-scores).
+  ///
+  /// This is equivalent to a call to:
+  /// ```cpp
+  ///    Service(options, AlignedMemory(),  AlignedMemory())
+  /// ```
+  /// wherein empty memory is passed and internal flow defaults to file-based
+  /// model, shortlist loading.
+  explicit Service(Ptr<Options> options)
+      : Service(options, AlignedMemory(), AlignedMemory()) {}

  /// Construct Service from a string configuration.
  /// @param [in] config string parsable as YAML expected to adhere with marian
@ -52,20 +82,55 @@ public:
  /// bytes of a model.bin. Optional.
  /// @param [in] shortlistMemory byte array of shortlist (aligned to 64)
  explicit Service(const std::string &config,
-                   AlignedMemory modelMemory = AlignedMemory(), AlignedMemory shortlistMemory = AlignedMemory())
-      : Service(parseOptions(config), std::move(modelMemory), std::move(shortlistMemory)) {}
+                   AlignedMemory modelMemory = AlignedMemory(),
+                   AlignedMemory shortlistMemory = AlignedMemory())
+      : Service(parseOptions(config), std::move(modelMemory),
+                std::move(shortlistMemory)) {}

  /// Explicit destructor to clean up after any threads initialized in
  /// asynchronous operation mode.
  ~Service();

  /// To stay efficient and to refer to the string for alignments, expects
-  /// ownership be moved through std::move(..)
+  /// ownership be moved through `std::move(..)`
  ///
-  ///  @param [in] rvalue reference of string to be translated.
-  std::future<Response> translate(std::string &&input);
+  ///  @param [in] source: rvalue reference of string to be translated.
+  std::future<Response> translate(std::string &&source);
+
+  /// Translate an input, providing Options to construct Response. This is
+  /// useful when one has to set/unset alignments or quality in the Response to
+  /// save compute spent in constructing these objects.
+  ///
+  /// @param [in] source: rvalue reference of the string to be translated
+  /// @param [in] responseOptions: Options indicating whether or not to include
+  /// some member in the Response, also specify any additional configurable
+  /// parameters.
+  std::future<Response> translate(std::string &&source,
+                                  ResponseOptions options);
+
+  /// Translate an input, providing TranslationRequest across all texts to
+  /// construct Response. Provides the browser with the ability to break texts
+  /// into multiple Request keeping gains from efficiently batching internally.
+  /// Also useful when one has to set/unset alignments or quality in the
+  /// Response to save compute spent in constructing these objects.
+
+  /// @param [in] source: rvalue reference of the string to be translated
+  /// @param [in] translationRequest: TranslationRequest (Unified API)
+  /// indicating whether or not to include some member in the Response, also
+  /// specify any additional configurable parameters.
+
+  std::vector<Response>
+  translateMultiple(std::vector<std::string> &&source,
+                    TranslationRequest translationRequest);

 private:
+  /// Queue an input for translation.
+  std::future<Response> queueRequest(std::string &&input,
+                                     ResponseOptions responseOptions);
+
+  /// Dispatch call to translate after inserting in queue
+  void dispatchTranslate();
+
  /// Build numTranslators number of translators with options from options
  void build_translators(Ptr<Options> options, size_t numTranslators);
  /// Initializes a blocking translator without using std::thread
@ -83,16 +148,17 @@ private:
  void async_translate();

  /// Number of workers to launch.
-  size_t numWorkers_;              // ORDER DEPENDENCY (pcqueue_)
+  size_t numWorkers_; // ORDER DEPENDENCY (pcqueue_)
  /// Model memory to load model passed as bytes.
-  AlignedMemory modelMemory_;      // ORDER DEPENDENCY (translators_)
+  AlignedMemory modelMemory_; // ORDER DEPENDENCY (translators_)
  /// Shortlist memory passed as bytes.
-  AlignedMemory shortlistMemory_;  // ORDER DEPENDENCY (translators_)
+  AlignedMemory shortlistMemory_; // ORDER DEPENDENCY (translators_)

  /// Holds instances of batch translators, just one in case
  /// of single-threaded application, numWorkers_ in case of multithreaded
  /// setting.
-  std::vector<BatchTranslator> translators_;  // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)
+  std::vector<BatchTranslator>
+      translators_; // ORDER DEPENDENCY (modelMemory_, shortlistMemory_)

  /// Stores requestId of active request. Used to establish
  /// ordering among requests and logging/book-keeping.