Merge pull request #30 from browsermt/jp/absorb-batch-translator

Sync thread adjustments with wasm-integration
2024-09-11 05:35:33 +03:00 · 2021-02-23 16:32:26 +01:00 · 2021-02-23 16:32:26 +01:00 · c0efc21c6e
commit c0efc21c6e
parent 51f702ea6c 415d16bd1d
36 changed files with 1065 additions and 680 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,20 @@
 *.swp
 *.swo

+# CMake
+CMakeLists.txt.user
+CMakeCache.txt
+CMakeFiles
+CMakeScripts
+Testing
+Makefile
+cmake_install.cmake
+install_manifest.txt
+compile_commands.json
+CTestTestfile.cmake
+_deps
+
+
 wasm/test_page/node_modules
 build-*
 models
--- a/3rd_party/ssplit-cpp
+++ b/3rd_party/ssplit-cpp
@ -1 +1 @@
-Subproject commit 16864967b7313e76e3b107d11ec39d8d5cedff1e
+Subproject commit 432208826ee27e7b3984b53774b1a16d74256d77
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -9,28 +9,51 @@ project(bergamot_translator CXX C)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)

+include(CMakeDependentOption)
+
 # Project specific cmake options
 option(COMPILE_WASM "Compile for WASM" OFF)
-option(COMPILE_THREAD_VARIANT "Compile with thread support" OFF)
+option(USE_WASM_COMPATIBLE_MARIAN "Use wasm compatible marian backend" ON)
+CMAKE_DEPENDENT_OPTION(COMPILE_THREAD_VARIANT "Compile the project with thread support" OFF
+                       "USE_WASM_COMPATIBLE_MARIAN" ON)
 SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be packaged (pre-loaded) in wasm builds")

 # Set marian (3rd party submodule) cmake options to compile for this project
 SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version")
 SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece")
 SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs")
-SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
-SET(COMPILE_DECODER_ONLY ON CACHE BOOL "Compile marian-decoder only")
-SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support")
-SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds")
 SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.")
-SET(COMPILE_WITHOUT_EXCEPTIONS ON CACHE BOOL "Compile without exceptions")
-if(COMPILE_WASM)
-  # Set WORMHOLE to ON for marian whenever compiling for wasm platform
-  SET(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160")
+if (USE_WASM_COMPATIBLE_MARIAN)
+  # If using wasm compatible marian then set following flags
+  SET(USE_MKL OFF CACHE BOOL "Compile with MKL support")
+  SET(COMPILE_DECODER_ONLY ON CACHE BOOL "Compile marian-decoder only")
+  SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support")
+  SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds")
+  SET(COMPILE_WITHOUT_EXCEPTIONS ON CACHE BOOL "Compile without exceptions")
+  if(COMPILE_WASM)
+    # Set WORMHOLE to ON for marian whenever compiling for wasm platform
+    SET(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160")
+  endif()
 endif()
+# Set ssplit (3rd party submodule) cmake options to compile for this project
+SET(USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2")

-execute_process(COMMAND git submodule update --init --recursive --no-fetch
-                WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+# Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html
+# Ensures the submodules are set correctly during a build.
+find_package(Git QUIET)
+if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
+# Update submodules as needed
+    option(GIT_SUBMODULE "Check submodules during build" ON)
+    if(GIT_SUBMODULE)
+        message(STATUS "Submodule update")
+        execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
+                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                        RESULT_VARIABLE GIT_SUBMOD_RESULT)
+        if(NOT GIT_SUBMOD_RESULT EQUAL "0")
+            message(FATAL_ERROR "git submodule update --init failed with ${GIT_SUBMOD_RESULT}, please checkout submodules")
+        endif()
+    endif()
+endif()

 if(NOT COMPILE_WASM)
  # Set BUILD_ARCH to native only while compiling for non wasm platform
--- a/app/CMakeLists.txt
+++ b/app/CMakeLists.txt
@ -3,3 +3,6 @@ target_link_libraries(bergamot-translator-app PRIVATE bergamot-translator)

 add_executable(service-cli main-mts.cpp)
 target_link_libraries(service-cli PRIVATE bergamot-translator)
+
+add_executable(marian-decoder-new marian-decoder-new.cpp)
+target_link_libraries(marian-decoder-new PRIVATE bergamot-translator)
--- a/app/main-mts.cpp
+++ b/app/main-mts.cpp
@ -7,8 +7,8 @@
 #include "common/utils.h"
 #include "marian.h"
 #include "translator/parser.h"
+#include "translator/response.h"
 #include "translator/service.h"
-#include "translator/translation_result.h"

 int main(int argc, char *argv[]) {
  auto cp = marian::bergamot::createConfigParser();
@ -19,27 +19,13 @@ int main(int argc, char *argv[]) {
  std::ostringstream std_input;
  std_input << std::cin.rdbuf();
  std::string input = std_input.str();
-  using marian::bergamot::TranslationResult;
+  using marian::bergamot::Response;

-  // Wait on future until TranslationResult is complete
-  std::future<TranslationResult> translation_result_future =
-      service.translate(std::move(input));
-  translation_result_future.wait();
-  const TranslationResult &translation_result = translation_result_future.get();
-
-  std::cout << "service-cli [Source text]: ";
-  std::cout << translation_result.getOriginalText() << std::endl;
-
-  std::cout << "service-cli [Translated text]: ";
-  std::cout << translation_result.getTranslatedText() << std::endl;
-
-  // Obtain sentenceMappings and print them as Proof of Concept.
-  const TranslationResult::SentenceMappings &sentenceMappings =
-      translation_result.getSentenceMappings();
-  for (auto &p : sentenceMappings) {
-    std::cout << "service-cli [src] " << p.first << "\n";
-    std::cout << "service-cli [tgt] " << p.second << "\n";
-  }
+  // Wait on future until Response is complete
+  std::future<Response> responseFuture = service.translate(std::move(input));
+  responseFuture.wait();
+  Response response = responseFuture.get();
+  std::cout << response.translation() << std::endl;

  // Stop Service.
  service.stop();
--- a/app/marian-decoder-new.cpp
+++ b/app/marian-decoder-new.cpp
@ -0,0 +1,61 @@
+#include <cstdlib>
+#include <future>
+#include <iostream>
+#include <sstream>
+
+#include "common/definitions.h"
+#include "common/timer.h"
+#include "common/utils.h"
+#include "marian.h"
+#include "translator/history.h"
+#include "translator/output_collector.h"
+#include "translator/output_printer.h"
+#include "translator/parser.h"
+#include "translator/response.h"
+#include "translator/service.h"
+
+void marian_decoder_minimal(const marian::Histories &histories,
+                            marian::Ptr<marian::Vocab const> targetVocab,
+                            marian::Ptr<marian::Options> options) {
+
+  bool doNbest = options->get<bool>("n-best");
+  auto collector =
+      marian::New<marian::OutputCollector>(options->get<std::string>("output"));
+
+  // There is a dependency of vocabs here.
+  auto printer = marian::New<marian::OutputPrinter>(options, targetVocab);
+  if (options->get<bool>("quiet-translation"))
+    collector->setPrintingStrategy(marian::New<marian::QuietPrinting>());
+
+  for (auto &history : histories) {
+    std::stringstream best1;
+    std::stringstream bestn;
+    printer->print(history, best1, bestn);
+    collector->Write((long)history->getLineNum(), best1.str(), bestn.str(),
+                     doNbest);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  auto cp = marian::bergamot::createConfigParser();
+  auto options = cp.parseOptions(argc, argv, true);
+  marian::timer::Timer decoderTimer;
+
+  marian::bergamot::Service service(options);
+  // Read a large input text blob from stdin
+  std::ostringstream std_input;
+  std_input << std::cin.rdbuf();
+  std::string input = std_input.str();
+  using marian::bergamot::Response;
+
+  // Wait on future until Response is complete
+  std::future<Response> responseFuture = service.translate(std::move(input));
+  responseFuture.wait();
+  const Response &response = responseFuture.get();
+
+  marian_decoder_minimal(response.histories(), service.targetVocab(), options);
+
+  LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed());
+  service.stop();
+  return 0;
+}
--- a/doc/marian-integration.md
+++ b/doc/marian-integration.md
@ -0,0 +1,85 @@
+# Marian Integration
+
+This document summarizes the minimal build instructions develop for the
+marian-code powering bergamot-translator.
+
+## Build Instructions
+
+```
+$ git clone https://github.com/browsermt/bergamot-translator
+$ cd bergamot-translator
+$ mkdir build
+$ cd build
+$ cmake .. -DUSE_WASM_COMPATIBLE_MARIAN=off -DCMAKE_BUILD_TYPE=Release
+$ make -j
+```
+
+
+The build will generate the library that can be linked to any project. All the
+public header files are specified in `src` folder.
+
+## Command line apps
+
+The following executables are created by the build:
+
+1. `app/service-cli`: Extends marian to capability to work with string_views.
+   `service-cli` exists to check if the underlying code, without the
+   integration works or not.
+2. `app/bergamot-translator-app`: App which integreates service-cli's
+   functionality into the translator agnostic API specified as part of the
+   project. Integration failures are detected if same arguments work with
+   `service-cli` and does not with `bergamot-translator-app`.
+3. `app/marian-decoder-new`: Helper executable to conveniently benchmark new
+   implementation with the optimized upstream marian-decoder.
+
+The models required to run the command-line are available at
+[data.statmt.org/bergamot/models/](http://data.statmt.org/bergamot/models/).
+The following example uses an English to German tiny11 student model, available
+at:
+
+* [data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz](http://data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz)
+
+<details>
+<summary> Example run of commandline: Click to expand </summary>
+<p>
+
+```bash
+MODEL_DIR=... # path to where the model-files are.
+ARGS=(
+    -m $MODEL_DIR/model.intgemm.alphas.bin # Path to model file.
+    --vocabs 
+        $MODEL_DIR/vocab.deen.spm # source-vocabulary
+        $MODEL_DIR/vocab.deen.spm # target-vocabulary
+
+    # The following increases speed through one-best-decoding, shortlist and quantization.
+    --beam-size 1 --skip-cost --shortlist $MODEL_DIR/lex.s2t.gz 50 50 --int8shiftAlphaAll 
+
+    # Number of CPU threads (workers to launch). Parallelizes over cores and improves speed.
+    # A value of 0 allows a path with no worker thread-launches and a single-thread.
+    --cpu-threads 4
+
+    # Maximum size of a sentence allowed. If a sentence is above this length,
+    # it's broken into pieces of less than or equal to this size.
+    --max-length-break 1024  
+
+    # Maximum number of tokens that can be fit in a batch. The optimal value 
+    # for the parameter is dependant on hardware and can be obtained by running
+    # with variations and benchmarking.
+    --mini-batch-words 1024 
+
+    # Three modes are supported
+    #   - sentence: One sentence per line
+    #   - paragraph: One paragraph per line.
+    #   - wrapped_text: Paragraphs are separated by empty line.
+    --ssplit-mode paragraph 
+)
+
+./app/service-cli "${ARGS[@]}" < path-to-input-file
+./app/bergamot-translator-app "${ARGS[@]}" < path-to-input-file
+
+```
+</p>
+
+</summary>
+</details>
+
--- a/src/translator/CMakeLists.txt
+++ b/src/translator/CMakeLists.txt
@ -3,13 +3,16 @@ add_library(bergamot-translator STATIC
    TranslationModel.cpp

    # Following files added from browsermt/mts@nuke
-    textops.cpp
+    text_processor.cpp
+    sentence_splitter.cpp
    batch_translator.cpp 
    multifactor_priority.cpp 
    request.cpp 
    service.cpp
    batcher.cpp
-    translation_result.cpp
+    response.cpp
+    batch.cpp
+    sentence_ranges.cpp
 )
 if (COMPILE_DECODER_ONLY)
  # A dirty hack because of marian's bad cmake practices
@ -34,3 +37,4 @@ target_include_directories(bergamot-translator
    PRIVATE ${CMAKE_SOURCE_DIR}
    PUBLIC ${CMAKE_SOURCE_DIR}/src)

+
--- a/src/translator/TranslationModel.cpp
+++ b/src/translator/TranslationModel.cpp
@ -14,9 +14,8 @@

 // All local project includes
 #include "TranslationModel.h"
-#include "translator/service.h"
 #include "translator/parser.h"
-
+#include "translator/service.h"

 std::shared_ptr<marian::Options> parseOptions(const std::string &config) {
  marian::Options options;
@ -70,20 +69,27 @@ TranslationModel::translate(std::vector<std::string> &&texts,
    // Collect future as marian::bergamot::TranslationResult
    auto intermediate = service_.translate(std::move(text));
    intermediate.wait();
-    auto mTranslationResult(std::move(intermediate.get()));
+    auto marianResponse(std::move(intermediate.get()));
+
+    // This mess because marian::string_view != std::string_view
+    std::string source, translation;
+    marian::bergamot::Response::SentenceMappings mSentenceMappings;
+    marianResponse.move(source, translation, mSentenceMappings);

    // Convert to UnifiedAPI::TranslationResult
    TranslationResult::SentenceMappings sentenceMappings;
-    for (auto &p : mTranslationResult.getSentenceMappings()) {
+    for (auto &p : mSentenceMappings) {
      std::string_view src(p.first.data(), p.first.size()),
          tgt(p.second.data(), p.second.size());
      sentenceMappings.emplace_back(src, tgt);
    }

    // In place construction.
-    translationResults.emplace_back(std::move(mTranslationResult.source_),
-                                    std::move(mTranslationResult.translation_),
-                                    std::move(sentenceMappings));
+    translationResults.emplace_back(
+        std::move(source),          // &&marianResponse.source_
+        std::move(translation),     // &&marianResponse.translation_
+        std::move(sentenceMappings) // &&sentenceMappings
+    );
  }

  return translationResults;
--- a/src/translator/TranslationModel.h
+++ b/src/translator/TranslationModel.h
@ -24,7 +24,8 @@
 */
 class TranslationModel : public AbstractTranslationModel {
 public:
-  /* Construct the model using the model configuration options as yaml-formatted string
+  /* Construct the model using the model configuration options as yaml-formatted
+   * string
   */
  TranslationModel(const std::string &config);

--- a/src/translator/batch.cpp
+++ b/src/translator/batch.cpp
@ -0,0 +1,28 @@
+#include "batch.h"
+#include "request.h"
+
+namespace marian {
+namespace bergamot {
+
+void Batch::log() {
+  size_t numTokens{0}, maxLength{0};
+  for (auto &sentence : sentences_) {
+    numTokens += sentence.numTokens();
+    maxLength = std::max(maxLength, static_cast<size_t>(sentence.numTokens()));
+  }
+
+  LOG(info, "Batch(tokens={}, max-length={}, sentences_={})", numTokens,
+      maxLength, sentences_.size());
+}
+
+void Batch::add(const RequestSentence &sentence) {
+  sentences_.push_back(sentence);
+}
+
+void Batch::completeBatch(const Histories &histories) {
+  for (size_t i = 0; i < sentences_.size(); i++) {
+    sentences_[i].completeSentence(histories[i]);
+  }
+}
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/batch.h
+++ b/src/translator/batch.h
@ -0,0 +1,52 @@
+#ifndef SRC_BERGAMOT_BATCH_H
+#define SRC_BERGAMOT_BATCH_H
+
+#include "request.h"
+#include "translator/beam_search.h"
+
+namespace marian {
+namespace bergamot {
+
+class Batch {
+public:
+  Batch() {}
+  void clear() { sentences_.clear(); }
+
+  //  Methods to construct and determine poison.
+  static Batch poison() {
+    Batch batch;
+    batch.poison_ = true;
+    return batch;
+  }
+
+  bool isPoison() const { return poison_; }
+
+  size_t size() const { return sentences_.size(); }
+
+  void add(const RequestSentence &sentence);
+
+  // Accessors to read from a Batch. For use in BatchTranslator (consumer on a
+  // PCQueue holding batches).
+  //
+  // sentences() are used to access sentences to construct marian internal
+  // batch.
+  const RequestSentences &sentences() { return sentences_; }
+
+  // On obtaining Histories after translating a batch, completeBatch can be
+  // called with Histories , which forwards the call to Request through
+  // RequestSentence and triggers completion, by setting the promised value to
+  // the future given to client.
+  void completeBatch(const Histories &histories);
+
+  // Convenience function to log batch-statistics. numTokens, max-length.
+  void log();
+
+private:
+  bool poison_{false};
+  RequestSentences sentences_;
+};
+
+} // namespace bergamot
+} // namespace marian
+
+#endif // SRC_BERGAMOT_BATCH_H_
--- a/src/translator/batch_translator.cpp
+++ b/src/translator/batch_translator.cpp
@ -1,27 +1,20 @@
 #include "batch_translator.h"
+#include "batch.h"
 #include "common/logging.h"
 #include "data/corpus.h"
 #include "data/text_input.h"
-#include "sanelogging.h"
 #include "translator/beam_search.h"

 namespace marian {
 namespace bergamot {

 BatchTranslator::BatchTranslator(DeviceId const device,
-                                 PCQueue<PCItem> &pcqueue,
                                 std::vector<Ptr<Vocab const>> &vocabs,
                                 Ptr<Options> options)
-    : device_(device), options_(options), pcqueue_(&pcqueue), vocabs_(&vocabs) {
+    : device_(device), options_(options), vocabs_(&vocabs) {}

-#ifdef WITH_PTHREADS
-  thread_ = std::thread([this] { this->mainloop(); });
-#else
-  this->initGraph();
-#endif
-}
-
-void BatchTranslator::initGraph() {
+void BatchTranslator::initialize() {
+  // Initializes the graph.
  if (options_->hasAndNotEmpty("shortlist")) {
    int srcIdx = 0, trgIdx = 1;
    bool shared_vcb = vocabs_->front() == vocabs_->back();
@ -43,15 +36,14 @@ void BatchTranslator::initGraph() {
      scorer->setShortlistGenerator(slgen_);
    }
  }
-
  graph_->forward();
 }

-void BatchTranslator::translate(RequestSentences &requestSentences,
-                                Histories &histories) {
+void BatchTranslator::translate(Batch &batch) {
  std::vector<data::SentenceTuple> batchVector;

-  for (auto &sentence : requestSentences) {
+  auto &sentences = batch.sentences();
+  for (auto &sentence : sentences) {
    data::SentenceTuple sentence_tuple(sentence.lineNumber());
    Segment segment = sentence.getUnderlyingSegment();
    sentence_tuple.push_back(segment);
@ -94,45 +86,32 @@ void BatchTranslator::translate(RequestSentences &requestSentences,
  for (size_t j = 0; j < maxDims.size(); ++j)
    subBatches[j]->setWords(words[j]);

-  auto batch = Ptr<CorpusBatch>(new CorpusBatch(subBatches));
-  batch->setSentenceIds(sentenceIds);
+  auto corpus_batch = Ptr<CorpusBatch>(new CorpusBatch(subBatches));
+  corpus_batch->setSentenceIds(sentenceIds);

  auto trgVocab = vocabs_->back();
  auto search = New<BeamSearch>(options_, scorers_, trgVocab);

-  histories = std::move(search->search(graph_, batch));
+  auto histories = std::move(search->search(graph_, corpus_batch));
+  batch.completeBatch(histories);
 }

-void BatchTranslator::mainloop() {
 #ifdef WITH_PTHREADS
-  initGraph();
-#endif

-  PCItem pcitem;
+void BatchTranslator::consumeFrom(PCQueue<Batch> &pcqueue) {
+  Batch batch;
  Histories histories;
-
-#ifdef WITH_PTHREADS
  while (true) {
-#endif
-    pcqueue_->ConsumeSwap(pcitem);
-    if (pcitem.isPoison()) {
+    pcqueue.ConsumeSwap(batch);
+    if (batch.isPoison()) {
      return;
    } else {
-      translate(pcitem.sentences, histories);
-      for (int i = 0; i < pcitem.sentences.size(); i++) {
-        pcitem.sentences[i].completeSentence(histories[i]);
-      }
+      translate(batch);
    }
-#ifdef WITH_PTHREADS
  }
-#endif
 }

-void BatchTranslator::join() {
-#ifdef WITH_PTHREADS
-  thread_.join();
 #endif
-}

 } // namespace bergamot
 } // namespace marian
--- a/src/translator/batch_translator.h
+++ b/src/translator/batch_translator.h
@ -4,14 +4,18 @@
 #include <string>
 #include <vector>

+#include "batch.h"
 #include "common/utils.h"
 #include "data/shortlist.h"
 #include "definitions.h"
-#include "pcqueue.h"
 #include "request.h"
 #include "translator/history.h"
 #include "translator/scorers.h"

+#ifdef WITH_PTHREADS
+#include "pcqueue.h"
+#endif
+
 namespace marian {
 namespace bergamot {

@ -22,37 +26,27 @@ class BatchTranslator {
  // shut down in Service which calls join() on the threads.

 public:
-  BatchTranslator(DeviceId const device, PCQueue<PCItem> &pcqueue,
-                  std::vector<Ptr<Vocab const>> &vocabs, Ptr<Options> options);
-  void join();
+  BatchTranslator(DeviceId const device, std::vector<Ptr<Vocab const>> &vocabs,
+                  Ptr<Options> options);

  // convenience function for logging. TODO(jerin)
  std::string _identifier() { return "worker" + std::to_string(device_.no); }
+  void translate(Batch &batch);
+  void initialize();

-#ifndef WITH_PTHREADS
-  void mainloop();
+#ifdef WITH_PTHREADS
+  void consumeFrom(PCQueue<Batch> &pcqueue);
 #endif

 private:
-  void initGraph();
-  void translate(RequestSentences &requestSentences, Histories &histories);
-#ifdef WITH_PTHREADS
-  void mainloop();
-#endif
-
  Ptr<Options> options_;
-
  DeviceId device_;
  std::vector<Ptr<Vocab const>> *vocabs_;
  Ptr<ExpressionGraph> graph_;
  std::vector<Ptr<Scorer>> scorers_;
  Ptr<data::ShortlistGenerator const> slgen_;
-
-  PCQueue<PCItem> *pcqueue_;
-#ifdef WITH_PTHREADS
-  std::thread thread_;
-#endif
 };
+
 } // namespace bergamot
 } // namespace marian

--- a/src/translator/batcher.cpp
+++ b/src/translator/batcher.cpp
@ -1,55 +1,70 @@
 #include "batcher.h"
+#include "batch.h"
 #include "common/logging.h"
-#include "sanelogging.h"
 #include <cassert>

 namespace marian {
 namespace bergamot {

 Batcher::Batcher(Ptr<Options> options) {
-  max_input_tokens_ = options->get<int>("max-input-tokens");
-  bucket_.resize(options->get<int>("max-input-sentence-tokens") + 1);
-  ABORT_IF(
-      max_input_tokens_ < bucket_.size() - 1,
-      "max-input-tokens cannot be less than than max-input-sentence-tokens, "
-      "batcher fail");
+  miniBatchWords = options->get<int>("mini-batch-words");
+  bucket_.resize(options->get<int>("max-length-break") + 1);
+  ABORT_IF(bucket_.size() - 1 > miniBatchWords,
+           "Fatal: max-length-break > mini-batch-words  will lead to sentences "
+           "longer than what can fit in a batch.");
 }

 void Batcher::addSentenceWithPriority(RequestSentence &sentence) {
-  int bucket_id = sentence.numTokens();
+  size_t bucket_id = sentence.numTokens();
  assert(bucket_id < bucket_.size());
  bucket_[bucket_id].insert(sentence);
 }

-void Batcher::cleaveBatch(RequestSentences &sentences) {
+bool Batcher::operator>>(Batch &batch) { return cleaveBatch(batch); }
+
+bool Batcher::cleaveBatch(Batch &batch) {
  // For now simply iterates on buckets and converts batches greedily.  This
  // has to be enhanced with optimizing over priority. The baseline
  // implementation should at least be as fast as marian's maxi-batch with full
  // corpus size as maxi-batch size.
+  batch.clear();
+  size_t paddedBatchSize = 0;

-  int segments_added = 0;
-  int current_input_tokens = 0;
-  int padded_batch_size = 0;
-  int prev_padded_batch_size;
-
-  for (int i = 0; i < bucket_.size(); i++) {
-    auto p = bucket_[i].begin();
-    while (p != bucket_[i].end()) {
-      padded_batch_size = (segments_added + 1) * i;
-      if (padded_batch_size <= max_input_tokens_) {
-        auto q = p;
-        ++p;
-        current_input_tokens += i;
-        sentences.push_back(*q);
-        ++segments_added;
-        bucket_[i].erase(q);
-        prev_padded_batch_size = padded_batch_size;
+  for (size_t length = 0; length < bucket_.size(); length++) {
+    auto p = bucket_[length].begin();
+    while (p != bucket_[length].end()) {
+      paddedBatchSize = (batch.size() + 1) * length;
+      if (paddedBatchSize <= miniBatchWords) {
+        auto q = p++;
+        batch.add(*q);
+        bucket_[length].erase(q);
      } else {
-        return;
+        // Check if elements exist
+        assert(batch.size() > 0);
+        return true;
      }
    }
  }
+
+  bool isValidBatch = batch.size() > 0;
+  return isValidBatch;
 }

+void Batcher::addWholeRequest(Ptr<Request> request) {
+  for (size_t i = 0; i < request->numSegments(); i++) {
+    RequestSentence requestSentence(i, request);
+    addSentenceWithPriority(requestSentence);
+  }
+}
+
+#ifdef WITH_PTHREADS
+void Batcher::produceTo(PCQueue<Batch> &pcqueue) {
+  Batch batch;
+  while (cleaveBatch(batch)) {
+    pcqueue.ProduceSwap(batch);
+  }
+}
+#endif
+
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/batcher.h
+++ b/src/translator/batcher.h
@ -1,11 +1,16 @@
 #ifndef SRC_BERGAMOT_BATCHER_H_
 #define SRC_BERGAMOT_BATCHER_H_

+#include "batch.h"
 #include "common/options.h"
 #include "data/corpus_base.h"
 #include "definitions.h"
 #include "request.h"

+#ifdef WITH_PTHREADS
+#include "pcqueue.h"
+#endif
+
 #include <set>
 #include <vector>

@ -19,14 +24,20 @@ public:
  // sentence. This method inserts the sentence into the internal data-structure
  // which maintains priority among sentences from multiple concurrent requests.
  void addSentenceWithPriority(RequestSentence &sentence);
+  void addWholeRequest(Ptr<Request> request);
+#ifdef WITH_PTHREADS
+  void produceTo(PCQueue<Batch> &pcqueue);
+#endif

  // Loads sentences with sentences compiled from (tentatively) multiple
  // requests optimizing for both padding and priority.
-  void cleaveBatch(RequestSentences &sentences);
+  bool cleaveBatch(Batch &batch);
+  bool operator>>(Batch &batch); // alias

 private:
-  unsigned int max_input_tokens_;
+  size_t miniBatchWords;
  std::vector<std::set<RequestSentence>> bucket_;
+  size_t batchNumber_{0};
 };

 } // namespace bergamot
--- a/src/translator/parser.h
+++ b/src/translator/parser.h
@ -5,7 +5,8 @@

 namespace marian {
 namespace bergamot {
-marian::ConfigParser createConfigParser() {
+
+inline marian::ConfigParser createConfigParser() {
  marian::ConfigParser cp(marian::cli::mode::translation);
  cp.addOption<std::string>(
      "--ssplit-prefix-file", "Bergamot Options",
@ -15,14 +16,9 @@ marian::ConfigParser createConfigParser() {
                            "[paragraph, sentence, wrapped_text]", "paragraph");

  cp.addOption<int>(
-      "--max-input-sentence-tokens", "Bergamot Options",
+      "--max-length-break", "Bergamot Options",
      "Maximum input tokens to be processed in a single sentence.", 128);

-  cp.addOption<int>("--max-input-tokens", "Bergamot Options",
-                    "Maximum input tokens in a batch. control for"
-                    "Bergamot Queue",
-                    1024);
-
  return cp;
 }

--- a/src/translator/pcqueue.h
+++ b/src/translator/pcqueue.h
@ -9,7 +9,6 @@
 #include <memory>
 #include <mutex>

-#ifdef WITH_PTHREADS
 #ifdef __APPLE__
 #include <mach/mach.h>
 #include <mach/mach_traps.h>
@ -20,7 +19,6 @@
 #else
 #include <boost/interprocess/sync/interprocess_semaphore.hpp>
 #endif
-#endif // WITH_PTHREADS

 #if __GNUC__ >= 3
 #define UTIL_UNLIKELY(x) __builtin_expect(!!(x), 0)
@ -31,7 +29,6 @@
 namespace marian {
 namespace bergamot {

-#ifdef WITH_PTHREADS
 /* OS X Maverick and Boost interprocess were doing "Function not implemented."
 * So this is my own wrapper around the mach kernel APIs.
 */
@ -117,20 +114,6 @@ inline void WaitSemaphore(Semaphore &on) {
 }

 #endif // Apple
-#else // WITH_PTHREADS
-// A dummy Semaphore class that does nothing
-class Semaphore {
-public:
-  explicit Semaphore(unsigned int value) : count(value) {}
-  ~Semaphore() {}
-  void wait() {}
-  void post() {}
-private:
-  unsigned int count;
-};
-
-inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); }
-#endif // WITH_PTHREADS

 /**
 * Producer consumer queue safe for multiple producers and multiple consumers.
@ -151,9 +134,7 @@ public:
  void Produce(const T &val) {
    WaitSemaphore(empty_);
    {
-    #ifdef WITH_PTHREADS
      std::lock_guard<std::mutex> produce_lock(produce_at_mutex_);
-    #endif
      try {
        *produce_at_ = val;
      } catch (...) {
@ -170,9 +151,7 @@ public:
  void ProduceSwap(T &val) {
    WaitSemaphore(empty_);
    {
-    #ifdef WITH_PTHREADS
      std::lock_guard<std::mutex> produce_lock(produce_at_mutex_);
-    #endif
      try {
        std::swap(*produce_at_, val);
      } catch (...) {
@ -189,9 +168,7 @@ public:
  T &Consume(T &out) {
    WaitSemaphore(used_);
    {
-    #ifdef WITH_PTHREADS
      std::lock_guard<std::mutex> consume_lock(consume_at_mutex_);
-    #endif
      try {
        out = *consume_at_;
      } catch (...) {
@ -209,9 +186,7 @@ public:
  T &ConsumeSwap(T &out) {
    WaitSemaphore(used_);
    {
-    #ifdef WITH_PTHREADS
      std::lock_guard<std::mutex> consume_lock(consume_at_mutex_);
-    #endif
      try {
        std::swap(out, *consume_at_);
      } catch (...) {
@ -245,15 +220,11 @@ private:

  // Index for next write in storage_.
  T *produce_at_;
-#ifdef WITH_PTHREADS
  std::mutex produce_at_mutex_;
-#endif

  // Index for next read from storage_.
  T *consume_at_;
-#ifdef WITH_PTHREADS
  std::mutex consume_at_mutex_;
-#endif
 };

 template <class T> struct UnboundedPage {
--- a/src/translator/request.cpp
+++ b/src/translator/request.cpp
@ -1,7 +1,7 @@
 #include "request.h"
-
 #include "definitions.h"
-#include "translation_result.h"
+#include "response.h"
+#include "sentence_ranges.h"

 #include "common/logging.h"

@ -10,15 +10,15 @@
 namespace marian {
 namespace bergamot {

-Request::Request(unsigned int Id, int lineNumberBegin,
+// -----------------------------------------------------------------
+Request::Request(size_t Id, size_t lineNumberBegin,
                 std::vector<Ptr<Vocab const>> &vocabs, std::string &&source,
-                 Segments &&segments,
-                 std::vector<TokenRanges> &&sourceAlignments,
-                 std::promise<TranslationResult> translationResultPromise)
+                 Segments &&segments, SentenceRanges &&sourceRanges,
+                 std::promise<Response> responsePromise)
    : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs),
      source_(std::move(source)), segments_(std::move(segments)),
-      sourceAlignments_(std::move(sourceAlignments)),
-      response_(std::move(translationResultPromise)) {
+      sourceRanges_(std::move(sourceRanges)),
+      response_(std::move(responsePromise)) {

  counter_ = segments_.size();
  histories_.resize(segments_.size(), nullptr);
@ -47,11 +47,10 @@ void Request::processHistory(size_t index, Ptr<History> history) {

 void Request::completeRequest() {
  // Request no longer needs to hold the content, can transfer it to
-  // TranslationResult.
-  TranslationResult translation_result(std::move(source_),
-                                       std::move(sourceAlignments_),
-                                       std::move(histories_), *vocabs_);
-  response_.set_value(std::move(translation_result));
+  // Response.
+  Response response(std::move(source_), std::move(sourceRanges_),
+                    std::move(histories_), *vocabs_);
+  response_.set_value(std::move(response));
 }

 bool Request::operator<(const Request &b) const {
@ -59,6 +58,8 @@ bool Request::operator<(const Request &b) const {
  return Id_ < b.Id_;
 }

+// ------------------------------------------------------------------
+
 RequestSentence::RequestSentence(size_t index, Ptr<Request> request)
    : index_(index), request_(request) {}

@ -88,5 +89,7 @@ bool operator<(const RequestSentence &a, const RequestSentence &b) {
  return a.request_ < b.request_;
 }

+// ----------------------------------------------------------------------
+
 } // namespace bergamot
 } // namespace marian
--- a/src/translator/request.h
+++ b/src/translator/request.h
@ -3,30 +3,30 @@
 //
 // Request: holds the input blob of a text, Segments (vector<Words>) which are
 // to go to the batching mechanism and alignments between the processed
-// segments and the input blob (sourceAlignments). In addition, Request takes
+// segments and the input blob (sourceTokenRanges). In addition, Request takes
 // care of the barrier which fires when all the Segments in a request are done
-// translating by the workers (BatchTranslator). Request is to be extended with
-// notions of Priority (sequence, user-given).
+// translating by the workers (BatchTranslator).
+// TODO(jerinphilip):  Extend Request with notions of Priority (sequence,
+// user-given).
 //
-// RequestSentence: is a tuple of (index, Request*). This provides the
+// RequestSentence: is a tuple of (index, Ptr<Request>). This provides the
 // batching mechanism access to the segment within the request. The backref to
 // Request allows event triggering the barrier upon completion of the last
 // sentence by a worker.
-//
-// PCItem: is a vector of RequestSentences and a batchNumber, which is what the
-// PCQueue holds. The batches are constructed from segments returned by a
-// RequestSentence. Can be enhanced with paddingSize, countTokens eventually for
-// logging.

 #ifndef SRC_BERGAMOT_REQUEST_H_
 #define SRC_BERGAMOT_REQUEST_H_

 #include "definitions.h"
-#include "translation_result.h"
+#include "response.h"
+#include "sentence_ranges.h"

+#include "common/logging.h"
 #include "data/types.h"
 #include "translator/beam_search.h"

+#include <cassert>
+
 #include <future>
 #include <vector>

@ -34,24 +34,11 @@ namespace marian {
 namespace bergamot {

 class Request {
-private:
-  unsigned int Id_;
-  int lineNumberBegin_;
-  std::string source_;
-  std::atomic<int> counter_;
-  std::vector<Ptr<Vocab const>> *vocabs_;
-
-  Segments segments_;
-  std::vector<TokenRanges> sourceAlignments_;
-  std::vector<Ptr<History>> histories_;
-
-  std::promise<TranslationResult> response_;
-
 public:
-  Request(unsigned int Id, int lineNumberBegin,
+  Request(size_t Id, size_t lineNumberBegin,
          std::vector<Ptr<Vocab const>> &vocabs_, std::string &&source,
-          Segments &&segments, std::vector<TokenRanges> &&sourceAlignments,
-          std::promise<TranslationResult> translationResultPromise);
+          Segments &&segments, SentenceRanges &&sourceTokenRanges,
+          std::promise<Response> responsePromise);

  // Obtain the count of tokens in the segment correponding to index. Used to
  // insert sentence from multiple requests into the corresponding size bucket.
@ -65,7 +52,8 @@ public:
  // several requests.
  Segment getSegment(size_t index) const;

-  // For notions of priority among requests (used to enable <set> in Batcher).
+  // For notions of priority among requests, used to enable std::set in
+  // Batcher.
  bool operator<(const Request &request) const;

  // Processes a history obtained after translating in a heterogenous batch
@ -74,40 +62,64 @@ public:

  // On completion of last segment, sets value of the promise.
  void completeRequest();
+
+private:
+  size_t Id_;
+  size_t lineNumberBegin_;
+
+  // Multiple translation-workers can concurrently access the same Request. The
+  // following atomic atomically operates on the variable holding sentences
+  // remaining to be translated.
+  std::atomic<int> counter_;
+
+  // source_ holds the source string to be translated. segments_ hold the
+  // sentences generated from source_ in vector<Words>. sourceRanges_ are
+  // string_views of the text corresponding to these words, pointing to
+  // sequences in source_. histories_ is a buffer which eventually stores the
+  // translations of each segment in the corresponding index.
+  std::string source_;
+  Segments segments_;
+  SentenceRanges sourceRanges_;
+  std::vector<Ptr<History>> histories_;
+
+  // Members above are moved into newly constructed Response on completion
+  // of translation of all segments. The promise below is set to this Response
+  // value. future to this promise is made available to the user through
+  // Service.
+  std::promise<Response> response_;
+
+  // Constructing Response requires the vocabs_ used to generate Request.
+  std::vector<Ptr<Vocab const>> *vocabs_;
 };

 class RequestSentence {
-private:
-  size_t index_;
-  Ptr<Request> request_;
+  // A RequestSentence provides a view to a sentence within a Request. Existence
+  // of this class allows the sentences and associated information to be kept
+  // within Request.

 public:
  RequestSentence(size_t, Ptr<Request>);
  size_t numTokens() const;
+
+  // lineNumber in Request, used for matching marian-decoder. SentenceTuple
+  // requires lineNumber to be set for Corpus based batches.
  size_t lineNumber() const;
+
+  // Accessor to the segment represented by the RequestSentence.
  Segment getUnderlyingSegment() const;
+
+  // Forwards call to Request, checking for completion.
  void completeSentence(Ptr<History> history);
+
  friend bool operator<(const RequestSentence &a, const RequestSentence &b);
+
+private:
+  size_t index_;
+  Ptr<Request> request_;
 };

 typedef std::vector<RequestSentence> RequestSentences;

-struct PCItem {
-  int batchNumber;
-  RequestSentences sentences;
-
-  // PCItem should be default constructible for PCQueue. Default constructed
-  // element is poison.
-  PCItem() : batchNumber(-1) {}
-
-  // PCItem constructor to construct a legit PCItem.
-  explicit PCItem(int batchNumber, RequestSentences &&sentences)
-      : batchNumber(batchNumber), sentences(std::move(sentences)) {}
-
-  // Convenience function to determine poison.
-  bool isPoison() { return (batchNumber == -1); }
-};
-
 } // namespace bergamot
 } // namespace marian

--- a/src/translator/response.cpp
+++ b/src/translator/response.cpp
@ -0,0 +1,98 @@
+#include "response.h"
+#include "sentence_ranges.h"
+#include "common/logging.h"
+#include "data/alignment.h"
+
+#include <utility>
+
+namespace marian {
+namespace bergamot {
+
+Response::Response(std::string &&source, SentenceRanges &&sourceRanges,
+                   Histories &&histories, std::vector<Ptr<Vocab const>> &vocabs)
+    : source_(std::move(source)), sourceRanges_(std::move(sourceRanges)),
+      histories_(std::move(histories)), vocabs_(&vocabs) {}
+
+void Response::move(std::string &source, std::string &translation,
+                    SentenceMappings &sentenceMappings) {
+
+  // Construct required stuff first.
+  constructTranslation();
+  constructSentenceMappings(sentenceMappings);
+
+  // Move content out.
+  source = std::move(source_);
+  translation = std::move(translation_);
+
+  // The above assignment expects source, target be moved.
+  // which makes the following invalid, hence required to be cleared.
+  sourceRanges_.clear();
+  targetRanges_.clear();
+  histories_.clear();
+}
+
+void Response::constructTranslation() {
+  if (translationConstructed_) {
+    return;
+  }
+
+  // Reserving length at least as much as source_ seems like a reasonable thing
+  // to do to avoid reallocations.
+  translation_.reserve(source_.size());
+
+  // In a first step, the decoded units (individual senteneces) are compiled
+  // into a huge string. This is done by computing indices first and appending
+  // to the string as each sentences are decoded.
+  std::vector<std::pair<size_t, size_t>> translationRanges;
+
+  size_t offset{0};
+  bool first{true};
+
+  for (auto &history : histories_) {
+    // TODO(jerin): Change hardcode of nBest = 1
+    NBestList onebest = history->nBest(1);
+
+    Result result = onebest[0]; // Expecting only one result;
+    Words words = std::get<0>(result);
+    auto targetVocab = vocabs_->back();
+    std::string decoded = targetVocab->decode(words);
+    if (first) {
+      first = false;
+    } else {
+      translation_ += " ";
+      ++offset;
+    }
+
+    translation_ += decoded;
+    translationRanges.emplace_back(offset, decoded.size());
+    offset += decoded.size();
+  }
+
+  // Once the entire string is constructed, there are no further possibility of
+  // reallocation in the string's storage, the indices are converted into
+  // string_views.
+
+  for (auto &range : translationRanges) {
+    // TODO(@jerinphilip):  Currently considers target tokens as whole text.
+    // Needs to be further enhanced in marian-dev to extract alignments.
+    std::vector<string_view> targetMappings;
+
+    const char *begin = &translation_[range.first];
+    targetMappings.emplace_back(begin, range.second);
+    targetRanges_.addSentence(targetMappings);
+  }
+
+  translationConstructed_ = true;
+}
+
+void Response::constructSentenceMappings(
+    Response::SentenceMappings &sentenceMappings) {
+
+  for (size_t i = 0; i < sourceRanges_.numSentences(); i++) {
+    string_view src = sourceRanges_.sentence(i);
+    string_view tgt = targetRanges_.sentence(i);
+    sentenceMappings.emplace_back(src, tgt);
+  }
+}
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/response.h
+++ b/src/translator/response.h
@ -0,0 +1,99 @@
+#ifndef SRC_BERGAMOT_RESPONSE_H_
+#define SRC_BERGAMOT_RESPONSE_H_
+
+#include "sentence_ranges.h"
+#include "data/types.h"
+#include "definitions.h"
+#include "translator/beam_search.h"
+
+#include <cassert>
+#include <string>
+#include <vector>
+
+namespace marian {
+namespace bergamot {
+class Response {
+  // Response is a marian internal class (not a bergamot-translator class)
+  // holding source blob of text, vector of TokenRanges corresponding to each
+  // sentence in the source text blob and histories obtained from translating
+  // these sentences.
+  //
+  // This class provides an API at a higher level in comparison to History to
+  // access translations and additionally use string_view manipulations to
+  // recover structure in translation from source-text's structure known through
+  // reference string and string_view. As many of these computations are not
+  // required until invoked, they are computed as required and stored in data
+  // members where it makes sense to do so (translation,translationTokenRanges).
+  //
+  // Examples of such use-cases are:
+  //    translation()
+  //    translationInSourceStructure() TODO(@jerinphilip)
+  //    alignment(idx) TODO(@jerinphilip)
+  //    sentenceMappings (for bergamot-translator)
+
+public:
+  Response(std::string &&source, SentenceRanges &&sourceRanges,
+           Histories &&histories,
+           // Required for constructing translation and TokenRanges within
+           // translation lazily.
+           std::vector<Ptr<Vocab const>> &vocabs);
+
+  // Move constructor.
+  Response(Response &&other)
+      : source_(std::move(other.source_)),
+        translation_(std::move(other.translation_)),
+        sourceRanges_(std::move(other.sourceRanges_)),
+        targetRanges_(std::move(other.targetRanges_)),
+        histories_(std::move(other.histories_)),
+        vocabs_(std::move(other.vocabs_)){};
+
+  // Prevents CopyConstruction and CopyAssignment. sourceRanges_ is constituted
+  // by string_view and copying invalidates the data member.
+  Response(const Response &) = delete;
+  Response &operator=(const Response &) = delete;
+
+  typedef std::vector<std::pair<const string_view, const string_view>>
+      SentenceMappings;
+
+  // Moves source sentence into source, translated text into translation.
+  // Pairs of string_views to corresponding sentences in
+  // source and translation are loaded into sentenceMappings. These string_views
+  // reference the new source and translation.
+  //
+  // Calling move() invalidates the Response object as ownership is transferred.
+  // Exists for moving strc
+  void move(std::string &source, std::string &translation,
+            SentenceMappings &sentenceMappings);
+
+  const Histories &histories() const { return histories_; }
+  const std::string &source() const { return source_; }
+  const std::string &translation() {
+    constructTranslation();
+    return translation_;
+  }
+
+  // A convenience function provided to return translated text placed within
+  // source's structure. This is useful when the source text is a multi-line
+  // paragraph or string_views extracted from structured text like HTML and it's
+  // desirable to place the individual sentences in the locations of the source
+  // sentences.
+  // const std::string translationInSourceStructure();
+  // const PendingAlignmentType alignment(size_t idx);
+
+private:
+  void constructTranslation();
+  void constructSentenceMappings(SentenceMappings &);
+
+  std::string source_;
+  SentenceRanges sourceRanges_;
+  Histories histories_;
+
+  std::vector<Ptr<Vocab const>> *vocabs_;
+  bool translationConstructed_{false};
+  std::string translation_;
+  SentenceRanges targetRanges_;
+};
+} // namespace bergamot
+} // namespace marian
+
+#endif // SRC_BERGAMOT_RESPONSE_H_
--- a/src/translator/sanelogging.h
+++ b/src/translator/sanelogging.h
@ -1,44 +0,0 @@
-#ifndef SRC_BERGAMOT_SANELOGGING_H_
-#define SRC_BERGAMOT_SANELOGGING_H_
-
-#include "spdlog/spdlog.h"
-#include <iostream>
-
-namespace marian {
-
-#define PLOG(worker, level, ...)
-#define _PLOG(worker, level, ...) checkedPLog(worker, #level, __VA_ARGS__)
-
-template <class... Args>
-void checkedPLog(std::string logger, std::string level, Args... args) {
-  Logger log = spdlog::get(logger);
-  if (!log) {
-    try {
-      log = spdlog::daily_logger_st(logger, "logs/" + logger + ".log");
-    } catch (const spdlog::spdlog_ex &ex) {
-      std::cout << "Log initialization failed: " << ex.what() << std::endl;
-    }
-  }
-
-  if (level == "trace")
-    log->trace(args...);
-  else if (level == "debug")
-    log->debug(args...);
-  else if (level == "info")
-    log->info(args...);
-  else if (level == "warn")
-    log->warn(args...);
-  else if (level == "error")
-    log->error(args...);
-  else if (level == "critical")
-    log->critical(args...);
-  else {
-    log->warn("Unknown log level '{}' for logger '{}'", level, logger);
-  }
-  // Not required when threads clean-exit.
-  log->flush();
-}
-
-} // namespace marian
-
-#endif // SRC_BERGAMOT_SANELOGGING_H_
--- a/src/translator/sentence_ranges.cpp
+++ b/src/translator/sentence_ranges.cpp
@ -0,0 +1,46 @@
+#include "sentence_ranges.h"
+#include <cassert>
+#include <iostream>
+
+namespace marian {
+namespace bergamot {
+
+void SentenceRanges::addSentence(std::vector<string_view> &wordRanges) {
+  addSentence(std::begin(wordRanges), std::end(wordRanges));
+}
+
+void SentenceRanges::addSentence(WordIterator begin, WordIterator end) {
+  size_t size = flatByteRanges_.size();
+  flatByteRanges_.insert(std::end(flatByteRanges_), begin, end);
+  sentenceBeginIds_.push_back(size);
+}
+
+string_view SentenceRanges::sentence(size_t index) const {
+  size_t bos_id;
+  string_view eos, bos;
+
+  bos_id = sentenceBeginIds_[index];
+  bos = flatByteRanges_[bos_id];
+
+  if (index + 1 == numSentences()) {
+    eos = flatByteRanges_.back();
+  } else {
+    assert(index < numSentences());
+    size_t eos_id = sentenceBeginIds_[index + 1];
+    --eos_id;
+    eos = flatByteRanges_[eos_id];
+  }
+
+  return sentenceBetween(bos, eos);
+}
+
+string_view SentenceRanges::sentenceBetween(string_view firstWord,
+                                            string_view lastWord) const {
+
+  const char *data = firstWord.data();
+  size_t size = lastWord.data() + lastWord.size() - firstWord.data();
+  return string_view(data, size);
+}
+
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/sentence_ranges.h
+++ b/src/translator/sentence_ranges.h
@ -0,0 +1,52 @@
+#ifndef BERGAMOT_SENTENCE_RANGES_H_
+#define BERGAMOT_SENTENCE_RANGES_H_
+
+#include "data/types.h"
+#include <cassert>
+#include <vector>
+
+namespace marian {
+namespace bergamot {
+
+class SentenceRanges {
+  // SentenceRanges stores string_views into a source text, with additional
+  // annotations to mark sentence boundaries.
+  //
+  // Given the availability annotations, this container provides capabilty to
+  // add sentences, and access individual sentences.
+public:
+  typedef std::vector<string_view>::iterator WordIterator;
+
+  void addSentence(std::vector<string_view> &wordRanges);
+  void addSentence(WordIterator begin, WordIterator end);
+
+  void clear() {
+    flatByteRanges_.clear();
+    sentenceBeginIds_.clear();
+  }
+
+  size_t numSentences() const { return sentenceBeginIds_.size(); }
+
+  // Returns a string_view into the ith sentence.
+  string_view sentence(size_t index) const;
+
+private:
+  // A flat storage for string_views. Can be words or sentences.
+  std::vector<string_view> flatByteRanges_;
+
+  // The container grows dynamically with addSentence. size_t marking index is
+  // used to ensure the sentence boundaries stay same while underlying storage
+  // might be changed during reallocation.
+  std::vector<size_t> sentenceBeginIds_;
+
+  // Utility function to extract the string starting at firstWord and ending at
+  // lastWord as a single string-view.
+  string_view sentenceBetween(string_view firstWord,
+                              string_view lastWord) const;
+};
+
+} // namespace bergamot
+
+} // namespace marian
+
+#endif //  BERGAMOT_SENTENCE_RANGES_H_
--- a/src/translator/sentence_splitter.cpp
+++ b/src/translator/sentence_splitter.cpp
@ -0,0 +1,53 @@
+#include "sentence_splitter.h"
+#include "common/cli_helper.h"
+#include "common/logging.h"
+#include "common/options.h"
+#include <string>
+
+namespace marian {
+namespace bergamot {
+
+SentenceSplitter::SentenceSplitter(marian::Ptr<marian::Options> options)
+    : options_(options) {
+
+  std::string smode_str = options_->get<std::string>("ssplit-mode", "");
+  mode_ = string2splitmode(smode_str);
+  std::string ssplit_prefix_file =
+      options_->get<std::string>("ssplit-prefix-file", "");
+
+  if (ssplit_prefix_file.size()) {
+    ssplit_prefix_file = marian::cli::interpolateEnvVars(ssplit_prefix_file);
+
+    LOG(info, "Loading protected prefixes for sentence splitting from {}",
+        ssplit_prefix_file);
+
+    ssplit_.load(ssplit_prefix_file);
+  } else {
+    LOG(warn, "Missing list of protected prefixes for sentence splitting. "
+              "Set with --ssplit-prefix-file.");
+  }
+}
+
+ug::ssplit::SentenceStream
+SentenceSplitter::createSentenceStream(const string_view &input) {
+  std::string_view input_converted(input.data(), input.size());
+  return std::move(
+      ug::ssplit::SentenceStream(input_converted, this->ssplit_, mode_));
+}
+
+ug::ssplit::SentenceStream::splitmode
+SentenceSplitter::string2splitmode(const std::string &m) {
+  typedef ug::ssplit::SentenceStream::splitmode splitmode;
+  // @TODO: throw Exception on error
+  if (m == "sentence" || m == "Sentence")
+    return splitmode::one_sentence_per_line;
+  if (m == "paragraph" || m == "Paragraph")
+    return splitmode::one_paragraph_per_line;
+  if (m != "wrapped_text" && m != "WrappedText" && m != "wrappedText") {
+    LOG(warn, "Ignoring unknown text input format specification: {}.", m);
+  }
+  return splitmode::wrapped_text;
+}
+
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/sentence_splitter.h
+++ b/src/translator/sentence_splitter.h
@ -0,0 +1,31 @@
+#ifndef SRC_BERGAMOT_SENTENCE_SPLITTER_H_
+#define SRC_BERGAMOT_SENTENCE_SPLITTER_H_
+
+#include "common/options.h"
+#include "data/types.h"
+#include "ssplit.h"
+#include <string>
+
+namespace marian {
+namespace bergamot {
+
+class SentenceSplitter {
+  // A wrapper around @ugermann's ssplit-cpp compiled from several places in
+  // mts. Constructed based on options. Used in TextProcessor below to create
+  // sentence-streams, which provide access to one sentence from blob of text at
+  // a time.
+public:
+  explicit SentenceSplitter(Ptr<Options> options);
+  ug::ssplit::SentenceStream createSentenceStream(string_view const &input);
+
+private:
+  ug::ssplit::SentenceSplitter ssplit_;
+  Ptr<Options> options_;
+  ug::ssplit::SentenceStream::splitmode mode_;
+  ug::ssplit::SentenceStream::splitmode string2splitmode(const std::string &m);
+};
+
+} // namespace bergamot
+} // namespace marian
+
+#endif //  SRC_BERGAMOT_SENTENCE_SPLITTER_H_
--- a/src/translator/service.cpp
+++ b/src/translator/service.cpp
@ -1,6 +1,6 @@
 #include "service.h"
+#include "batch.h"
 #include "definitions.h"
-#include "sanelogging.h"

 #include <string>
 #include <utility>
@ -9,26 +9,53 @@ namespace marian {
 namespace bergamot {

 Service::Service(Ptr<Options> options)
-    : requestId_(0), batchNumber_(0),
-      numWorkers_(options->get<int>("cpu-threads")),
+    : requestId_(0), numWorkers_(options->get<int>("cpu-threads")),
      vocabs_(std::move(loadVocabularies(options))),
-      text_processor_(vocabs_, options), batcher_(options),
-      pcqueue_(2 * options->get<int>("cpu-threads")) {
+      text_processor_(vocabs_, options), batcher_(options)
+#ifdef WITH_PTHREADS
+      ,
+      pcqueue_(2 * options->get<int>("cpu-threads"))
+#endif // WITH_PTHREADS
+{

-  workers_.reserve(numWorkers_);
+  if (numWorkers_ == 0) {
+    // In case workers are 0, a single-translator is created and initialized
+    // in the main thread.
+    marian::DeviceId deviceId(/*cpuId=*/0, DeviceType::cpu);
+    translators_.emplace_back(deviceId, vocabs_, options);
+    translators_.back().initialize();
+  } else {
+#ifdef WITH_PTHREADS
+    // If workers specified are greater than 0, translators_ are populated with
+    // unitialized instances. These are then initialized inside
+    // individual threads and set to consume from producer-consumer queue.
+    workers_.reserve(numWorkers_);
+    translators_.reserve(numWorkers_);
+    for (size_t cpuId = 0; cpuId < numWorkers_; cpuId++) {
+      marian::DeviceId deviceId(cpuId, DeviceType::cpu);
+      translators_.emplace_back(deviceId, vocabs_, options);

-  for (int i = 0; i < numWorkers_; i++) {
-    marian::DeviceId deviceId(i, DeviceType::cpu);
-    workers_.emplace_back(deviceId, pcqueue_, vocabs_, options);
+      auto &translator = translators_.back();
+      workers_.emplace_back([&translator, this] {
+        translator.initialize();
+        translator.consumeFrom(pcqueue_);
+      });
+    }
+#else // WITH_PTHREADS
+    ABORT(
+        "Fatal: Service started requesting multiple threadswhile compiled with "
+        "COMPILE_THREAD_VARIANT=off. Please check your cmake build "
+        "configuration");
+#endif
  }
 }

-std::future<TranslationResult> Service::translateWithCopy(std::string input) {
+std::future<Response> Service::translateWithCopy(std::string input) {
  return translate(std::move(input));
 }

-std::future<TranslationResult> Service::translate(std::string &&input) {
-  // Takes in a blob of text. Segments and std::vector<TokenRanges> are
+std::future<Response> Service::translate(std::string &&input) {
+  // Takes in a blob of text. Segments and SentenceRanges are
  // extracted from the input (blob of text) and used to construct a Request
  // along with a promise. promise value is set by the worker completing a
  // request.
@ -41,59 +68,46 @@ std::future<TranslationResult> Service::translate(std::string &&input) {
  // returns future corresponding to the promise.

  Segments segments;
-  std::vector<TokenRanges> sourceAlignments;
-  text_processor_.process(input, segments, sourceAlignments);
+  SentenceRanges sourceRanges;
+  text_processor_.process(input, segments, sourceRanges);

-  std::promise<TranslationResult> translationResultPromise;
-  auto future = translationResultPromise.get_future();
+  std::promise<Response> responsePromise;
+  auto future = responsePromise.get_future();

  Ptr<Request> request = New<Request>(
      requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(input),
-      std::move(segments), std::move(sourceAlignments),
-      std::move(translationResultPromise));
+      std::move(segments), std::move(sourceRanges), std::move(responsePromise));

-  for (int i = 0; i < request->numSegments(); i++) {
-    RequestSentence requestSentence(i, request);
-    batcher_.addSentenceWithPriority(requestSentence);
+  batcher_.addWholeRequest(request);
+
+  if (numWorkers_ > 0) {
+#ifdef WITH_PTHREADS
+    batcher_.produceTo(pcqueue_);
+#endif
+  } else {
+    // Queue single-threaded
+    Batch batch;
+    while (batcher_ >> batch) {
+      translators_[0].translate(batch);
+    }
  }

-  int numSentences;
-  do {
-    RequestSentences batchSentences;
-    batcher_.cleaveBatch(batchSentences);
-    numSentences = batchSentences.size();
-
-    if (numSentences > 0) {
-      PCItem pcitem(batchNumber_++, std::move(batchSentences));
-      pcqueue_.ProduceSwap(pcitem);
-    }
-
-    if (batchNumber_ % 500 == 0) {
-      LOG(info, "Queuing batch {}", batchNumber_);
-    }
-  } while (numSentences > 0);
-
-#ifndef WITH_PTHREADS
-  workers_[0].mainloop();
-#endif
  return future;
 }

 void Service::stop() {
-  int counter = 0;
+#ifdef WITH_PTHREADS
  for (auto &worker : workers_) {
-    PCItem pcitem;
-    pcqueue_.ProduceSwap(pcitem);
-    ++counter;
+    Batch poison = Batch::poison();
+    pcqueue_.ProduceSwap(poison);
  }

-  counter = 0;
  for (auto &worker : workers_) {
    worker.join();
-    ++counter;
  }

  workers_.clear(); // Takes care of idempotency.
+#endif
 }

 Service::~Service() { stop(); }
--- a/src/translator/service.h
+++ b/src/translator/service.h
@ -3,15 +3,18 @@

 #include "batch_translator.h"
 #include "batcher.h"
-#include "pcqueue.h"
-#include "textops.h"
-#include "translation_result.h"
+#include "response.h"
+#include "text_processor.h"

 #include <queue>
 #include <vector>

 #include "data/types.h"

+#ifdef WITH_PTHREADS
+#include "pcqueue.h"
+#endif
+
 namespace marian {
 namespace bergamot {

@ -25,17 +28,17 @@ class Service {
  //  options = ...;
  //  service = Service(options);
  //  std::string input_blob = "Hello World";
-  //  std::future<TranslationResult>
+  //  std::future<Response>
  //      response = service.translate(std::move(input_blob));
  //  response.wait();
-  //  TranslationResult result = response.get();
+  //  Response result = response.get();

 public:
  explicit Service(Ptr<Options> options);

  // Constructs new string copying, calls translate internally.
-  std::future<TranslationResult> translateWithCopy(std::string input);
-  std::future<TranslationResult> translate(std::string &&input);
+  std::future<Response> translateWithCopy(std::string input);
+  std::future<Response> translate(std::string &&input);

  void stop();

@ -45,12 +48,11 @@ public:
  ~Service();

 private:
-  unsigned int requestId_;
-  unsigned int batchNumber_;
-  int numWorkers_;
+  size_t requestId_;
+  size_t numWorkers_;

  // vocabs are used to construct a Request, which later uses it to construct
-  // TranslationResult (decode from words to string).
+  // Response (decode from words to string).
  std::vector<Ptr<Vocab const>> vocabs_; // ORDER DEPENDENCY

  // Consists of:
@ -68,8 +70,12 @@ private:

  TextProcessor text_processor_; // ORDER DEPENDENCY
  Batcher batcher_;
-  PCQueue<PCItem> pcqueue_;
-  std::vector<BatchTranslator> workers_;
+  std::vector<BatchTranslator> translators_;
+
+#ifdef WITH_PTHREADS
+  PCQueue<Batch> pcqueue_;
+  std::vector<std::thread> workers_;
+#endif
 };

 std::vector<Ptr<const Vocab>> loadVocabularies(Ptr<Options> options);
--- a/src/translator/text_processor.cpp
+++ b/src/translator/text_processor.cpp
@ -0,0 +1,69 @@
+#include "text_processor.h"
+#include "data/types.h"
+#include "definitions.h"
+#include "sentence_ranges.h"
+
+#include "common/options.h"
+#include "data/vocab.h"
+#include <vector>
+
+namespace marian {
+namespace bergamot {
+
+Segment TextProcessor::tokenize(const string_view &segment,
+                                std::vector<string_view> &wordRanges) {
+  return vocabs_->front()->encodeWithByteRanges(
+      segment, wordRanges, /*addEOS=*/false, /*inference=*/true);
+}
+
+TextProcessor::TextProcessor(std::vector<Ptr<Vocab const>> &vocabs,
+                             Ptr<Options> options)
+    : vocabs_(&vocabs), sentence_splitter_(options) {
+
+  max_length_break_ = options->get<int>("max-length-break");
+  max_length_break_ = max_length_break_ - 1;
+  ABORT_IF(max_length_break_ < 0, "max-length-break cannot be < 0");
+}
+
+void TextProcessor::process(const string_view &query, Segments &segments,
+                            SentenceRanges &sourceRanges) {
+
+  auto sentenceStream = sentence_splitter_.createSentenceStream(query);
+  std::string_view sentenceStringPiece;
+
+  while (sentenceStream >> sentenceStringPiece) {
+    marian::string_view sentence(sentenceStringPiece.data(),
+                                 sentenceStringPiece.size());
+
+    std::vector<string_view> wordRanges;
+    Segment segment = tokenize(sentence, wordRanges);
+
+    // There are some cases where SentencePiece or vocab returns no words
+    // after normalization. 0 prevents any empty entries from being added.
+    if (segment.size() > 0) {
+      // Truncate segment into max_input_size segments.
+      truncate(segment, wordRanges, segments, sourceRanges);
+    }
+  }
+}
+
+void TextProcessor::truncate(Segment &segment,
+                             std::vector<string_view> &wordRanges,
+                             Segments &segments, SentenceRanges &sourceRanges) {
+  for (size_t offset = 0; offset < segment.size();
+       offset += max_length_break_) {
+    auto start = segment.begin() + offset;
+
+    size_t left = segment.size() - offset;
+    size_t diff = std::min(max_length_break_, left);
+
+    segments.emplace_back(start, start + diff);
+    segments.back().push_back(sourceEosId());
+
+    auto astart = wordRanges.begin() + offset;
+    sourceRanges.addSentence(astart, astart + diff);
+  }
+}
+
+} // namespace bergamot
+} // namespace marian
--- a/src/translator/text_processor.h
+++ b/src/translator/text_processor.h
@ -0,0 +1,50 @@
+#ifndef SRC_BERGAMOT_TEXT_PROCESSOR_H_
+#define SRC_BERGAMOT_TEXT_PROCESSOR_H_
+
+#include "data/types.h"
+#include "data/vocab.h"
+#include "definitions.h"
+#include "sentence_ranges.h"
+
+#include "sentence_splitter.h"
+
+#include <vector>
+
+namespace marian {
+namespace bergamot {
+
+class TextProcessor {
+  // TextProcessor handles loading the sentencepiece vocabulary and also
+  // contains an instance of sentence-splitter based on ssplit.
+  //
+  // Used in Service to convert an incoming blog of text to a vector of
+  // sentences (vector of words). In addition, the ByteRanges of the
+  // source-tokens in unnormalized text are provided as string_views.
+public:
+  explicit TextProcessor(std::vector<Ptr<Vocab const>> &vocabs, Ptr<Options>);
+
+  void process(const string_view &query, Segments &segments,
+               SentenceRanges &sourceRanges);
+
+private:
+  // Tokenizes an input string, returns Words corresponding. Loads the
+  // corresponding byte-ranges into tokenRanges.
+  Segment tokenize(const string_view &input,
+                   std::vector<string_view> &tokenRanges);
+
+  // Truncate sentence into max_input_size segments.
+  void truncate(Segment &sentence, std::vector<string_view> &tokenRanges,
+                Segments &segments, SentenceRanges &sourceRanges);
+
+  // shorthand, used only in truncate()
+  const Word sourceEosId() const { return vocabs_->front()->getEosId(); }
+
+  std::vector<Ptr<Vocab const>> *vocabs_;
+  SentenceSplitter sentence_splitter_;
+  size_t max_length_break_;
+};
+
+} // namespace bergamot
+} // namespace marian
+
+#endif // SRC_BERGAMOT_TEXT_PROCESSOR_H_
--- a/src/translator/textops.cpp
+++ b/src/translator/textops.cpp
@ -1,109 +0,0 @@
-#include "textops.h"
-#include "common/timer.h"
-#include <pcrecpp.h>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-namespace marian {
-namespace bergamot {
-
-SentenceSplitter::SentenceSplitter(marian::Ptr<marian::Options> options)
-    : options_(options) {
-
-  std::string smode_str = options_->get<std::string>("ssplit-mode", "");
-  mode_ = string2splitmode(smode_str);
-  std::string ssplit_prefix_file =
-      options_->get<std::string>("ssplit-prefix-file", "");
-
-  if (ssplit_prefix_file.size()) {
-    ssplit_prefix_file = marian::cli::interpolateEnvVars(ssplit_prefix_file);
-
-    LOG(info, "Loading protected prefixes for sentence splitting from {}",
-        ssplit_prefix_file);
-
-    ssplit_.load(ssplit_prefix_file);
-  } else {
-    LOG(warn, "Missing list of protected prefixes for sentence splitting. "
-              "Set with --ssplit-prefix-file.");
-  }
-}
-
-ug::ssplit::SentenceStream
-SentenceSplitter::createSentenceStream(const string_view &input) {
-  pcrecpp::StringPiece spiece(input.begin(), input.size());
-  return std::move(ug::ssplit::SentenceStream(spiece, this->ssplit_, mode_));
-}
-
-ug::ssplit::SentenceStream::splitmode
-SentenceSplitter::string2splitmode(const std::string &m) {
-  typedef ug::ssplit::SentenceStream::splitmode splitmode;
-  // @TODO: throw Exception on error
-  if (m == "sentence" || m == "Sentence")
-    return splitmode::one_sentence_per_line;
-  if (m == "paragraph" || m == "Paragraph")
-    return splitmode::one_paragraph_per_line;
-  if (m != "wrapped_text" && m != "WrappedText" && m != "wrappedText") {
-    LOG(warn, "Ignoring unknown text input format specification: {}.", m);
-  }
-  return splitmode::wrapped_text;
-}
-
-Segment TextProcessor::tokenize(const string_view &segment,
-                                TokenRanges &tokenRanges) {
-  return vocabs_->front()->encodeWithByteRanges(
-      segment, tokenRanges, /*addEOS=*/false, /*inference=*/true);
-}
-
-TextProcessor::TextProcessor(std::vector<Ptr<Vocab const>> &vocabs,
-                             Ptr<Options> options)
-    : vocabs_(&vocabs), sentence_splitter_(options) {
-
-  max_input_sentence_tokens_ = options->get<int>("max-input-sentence-tokens");
-  max_input_sentence_tokens_ = max_input_sentence_tokens_ - 1;
-  ABORT_IF(max_input_sentence_tokens_ < 0,
-           "max-input-sentence-tokens cannot be < 0");
-}
-
-void TextProcessor::process(const string_view &query, Segments &segments,
-                            std::vector<TokenRanges> &sourceRanges) {
-
-  auto sentenceStream = sentence_splitter_.createSentenceStream(query);
-  pcrecpp::StringPiece sentenceStringPiece;
-
-  while (sentenceStream >> sentenceStringPiece) {
-    string_view sentence(sentenceStringPiece.data(),
-                         sentenceStringPiece.size());
-    TokenRanges tokenRanges;
-    Segment segment = tokenize(sentence, tokenRanges);
-
-    // There are some cases where SentencePiece or vocab returns no words
-    // after normalization. 0 prevents any empty entries from being added.
-    if (segment.size() > 0) {
-      // Truncate segment into max_input_size segments.
-      truncate(segment, tokenRanges, segments, sourceRanges);
-    }
-  }
-}
-
-void TextProcessor::truncate(Segment &segment, TokenRanges &tokenRanges,
-                             Segments &segments,
-                             std::vector<TokenRanges> &sourceRanges) {
-  for (int offset = 0; offset < segment.size();
-       offset += max_input_sentence_tokens_) {
-    auto start = segment.begin() + offset;
-
-    unsigned int left = segment.size() - offset;
-    unsigned int diff = std::min(max_input_sentence_tokens_, left);
-
-    segments.emplace_back(start, start + diff);
-    segments.back().push_back(sourceEosId());
-
-    auto astart = tokenRanges.begin() + offset;
-    sourceRanges.emplace_back(astart, astart + diff);
-  }
-}
-
-} // namespace bergamot
-} // namespace marian
--- a/src/translator/textops.h
+++ b/src/translator/textops.h
@ -1,71 +0,0 @@
-#ifndef SRC_BERGAMOT_TEXTOPS_H_
-#define SRC_BERGAMOT_TEXTOPS_H_
-
-#include "common/definitions.h"
-#include "common/logging.h"
-#include "common/options.h"
-#include "common/types.h" // missing in shortlist.h
-#include "common/utils.h"
-#include "data/sentencepiece_vocab.h"
-#include "data/shortlist.h"
-#include "definitions.h"
-#include "ssplit.h"
-
-#include <cassert>
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace marian {
-namespace bergamot {
-
-class SentenceSplitter {
-  // A wrapper around @ugermann's ssplit-cpp compiled from several places in
-  // mts. Constructed based on options. Used in TextProcessor below to create
-  // sentence-streams, which provide access to one sentence from blob of text at
-  // a time.
-public:
-  explicit SentenceSplitter(Ptr<Options> options);
-  ug::ssplit::SentenceStream createSentenceStream(string_view const &input);
-
-private:
-  ug::ssplit::SentenceSplitter ssplit_;
-  Ptr<Options> options_;
-  ug::ssplit::SentenceStream::splitmode mode_;
-  ug::ssplit::SentenceStream::splitmode string2splitmode(const std::string &m);
-};
-
-class TextProcessor {
-  // TextProcessor handles loading the sentencepiece vocabulary and also
-  // contains an instance of sentence-splitter based on ssplit.
-  //
-  // Used in Service to convert an incoming blog of text to a vector of
-  // sentences (vector of words). In addition, the ByteRanges of the
-  // source-tokens in unnormalized text are provided as string_views.
-public:
-  explicit TextProcessor(std::vector<Ptr<Vocab const>> &vocabs, Ptr<Options>);
-
-  void process(const string_view &query, Segments &segments,
-               std::vector<TokenRanges> &sourceRanges);
-
-private:
-  // Tokenizes an input string, returns Words corresponding. Loads the
-  // corresponding byte-ranges into tokenRanges.
-  Segment tokenize(const string_view &input, TokenRanges &tokenRanges);
-
-  // Truncate sentence into max_input_size segments.
-  void truncate(Segment &sentence, TokenRanges &tokenRanges, Segments &segments,
-                std::vector<TokenRanges> &sourceRanges);
-
-  // shorthand, used only in truncate()
-  const Word sourceEosId() const { return vocabs_->front()->getEosId(); }
-
-  std::vector<Ptr<Vocab const>> *vocabs_;
-  SentenceSplitter sentence_splitter_;
-  unsigned int max_input_sentence_tokens_;
-};
-
-} // namespace bergamot
-} // namespace marian
-
-#endif // SRC_BERGAMOT_TEXTOPS_H_
--- a/src/translator/translation_result.cpp
+++ b/src/translator/translation_result.cpp
@ -1,72 +0,0 @@
-#include "translation_result.h"
-#include "common/logging.h"
-#include "data/alignment.h"
-
-#include <utility>
-
-namespace marian {
-namespace bergamot {
-
-TranslationResult::TranslationResult(std::string &&source,
-                                     std::vector<TokenRanges> &&sourceRanges,
-                                     Histories &&histories,
-                                     std::vector<Ptr<Vocab const>> &vocabs)
-    : source_(std::move(source)), sourceRanges_(std::move(sourceRanges)),
-      histories_(std::move(histories)) {
-
-  std::vector<string_view> sourceMappings;
-  std::vector<string_view> targetMappings;
-
-  // Process sourceMappings into sourceMappings.
-  sourceMappings.reserve(sourceRanges_.size());
-  for (int i = 0; i < sourceRanges_.size(); i++) {
-    string_view first = sourceRanges_[i].front();
-    string_view last = sourceRanges_[i].back();
-    sourceMappings.emplace_back(first.data(), last.end() - first.begin());
-  }
-
-  // Compiles translations into a single std::string translation_
-  // Current implementation uses += on std::string, multiple resizes.
-  // Stores ByteRanges as indices first, followed by conversion into
-  // string_views.
-  // TODO(jerin): Add token level string_views here as well.
-  std::vector<std::pair<int, int>> translationRanges;
-  size_t offset{0};
-  bool first{true};
-  for (auto &history : histories_) {
-    // TODO(jerin): Change hardcode of nBest = 1
-    NBestList onebest = history->nBest(1);
-
-    Result result = onebest[0]; // Expecting only one result;
-    Words words = std::get<0>(result);
-    std::string decoded = (vocabs.back())->decode(words);
-    if (first) {
-      first = false;
-    } else {
-      translation_ += " ";
-      ++offset;
-    }
-
-    translation_ += decoded;
-    translationRanges.emplace_back(offset, decoded.size());
-    offset += decoded.size();
-  }
-
-  // Converting ByteRanges as indices into string_views.
-  targetMappings.reserve(translationRanges.size());
-  for (auto &range : translationRanges) {
-    const char *begin = &translation_[range.first];
-    targetMappings.emplace_back(begin, range.second);
-  }
-
-  // Surely, let's add sentenceMappings_
-  for (auto src = sourceMappings.begin(), tgt = targetMappings.begin();
-       src != sourceMappings.end() && tgt != targetMappings.end();
-       ++src, ++tgt) {
-    sentenceMappings_.emplace_back(*src, *tgt);
-    auto &t = sentenceMappings_.back();
-  }
-}
-
-} // namespace bergamot
-} // namespace marian
--- a/src/translator/translation_result.h
+++ b/src/translator/translation_result.h
@ -1,76 +0,0 @@
-#ifndef SRC_BERGAMOT_TRANSLATION_RESULT_H_
-#define SRC_BERGAMOT_TRANSLATION_RESULT_H_
-
-#include "data/types.h"
-#include "definitions.h"
-#include "translator/beam_search.h"
-
-#include <cassert>
-#include <string>
-#include <vector>
-
-namespace marian {
-namespace bergamot {
-class TranslationResult {
-public:
-  TranslationResult(std::string &&source,
-                    std::vector<TokenRanges> &&sourceRanges,
-                    Histories &&histories,
-                    std::vector<Ptr<Vocab const>> &vocabs);
-
-  TranslationResult(TranslationResult &&other)
-      : source_(std::move(other.source_)),
-        translation_(std::move(other.translation_)),
-        sourceRanges_(std::move(other.sourceRanges_)),
-        sentenceMappings_(std::move(other.sentenceMappings_)),
-        histories_(std::move(other.histories_)){};
-
-  TranslationResult(const TranslationResult &) = delete;
-  TranslationResult &operator=(const TranslationResult &) = delete;
-
-  // Returns const references to source and translated texts, for external
-  // consumption.
-
-  const std::string &getOriginalText() const { return source_; }
-  const std::string &getTranslatedText() const { return translation_; }
-
-  // A mapping of string_views in the source_ and translation_ are provide as a
-  // pair for external consumption. Each entry corresponding
-  // to a (source-sentence, target-sentence).
-
-  typedef std::vector<std::pair<const string_view, const string_view>>
-      SentenceMappings;
-  const SentenceMappings &getSentenceMappings() const {
-    return sentenceMappings_;
-  }
-
-  // Return the Quality scores of the translated text.
-  // Not implemented currently, commenting out.
-  // const QualityScore &getQualityScore() const { return qualityScore; }
-
-  // For development use to benchmark with marian-decoder.
-  const Histories &getHistories() const { return histories_; }
-
-  // @jerinphilip: Why are these members no longer-private? For move-semantics
-  // with consistent string_views for bergamot-translator.
-
-  std::string source_;
-  std::string translation_;
-  // Adding the following to complete bergamot-translator spec, redundant while
-  // sourceMappings_ and targetMappings_ exists or vice-versa.
-
-  SentenceMappings sentenceMappings_;
-
-private:
-  // Histories are currently required for interoperability with OutputPrinter
-  // and OutputCollector and hence comparisons with marian-decoder.
-  // Future hook to gain alignments.
-  Histories histories_;
-
-  // string_views at the token level.
-  std::vector<TokenRanges> sourceRanges_;
-};
-} // namespace bergamot
-} // namespace marian
-
-#endif // SRC_BERGAMOT_TRANSLATION_RESULT_H_
--- a/wasm/test_page/bergamot.html
+++ b/wasm/test_page/bergamot.html
@ -38,16 +38,11 @@
 <div id="divtranslation">
    <label for="from">From</label>
    <textarea id="from" name="from">
-Una estrategia republicana para obstaculizar la reelección de Obama
-Los dirigentes republicanos justificaron su política por la necesidad de luchar contra el fraude electoral.
-Ahora bien, el Centro Brennan considera esto último un mito y afirma que el fraude electoral es menos frecuente en los Estados Unidos que el número de personas que mueren a causa de la caída de un rayo.
-De hecho, los abogados republicanos no han encontrado más que 300 casos de fraude electoral en los Estados Unidos en diez años.
-Una cosa es cierta: esas nuevas disposiciones afectarán negativamente a la tasa de participación.
-En ese sentido, estas medidas minarán en parte el sistema democrático americano.
-Al contrario de lo que ocurre en Canadá, los estados americanos son responsables de la organización de las elecciones federales en los Estados Unidos.
-Y en esa misma línea una mayoría de los gobiernos americanos promulgaron, a partir de 2009, nuevas leyes que dificultaban el proceso de inscripción o de votación.
-Este fenómeno se ha extendido tras las elecciones de noviembre de 2010, que vieron el aumento de 675 nuevos representantes republicanos en 26 estados.
-En consecuencia, durante el año 2011 se introdujeron 180 proyectos de ley que restringían el ejercicio del derecho de voto en 41 estados.
+Una estrategia republicana para obstaculizar la reelecci<63>n de Obama. Los dirigentes republicanos justificaron su pol<6F>tica por la necesidad de luchar contra el fraude electoral.
+Ahora bien, el Centro Brennan considera esto <20>ltimo un mito y afirma que el fraude electoral es menos frecuente en los Estados Unidos que el n<>mero de personas que mueren a causa de la ca<63>da de un rayo. De hecho, los abogados republicanos no han encontrado m<>s que 300 casos de fraude electoral en los Estados Unidos en diez a<>os. Una cosa es cierta: esas nuevas disposiciones afectar<61>n negativamente a la tasa de participaci<63>n.
+En ese sentido, estas medidas minar<61>n en parte el sistema democr<63>tico americano.
+Al contrario de lo que ocurre en Canad<61>, los estados americanos son responsables de la organizaci<63>n de las elecciones federales en los Estados Unidos. Y en esa misma l<>nea una mayor<6F>a de los gobiernos americanos promulgaron, a partir de 2009, nuevas leyes que dificultaban el proceso de inscripci<63>n o de votaci<63>n.
+Este fen<65>meno se ha extendido tras las elecciones de noviembre de 2010, que vieron el aumento de 675 nuevos representantes republicanos en 26 estados. En consecuencia, durante el a<>o 2011 se introdujeron 180 proyectos de ley que restring<6E>an el ejercicio del derecho de voto en 41 estados.
    </textarea>
    <br><br>
    <label for="to">To</label>
@ -81,12 +76,12 @@ vocabs:
 beam-size: 1
 normalize: 1.0
 word-penalty: 0
-max-input-sentence-tokens: 128
-max-input-tokens: 1024
+max-length-break: 128
+mini-batch-words: 1024
 workspace: 128
 max-length-factor: 2.0
 skip-cost: true
-cpu-threads: 1
+cpu-threads: 0
 quiet: true
 quiet-translation: true
 shortlist:
@ -112,19 +107,19 @@ maxi-batch-sort: src
    model = new Module.TranslationModel(modelConfig);
  }

-  const translate = (sentences) => {
+  const translate = (paragraphs) => {

    // Instantiate the arguments of translate() API i.e. TranslationRequest and input (vector<string>)
    var request = new Module.TranslationRequest();
    let input = new Module.VectorString;

    // Initialize the input
-    sentences.forEach(sentence => {
-      // prevent empty sentences - it breaks the translation
-      if (sentence.trim() === "") {
+    paragraphs.forEach(paragraph => {
+      // prevent empty paragraph - it breaks the translation
+      if (paragraph.trim() === "") {
        return;
      }
-      input.push_back(sentence.trim())
+      input.push_back(paragraph.trim())
    })
    // Access input (just for debugging)
    console.log('Input size=', input.size());
@ -138,14 +133,14 @@ maxi-batch-sort: src
    let result = model.translate(input, request);
    // Access original and translated text from each entry of vector<TranslationResult>
    //console.log('Result size=', result.size(), ' - TimeDiff - ', (Date.now() - start)/1000);
-    const translatedSentences = [];
+    const translatedParagraphs = [];
    for (let i = 0; i < result.size(); i++) {
-      translatedSentences.push(result.get(i).getTranslatedText());
+      translatedParagraphs.push(result.get(i).getTranslatedText());
    }
-    console.log({ translatedSentences });
+    console.log({ translatedParagraphs });
    request.delete();
    input.delete();
-    return translatedSentences;
+    return translatedParagraphs;
  }

  document.querySelector("#load").addEventListener("click", () => {
@ -160,17 +155,17 @@ maxi-batch-sort: src

  const translateCall = () => {
    const text = document.querySelector('#from').value;
-    const sentences = text.split("\n");
+    const paragraphs = text.split("\n");
    let wordCount = 0;
-    sentences.forEach(sentence => {
+    paragraphs.forEach(sentence => {
      wordCount += sentence.trim().split(" ").filter(word => word.trim() !== "").length;
    })
    const start = Date.now();
-    const translatedSentences = translate(sentences);
+    const translatedParagraphs = translate(paragraphs);
    const secs = (Date.now() - start) / 1000;
-    log(`Translation of ${translatedSentences.length} sentences (wordCount ${wordCount}) took ${secs} secs (${Math.round(wordCount / secs)} words per second)`);
+    log(`Translation of (${wordCount}) words took ${secs} secs (${Math.round(wordCount / secs)} words per second)`);

-    document.querySelector('#to').value = translatedSentences.join("\n");
+    document.querySelector('#to').value = translatedParagraphs.join("\n");
  }

  document.querySelector("#translate").addEventListener("click", () => {