From 9a54d2116cc0b26fcc7582c0a99c7905c2d3be66 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 8 Feb 2021 13:46:59 +0100 Subject: [PATCH 01/98] Updated marian-dev submodule - Switch to "wasm" branch of browsermt/marian-dev --- 3rd_party/marian-dev | 2 +- CMakeLists.txt | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/3rd_party/marian-dev b/3rd_party/marian-dev index ee56e02..a4e50b6 160000 --- a/3rd_party/marian-dev +++ b/3rd_party/marian-dev @@ -1 +1 @@ -Subproject commit ee56e02f0525a4651157a07f74b44f456db14c8c +Subproject commit a4e50b66be38a94b90c46c4695d86de9932c34e8 diff --git a/CMakeLists.txt b/CMakeLists.txt index ce48a90..45551ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,10 @@ set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") option(COMPILE_CUDA "Compile GPU version" OFF) option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON) option(USE_STATIC_LIBS "Link statically against non-system libs" ON) -option(USE_MKL "Compile with MKL support" ON) +option(USE_MKL "Compile with MKL support" OFF) +option(COMPILE_DECODER_ONLY "Compile marian-decoder only" ON) +option(COMPILE_WITH_PTHREADS "Compile with pthreads support" OFF) +option(USE_WASM_COMPATIBLE_BLAS "Compile with a WASM compatible blas for decoder only builds" ON) execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) From 47b4bae268bf98dd1fad70ce50731a5f74e09c3b Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 8 Feb 2021 14:31:12 +0100 Subject: [PATCH 02/98] Changed encodePreservingSource -> encodeWithByteRanges - This change happened because marian submodule changed this name - Native builds are working fine -- bergamot-translator-app output is consistent --- src/translator/textops.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/textops.cpp b/src/translator/textops.cpp index 25e48f1..ac93421 100644 --- a/src/translator/textops.cpp +++ b/src/translator/textops.cpp @@ -52,7 +52,7 @@ SentenceSplitter::string2splitmode(const std::string &m) { Segment TextProcessor::tokenize(const string_view &segment, TokenRanges &tokenRanges) { - return vocabs_->front()->encodePreservingSource( + return vocabs_->front()->encodeWithByteRanges( segment, tokenRanges, /*addEOS=*/false, /*inference=*/true); } From 5683168a8d0011e7311ec62e13806b23bce52ec9 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Tue, 9 Feb 2021 15:42:02 +0100 Subject: [PATCH 03/98] Updated ssplit submodule to a different repository - Added abhi-agg/ssplit-cpp - Added its wasm branch in bergamot-translator - Native builds of bergamot-translator are successful -- Sentence splitting is NOT WORKING -- Only translation is working --- .gitmodules | 2 +- 3rd_party/ssplit-cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index d3bbf18..e4feab5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "3rd_party/ssplit-cpp"] path = 3rd_party/ssplit-cpp - url = https://github.com/ugermann/ssplit-cpp + url = https://github.com/abhi-agg/ssplit-cpp [submodule "3rd_party/marian-dev"] path = 3rd_party/marian-dev url = https://github.com/browsermt/marian-dev diff --git a/3rd_party/ssplit-cpp b/3rd_party/ssplit-cpp index f5d0229..4f5d134 160000 --- a/3rd_party/ssplit-cpp +++ b/3rd_party/ssplit-cpp @@ -1 +1 @@ -Subproject commit f5d022992f4a00c860eb809389748908bb85ffcf +Subproject commit 4f5d1348a3fba1a8cb70135f68470d613573f9f3 From 584700ce911de9da92489661c42a4ecc7c58d35e Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Wed, 10 Feb 2021 11:15:16 +0100 Subject: [PATCH 04/98] Changed translate() API from non-blocking to blocking - Can be changed back to non-blocking once blocking API becomes integrable via WASM port in browser --- app/main.cpp | 4 ++-- src/AbstractTranslationModel.h | 2 +- src/translator/TranslationModel.cpp | 5 ++--- src/translator/TranslationModel.h | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/app/main.cpp b/app/main.cpp index ef61eb2..2f67feb 100644 --- a/app/main.cpp +++ b/app/main.cpp @@ -44,10 +44,10 @@ int main(int argc, char **argv) { "Prague, the University of Sheffield, University of Tartu, and " "Mozilla."); - auto futureResults = model->translate(std::move(texts), translationRequest); + auto results = model->translate(std::move(texts), translationRequest); // Resolve the future and get the actual result - std::vector results = futureResults.get(); + //std::vector results = futureResults.get(); for (auto &result : results) { std::cout << "[original]: " << result.getOriginalText() << std::endl; diff --git a/src/AbstractTranslationModel.h b/src/AbstractTranslationModel.h index 6cb30c4..7562b0a 100644 --- a/src/AbstractTranslationModel.h +++ b/src/AbstractTranslationModel.h @@ -57,7 +57,7 @@ public: * entry of texts list will be moved to its corresponding TranslationResult * object). */ - virtual std::future> + virtual std::vector translate(std::vector &&texts, TranslationRequest request) = 0; /* Check if the model can provide alignment information b/w original and diff --git a/src/translator/TranslationModel.cpp b/src/translator/TranslationModel.cpp index f501678..3d5ae23 100644 --- a/src/translator/TranslationModel.cpp +++ b/src/translator/TranslationModel.cpp @@ -55,7 +55,7 @@ TranslationModel::TranslationModel(const std::string &config) TranslationModel::~TranslationModel() {} -std::future> +std::vector TranslationModel::translate(std::vector &&texts, TranslationRequest request) { // Implementing a non-async version first. Unpleasant, but should work. @@ -84,8 +84,7 @@ TranslationModel::translate(std::vector &&texts, std::move(sentenceMappings)); } - promise.set_value(std::move(translationResults)); - return future; + return translationResults; } bool TranslationModel::isAlignmentSupported() const { return false; } diff --git a/src/translator/TranslationModel.h b/src/translator/TranslationModel.h index c922538..d468e2f 100644 --- a/src/translator/TranslationModel.h +++ b/src/translator/TranslationModel.h @@ -54,7 +54,7 @@ public: * entry of texts list will be moved to its corresponding TranslationResult * object). */ - std::future> + std::vector translate(std::vector &&texts, TranslationRequest request) override; From a2d32693448fbbc582efc0da1e05f6731e548845 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Wed, 10 Feb 2021 11:27:16 +0100 Subject: [PATCH 05/98] Updated ssplit submodule --- 3rd_party/ssplit-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rd_party/ssplit-cpp b/3rd_party/ssplit-cpp index 4f5d134..1686496 160000 --- a/3rd_party/ssplit-cpp +++ b/3rd_party/ssplit-cpp @@ -1 +1 @@ -Subproject commit 4f5d1348a3fba1a8cb70135f68470d613573f9f3 +Subproject commit 16864967b7313e76e3b107d11ec39d8d5cedff1e From 9747d9ba83e2eb6f7cf5edfee37a90592d2c220b Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 15:34:27 +0100 Subject: [PATCH 06/98] Add cmake option to compile project on WASM - Set cmake option COMPILE_WASM to ON to compile the project on WASM --- CMakeLists.txt | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45551ea..b662a78 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,9 @@ project(bergamot_translator CXX C) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") + +# Project specific cmake options +option(COMPILE_WASM "Compile for WASM" OFF) # Custom CMake options to compile marian (a 3rd party submodule) for this project option(COMPILE_CUDA "Compile GPU version" OFF) @@ -22,8 +24,19 @@ option(USE_WASM_COMPATIBLE_BLAS "Compile with a WASM compatible blas for decoder execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +if(NOT COMPILE_WASM) + # Set BUILD_ARCH to native only while compiling for non wasm platform + set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") +endif() + +if(COMPILE_WASM) + add_compile_options(-pthread -O3 -g2 -fPIC -mssse3 -msimd128) + add_compile_options("SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=1" "SHELL:-s DISABLE_EXCEPTION_CATCHING=0" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1") + add_compile_options(-Wno-error=pthreads-mem-growth) +endif(COMPILE_WASM) + add_subdirectory(3rd_party) add_subdirectory(src) -add_subdirectory(app) - - +if(NOT COMPILE_WASM) + add_subdirectory(app) +endif() From b73d4f4cc275277b35545af2a0d35ea7953166d4 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 15:37:38 +0100 Subject: [PATCH 07/98] Set cmake option to compile marian library only - Set COMPILE_LIBRARY_ONLY to ON for marian library --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b662a78..daea560 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,7 @@ option(USE_MKL "Compile with MKL support" OFF) option(COMPILE_DECODER_ONLY "Compile marian-decoder only" ON) option(COMPILE_WITH_PTHREADS "Compile with pthreads support" OFF) option(USE_WASM_COMPATIBLE_BLAS "Compile with a WASM compatible blas for decoder only builds" ON) +SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.") execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) From 838547e4d582089d6222aadf14e77732d8955d17 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 15:42:18 +0100 Subject: [PATCH 08/98] Set cmake options of marian properly for this project --- CMakeLists.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index daea560..09ac2fc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,14 +12,14 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Project specific cmake options option(COMPILE_WASM "Compile for WASM" OFF) -# Custom CMake options to compile marian (a 3rd party submodule) for this project -option(COMPILE_CUDA "Compile GPU version" OFF) -option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON) -option(USE_STATIC_LIBS "Link statically against non-system libs" ON) -option(USE_MKL "Compile with MKL support" OFF) -option(COMPILE_DECODER_ONLY "Compile marian-decoder only" ON) -option(COMPILE_WITH_PTHREADS "Compile with pthreads support" OFF) -option(USE_WASM_COMPATIBLE_BLAS "Compile with a WASM compatible blas for decoder only builds" ON) +# Set marian (3rd party submodule) cmake options to compile for this project +SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version") +SET(USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece") +SET(USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs") +SET(USE_MKL OFF CACHE BOOL "Compile with MKL support") +SET(COMPILE_DECODER_ONLY ON CACHE BOOL "Compile marian-decoder only") +SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support") +SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds") SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.") execute_process(COMMAND git submodule update --init --recursive --no-fetch From 9b896507e3860b5c3cf0e452659d336fe43958e1 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 15:53:38 +0100 Subject: [PATCH 09/98] cmake compile option changes - Make native builds successful with marian decoder - COMPILE_DECODER_ONLY flag requires importing some compile definitions from marian --- src/translator/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index b6fcf69..eab04ab 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -11,6 +11,10 @@ add_library(bergamot-translator STATIC batcher.cpp translation_result.cpp ) +if (COMPILE_DECODER_ONLY) + # A dirty hack because of marian's bad cmake practices + target_compile_definitions(bergamot-translator PUBLIC DECODER_ONLY) +endif() target_link_libraries(bergamot-translator marian ssplit) From 79c445ae3a9c63fa68cd7687e5bdae7b76dc72b1 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 15:57:26 +0100 Subject: [PATCH 10/98] cmake compile option changes for wasm builds - Make WASM builds successful with marian decoder - Setting COMPILE_WASM to ON requires importing some compile definitions from marian --- src/translator/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index eab04ab..b8ed196 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -16,6 +16,11 @@ if (COMPILE_DECODER_ONLY) target_compile_definitions(bergamot-translator PUBLIC DECODER_ONLY) endif() +if(COMPILE_WASM) + # A dirty hack because of marian's bad cmake practices + target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM) +endif(COMPILE_WASM) + target_link_libraries(bergamot-translator marian ssplit) target_include_directories(bergamot-translator From a06530e92b6d16527487c8fa0ead4ae04f0ddbb5 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 16:14:03 +0100 Subject: [PATCH 11/98] Fixed a bug in TranslationModel class - Using bergamot-translator as a library fails at run time because necessary parser options are not set --- src/translator/TranslationModel.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/translator/TranslationModel.cpp b/src/translator/TranslationModel.cpp index 3d5ae23..fd2db1d 100644 --- a/src/translator/TranslationModel.cpp +++ b/src/translator/TranslationModel.cpp @@ -15,6 +15,8 @@ // All local project includes #include "TranslationModel.h" #include "translator/service.h" +#include "translator/parser.h" + std::shared_ptr parseOptions(const std::string &config) { marian::Options options; @@ -34,7 +36,7 @@ std::shared_ptr parseOptions(const std::string &config) { // Error: Aborted from void unhandledException() in // 3rd_party/marian-dev/src/common/logging.cpp:113 - marian::ConfigParser configParser(marian::cli::mode::translation); + marian::ConfigParser configParser = marian::bergamot::createConfigParser(); const YAML::Node &defaultConfig = configParser.getConfig(); options.merge(defaultConfig); From 23a952782479401c4ac31bab6eccccb546c1f4ee Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 16:38:36 +0100 Subject: [PATCH 12/98] Source code changes to compile the project without threads - Set COMPILE_THREAD_VARIANT cmake option to ON to compile multithreaded variant of the project --- CMakeLists.txt | 4 ++++ src/translator/CMakeLists.txt | 4 ++++ src/translator/batch_translator.cpp | 16 +++++++++++++++- src/translator/batch_translator.h | 8 ++++++++ src/translator/pcqueue.h | 29 +++++++++++++++++++++++++++++ src/translator/service.cpp | 3 +++ 6 files changed, 63 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 09ac2fc..7327e14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Project specific cmake options option(COMPILE_WASM "Compile for WASM" OFF) +option(COMPILE_THREAD_VARIANT "Compile with thread support" OFF) # Set marian (3rd party submodule) cmake options to compile for this project SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version") @@ -41,3 +42,6 @@ add_subdirectory(src) if(NOT COMPILE_WASM) add_subdirectory(app) endif() +if(COMPILE_WASM) + add_subdirectory(app) +endif(COMPILE_WASM) diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index b8ed196..71bdd97 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -21,6 +21,10 @@ if(COMPILE_WASM) target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM) endif(COMPILE_WASM) +if (COMPILE_THREAD_VARIANT) + target_compile_definitions(bergamot-translator PRIVATE WITH_PTHREADS) +endif() + target_link_libraries(bergamot-translator marian ssplit) target_include_directories(bergamot-translator diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index 6380a00..6dc3993 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -14,7 +14,11 @@ BatchTranslator::BatchTranslator(DeviceId const device, Ptr options) : device_(device), options_(options), pcqueue_(&pcqueue), vocabs_(&vocabs) { +#ifdef WITH_PTHREADS thread_ = std::thread([this] { this->mainloop(); }); +#else + this->initGraph(); +#endif } void BatchTranslator::initGraph() { @@ -100,12 +104,16 @@ void BatchTranslator::translate(RequestSentences &requestSentences, } void BatchTranslator::mainloop() { +#ifdef WITH_PTHREADS initGraph(); +#endif PCItem pcitem; Histories histories; +#ifdef WITH_PTHREADS while (true) { +#endif pcqueue_->ConsumeSwap(pcitem); if (pcitem.isPoison()) { return; @@ -115,10 +123,16 @@ void BatchTranslator::mainloop() { pcitem.sentences[i].completeSentence(histories[i]); } } +#ifdef WITH_PTHREADS } +#endif } -void BatchTranslator::join() { thread_.join(); } +void BatchTranslator::join() { +#ifdef WITH_PTHREADS + thread_.join(); +#endif +} } // namespace bergamot } // namespace marian diff --git a/src/translator/batch_translator.h b/src/translator/batch_translator.h index 069155e..3f1d2e4 100644 --- a/src/translator/batch_translator.h +++ b/src/translator/batch_translator.h @@ -29,10 +29,16 @@ public: // convenience function for logging. TODO(jerin) std::string _identifier() { return "worker" + std::to_string(device_.no); } +#ifndef WITH_PTHREADS + void mainloop(); +#endif + private: void initGraph(); void translate(RequestSentences &requestSentences, Histories &histories); +#ifdef WITH_PTHREADS void mainloop(); +#endif Ptr options_; @@ -43,7 +49,9 @@ private: Ptr slgen_; PCQueue *pcqueue_; +#ifdef WITH_PTHREADS std::thread thread_; +#endif }; } // namespace bergamot } // namespace marian diff --git a/src/translator/pcqueue.h b/src/translator/pcqueue.h index f0b3541..79d6b75 100644 --- a/src/translator/pcqueue.h +++ b/src/translator/pcqueue.h @@ -9,6 +9,7 @@ #include #include +#ifdef WITH_PTHREADS #ifdef __APPLE__ #include #include @@ -19,6 +20,7 @@ #else #include #endif +#endif // WITH_PTHREADS #if __GNUC__ >= 3 #define UTIL_UNLIKELY(x) __builtin_expect(!!(x), 0) @@ -29,6 +31,7 @@ namespace marian { namespace bergamot { +#ifdef WITH_PTHREADS /* OS X Maverick and Boost interprocess were doing "Function not implemented." * So this is my own wrapper around the mach kernel APIs. */ @@ -114,6 +117,20 @@ inline void WaitSemaphore(Semaphore &on) { } #endif // Apple +#else // WITH_PTHREADS +// A dummy Semaphore class that does nothing +class Semaphore { +public: + explicit Semaphore(unsigned int value) : count(value) {} + ~Semaphore() {} + void wait() {} + void post() {} +private: + unsigned int count; +}; + +inline void WaitSemaphore(Semaphore &semaphore) { semaphore.wait(); } +#endif // WITH_PTHREADS /** * Producer consumer queue safe for multiple producers and multiple consumers. @@ -134,7 +151,9 @@ public: void Produce(const T &val) { WaitSemaphore(empty_); { + #ifdef WITH_PTHREADS std::lock_guard produce_lock(produce_at_mutex_); + #endif try { *produce_at_ = val; } catch (...) { @@ -151,7 +170,9 @@ public: void ProduceSwap(T &val) { WaitSemaphore(empty_); { + #ifdef WITH_PTHREADS std::lock_guard produce_lock(produce_at_mutex_); + #endif try { std::swap(*produce_at_, val); } catch (...) { @@ -168,7 +189,9 @@ public: T &Consume(T &out) { WaitSemaphore(used_); { + #ifdef WITH_PTHREADS std::lock_guard consume_lock(consume_at_mutex_); + #endif try { out = *consume_at_; } catch (...) { @@ -186,7 +209,9 @@ public: T &ConsumeSwap(T &out) { WaitSemaphore(used_); { + #ifdef WITH_PTHREADS std::lock_guard consume_lock(consume_at_mutex_); + #endif try { std::swap(out, *consume_at_); } catch (...) { @@ -220,11 +245,15 @@ private: // Index for next write in storage_. T *produce_at_; +#ifdef WITH_PTHREADS std::mutex produce_at_mutex_; +#endif // Index for next read from storage_. T *consume_at_; +#ifdef WITH_PTHREADS std::mutex consume_at_mutex_; +#endif }; template struct UnboundedPage { diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 4a5af30..f61ad47 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -73,6 +73,9 @@ std::future Service::translate(std::string &&input) { } } while (numSentences > 0); +#ifndef WITH_PTHREADS + workers_[0].mainloop(); +#endif return future; } From 7b80003a5fd60d5e28beee74d8f45590390581f5 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 16:59:07 +0100 Subject: [PATCH 13/98] Added code to generate proper JS bindings of translator - COMPILE_WASM cmake option sets WASM_BINDINGS compile definition that enables code for generating proper JS bindings --- src/TranslationResult.h | 22 +++++++++++++++++++++- src/translator/CMakeLists.txt | 2 ++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/TranslationResult.h b/src/TranslationResult.h index d743ff5..b4867af 100644 --- a/src/TranslationResult.h +++ b/src/TranslationResult.h @@ -20,7 +20,11 @@ class TranslationResult { public: typedef std::vector> SentenceMappings; - +#ifdef WASM_BINDINGS + TranslationResult(const std::string &original, const std::string &translation) + : originalText(original), translatedText(translation), + sentenceMappings() {} +#endif TranslationResult(const std::string &original, const std::string &translation, SentenceMappings &sentenceMappings) : originalText(original), translatedText(translation), @@ -31,13 +35,29 @@ public: translatedText(std::move(other.translatedText)), sentenceMappings(std::move(other.sentenceMappings)) {} +#ifdef WASM_BINDINGS + TranslationResult(const TranslationResult &other) + : originalText(other.originalText), + translatedText(other.translatedText), + sentenceMappings(other.sentenceMappings) {} +#endif + TranslationResult(std::string &&original, std::string &&translation, SentenceMappings &&sentenceMappings) : originalText(std::move(original)), translatedText(std::move(translation)), sentenceMappings(std::move(sentenceMappings)) {} +#ifndef WASM_BINDINGS TranslationResult &operator=(const TranslationResult &) = delete; +#else + TranslationResult &operator=(const TranslationResult &result) { + originalText = result.originalText; + translatedText = result.translatedText; + sentenceMappings = result.sentenceMappings; + return *this; + } +#endif /* Return the original text. */ const std::string &getOriginalText() const { return originalText; } diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index 71bdd97..ba2c2e0 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -19,6 +19,8 @@ endif() if(COMPILE_WASM) # A dirty hack because of marian's bad cmake practices target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM) + # Enable code that is required for generating JS bindings + target_compile_definitions(bergamot-translator PRIVATE WASM_BINDINGS) endif(COMPILE_WASM) if (COMPILE_THREAD_VARIANT) From 74b06d863ebbd0b0b59dfd7be1e541a338c8a3f8 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 19:09:30 +0100 Subject: [PATCH 14/98] Add wasm folder to compile JS bindings --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7327e14..4b6e224 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,5 +43,5 @@ if(NOT COMPILE_WASM) add_subdirectory(app) endif() if(COMPILE_WASM) - add_subdirectory(app) + add_subdirectory(wasm) endif(COMPILE_WASM) From de501e8f963b8fed6cc6f1799d55f2e20b325d3e Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 20:48:29 +0100 Subject: [PATCH 15/98] Added JS binding files and cmake infrastructure to build them - Added "wasm" folder - Contains README file as well --- CMakeLists.txt | 1 + wasm/CMakeLists.txt | 27 ++++++++++ wasm/README.md | 52 +++++++++++++++++++ wasm/bergamot.html | 54 ++++++++++++++++++++ wasm/bindings/TranslationModelBindings.cpp | 23 +++++++++ wasm/bindings/TranslationRequestBindings.cpp | 17 ++++++ wasm/bindings/TranslationResultBindings.cpp | 20 ++++++++ 7 files changed, 194 insertions(+) create mode 100644 wasm/CMakeLists.txt create mode 100644 wasm/README.md create mode 100644 wasm/bergamot.html create mode 100644 wasm/bindings/TranslationModelBindings.cpp create mode 100644 wasm/bindings/TranslationRequestBindings.cpp create mode 100644 wasm/bindings/TranslationResultBindings.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b6e224..505d785 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Project specific cmake options option(COMPILE_WASM "Compile for WASM" OFF) option(COMPILE_THREAD_VARIANT "Compile with thread support" OFF) +option(PACKAGE_DIR "Directory including all the files to be packaged (pre-loaded) in wasm builds" "") # Set marian (3rd party submodule) cmake options to compile for this project SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version") diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt new file mode 100644 index 0000000..9ede6a6 --- /dev/null +++ b/wasm/CMakeLists.txt @@ -0,0 +1,27 @@ +add_executable(bergamot-translator-worker + bindings/TranslationModelBindings.cpp + bindings/TranslationRequestBindings.cpp + bindings/TranslationResultBindings.cpp +) + +# This header inclusion needs to go away later as path to public headers of bergamot +# translator should be directly available from "bergamot-translator" target +target_include_directories(bergamot-translator-worker + PRIVATE ${CMAKE_SOURCE_DIR}/src/translator + PRIVATE ${CMAKE_SOURCE_DIR} +) +# This compile definition is required for generating binding code properly +target_compile_definitions(bergamot-translator-worker PRIVATE WASM_BINDINGS) + +set(LINKER_FLAGS "--bind -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1") +if (NOT PACKAGE_DIR STREQUAL "") + set(LINKER_FLAGS "${LINKER_FLAGS} --preload-file ${PACKAGE_DIR}@/") +endif() + +set_target_properties(bergamot-translator-worker PROPERTIES + SUFFIX ".js" + LINK_FLAGS ${LINKER_FLAGS} + ) +#target_link_options(bergamot-translator-worker --preload-file ${PACKAGE_DIR}@/) + +target_link_libraries(bergamot-translator-worker bergamot-translator) diff --git a/wasm/README.md b/wasm/README.md new file mode 100644 index 0000000..83d4738 --- /dev/null +++ b/wasm/README.md @@ -0,0 +1,52 @@ +## Using Bergamot Translator in JavaScript +The example file `bergamot.html` in this folder demonstrates how to use the bergamot translator in JavaScript via a ` + + diff --git a/wasm/bindings/TranslationModelBindings.cpp b/wasm/bindings/TranslationModelBindings.cpp new file mode 100644 index 0000000..245416c --- /dev/null +++ b/wasm/bindings/TranslationModelBindings.cpp @@ -0,0 +1,23 @@ +/* + * TranslationModelBindings.cpp + * + * Bindings for TranslationModel class + */ + +#include + +#include "TranslationModel.h" + +using namespace emscripten; + +// Binding code +EMSCRIPTEN_BINDINGS(translation_model) { + class_("TranslationModel") + .constructor() + .function("translate", &TranslationModel::translate) + .function("isAlignmentSupported", &TranslationModel::isAlignmentSupported) + ; + + register_vector("VectorString"); + register_vector("VectorTranslationResult"); +} diff --git a/wasm/bindings/TranslationRequestBindings.cpp b/wasm/bindings/TranslationRequestBindings.cpp new file mode 100644 index 0000000..bb5ec98 --- /dev/null +++ b/wasm/bindings/TranslationRequestBindings.cpp @@ -0,0 +1,17 @@ +/* + * Bindings for TranslationRequest class + * + */ + +#include + +#include "TranslationRequest.h" + +using namespace emscripten; + +// Binding code +EMSCRIPTEN_BINDINGS(translation_request) { + class_("TranslationRequest") + .constructor<>() + ; +} diff --git a/wasm/bindings/TranslationResultBindings.cpp b/wasm/bindings/TranslationResultBindings.cpp new file mode 100644 index 0000000..a3713a1 --- /dev/null +++ b/wasm/bindings/TranslationResultBindings.cpp @@ -0,0 +1,20 @@ +/* + * Bindings for TranslationResult class + * + */ + +#include +#include + +#include "TranslationResult.h" + +using namespace emscripten; + +// Binding code +EMSCRIPTEN_BINDINGS(translation_result) { + class_("TranslationResult") + .constructor() + .function("getOriginalText", &TranslationResult::getOriginalText) + .function("getTranslatedText", &TranslationResult::getTranslatedText) + ; +} From e12647076c69c4e0355b598b16127d4112f662bd Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 23:27:16 +0100 Subject: [PATCH 16/98] Updated README with wasm build and use instructions --- README.md | 110 +++++++++++++++++++++++++++++------------------------- 1 file changed, 59 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 52f60b2..e1ad9c3 100644 --- a/README.md +++ b/README.md @@ -3,58 +3,66 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt.github.io/) framework based) neural machine translation functionality in accordance with the [Bergamot](https://browser.mt/) project that focuses on improving client-side machine translation in a web browser. ## Build Instructions -``` -$ git clone https://github.com/browsermt/bergamot-translator -$ cd bergamot-translator -$ mkdir build -$ cd build -$ cmake ../ -$ make -j -``` - -## Usage - -### Bergamot Translator - -The build will generate the library that can be linked to any project. All the public header files are specified in `src` folder. - -### `service-cli` - -An executable `service-cli` is generated by the build in the `app` folder and -provides command line interface to the underlying translator. The models -required to run the command-line are available at -[data.statmt.org/bergamot/models/](http://data.statmt.org/bergamot/models/). -The following example uses an English to German tiny11 student model, available -at: - -* [data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz](http://data.statmt.org/bergamot/models/deen/ende.student.tiny11.tar.gz) +### Build Natively ```bash -MODEL_DIR=... # path to where the model-files are. -ARGS=( - -m $MODEL_DIR/model.intgemm.alphas.bin # Path to model file. - --vocabs - $MODEL_DIR/vocab.deen.spm # source-vocabulary - $MODEL_DIR/vocab.deen.spm # target-vocabulary - - # The following increases speed through one-best-decoding, shortlist and quantization. - --beam-size 1 --skip-cost --shortlist $MODEL_DIR/lex.s2t.gz 50 50 --int8shiftAlphaAll - - # Number of CPU threads (workers to launch). Parallelizes over cores and improves speed. - --cpu-threads 4 - - # Hyperparameters of how many tokens to be accounted for in a batch and maximum tokens in a sentence. - --max-input-sentence-tokens 1024 --max-input-tokens 1024 - - # Three modes are supported - # - sentence: One sentence per line - # - paragraph: One paragraph per line. - # - wrapped text: Paragraphs are separated by empty line. - - --ssplit-mode paragraph - -) - -./app/service-cli "${ARGS[@]}" < path-to-input-file +git clone https://github.com/browsermt/bergamot-translator +cd bergamot-translator +mkdir build +cd build +cmake ../ +make -j ``` + +### Build WASM + +To compile WASM, first download and Install Emscripten using following instructions: + +1. Get the latest sdk: `git clone https://github.com/emscripten-core/emsdk.git` +2. Enter the cloned directory: `cd emsdk` +3. Install the lastest sdk tools: `./emsdk install latest` +4. Activate the latest sdk tools: `./emsdk activate latest` +5. Activate path variables: `source ./emsdk_env.sh` + +After the successful installation of Emscripten, perform these steps: + +```bash +git clone https://github.com/browsermt/bergamot-translator +cd bergamot-translator +mkdir build-wasm +cd build-wasm +emcmake cmake -DCOMPILE_WASM=on ../ +emmake make -j +``` + +It should generate the artefacts (.js and .wasm files) in `wasm` folder inside build directory ("build-wasm" in this case). + +The build also allows packaging files into wasm binary (i.e. preloading in Emscripten’s virtual file system) using cmake +option `PACKAGE_DIR`. The compile command below packages all the files in PATH directory into wasm binary. +```bash +emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR= ../ +``` +Files packaged this way are preloaded in the root of the virtual file system. + + +After Editing Files: + +```bash +emmake make -j +``` + +After Adding/Removing Files: + +```bash +emcmake cmake -DCOMPILE_WASM=on ../ +emmake make -j +``` + +### Using Native version + +The builds generate library that can be integrated to any project. All the public header files are specified in `src` folder. A short example of how to use the APIs is provided in `app/main.cpp` file + +### Using WASM version + +Please follow the `README` inside the `wasm` folder of this repository that demonstrates how to use the translator in JavaScript. From ff95e37f89e2ed67e4a6420e6a3415bb8e794994 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Thu, 11 Feb 2021 23:51:45 +0100 Subject: [PATCH 17/98] Improved cmake option PACKAGE_DIR --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 505d785..10256c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Project specific cmake options option(COMPILE_WASM "Compile for WASM" OFF) option(COMPILE_THREAD_VARIANT "Compile with thread support" OFF) -option(PACKAGE_DIR "Directory including all the files to be packaged (pre-loaded) in wasm builds" "") +SET(PACKAGE_DIR "" CACHE STRING "Directory including all the files to be packaged (pre-loaded) in wasm builds") # Set marian (3rd party submodule) cmake options to compile for this project SET(COMPILE_CUDA OFF CACHE BOOL "Compile GPU version") From 28dcf55b417549f1b5ba7ec739e416166ac93591 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 12 Feb 2021 11:35:47 +0100 Subject: [PATCH 18/98] Improved cmake to use wasm compilation flags across project --- 3rd_party/CMakeLists.txt | 6 ++++++ CMakeLists.txt | 6 +++--- src/translator/CMakeLists.txt | 1 + wasm/CMakeLists.txt | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/3rd_party/CMakeLists.txt b/3rd_party/CMakeLists.txt index 644ac52..74ce906 100644 --- a/3rd_party/CMakeLists.txt +++ b/3rd_party/CMakeLists.txt @@ -1,4 +1,10 @@ add_subdirectory(marian-dev) + +if(COMPILE_WASM) + # This is a bad way of adding compilation flags. Will be improved soon. + add_compile_options(${WASM_COMPILE_FLAGS}) +endif(COMPILE_WASM) + add_subdirectory(ssplit-cpp) # Add include directories for 3rd party targets to be able to use it anywhere in the diff --git a/CMakeLists.txt b/CMakeLists.txt index 10256c2..677963f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,9 +33,9 @@ if(NOT COMPILE_WASM) endif() if(COMPILE_WASM) - add_compile_options(-pthread -O3 -g2 -fPIC -mssse3 -msimd128) - add_compile_options("SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=1" "SHELL:-s DISABLE_EXCEPTION_CATCHING=0" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1") - add_compile_options(-Wno-error=pthreads-mem-growth) + list(APPEND WASM_COMPILE_FLAGS -pthread -O3 -g2 -fPIC -mssse3 -msimd128) + list(APPEND WASM_COMPILE_FLAGS "SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=1" "SHELL:-s DISABLE_EXCEPTION_CATCHING=0" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1") + list(APPEND WASM_COMPILE_FLAGS -Wno-error=pthreads-mem-growth) endif(COMPILE_WASM) add_subdirectory(3rd_party) diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index ba2c2e0..1a664b3 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -21,6 +21,7 @@ if(COMPILE_WASM) target_compile_definitions(bergamot-translator PUBLIC USE_SSE2 WASM) # Enable code that is required for generating JS bindings target_compile_definitions(bergamot-translator PRIVATE WASM_BINDINGS) + target_compile_options(bergamot-translator PRIVATE ${WASM_COMPILE_FLAGS}) endif(COMPILE_WASM) if (COMPILE_THREAD_VARIANT) diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 9ede6a6..40b08bf 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -12,6 +12,7 @@ target_include_directories(bergamot-translator-worker ) # This compile definition is required for generating binding code properly target_compile_definitions(bergamot-translator-worker PRIVATE WASM_BINDINGS) +target_compile_options(bergamot-translator-worker PRIVATE ${WASM_COMPILE_FLAGS}) set(LINKER_FLAGS "--bind -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1") if (NOT PACKAGE_DIR STREQUAL "") @@ -22,6 +23,5 @@ set_target_properties(bergamot-translator-worker PROPERTIES SUFFIX ".js" LINK_FLAGS ${LINKER_FLAGS} ) -#target_link_options(bergamot-translator-worker --preload-file ${PACKAGE_DIR}@/) target_link_libraries(bergamot-translator-worker bergamot-translator) From 3b7673bf15e9877f3cfc15c17a366db8a494a4d5 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Fri, 12 Feb 2021 14:38:16 +0100 Subject: [PATCH 19/98] Updated marian-dev submodule - This fixes the issue of sentencepiece not being able to checkout properly --- 3rd_party/marian-dev | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rd_party/marian-dev b/3rd_party/marian-dev index a4e50b6..29ecba1 160000 --- a/3rd_party/marian-dev +++ b/3rd_party/marian-dev @@ -1 +1 @@ -Subproject commit a4e50b66be38a94b90c46c4695d86de9932c34e8 +Subproject commit 29ecba1cb1b8ea26ae582d3851e214769b89e566 From 9108d9f0b3e96c1890746ab740df1901b5cc2245 Mon Sep 17 00:00:00 2001 From: Andre Natal Date: Fri, 12 Feb 2021 15:25:40 -0800 Subject: [PATCH 20/98] Update README.md Add `--recursive` to `git clone` instructions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e1ad9c3..e8adaba 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Bergamot translator provides a unified API for ([Marian NMT](https://marian-nmt. ### Build Natively ```bash -git clone https://github.com/browsermt/bergamot-translator +git clone --recursive https://github.com/browsermt/bergamot-translator cd bergamot-translator mkdir build cd build From 3a53a68444834aeb6e78bfdb35ae12570187acd7 Mon Sep 17 00:00:00 2001 From: Andre Natal Date: Fri, 12 Feb 2021 15:41:17 -0800 Subject: [PATCH 21/98] Update README.md updating `--recursive` on wasm instructions too --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e8adaba..4b10944 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ make -j To compile WASM, first download and Install Emscripten using following instructions: -1. Get the latest sdk: `git clone https://github.com/emscripten-core/emsdk.git` +1. Get the latest sdk: `git clone --recursive https://github.com/emscripten-core/emsdk.git` 2. Enter the cloned directory: `cd emsdk` 3. Install the lastest sdk tools: `./emsdk install latest` 4. Activate the latest sdk tools: `./emsdk activate latest` From a97bf7b504e151494d3206e8b2459e666482640b Mon Sep 17 00:00:00 2001 From: Andre Natal Date: Fri, 12 Feb 2021 17:00:12 -0800 Subject: [PATCH 22/98] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4b10944..2791ebf 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ make -j To compile WASM, first download and Install Emscripten using following instructions: -1. Get the latest sdk: `git clone --recursive https://github.com/emscripten-core/emsdk.git` +1. Get the latest sdk: `git clone https://github.com/emscripten-core/emsdk.git` 2. Enter the cloned directory: `cd emsdk` 3. Install the lastest sdk tools: `./emsdk install latest` 4. Activate the latest sdk tools: `./emsdk activate latest` @@ -28,7 +28,7 @@ To compile WASM, first download and Install Emscripten using following instructi After the successful installation of Emscripten, perform these steps: ```bash -git clone https://github.com/browsermt/bergamot-translator +git clone --recursive https://github.com/browsermt/bergamot-translator cd bergamot-translator mkdir build-wasm cd build-wasm From 47db65972cd791cbb59b4ee9825e1d80a1e9d0f1 Mon Sep 17 00:00:00 2001 From: Andre Natal Date: Fri, 12 Feb 2021 17:18:57 -0800 Subject: [PATCH 23/98] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 2791ebf..3e458df 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,8 @@ After the successful installation of Emscripten, perform these steps: ```bash git clone --recursive https://github.com/browsermt/bergamot-translator cd bergamot-translator +git checkout wasm-integration +git submodule update --recursive mkdir build-wasm cd build-wasm emcmake cmake -DCOMPILE_WASM=on ../ From 4764f11e95cb2ec3c2766949ba58a74ee0d2cc90 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sat, 13 Feb 2021 10:55:07 +0000 Subject: [PATCH 24/98] Move BatchTranslator::thread_ to Service (#10) Service now holds an std::vector instead of BatchTranslators. --- src/translator/batch_translator.cpp | 26 +++++++++++--------------- src/translator/batch_translator.h | 19 ++++++++----------- src/translator/service.cpp | 8 +++++--- src/translator/service.h | 2 +- 4 files changed, 25 insertions(+), 30 deletions(-) diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index 860255c..7f801c9 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -8,15 +8,10 @@ namespace marian { namespace bergamot { BatchTranslator::BatchTranslator(DeviceId const device, - PCQueue &pcqueue, std::vector> &vocabs, Ptr options) - : device_(device), options_(options), pcqueue_(&pcqueue), vocabs_(&vocabs) { - - thread_ = std::thread([this] { this->mainloop(); }); -} - -void BatchTranslator::initGraph() { + : device_(device), options_(options), vocabs_(&vocabs) { + // Initializes the graph. if (options_->hasAndNotEmpty("shortlist")) { int srcIdx = 0, trgIdx = 1; bool shared_vcb = vocabs_->front() == vocabs_->back(); @@ -38,7 +33,6 @@ void BatchTranslator::initGraph() { scorer->setShortlistGenerator(slgen_); } } - graph_->forward(); } @@ -98,18 +92,22 @@ void BatchTranslator::translate(RequestSentences &requestSentences, histories = std::move(search->search(graph_, batch)); } -void BatchTranslator::mainloop() { - initGraph(); +// void BatchTranslator::join() { thread_.join(); } + +void translation_loop(DeviceId const &device, PCQueue &pcqueue, + std::vector> &vocabs, + Ptr options) { + + BatchTranslator translator(device, vocabs, options); PCItem pcitem; Histories histories; - while (true) { - pcqueue_->ConsumeSwap(pcitem); + pcqueue.ConsumeSwap(pcitem); if (pcitem.isPoison()) { return; } else { - translate(pcitem.sentences, histories); + translator.translate(pcitem.sentences, histories); for (int i = 0; i < pcitem.sentences.size(); i++) { pcitem.sentences[i].completeSentence(histories[i]); } @@ -117,7 +115,5 @@ void BatchTranslator::mainloop() { } } -void BatchTranslator::join() { thread_.join(); } - } // namespace bergamot } // namespace marian diff --git a/src/translator/batch_translator.h b/src/translator/batch_translator.h index 069155e..c718b32 100644 --- a/src/translator/batch_translator.h +++ b/src/translator/batch_translator.h @@ -22,29 +22,26 @@ class BatchTranslator { // shut down in Service which calls join() on the threads. public: - BatchTranslator(DeviceId const device, PCQueue &pcqueue, - std::vector> &vocabs, Ptr options); - void join(); + BatchTranslator(DeviceId const device, std::vector> &vocabs, + Ptr options); // convenience function for logging. TODO(jerin) std::string _identifier() { return "worker" + std::to_string(device_.no); } + void translate(RequestSentences &requestSentences, Histories &histories); private: - void initGraph(); - void translate(RequestSentences &requestSentences, Histories &histories); - void mainloop(); - Ptr options_; - DeviceId device_; std::vector> *vocabs_; Ptr graph_; std::vector> scorers_; Ptr slgen_; - - PCQueue *pcqueue_; - std::thread thread_; }; + +void translation_loop(DeviceId const &device, PCQueue &pcqueue, + std::vector> &vocabs, + Ptr options); + } // namespace bergamot } // namespace marian diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 2acbbdb..62073f9 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -16,9 +16,11 @@ Service::Service(Ptr options) workers_.reserve(numWorkers_); - for (int i = 0; i < numWorkers_; i++) { - marian::DeviceId deviceId(i, DeviceType::cpu); - workers_.emplace_back(deviceId, pcqueue_, vocabs_, options); + for (int cpuId = 0; cpuId < numWorkers_; cpuId++) { + workers_.emplace_back([&] { + marian::DeviceId deviceId(cpuId, DeviceType::cpu); + translation_loop(deviceId, pcqueue_, vocabs_, options); + }); } } diff --git a/src/translator/service.h b/src/translator/service.h index 0ed8d0c..e516bba 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -69,7 +69,7 @@ private: TextProcessor text_processor_; // ORDER DEPENDENCY Batcher batcher_; PCQueue pcqueue_; - std::vector workers_; + std::vector workers_; }; std::vector> loadVocabularies(Ptr options); From f1d9f67b56ed5d84f74236b166fd592c060bf8d2 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sat, 13 Feb 2021 11:42:57 +0000 Subject: [PATCH 25/98] single-threaded run with --cpu-threads 0 (#10) --- src/translator/batch_translator.cpp | 13 +++---- src/translator/batch_translator.h | 2 +- src/translator/batcher.cpp | 25 +++++++++++++ src/translator/batcher.h | 4 ++ src/translator/service.cpp | 57 ++++++++++++++++------------- src/translator/service.h | 3 ++ 6 files changed, 70 insertions(+), 34 deletions(-) diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index 7f801c9..3d2ec41 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -36,8 +36,7 @@ BatchTranslator::BatchTranslator(DeviceId const device, graph_->forward(); } -void BatchTranslator::translate(RequestSentences &requestSentences, - Histories &histories) { +void BatchTranslator::translate(RequestSentences &requestSentences) { std::vector batchVector; for (auto &sentence : requestSentences) { @@ -89,7 +88,10 @@ void BatchTranslator::translate(RequestSentences &requestSentences, auto trgVocab = vocabs_->back(); auto search = New(options_, scorers_, trgVocab); - histories = std::move(search->search(graph_, batch)); + auto histories = std::move(search->search(graph_, batch)); + for (int i = 0; i < requestSentences.size(); i++) { + requestSentences[i].completeSentence(histories[i]); + } } // void BatchTranslator::join() { thread_.join(); } @@ -107,10 +109,7 @@ void translation_loop(DeviceId const &device, PCQueue &pcqueue, if (pcitem.isPoison()) { return; } else { - translator.translate(pcitem.sentences, histories); - for (int i = 0; i < pcitem.sentences.size(); i++) { - pcitem.sentences[i].completeSentence(histories[i]); - } + translator.translate(pcitem.sentences); } } } diff --git a/src/translator/batch_translator.h b/src/translator/batch_translator.h index c718b32..4067e59 100644 --- a/src/translator/batch_translator.h +++ b/src/translator/batch_translator.h @@ -27,7 +27,7 @@ public: // convenience function for logging. TODO(jerin) std::string _identifier() { return "worker" + std::to_string(device_.no); } - void translate(RequestSentences &requestSentences, Histories &histories); + void translate(RequestSentences &requestSentences); private: Ptr options_; diff --git a/src/translator/batcher.cpp b/src/translator/batcher.cpp index 2fa4eaf..18bf5fd 100644 --- a/src/translator/batcher.cpp +++ b/src/translator/batcher.cpp @@ -50,5 +50,30 @@ void Batcher::cleaveBatch(RequestSentences &sentences) { } } +void Batcher::addWholeRequest(Ptr request) { + for (int i = 0; i < request->numSegments(); i++) { + RequestSentence requestSentence(i, request); + addSentenceWithPriority(requestSentence); + } +} + +void Batcher::enqueue(PCQueue &pcqueue) { + int numSentences; + do { + RequestSentences batchSentences; + cleaveBatch(batchSentences); + numSentences = batchSentences.size(); + + if (numSentences > 0) { + PCItem pcitem(batchNumber_++, std::move(batchSentences)); + pcqueue.ProduceSwap(pcitem); + } + + if (batchNumber_ % 500 == 0) { + LOG(info, "Queuing batch {}", batchNumber_); + } + } while (numSentences > 0); +} + } // namespace bergamot } // namespace marian diff --git a/src/translator/batcher.h b/src/translator/batcher.h index b60b642..2499cd2 100644 --- a/src/translator/batcher.h +++ b/src/translator/batcher.h @@ -4,6 +4,7 @@ #include "common/options.h" #include "data/corpus_base.h" #include "definitions.h" +#include "pcqueue.h" #include "request.h" #include @@ -19,6 +20,8 @@ public: // sentence. This method inserts the sentence into the internal data-structure // which maintains priority among sentences from multiple concurrent requests. void addSentenceWithPriority(RequestSentence &sentence); + void addWholeRequest(Ptr request); + void enqueue(PCQueue &pcqueue); // Loads sentences with sentences compiled from (tentatively) multiple // requests optimizing for both padding and priority. @@ -27,6 +30,7 @@ public: private: unsigned int max_input_tokens_; std::vector> bucket_; + unsigned int batchNumber_{0}; }; } // namespace bergamot diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 62073f9..fc71385 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -14,13 +14,17 @@ Service::Service(Ptr options) text_processor_(vocabs_, options), batcher_(options), pcqueue_(2 * options->get("cpu-threads")) { - workers_.reserve(numWorkers_); - - for (int cpuId = 0; cpuId < numWorkers_; cpuId++) { - workers_.emplace_back([&] { - marian::DeviceId deviceId(cpuId, DeviceType::cpu); - translation_loop(deviceId, pcqueue_, vocabs_, options); - }); + if (numWorkers_ > 0) { + workers_.reserve(numWorkers_); + for (int cpuId = 0; cpuId < numWorkers_; cpuId++) { + workers_.emplace_back([&] { + marian::DeviceId deviceId(cpuId, DeviceType::cpu); + translation_loop(deviceId, pcqueue_, vocabs_, options); + }); + } + } else { + marian::DeviceId deviceId(/*cpuId=*/0, DeviceType::cpu); + translator = new BatchTranslator(deviceId, vocabs_, options); } } @@ -53,27 +57,28 @@ std::future Service::translate(std::string &&input) { std::move(segments), std::move(sourceAlignments), std::move(translationResultPromise)); - for (int i = 0; i < request->numSegments(); i++) { - RequestSentence requestSentence(i, request); - batcher_.addSentenceWithPriority(requestSentence); + batcher_.addWholeRequest(request); + if (numWorkers_ > 0) { + batcher_.enqueue(pcqueue_); + } else { + // Queue single-threaded + int numSentences; + do { + RequestSentences batchSentences; + batcher_.cleaveBatch(batchSentences); + numSentences = batchSentences.size(); + + if (numSentences > 0) { + translator->translate(batchSentences); + batchNumber_++; + } + + if (batchNumber_ % 500 == 0) { + LOG(info, "Tranlsating batch {}", batchNumber_); + } + } while (numSentences > 0); } - int numSentences; - do { - RequestSentences batchSentences; - batcher_.cleaveBatch(batchSentences); - numSentences = batchSentences.size(); - - if (numSentences > 0) { - PCItem pcitem(batchNumber_++, std::move(batchSentences)); - pcqueue_.ProduceSwap(pcitem); - } - - if (batchNumber_ % 500 == 0) { - LOG(info, "Queuing batch {}", batchNumber_); - } - } while (numSentences > 0); - return future; } diff --git a/src/translator/service.h b/src/translator/service.h index e516bba..951398d 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -70,6 +70,9 @@ private: Batcher batcher_; PCQueue pcqueue_; std::vector workers_; + + // Optional + BatchTranslator *translator{nullptr}; }; std::vector> loadVocabularies(Ptr options); From 77a600b637afd854a189f96b052f37896d37acb7 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sat, 13 Feb 2021 14:19:10 +0000 Subject: [PATCH 26/98] Removing join() (#10) --- src/translator/batch_translator.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index 3d2ec41..b944bed 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -94,8 +94,6 @@ void BatchTranslator::translate(RequestSentences &requestSentences) { } } -// void BatchTranslator::join() { thread_.join(); } - void translation_loop(DeviceId const &device, PCQueue &pcqueue, std::vector> &vocabs, Ptr options) { From 73a56a8f4fa447fb58e230905c7c6e3d25c366da Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sat, 13 Feb 2021 15:48:23 +0000 Subject: [PATCH 27/98] Refactoring batching-mechanisms into Batcher Guided by an objective to move batching mechanism and queueing request to generate batches into a diffenrent thread. This commit is in preparation for this functionality. First, PCItem from the looks of it is *Batch*. Renamed to reflect the same. Fingers crossed, hopefully no naming conflicts with marian. BatchTranslator translates a "Batch" now, instead of vector. Additional data members are setup at Batch to enable development. Workflows previously in Service, but more adequate in Batcher are now moved, preparing to move Batcher/enqueuing of a request into a new thread making it non-blocking. This will allow service to queue requests into the batcher thread and exit, without waiting until the full-request is queued. Batcher now has a path with and without pcqueue. --- src/translator/batch_translator.cpp | 25 +++++----- src/translator/batch_translator.h | 4 +- src/translator/batcher.cpp | 73 +++++++++++++++-------------- src/translator/batcher.h | 7 +-- src/translator/request.h | 22 +++++---- src/translator/service.cpp | 27 ++++------- src/translator/service.h | 3 +- 7 files changed, 78 insertions(+), 83 deletions(-) diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index b944bed..a6e6b93 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -36,10 +36,10 @@ BatchTranslator::BatchTranslator(DeviceId const device, graph_->forward(); } -void BatchTranslator::translate(RequestSentences &requestSentences) { +void BatchTranslator::translate(Batch &batch) { std::vector batchVector; - for (auto &sentence : requestSentences) { + for (auto &sentence : batch.sentences) { data::SentenceTuple sentence_tuple(sentence.lineNumber()); Segment segment = sentence.getUnderlyingSegment(); sentence_tuple.push_back(segment); @@ -82,32 +82,31 @@ void BatchTranslator::translate(RequestSentences &requestSentences) { for (size_t j = 0; j < maxDims.size(); ++j) subBatches[j]->setWords(words[j]); - auto batch = Ptr(new CorpusBatch(subBatches)); - batch->setSentenceIds(sentenceIds); + auto corpus_batch = Ptr(new CorpusBatch(subBatches)); + corpus_batch->setSentenceIds(sentenceIds); auto trgVocab = vocabs_->back(); auto search = New(options_, scorers_, trgVocab); - auto histories = std::move(search->search(graph_, batch)); - for (int i = 0; i < requestSentences.size(); i++) { - requestSentences[i].completeSentence(histories[i]); + auto histories = std::move(search->search(graph_, corpus_batch)); + for (int i = 0; i < batch.sentences.size(); i++) { + batch.sentences[i].completeSentence(histories[i]); } } -void translation_loop(DeviceId const &device, PCQueue &pcqueue, +void translation_loop(DeviceId const &device, PCQueue &pcqueue, std::vector> &vocabs, Ptr options) { BatchTranslator translator(device, vocabs, options); - - PCItem pcitem; + Batch batch; Histories histories; while (true) { - pcqueue.ConsumeSwap(pcitem); - if (pcitem.isPoison()) { + pcqueue.ConsumeSwap(batch); + if (batch.isPoison()) { return; } else { - translator.translate(pcitem.sentences); + translator.translate(batch); } } } diff --git a/src/translator/batch_translator.h b/src/translator/batch_translator.h index 4067e59..2ee4e04 100644 --- a/src/translator/batch_translator.h +++ b/src/translator/batch_translator.h @@ -27,7 +27,7 @@ public: // convenience function for logging. TODO(jerin) std::string _identifier() { return "worker" + std::to_string(device_.no); } - void translate(RequestSentences &requestSentences); + void translate(Batch &batch); private: Ptr options_; @@ -38,7 +38,7 @@ private: Ptr slgen_; }; -void translation_loop(DeviceId const &device, PCQueue &pcqueue, +void translation_loop(DeviceId const &device, PCQueue &pcqueue, std::vector> &vocabs, Ptr options); diff --git a/src/translator/batcher.cpp b/src/translator/batcher.cpp index 18bf5fd..13b5635 100644 --- a/src/translator/batcher.cpp +++ b/src/translator/batcher.cpp @@ -6,10 +6,10 @@ namespace marian { namespace bergamot { Batcher::Batcher(Ptr options) { - max_input_tokens_ = options->get("max-input-tokens"); + miniBatchWords = options->get("max-input-tokens"); bucket_.resize(options->get("max-input-sentence-tokens") + 1); ABORT_IF( - max_input_tokens_ < bucket_.size() - 1, + miniBatchWords < bucket_.size() - 1, "max-input-tokens cannot be less than than max-input-sentence-tokens, " "batcher fail"); } @@ -20,34 +20,48 @@ void Batcher::addSentenceWithPriority(RequestSentence &sentence) { bucket_[bucket_id].insert(sentence); } -void Batcher::cleaveBatch(RequestSentences &sentences) { +bool Batcher::operator>>(Batch &batch) { return cleaveBatch(batch); } + +bool Batcher::cleaveBatch(Batch &batch) { // For now simply iterates on buckets and converts batches greedily. This // has to be enhanced with optimizing over priority. The baseline // implementation should at least be as fast as marian's maxi-batch with full // corpus size as maxi-batch size. + batch.reset(); + int paddedBatchSize = 0; - int segments_added = 0; - int current_input_tokens = 0; - int padded_batch_size = 0; - int prev_padded_batch_size; - - for (int i = 0; i < bucket_.size(); i++) { - auto p = bucket_[i].begin(); - while (p != bucket_[i].end()) { - padded_batch_size = (segments_added + 1) * i; - if (padded_batch_size <= max_input_tokens_) { + for (int length = 0; length < bucket_.size(); length++) { + auto p = bucket_[length].begin(); + while (p != bucket_[length].end()) { + paddedBatchSize = (batch.sentences.size() + 1) * length; + if (paddedBatchSize <= miniBatchWords) { auto q = p; ++p; - current_input_tokens += i; - sentences.push_back(*q); - ++segments_added; - bucket_[i].erase(q); - prev_padded_batch_size = padded_batch_size; + + batch.numTokens += length; + batch.sentences.push_back(*q); + batch.maxLength = std::max(batch.maxLength, length); + + bucket_[length].erase(q); } else { - return; + // Check if elements exist + assert(batch.sentences.size() > 0); + batch.Id = ++batchNumber_; + if (batchId % 500 == 0) { + batch.log(); + } + return true; } } } + + if (batch.sentences.size()) { + batch.Id = ++batchNumber_; + batch.log(); + return true; + } else { + return false; + } } void Batcher::addWholeRequest(Ptr request) { @@ -57,22 +71,11 @@ void Batcher::addWholeRequest(Ptr request) { } } -void Batcher::enqueue(PCQueue &pcqueue) { - int numSentences; - do { - RequestSentences batchSentences; - cleaveBatch(batchSentences); - numSentences = batchSentences.size(); - - if (numSentences > 0) { - PCItem pcitem(batchNumber_++, std::move(batchSentences)); - pcqueue.ProduceSwap(pcitem); - } - - if (batchNumber_ % 500 == 0) { - LOG(info, "Queuing batch {}", batchNumber_); - } - } while (numSentences > 0); +void Batcher::enqueue(PCQueue &pcqueue) { + Batch batch; + while (cleaveBatch(batch)) { + pcqueue.ProduceSwap(batch); + } } } // namespace bergamot diff --git a/src/translator/batcher.h b/src/translator/batcher.h index 2499cd2..d6b85f3 100644 --- a/src/translator/batcher.h +++ b/src/translator/batcher.h @@ -21,14 +21,15 @@ public: // which maintains priority among sentences from multiple concurrent requests. void addSentenceWithPriority(RequestSentence &sentence); void addWholeRequest(Ptr request); - void enqueue(PCQueue &pcqueue); + void enqueue(PCQueue &pcqueue); // Loads sentences with sentences compiled from (tentatively) multiple // requests optimizing for both padding and priority. - void cleaveBatch(RequestSentences &sentences); + bool cleaveBatch(Batch &batch); + bool operator>>(Batch &batch); // alias private: - unsigned int max_input_tokens_; + unsigned int miniBatchWords; std::vector> bucket_; unsigned int batchNumber_{0}; }; diff --git a/src/translator/request.h b/src/translator/request.h index 6f268ba..673f88c 100644 --- a/src/translator/request.h +++ b/src/translator/request.h @@ -24,6 +24,7 @@ #include "definitions.h" #include "translation_result.h" +#include "common/logging.h" #include "data/types.h" #include "translator/beam_search.h" @@ -92,20 +93,23 @@ public: typedef std::vector RequestSentences; -struct PCItem { - int batchNumber; +struct Batch { + int Id; + int numTokens, maxLength; RequestSentences sentences; - // PCItem should be default constructible for PCQueue. Default constructed + // Batch should be default constructible for PCQueue. Default constructed // element is poison. - PCItem() : batchNumber(-1) {} - - // PCItem constructor to construct a legit PCItem. - explicit PCItem(int batchNumber, RequestSentences &&sentences) - : batchNumber(batchNumber), sentences(std::move(sentences)) {} + Batch() { reset(); } + void reset() { Id = -1, numTokens = 0, maxLength = 0, sentences.clear(); } // Convenience function to determine poison. - bool isPoison() { return (batchNumber == -1); } + bool isPoison() { return (Id == -1); } + + void log() { + LOG(info, "Batch(Id={}, tokens={}, max-length={}, sentences={})", Id, + numTokens, maxLength, sentences.size()); + } }; } // namespace bergamot diff --git a/src/translator/service.cpp b/src/translator/service.cpp index fc71385..3701955 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -8,8 +8,7 @@ namespace marian { namespace bergamot { Service::Service(Ptr options) - : requestId_(0), batchNumber_(0), - numWorkers_(options->get("cpu-threads")), + : requestId_(0), numWorkers_(options->get("cpu-threads")), vocabs_(std::move(loadVocabularies(options))), text_processor_(vocabs_, options), batcher_(options), pcqueue_(2 * options->get("cpu-threads")) { @@ -58,25 +57,15 @@ std::future Service::translate(std::string &&input) { std::move(translationResultPromise)); batcher_.addWholeRequest(request); + if (numWorkers_ > 0) { batcher_.enqueue(pcqueue_); } else { // Queue single-threaded - int numSentences; - do { - RequestSentences batchSentences; - batcher_.cleaveBatch(batchSentences); - numSentences = batchSentences.size(); - - if (numSentences > 0) { - translator->translate(batchSentences); - batchNumber_++; - } - - if (batchNumber_ % 500 == 0) { - LOG(info, "Tranlsating batch {}", batchNumber_); - } - } while (numSentences > 0); + Batch batch; + while (batcher_ >> batch) { + translator->translate(batch); + } } return future; @@ -85,8 +74,8 @@ std::future Service::translate(std::string &&input) { void Service::stop() { int counter = 0; for (auto &worker : workers_) { - PCItem pcitem; - pcqueue_.ProduceSwap(pcitem); + Batch batch; + pcqueue_.ProduceSwap(batch); ++counter; } diff --git a/src/translator/service.h b/src/translator/service.h index 951398d..c57e609 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -46,7 +46,6 @@ public: private: unsigned int requestId_; - unsigned int batchNumber_; int numWorkers_; // vocabs are used to construct a Request, which later uses it to construct @@ -68,7 +67,7 @@ private: TextProcessor text_processor_; // ORDER DEPENDENCY Batcher batcher_; - PCQueue pcqueue_; + PCQueue pcqueue_; std::vector workers_; // Optional From e585a9e7861934e40d3d4e2a5793724be3a9e3a6 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sat, 13 Feb 2021 16:31:30 +0000 Subject: [PATCH 28/98] Sanitizing Batch construction Batch Ids cannot be set by outside classes to values < 0. Batch.Id_ = -1 : Poison, for use in PCQueue 0 : Default constructed, invalid batch. >0 : Legit batch. Book-keeping for batch metrics (maxLength, numTokens, etc) and logging are now moved to Batch. Batch is now a class instead of a struct with accessors controlling how members can be modified to suit above. --- src/translator/batch_translator.cpp | 7 ++-- src/translator/batcher.cpp | 23 ++++--------- src/translator/request.h | 53 ++++++++++++++++++++++------- src/translator/service.cpp | 4 +-- 4 files changed, 53 insertions(+), 34 deletions(-) diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index a6e6b93..13eb58a 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -39,7 +39,8 @@ BatchTranslator::BatchTranslator(DeviceId const device, void BatchTranslator::translate(Batch &batch) { std::vector batchVector; - for (auto &sentence : batch.sentences) { + auto &sentences = batch.sentences(); + for (auto &sentence : sentences) { data::SentenceTuple sentence_tuple(sentence.lineNumber()); Segment segment = sentence.getUnderlyingSegment(); sentence_tuple.push_back(segment); @@ -89,9 +90,7 @@ void BatchTranslator::translate(Batch &batch) { auto search = New(options_, scorers_, trgVocab); auto histories = std::move(search->search(graph_, corpus_batch)); - for (int i = 0; i < batch.sentences.size(); i++) { - batch.sentences[i].completeSentence(histories[i]); - } + batch.completeBatch(histories); } void translation_loop(DeviceId const &device, PCQueue &pcqueue, diff --git a/src/translator/batcher.cpp b/src/translator/batcher.cpp index 13b5635..5fdcc3a 100644 --- a/src/translator/batcher.cpp +++ b/src/translator/batcher.cpp @@ -33,31 +33,22 @@ bool Batcher::cleaveBatch(Batch &batch) { for (int length = 0; length < bucket_.size(); length++) { auto p = bucket_[length].begin(); while (p != bucket_[length].end()) { - paddedBatchSize = (batch.sentences.size() + 1) * length; + paddedBatchSize = (batch.size() + 1) * length; if (paddedBatchSize <= miniBatchWords) { - auto q = p; - ++p; - - batch.numTokens += length; - batch.sentences.push_back(*q); - batch.maxLength = std::max(batch.maxLength, length); - + auto q = p++; + batch.add(*q); bucket_[length].erase(q); } else { // Check if elements exist - assert(batch.sentences.size() > 0); - batch.Id = ++batchNumber_; - if (batchId % 500 == 0) { - batch.log(); - } + assert(batch.size() > 0); + batch.setId(++batchNumber_); return true; } } } - if (batch.sentences.size()) { - batch.Id = ++batchNumber_; - batch.log(); + if (batch.size()) { + batch.setId(++batchNumber_); return true; } else { return false; diff --git a/src/translator/request.h b/src/translator/request.h index 673f88c..5fb9c3c 100644 --- a/src/translator/request.h +++ b/src/translator/request.h @@ -28,6 +28,8 @@ #include "data/types.h" #include "translator/beam_search.h" +#include + #include #include @@ -93,23 +95,50 @@ public: typedef std::vector RequestSentences; -struct Batch { - int Id; - int numTokens, maxLength; - RequestSentences sentences; - - // Batch should be default constructible for PCQueue. Default constructed - // element is poison. +class Batch { +public: Batch() { reset(); } - void reset() { Id = -1, numTokens = 0, maxLength = 0, sentences.clear(); } - + void reset() { Id_ = 0, numTokens_ = 0, maxLength_ = 0, sentences_.clear(); } // Convenience function to determine poison. - bool isPoison() { return (Id == -1); } + bool isPoison() { return (Id_ == -1); } + static Batch poison() { + Batch poison_; + poison_.Id_ = -1; + return poison_; + } void log() { - LOG(info, "Batch(Id={}, tokens={}, max-length={}, sentences={})", Id, - numTokens, maxLength, sentences.size()); + LOG(info, "Batch(Id_={}, tokens={}, max-length={}, sentences_={})", Id_, + numTokens_, maxLength_, sentences_.size()); } + + void add(const RequestSentence &sentence) { + sentences_.push_back(sentence); + maxLength_ = std::max(sentence.numTokens(), maxLength_); + numTokens_ += sentence.numTokens(); + } + + size_t size() { return sentences_.size(); } + + void setId(int Id) { + assert(Id > 0); + Id_ = Id; + if (Id % 500 == 0) { + log(); + } + } + + const RequestSentences &sentences() { return sentences_; } + void completeBatch(const Histories &histories) { + for (int i = 0; i < sentences_.size(); i++) { + sentences_[i].completeSentence(histories[i]); + } + } + +private: + int Id_; + size_t numTokens_, maxLength_; + RequestSentences sentences_; }; } // namespace bergamot diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 3701955..c93aa5f 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -74,8 +74,8 @@ std::future Service::translate(std::string &&input) { void Service::stop() { int counter = 0; for (auto &worker : workers_) { - Batch batch; - pcqueue_.ProduceSwap(batch); + Batch poison = Batch::poison(); + pcqueue_.ProduceSwap(poison); ++counter; } From 1e413f71cd583bba570af8ed8fde7f797174dd41 Mon Sep 17 00:00:00 2001 From: Andre Natal Date: Sat, 13 Feb 2021 14:54:36 -0800 Subject: [PATCH 29/98] Including a more elaborated test page, a node webserver containing the proper cors headers and wasm mimetype --- .gitignore | 2 + README.md | 6 +- wasm/README.md | 38 +- wasm/bergamot.html | 54 -- wasm/test_page/bergamot.html | 140 +++++ wasm/test_page/helper.js | 40 ++ wasm/test_page/package-lock.json | 904 +++++++++++++++++++++++++++++++ wasm/test_page/package.json | 7 + wasm/test_page/start_server.sh | 8 + 9 files changed, 1129 insertions(+), 70 deletions(-) delete mode 100644 wasm/bergamot.html create mode 100644 wasm/test_page/bergamot.html create mode 100644 wasm/test_page/helper.js create mode 100644 wasm/test_page/package-lock.json create mode 100644 wasm/test_page/package.json create mode 100644 wasm/test_page/start_server.sh diff --git a/.gitignore b/.gitignore index e63aee1..59363a8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ *.swp *.swo +wasm/test_page/node_modules +build-wasm diff --git a/README.md b/README.md index 3e458df..333e758 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,12 @@ emmake make -j It should generate the artefacts (.js and .wasm files) in `wasm` folder inside build directory ("build-wasm" in this case). +Download the models from `https://github.com/mozilla-applied-ml/bergamot-models`, and place all the desired ones to package in a folder called `models`. + The build also allows packaging files into wasm binary (i.e. preloading in Emscripten’s virtual file system) using cmake -option `PACKAGE_DIR`. The compile command below packages all the files in PATH directory into wasm binary. +option `PACKAGE_DIR`. The compile command below packages all the files in PATH directory (in these case, your models) into wasm binary. ```bash -emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR= ../ +emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR= ./models ``` Files packaged this way are preloaded in the root of the virtual file system. diff --git a/wasm/README.md b/wasm/README.md index 83d4738..6be6209 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -1,5 +1,5 @@ ## Using Bergamot Translator in JavaScript -The example file `bergamot.html` in this folder demonstrates how to use the bergamot translator in JavaScript via a ` - - diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html new file mode 100644 index 0000000..49ca50e --- /dev/null +++ b/wasm/test_page/bergamot.html @@ -0,0 +1,140 @@ + + + + + + + + + +
+ + + + +
+ +
+ + +

+ + +

+ +
+ +
+
+ +
+ + + + + diff --git a/wasm/test_page/helper.js b/wasm/test_page/helper.js new file mode 100644 index 0000000..bff116c --- /dev/null +++ b/wasm/test_page/helper.js @@ -0,0 +1,40 @@ +/* + * @author - Based of a file from Gist here: https://gist.github.com/1757658 + * + * @modified - Mike Newell - it was on Gist so I figure I can use it + * + * @Description - Added support for a few more mime types including the new + * .ogv, .webm, and .mp4 file types for HTML5 video. + * + */ + +/* +* @modified - Andre Natal - removed unused types for the purpose of this use +case +*/ + +Helper = { + + types: { + "wasm" : "application/wasm" + , "js" : "application/javascript" + , "html" : "text/html" + , "htm" : "text/html" + , "ico" : "image/vnd.microsoft.icon", + }, + + getMime: function(u) { + + var ext = this.getExt(u.pathname).replace('.', ''); + + return this.types[ext.toLowerCase()] || 'application/octet-stream'; + + }, + + getExt: function(path) { + var i = path.lastIndexOf('.'); + + return (i < 0) ? '' : path.substr(i); + } + +}; diff --git a/wasm/test_page/package-lock.json b/wasm/test_page/package-lock.json new file mode 100644 index 0000000..065c92d --- /dev/null +++ b/wasm/test_page/package-lock.json @@ -0,0 +1,904 @@ +{ + "name": "test_page", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "dependencies": { + "cors": "^2.8.5", + "express": "^4.17.1", + "nocache": "^2.1.0" + } + }, + "node_modules/accepts": { + "version": "1.3.7", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz", + "integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==", + "dependencies": { + "mime-types": "~2.1.24", + "negotiator": "0.6.2" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=" + }, + "node_modules/body-parser": { + "version": "1.19.0", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz", + "integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==", + "dependencies": { + "bytes": "3.1.0", + "content-type": "~1.0.4", + "debug": "2.6.9", + "depd": "~1.1.2", + "http-errors": "1.7.2", + "iconv-lite": "0.4.24", + "on-finished": "~2.3.0", + "qs": "6.7.0", + "raw-body": "2.4.0", + "type-is": "~1.6.17" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/bytes": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz", + "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/content-disposition": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz", + "integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==", + "dependencies": { + "safe-buffer": "5.1.2" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/content-type": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", + "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz", + "integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", + "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw=" + }, + "node_modules/cors": { + "version": "2.8.5", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz", + "integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==", + "dependencies": { + "object-assign": "^4", + "vary": "^1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "dependencies": { + "ms": "2.0.0" + } + }, + "node_modules/depd": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", + "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/destroy": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", + "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" + }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=" + }, + "node_modules/encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=" + }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/express": { + "version": "4.17.1", + "resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz", + "integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==", + "dependencies": { + "accepts": "~1.3.7", + "array-flatten": "1.1.1", + "body-parser": "1.19.0", + "content-disposition": "0.5.3", + "content-type": "~1.0.4", + "cookie": "0.4.0", + "cookie-signature": "1.0.6", + "debug": "2.6.9", + "depd": "~1.1.2", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "~1.1.2", + "fresh": "0.5.2", + "merge-descriptors": "1.0.1", + "methods": "~1.1.2", + "on-finished": "~2.3.0", + "parseurl": "~1.3.3", + "path-to-regexp": "0.1.7", + "proxy-addr": "~2.0.5", + "qs": "6.7.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.1.2", + "send": "0.17.1", + "serve-static": "1.14.1", + "setprototypeof": "1.1.1", + "statuses": "~1.5.0", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "engines": { + "node": ">= 0.10.0" + } + }, + "node_modules/finalhandler": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz", + "integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==", + "dependencies": { + "debug": "2.6.9", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "on-finished": "~2.3.0", + "parseurl": "~1.3.3", + "statuses": "~1.5.0", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/forwarded": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", + "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/http-errors": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz", + "integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==", + "dependencies": { + "depd": "~1.1.2", + "inherits": "2.0.3", + "setprototypeof": "1.1.1", + "statuses": ">= 1.5.0 < 2", + "toidentifier": "1.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/merge-descriptors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", + "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=" + }, + "node_modules/methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", + "bin": { + "mime": "cli.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/mime-db": { + "version": "1.45.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.45.0.tgz", + "integrity": "sha512-CkqLUxUk15hofLoLyljJSrukZi8mAtgd+yE5uO4tqRZsdsAJKv0O+rFMhVDRJgozy+yG6md5KwuXhD4ocIoP+w==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.28", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.28.tgz", + "integrity": "sha512-0TO2yJ5YHYr7M2zzT7gDU1tbwHxEUWBCLt0lscSNpcdAfFyJOVEpRYNS7EXVcTLNj/25QO8gulHC5JtTzSE2UQ==", + "dependencies": { + "mime-db": "1.45.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" + }, + "node_modules/negotiator": { + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", + "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/nocache": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/nocache/-/nocache-2.1.0.tgz", + "integrity": "sha512-0L9FvHG3nfnnmaEQPjT9xhfN4ISk0A8/2j4M37Np4mcDesJjHgEUfgPhdCyZuFI954tjokaIj/A3NdpFNdEh4Q==", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/on-finished": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", + "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-to-regexp": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", + "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" + }, + "node_modules/proxy-addr": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.6.tgz", + "integrity": "sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==", + "dependencies": { + "forwarded": "~0.1.2", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/qs": { + "version": "6.7.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz", + "integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz", + "integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==", + "dependencies": { + "bytes": "3.1.0", + "http-errors": "1.7.2", + "iconv-lite": "0.4.24", + "unpipe": "1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "node_modules/send": { + "version": "0.17.1", + "resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz", + "integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==", + "dependencies": { + "debug": "2.6.9", + "depd": "~1.1.2", + "destroy": "~1.0.4", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "~1.7.2", + "mime": "1.6.0", + "ms": "2.1.1", + "on-finished": "~2.3.0", + "range-parser": "~1.2.1", + "statuses": "~1.5.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/send/node_modules/ms": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", + "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==" + }, + "node_modules/serve-static": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz", + "integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==", + "dependencies": { + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "0.17.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/setprototypeof": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz", + "integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==" + }, + "node_modules/statuses": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz", + "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/toidentifier": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz", + "integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "dependencies": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=", + "engines": { + "node": ">= 0.4.0" + } + }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=", + "engines": { + "node": ">= 0.8" + } + } + }, + "dependencies": { + "accepts": { + "version": "1.3.7", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz", + "integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==", + "requires": { + "mime-types": "~2.1.24", + "negotiator": "0.6.2" + } + }, + "array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=" + }, + "body-parser": { + "version": "1.19.0", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz", + "integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==", + "requires": { + "bytes": "3.1.0", + "content-type": "~1.0.4", + "debug": "2.6.9", + "depd": "~1.1.2", + "http-errors": "1.7.2", + "iconv-lite": "0.4.24", + "on-finished": "~2.3.0", + "qs": "6.7.0", + "raw-body": "2.4.0", + "type-is": "~1.6.17" + } + }, + "bytes": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz", + "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==" + }, + "content-disposition": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz", + "integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==", + "requires": { + "safe-buffer": "5.1.2" + } + }, + "content-type": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", + "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==" + }, + "cookie": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz", + "integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==" + }, + "cookie-signature": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", + "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw=" + }, + "cors": { + "version": "2.8.5", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz", + "integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==", + "requires": { + "object-assign": "^4", + "vary": "^1" + } + }, + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + } + }, + "depd": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", + "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=" + }, + "destroy": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", + "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" + }, + "ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=" + }, + "encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=" + }, + "escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=" + }, + "etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=" + }, + "express": { + "version": "4.17.1", + "resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz", + "integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==", + "requires": { + "accepts": "~1.3.7", + "array-flatten": "1.1.1", + "body-parser": "1.19.0", + "content-disposition": "0.5.3", + "content-type": "~1.0.4", + "cookie": "0.4.0", + "cookie-signature": "1.0.6", + "debug": "2.6.9", + "depd": "~1.1.2", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "~1.1.2", + "fresh": "0.5.2", + "merge-descriptors": "1.0.1", + "methods": "~1.1.2", + "on-finished": "~2.3.0", + "parseurl": "~1.3.3", + "path-to-regexp": "0.1.7", + "proxy-addr": "~2.0.5", + "qs": "6.7.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.1.2", + "send": "0.17.1", + "serve-static": "1.14.1", + "setprototypeof": "1.1.1", + "statuses": "~1.5.0", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + } + }, + "finalhandler": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz", + "integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==", + "requires": { + "debug": "2.6.9", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "on-finished": "~2.3.0", + "parseurl": "~1.3.3", + "statuses": "~1.5.0", + "unpipe": "~1.0.0" + } + }, + "forwarded": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", + "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=" + }, + "fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=" + }, + "http-errors": { + "version": "1.7.2", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz", + "integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==", + "requires": { + "depd": "~1.1.2", + "inherits": "2.0.3", + "setprototypeof": "1.1.1", + "statuses": ">= 1.5.0 < 2", + "toidentifier": "1.0.0" + } + }, + "iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } + }, + "inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + }, + "ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==" + }, + "media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=" + }, + "merge-descriptors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", + "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=" + }, + "methods": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", + "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=" + }, + "mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==" + }, + "mime-db": { + "version": "1.45.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.45.0.tgz", + "integrity": "sha512-CkqLUxUk15hofLoLyljJSrukZi8mAtgd+yE5uO4tqRZsdsAJKv0O+rFMhVDRJgozy+yG6md5KwuXhD4ocIoP+w==" + }, + "mime-types": { + "version": "2.1.28", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.28.tgz", + "integrity": "sha512-0TO2yJ5YHYr7M2zzT7gDU1tbwHxEUWBCLt0lscSNpcdAfFyJOVEpRYNS7EXVcTLNj/25QO8gulHC5JtTzSE2UQ==", + "requires": { + "mime-db": "1.45.0" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" + }, + "negotiator": { + "version": "0.6.2", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", + "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==" + }, + "nocache": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/nocache/-/nocache-2.1.0.tgz", + "integrity": "sha512-0L9FvHG3nfnnmaEQPjT9xhfN4ISk0A8/2j4M37Np4mcDesJjHgEUfgPhdCyZuFI954tjokaIj/A3NdpFNdEh4Q==" + }, + "object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" + }, + "on-finished": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", + "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=", + "requires": { + "ee-first": "1.1.1" + } + }, + "parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==" + }, + "path-to-regexp": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", + "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" + }, + "proxy-addr": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.6.tgz", + "integrity": "sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==", + "requires": { + "forwarded": "~0.1.2", + "ipaddr.js": "1.9.1" + } + }, + "qs": { + "version": "6.7.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz", + "integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==" + }, + "range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==" + }, + "raw-body": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz", + "integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==", + "requires": { + "bytes": "3.1.0", + "http-errors": "1.7.2", + "iconv-lite": "0.4.24", + "unpipe": "1.0.0" + } + }, + "safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "send": { + "version": "0.17.1", + "resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz", + "integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==", + "requires": { + "debug": "2.6.9", + "depd": "~1.1.2", + "destroy": "~1.0.4", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "~1.7.2", + "mime": "1.6.0", + "ms": "2.1.1", + "on-finished": "~2.3.0", + "range-parser": "~1.2.1", + "statuses": "~1.5.0" + }, + "dependencies": { + "ms": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", + "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==" + } + } + }, + "serve-static": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz", + "integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==", + "requires": { + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "0.17.1" + } + }, + "setprototypeof": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz", + "integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==" + }, + "statuses": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz", + "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=" + }, + "toidentifier": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz", + "integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==" + }, + "type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "requires": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + } + }, + "unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=" + }, + "utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=" + }, + "vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=" + } + } +} diff --git a/wasm/test_page/package.json b/wasm/test_page/package.json new file mode 100644 index 0000000..20af6d2 --- /dev/null +++ b/wasm/test_page/package.json @@ -0,0 +1,7 @@ +{ + "dependencies": { + "cors": "^2.8.5", + "express": "^4.17.1", + "nocache": "^2.1.0" + } +} diff --git a/wasm/test_page/start_server.sh b/wasm/test_page/start_server.sh new file mode 100644 index 0000000..b83344b --- /dev/null +++ b/wasm/test_page/start_server.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +cp ../../build-wasm/wasm/bergamot-translator-worker.data . +cp ../../build-wasm/wasm/bergamot-translator-worker.js . +cp ../../build-wasm/wasm/bergamot-translator-worker.wasm . +cp ../../build-wasm/wasm/bergamot-translator-worker.worker.js . +npm install +node bergamot-httpserver.js \ No newline at end of file From 47323d21b93795e19d82a499bfb13b71f7032c40 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sun, 14 Feb 2021 13:05:05 +0000 Subject: [PATCH 30/98] Getting rid of unused variables in Batch --- src/translator/request.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/translator/request.h b/src/translator/request.h index 5fb9c3c..eab0d4b 100644 --- a/src/translator/request.h +++ b/src/translator/request.h @@ -98,7 +98,10 @@ typedef std::vector RequestSentences; class Batch { public: Batch() { reset(); } - void reset() { Id_ = 0, numTokens_ = 0, maxLength_ = 0, sentences_.clear(); } + void reset() { + Id_ = 0; + sentences_.clear(); + } // Convenience function to determine poison. bool isPoison() { return (Id_ == -1); } static Batch poison() { @@ -108,15 +111,17 @@ public: } void log() { + int numTokens{0}, maxLength{0}; + for (auto &sentence : sentences_) { + numTokens += sentence.numTokens(); + maxLength = std::max(maxLength, static_cast(sentence.numTokens())); + } + LOG(info, "Batch(Id_={}, tokens={}, max-length={}, sentences_={})", Id_, - numTokens_, maxLength_, sentences_.size()); + numTokens, maxLength, sentences_.size()); } - void add(const RequestSentence &sentence) { - sentences_.push_back(sentence); - maxLength_ = std::max(sentence.numTokens(), maxLength_); - numTokens_ += sentence.numTokens(); - } + void add(const RequestSentence &sentence) { sentences_.push_back(sentence); } size_t size() { return sentences_.size(); } @@ -137,7 +142,6 @@ public: private: int Id_; - size_t numTokens_, maxLength_; RequestSentences sentences_; }; From ecc91c51e3b439b32173e3e4a821fdfe1a538436 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sun, 14 Feb 2021 13:23:46 +0000 Subject: [PATCH 31/98] BatchTranslator* -> unique_ptr --- src/translator/service.cpp | 3 ++- src/translator/service.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/translator/service.cpp b/src/translator/service.cpp index c93aa5f..bdfb7e9 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -23,7 +23,8 @@ Service::Service(Ptr options) } } else { marian::DeviceId deviceId(/*cpuId=*/0, DeviceType::cpu); - translator = new BatchTranslator(deviceId, vocabs_, options); + translator = + UPtr(new BatchTranslator(deviceId, vocabs_, options)); } } diff --git a/src/translator/service.h b/src/translator/service.h index c57e609..db01468 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -71,7 +71,7 @@ private: std::vector workers_; // Optional - BatchTranslator *translator{nullptr}; + UPtr translator{nullptr}; }; std::vector> loadVocabularies(Ptr options); From 0dbc8612c2431722152ca925f1bd7152187a399a Mon Sep 17 00:00:00 2001 From: Andre Natal Date: Sun, 14 Feb 2021 09:15:08 -0800 Subject: [PATCH 32/98] Adding missing bergamot-httpserver.js --- wasm/test_page/bergamot-httpserver.js | 39 +++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 wasm/test_page/bergamot-httpserver.js diff --git a/wasm/test_page/bergamot-httpserver.js b/wasm/test_page/bergamot-httpserver.js new file mode 100644 index 0000000..f23b3e7 --- /dev/null +++ b/wasm/test_page/bergamot-httpserver.js @@ -0,0 +1,39 @@ +require(__dirname + '/helper.js'); + +var http = require('http'); +var express = require('express'); +var app = express(); +var server = http.createServer(app); +var fs = require('fs'); +var url = require('url'); +const nocache = require('nocache'); +const cors = require('cors'); + +app.use(cors()) +app.use(nocache()); +app.get('/*.*' , cors(), function(req, res) { + var options = url.parse(req.url, true); + var mime = Helper.getMime(options); + serveFile(res, options.pathname, mime); +}); + +function serveFile(res, pathName, mime) { + mime = mime || 'text/html'; + fs.readFile(__dirname + '/' + pathName, function (err, data) { + if (err) { + res.writeHead(500, {"Content-Type": "text/plain"}); + return res.end('Error loading ' + pathName + " with Error: " + err); + } + res.header('Cross-Origin-Embedder-Policy','require-corp'); + res.header('Cross-Origin-Opener-Policy','same-origin'); + res.writeHead(200, {"Content-Type": mime}); + res.end(data); + }); +} + +server.listen(8000); +console.log('HTTP and BinaryJS server started on port 8000'); + + + + From 5bd4a1a3c0ef388249794298b5ed2c0b1cf92d05 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sun, 14 Feb 2021 19:58:29 +0000 Subject: [PATCH 33/98] Refactor: marian-TranslationResult and associated marian-TranslationResult has more guards in place. Switching to a construction on demand model for sentenceMappings. These changes propogate to bergamot translation results. Integration broke with the change in marian's internals, which are updated accordingly to get back functionality. Changes revealed a few bugs, which are fixed: - ConfigParser already discovered in wasm-integration (https://github.com/browsermt/bergamot-translator/commit/a06530e92b6d16527487c8fa0ead4ae04f0ddbb5). - Lambda captures and undefined values in DeviceId --- app/main-mts.cpp | 2 +- app/marian-decoder-new.cpp | 4 +- src/translator/TranslationModel.cpp | 18 +++++-- src/translator/parser.h | 3 +- src/translator/service.cpp | 10 ++-- src/translator/translation_result.cpp | 67 +++++++++++++++++---------- src/translator/translation_result.h | 42 ++++------------- 7 files changed, 76 insertions(+), 70 deletions(-) diff --git a/app/main-mts.cpp b/app/main-mts.cpp index c94ff30..d8e7567 100644 --- a/app/main-mts.cpp +++ b/app/main-mts.cpp @@ -26,7 +26,7 @@ int main(int argc, char *argv[]) { service.translate(std::move(input)); translation_result_future.wait(); const TranslationResult &translation_result = translation_result_future.get(); - std::cout << translation_result.getTranslatedText() << std::endl; + std::cout << translation_result.translation() << std::endl; // Stop Service. service.stop(); diff --git a/app/marian-decoder-new.cpp b/app/marian-decoder-new.cpp index 62b1bb4..6e44fb7 100644 --- a/app/marian-decoder-new.cpp +++ b/app/marian-decoder-new.cpp @@ -54,8 +54,8 @@ int main(int argc, char *argv[]) { translation_result_future.wait(); const TranslationResult &translation_result = translation_result_future.get(); - marian_decoder_minimal(translation_result.getHistories(), - service.targetVocab(), options); + marian_decoder_minimal(translation_result.histories(), service.targetVocab(), + options); LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed()); service.stop(); diff --git a/src/translator/TranslationModel.cpp b/src/translator/TranslationModel.cpp index f501678..9c55422 100644 --- a/src/translator/TranslationModel.cpp +++ b/src/translator/TranslationModel.cpp @@ -14,6 +14,7 @@ // All local project includes #include "TranslationModel.h" +#include "translator/parser.h" #include "translator/service.h" std::shared_ptr parseOptions(const std::string &config) { @@ -34,7 +35,7 @@ std::shared_ptr parseOptions(const std::string &config) { // Error: Aborted from void unhandledException() in // 3rd_party/marian-dev/src/common/logging.cpp:113 - marian::ConfigParser configParser(marian::cli::mode::translation); + marian::ConfigParser configParser = marian::bergamot::createConfigParser(); const YAML::Node &defaultConfig = configParser.getConfig(); options.merge(defaultConfig); @@ -70,18 +71,25 @@ TranslationModel::translate(std::vector &&texts, intermediate.wait(); auto mTranslationResult(std::move(intermediate.get())); + // This mess because marian::string_view != std::string_view + std::string source, translation; + marian::bergamot::TranslationResult::SentenceMappings mSentenceMappings; + mTranslationResult.move(source, translation, mSentenceMappings); + // Convert to UnifiedAPI::TranslationResult TranslationResult::SentenceMappings sentenceMappings; - for (auto &p : mTranslationResult.getSentenceMappings()) { + for (auto &p : mSentenceMappings) { std::string_view src(p.first.data(), p.first.size()), tgt(p.second.data(), p.second.size()); sentenceMappings.emplace_back(src, tgt); } // In place construction. - translationResults.emplace_back(std::move(mTranslationResult.source_), - std::move(mTranslationResult.translation_), - std::move(sentenceMappings)); + translationResults.emplace_back( + std::move(source), // &&mTranslationResult.source_ + std::move(translation), // &&mTranslationResult.translation_ + std::move(sentenceMappings) // &&sentenceMappings + ); } promise.set_value(std::move(translationResults)); diff --git a/src/translator/parser.h b/src/translator/parser.h index e273d6a..606b6a4 100644 --- a/src/translator/parser.h +++ b/src/translator/parser.h @@ -5,7 +5,8 @@ namespace marian { namespace bergamot { -marian::ConfigParser createConfigParser() { + +inline marian::ConfigParser createConfigParser() { marian::ConfigParser cp(marian::cli::mode::translation); cp.addOption( "--ssplit-prefix-file", "Bergamot Options", diff --git a/src/translator/service.cpp b/src/translator/service.cpp index bdfb7e9..ef2bacb 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -15,11 +15,11 @@ Service::Service(Ptr options) if (numWorkers_ > 0) { workers_.reserve(numWorkers_); - for (int cpuId = 0; cpuId < numWorkers_; cpuId++) { - workers_.emplace_back([&] { - marian::DeviceId deviceId(cpuId, DeviceType::cpu); - translation_loop(deviceId, pcqueue_, vocabs_, options); - }); + for (size_t cpuId = 0; cpuId < numWorkers_; cpuId++) { + marian::DeviceId deviceId(cpuId, DeviceType::cpu); + workers_.emplace_back(translation_loop, // Function + deviceId, std::ref(pcqueue_), std::ref(vocabs_), + options); } } else { marian::DeviceId deviceId(/*cpuId=*/0, DeviceType::cpu); diff --git a/src/translator/translation_result.cpp b/src/translator/translation_result.cpp index d69259f..ee147be 100644 --- a/src/translator/translation_result.cpp +++ b/src/translator/translation_result.cpp @@ -14,22 +14,26 @@ TranslationResult::TranslationResult(std::string &&source, : source_(std::move(source)), sourceRanges_(std::move(sourceRanges)), histories_(std::move(histories)) { - std::vector sourceMappings; - std::vector targetMappings; + constructTargetProperties(vocabs); +} - // Process sourceMappings into sourceMappings. - sourceMappings.reserve(sourceRanges_.size()); - for (int i = 0; i < sourceRanges_.size(); i++) { - string_view first = sourceRanges_[i].front(); - string_view last = sourceRanges_[i].back(); - sourceMappings.emplace_back(first.data(), last.end() - first.begin()); - } +void TranslationResult::move(std::string &source, std::string &translation, + SentenceMappings &sentenceMappings) { - // Compiles translations into a single std::string translation_ - // Current implementation uses += on std::string, multiple resizes. - // Stores ByteRanges as indices first, followed by conversion into - // string_views. - // TODO(jerin): Add token level string_views here as well. + constructSentenceMappings(sentenceMappings); + // Totally illegal stuff. + source = std::move(source_); + translation = std::move(translation_); + + // The above assignment expects source, target be moved. + // which makes the following invalid, hence required to be cleared. + sourceRanges_.clear(); + targetRanges_.clear(); + histories_.clear(); +} + +void TranslationResult::constructTargetProperties( + std::vector> &vocabs) { std::vector> translationRanges; size_t offset{0}; bool first{true}; @@ -52,21 +56,36 @@ TranslationResult::TranslationResult(std::string &&source, offset += decoded.size(); } - // Converting ByteRanges as indices into string_views. - targetMappings.reserve(translationRanges.size()); + // TODO(@jerinphilip): + // Currently considers target tokens as whole text. Needs + // to be further enhanced in marian-dev to extract alignments. for (auto &range : translationRanges) { + std::vector targetMappings; const char *begin = &translation_[range.first]; targetMappings.emplace_back(begin, range.second); - } - - // Surely, let's add sentenceMappings_ - for (auto src = sourceMappings.begin(), tgt = targetMappings.begin(); - src != sourceMappings.end() && tgt != targetMappings.end(); - ++src, ++tgt) { - sentenceMappings_.emplace_back(*src, *tgt); - auto &t = sentenceMappings_.back(); + targetRanges_.push_back(std::move(targetMappings)); } } +void TranslationResult::constructSentenceMappings( + TranslationResult::SentenceMappings &sentenceMappings) { + + for (int i = 0; i < sourceRanges_.size(); i++) { + string_view first, last; + + // Handle source-sentence + first = sourceRanges_[i].front(); + last = sourceRanges_[i].back(); + string_view src_sentence(first.data(), last.end() - first.begin()); + + // Handle target-sentence + first = targetRanges_[i].front(); + last = targetRanges_[i].back(); + string_view tgt_sentence(first.data(), last.end() - first.begin()); + + // Add both into sentence-mappings + sentenceMappings.emplace_back(src_sentence, tgt_sentence); + } +} } // namespace bergamot } // namespace marian diff --git a/src/translator/translation_result.h b/src/translator/translation_result.h index edc9a8d..5903145 100644 --- a/src/translator/translation_result.h +++ b/src/translator/translation_result.h @@ -22,53 +22,31 @@ public: : source_(std::move(other.source_)), translation_(std::move(other.translation_)), sourceRanges_(std::move(other.sourceRanges_)), - sentenceMappings_(std::move(other.sentenceMappings_)), + targetRanges_(std::move(other.targetRanges_)), histories_(std::move(other.histories_)){}; TranslationResult(const TranslationResult &) = delete; TranslationResult &operator=(const TranslationResult &) = delete; - // Returns const references to source and translated texts, for external - // consumption. - - const std::string &getOriginalText() const { return source_; } - const std::string &getTranslatedText() const { return translation_; } - - // A mapping of string_views in the source_ and translation_ are provide as a - // pair for external consumption. Each entry corresponding - // to a (source-sentence, target-sentence). - typedef std::vector> SentenceMappings; - const SentenceMappings &getSentenceMappings() const { - return sentenceMappings_; - } - // Return the Quality scores of the translated text. - // Not implemented currently, commenting out. - // const QualityScore &getQualityScore() const { return qualityScore; } + void move(std::string &source, std::string &target, + SentenceMappings &sentenceMappings); - // For development use to benchmark with marian-decoder. - const Histories &getHistories() const { return histories_; } + const Histories &histories() const { return histories_; } + const std::string &source() const { return source_; } + const std::string &translation() const { return translation_; } - // @jerinphilip: Why are these members no longer-private? For move-semantics - // with consistent string_views for bergamot-translator. +private: + void constructTargetProperties(std::vector> &vocabs); + void constructSentenceMappings(SentenceMappings &); std::string source_; std::string translation_; - // Adding the following to complete bergamot-translator spec, redundant while - // sourceMappings_ and targetMappings_ exists or vice-versa. - - SentenceMappings sentenceMappings_; - -private: - // Histories are currently required for interoperability with OutputPrinter - // and OutputCollector and hence comparisons with marian-decoder. - // Future hook to gain alignments. Histories histories_; - - // string_views at the token level. std::vector sourceRanges_; + std::vector targetRanges_; }; } // namespace bergamot } // namespace marian From 0fc6105df49a4e0f05e1d382ea9909776ad3aeec Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sun, 14 Feb 2021 20:27:53 +0000 Subject: [PATCH 34/98] No more two TranslationResults (sort-of) To avoid confusion, this commit renames marian::bergamot::TranslationResult -> marian::bergamot::Response. Usages of marian::bergamot::TranslationResults are updated across the source to be consistent with the change and get source back working. --- app/main-mts.cpp | 13 ++++++------- app/marian-decoder-new.cpp | 14 ++++++-------- src/translator/TranslationModel.cpp | 10 +++++----- src/translator/TranslationModel.h | 3 ++- src/translator/request.cpp | 11 +++++------ src/translator/request.h | 4 ++-- src/translator/service.cpp | 16 ++++++++-------- src/translator/service.h | 10 +++++----- src/translator/translation_result.cpp | 17 ++++++++--------- src/translator/translation_result.h | 14 ++++++-------- 10 files changed, 53 insertions(+), 59 deletions(-) diff --git a/app/main-mts.cpp b/app/main-mts.cpp index d8e7567..b5a4938 100644 --- a/app/main-mts.cpp +++ b/app/main-mts.cpp @@ -19,14 +19,13 @@ int main(int argc, char *argv[]) { std::ostringstream std_input; std_input << std::cin.rdbuf(); std::string input = std_input.str(); - using marian::bergamot::TranslationResult; + using marian::bergamot::Response; - // Wait on future until TranslationResult is complete - std::future translation_result_future = - service.translate(std::move(input)); - translation_result_future.wait(); - const TranslationResult &translation_result = translation_result_future.get(); - std::cout << translation_result.translation() << std::endl; + // Wait on future until Response is complete + std::future responseFuture = service.translate(std::move(input)); + responseFuture.wait(); + const Response &response = responseFuture.get(); + std::cout << response.translation() << std::endl; // Stop Service. service.stop(); diff --git a/app/marian-decoder-new.cpp b/app/marian-decoder-new.cpp index 6e44fb7..8988310 100644 --- a/app/marian-decoder-new.cpp +++ b/app/marian-decoder-new.cpp @@ -46,16 +46,14 @@ int main(int argc, char *argv[]) { std::ostringstream std_input; std_input << std::cin.rdbuf(); std::string input = std_input.str(); - using marian::bergamot::TranslationResult; + using marian::bergamot::Response; - // Wait on future until TranslationResult is complete - std::future translation_result_future = - service.translate(std::move(input)); - translation_result_future.wait(); - const TranslationResult &translation_result = translation_result_future.get(); + // Wait on future until Response is complete + std::future responseFuture = service.translate(std::move(input)); + responseFuture.wait(); + const Response &response = responseFuture.get(); - marian_decoder_minimal(translation_result.histories(), service.targetVocab(), - options); + marian_decoder_minimal(response.histories(), service.targetVocab(), options); LOG(info, "Total time: {:.5f}s wall", decoderTimer.elapsed()); service.stop(); diff --git a/src/translator/TranslationModel.cpp b/src/translator/TranslationModel.cpp index 9c55422..a5d396e 100644 --- a/src/translator/TranslationModel.cpp +++ b/src/translator/TranslationModel.cpp @@ -69,12 +69,12 @@ TranslationModel::translate(std::vector &&texts, // Collect future as marian::bergamot::TranslationResult auto intermediate = service_.translate(std::move(text)); intermediate.wait(); - auto mTranslationResult(std::move(intermediate.get())); + auto marianResponse(std::move(intermediate.get())); // This mess because marian::string_view != std::string_view std::string source, translation; - marian::bergamot::TranslationResult::SentenceMappings mSentenceMappings; - mTranslationResult.move(source, translation, mSentenceMappings); + marian::bergamot::Response::SentenceMappings mSentenceMappings; + marianResponse.move(source, translation, mSentenceMappings); // Convert to UnifiedAPI::TranslationResult TranslationResult::SentenceMappings sentenceMappings; @@ -86,8 +86,8 @@ TranslationModel::translate(std::vector &&texts, // In place construction. translationResults.emplace_back( - std::move(source), // &&mTranslationResult.source_ - std::move(translation), // &&mTranslationResult.translation_ + std::move(source), // &&marianResponse.source_ + std::move(translation), // &&marianResponse.translation_ std::move(sentenceMappings) // &&sentenceMappings ); } diff --git a/src/translator/TranslationModel.h b/src/translator/TranslationModel.h index c922538..5f590d9 100644 --- a/src/translator/TranslationModel.h +++ b/src/translator/TranslationModel.h @@ -24,7 +24,8 @@ */ class TranslationModel : public AbstractTranslationModel { public: - /* Construct the model using the model configuration options as yaml-formatted string + /* Construct the model using the model configuration options as yaml-formatted + * string */ TranslationModel(const std::string &config); diff --git a/src/translator/request.cpp b/src/translator/request.cpp index a743389..5433699 100644 --- a/src/translator/request.cpp +++ b/src/translator/request.cpp @@ -14,11 +14,11 @@ Request::Request(unsigned int Id, int lineNumberBegin, std::vector> &vocabs, std::string &&source, Segments &&segments, std::vector &&sourceAlignments, - std::promise translationResultPromise) + std::promise responsePromise) : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs), source_(std::move(source)), segments_(std::move(segments)), sourceAlignments_(std::move(sourceAlignments)), - response_(std::move(translationResultPromise)) { + response_(std::move(responsePromise)) { counter_ = segments_.size(); histories_.resize(segments_.size(), nullptr); @@ -47,10 +47,9 @@ void Request::processHistory(size_t index, Ptr history) { void Request::completeRequest() { // Request no longer needs to hold the content, can transfer it to - // TranslationResult. - TranslationResult translation_result(std::move(source_), - std::move(sourceAlignments_), - std::move(histories_), *vocabs_); + // Response. + Response translation_result(std::move(source_), std::move(sourceAlignments_), + std::move(histories_), *vocabs_); response_.set_value(std::move(translation_result)); } diff --git a/src/translator/request.h b/src/translator/request.h index eab0d4b..ddd6ccc 100644 --- a/src/translator/request.h +++ b/src/translator/request.h @@ -48,13 +48,13 @@ private: std::vector sourceAlignments_; std::vector> histories_; - std::promise response_; + std::promise response_; public: Request(unsigned int Id, int lineNumberBegin, std::vector> &vocabs_, std::string &&source, Segments &&segments, std::vector &&sourceAlignments, - std::promise translationResultPromise); + std::promise responsePromise); // Obtain the count of tokens in the segment correponding to index. Used to // insert sentence from multiple requests into the corresponding size bucket. diff --git a/src/translator/service.cpp b/src/translator/service.cpp index ef2bacb..4ab539f 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -28,11 +28,11 @@ Service::Service(Ptr options) } } -std::future Service::translateWithCopy(std::string input) { +std::future Service::translateWithCopy(std::string input) { return translate(std::move(input)); } -std::future Service::translate(std::string &&input) { +std::future Service::translate(std::string &&input) { // Takes in a blob of text. Segments and std::vector are // extracted from the input (blob of text) and used to construct a Request // along with a promise. promise value is set by the worker completing a @@ -49,13 +49,13 @@ std::future Service::translate(std::string &&input) { std::vector sourceAlignments; text_processor_.process(input, segments, sourceAlignments); - std::promise translationResultPromise; - auto future = translationResultPromise.get_future(); + std::promise responsePromise; + auto future = responsePromise.get_future(); - Ptr request = New( - requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(input), - std::move(segments), std::move(sourceAlignments), - std::move(translationResultPromise)); + Ptr request = + New(requestId_++, /* lineNumberBegin = */ 0, vocabs_, + std::move(input), std::move(segments), + std::move(sourceAlignments), std::move(responsePromise)); batcher_.addWholeRequest(request); diff --git a/src/translator/service.h b/src/translator/service.h index db01468..6f26bc8 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -25,17 +25,17 @@ class Service { // options = ...; // service = Service(options); // std::string input_blob = "Hello World"; - // std::future + // std::future // response = service.translate(std::move(input_blob)); // response.wait(); - // TranslationResult result = response.get(); + // Response result = response.get(); public: explicit Service(Ptr options); // Constructs new string copying, calls translate internally. - std::future translateWithCopy(std::string input); - std::future translate(std::string &&input); + std::future translateWithCopy(std::string input); + std::future translate(std::string &&input); void stop(); @@ -49,7 +49,7 @@ private: int numWorkers_; // vocabs are used to construct a Request, which later uses it to construct - // TranslationResult (decode from words to string). + // Response (decode from words to string). std::vector> vocabs_; // ORDER DEPENDENCY // Consists of: diff --git a/src/translator/translation_result.cpp b/src/translator/translation_result.cpp index ee147be..58f0926 100644 --- a/src/translator/translation_result.cpp +++ b/src/translator/translation_result.cpp @@ -7,18 +7,17 @@ namespace marian { namespace bergamot { -TranslationResult::TranslationResult(std::string &&source, - std::vector &&sourceRanges, - Histories &&histories, - std::vector> &vocabs) +Response::Response(std::string &&source, + std::vector &&sourceRanges, + Histories &&histories, std::vector> &vocabs) : source_(std::move(source)), sourceRanges_(std::move(sourceRanges)), histories_(std::move(histories)) { constructTargetProperties(vocabs); } -void TranslationResult::move(std::string &source, std::string &translation, - SentenceMappings &sentenceMappings) { +void Response::move(std::string &source, std::string &translation, + SentenceMappings &sentenceMappings) { constructSentenceMappings(sentenceMappings); // Totally illegal stuff. @@ -32,7 +31,7 @@ void TranslationResult::move(std::string &source, std::string &translation, histories_.clear(); } -void TranslationResult::constructTargetProperties( +void Response::constructTargetProperties( std::vector> &vocabs) { std::vector> translationRanges; size_t offset{0}; @@ -67,8 +66,8 @@ void TranslationResult::constructTargetProperties( } } -void TranslationResult::constructSentenceMappings( - TranslationResult::SentenceMappings &sentenceMappings) { +void Response::constructSentenceMappings( + Response::SentenceMappings &sentenceMappings) { for (int i = 0; i < sourceRanges_.size(); i++) { string_view first, last; diff --git a/src/translator/translation_result.h b/src/translator/translation_result.h index 5903145..6ed8927 100644 --- a/src/translator/translation_result.h +++ b/src/translator/translation_result.h @@ -11,22 +11,20 @@ namespace marian { namespace bergamot { -class TranslationResult { +class Response { public: - TranslationResult(std::string &&source, - std::vector &&sourceRanges, - Histories &&histories, - std::vector> &vocabs); + Response(std::string &&source, std::vector &&sourceRanges, + Histories &&histories, std::vector> &vocabs); - TranslationResult(TranslationResult &&other) + Response(Response &&other) : source_(std::move(other.source_)), translation_(std::move(other.translation_)), sourceRanges_(std::move(other.sourceRanges_)), targetRanges_(std::move(other.targetRanges_)), histories_(std::move(other.histories_)){}; - TranslationResult(const TranslationResult &) = delete; - TranslationResult &operator=(const TranslationResult &) = delete; + Response(const Response &) = delete; + Response &operator=(const Response &) = delete; typedef std::vector> SentenceMappings; From 370e9e2fb619b5f45693a3d4e6e3dac1442b6fed Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sun, 14 Feb 2021 20:35:41 +0000 Subject: [PATCH 35/98] {translation_result -> response}.h; propogates; --- app/main-mts.cpp | 2 +- app/marian-decoder-new.cpp | 2 +- src/translator/CMakeLists.txt | 2 +- src/translator/request.cpp | 2 +- src/translator/request.h | 2 +- src/translator/{translation_result.cpp => response.cpp} | 2 +- src/translator/{translation_result.h => response.h} | 6 +++--- src/translator/service.h | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) rename src/translator/{translation_result.cpp => response.cpp} (98%) rename src/translator/{translation_result.h => response.h} (91%) diff --git a/app/main-mts.cpp b/app/main-mts.cpp index b5a4938..78967be 100644 --- a/app/main-mts.cpp +++ b/app/main-mts.cpp @@ -7,8 +7,8 @@ #include "common/utils.h" #include "marian.h" #include "translator/parser.h" +#include "translator/response.h" #include "translator/service.h" -#include "translator/translation_result.h" int main(int argc, char *argv[]) { auto cp = marian::bergamot::createConfigParser(); diff --git a/app/marian-decoder-new.cpp b/app/marian-decoder-new.cpp index 8988310..f807909 100644 --- a/app/marian-decoder-new.cpp +++ b/app/marian-decoder-new.cpp @@ -11,8 +11,8 @@ #include "translator/output_collector.h" #include "translator/output_printer.h" #include "translator/parser.h" +#include "translator/response.h" #include "translator/service.h" -#include "translator/translation_result.h" void marian_decoder_minimal(const marian::Histories &histories, marian::Ptr targetVocab, diff --git a/src/translator/CMakeLists.txt b/src/translator/CMakeLists.txt index 16c3db9..c279ab9 100644 --- a/src/translator/CMakeLists.txt +++ b/src/translator/CMakeLists.txt @@ -10,7 +10,7 @@ add_library(bergamot-translator STATIC request.cpp service.cpp batcher.cpp - translation_result.cpp + response.cpp ) target_link_libraries(bergamot-translator marian ssplit) diff --git a/src/translator/request.cpp b/src/translator/request.cpp index 5433699..23bd679 100644 --- a/src/translator/request.cpp +++ b/src/translator/request.cpp @@ -1,7 +1,7 @@ #include "request.h" #include "definitions.h" -#include "translation_result.h" +#include "response.h" #include "common/logging.h" diff --git a/src/translator/request.h b/src/translator/request.h index ddd6ccc..8912a49 100644 --- a/src/translator/request.h +++ b/src/translator/request.h @@ -22,7 +22,7 @@ #define SRC_BERGAMOT_REQUEST_H_ #include "definitions.h" -#include "translation_result.h" +#include "response.h" #include "common/logging.h" #include "data/types.h" diff --git a/src/translator/translation_result.cpp b/src/translator/response.cpp similarity index 98% rename from src/translator/translation_result.cpp rename to src/translator/response.cpp index 58f0926..d40f88d 100644 --- a/src/translator/translation_result.cpp +++ b/src/translator/response.cpp @@ -1,4 +1,4 @@ -#include "translation_result.h" +#include "response.h" #include "common/logging.h" #include "data/alignment.h" diff --git a/src/translator/translation_result.h b/src/translator/response.h similarity index 91% rename from src/translator/translation_result.h rename to src/translator/response.h index 6ed8927..5737717 100644 --- a/src/translator/translation_result.h +++ b/src/translator/response.h @@ -1,5 +1,5 @@ -#ifndef SRC_BERGAMOT_TRANSLATION_RESULT_H_ -#define SRC_BERGAMOT_TRANSLATION_RESULT_H_ +#ifndef SRC_BERGAMOT_RESPONSE_H_ +#define SRC_BERGAMOT_RESPONSE_H_ #include "data/types.h" #include "definitions.h" @@ -49,4 +49,4 @@ private: } // namespace bergamot } // namespace marian -#endif // SRC_BERGAMOT_TRANSLATION_RESULT_H_ +#endif // SRC_BERGAMOT_RESPONSE_H_ diff --git a/src/translator/service.h b/src/translator/service.h index 6f26bc8..38a45c6 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -4,8 +4,8 @@ #include "batch_translator.h" #include "batcher.h" #include "pcqueue.h" +#include "response.h" #include "text_processor.h" -#include "translation_result.h" #include #include From be455a3da101132c5d7c3a283b90cc1cffd8a119 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sun, 14 Feb 2021 22:08:17 +0000 Subject: [PATCH 36/98] Straightening multithreading in translator workers BatchTranslators are now held in Service. Threads are separate, and constructed via lambdas. Retaining BatchTranslator class and member function (Probably a matter of taste I guess). This should eliminate complaints in (#10), hopefully. --- src/translator/batch_translator.cpp | 12 +++++------- src/translator/batch_translator.h | 6 ++---- src/translator/service.cpp | 28 +++++++++++++++++++--------- src/translator/service.h | 4 +--- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/translator/batch_translator.cpp b/src/translator/batch_translator.cpp index 13eb58a..7da63cf 100644 --- a/src/translator/batch_translator.cpp +++ b/src/translator/batch_translator.cpp @@ -10,7 +10,9 @@ namespace bergamot { BatchTranslator::BatchTranslator(DeviceId const device, std::vector> &vocabs, Ptr options) - : device_(device), options_(options), vocabs_(&vocabs) { + : device_(device), options_(options), vocabs_(&vocabs) {} + +void BatchTranslator::initialize() { // Initializes the graph. if (options_->hasAndNotEmpty("shortlist")) { int srcIdx = 0, trgIdx = 1; @@ -93,11 +95,7 @@ void BatchTranslator::translate(Batch &batch) { batch.completeBatch(histories); } -void translation_loop(DeviceId const &device, PCQueue &pcqueue, - std::vector> &vocabs, - Ptr options) { - - BatchTranslator translator(device, vocabs, options); +void BatchTranslator::consumeFrom(PCQueue &pcqueue) { Batch batch; Histories histories; while (true) { @@ -105,7 +103,7 @@ void translation_loop(DeviceId const &device, PCQueue &pcqueue, if (batch.isPoison()) { return; } else { - translator.translate(batch); + translate(batch); } } } diff --git a/src/translator/batch_translator.h b/src/translator/batch_translator.h index 2ee4e04..83b911c 100644 --- a/src/translator/batch_translator.h +++ b/src/translator/batch_translator.h @@ -28,6 +28,8 @@ public: // convenience function for logging. TODO(jerin) std::string _identifier() { return "worker" + std::to_string(device_.no); } void translate(Batch &batch); + void initialize(); + void consumeFrom(PCQueue &pcqueue); private: Ptr options_; @@ -38,10 +40,6 @@ private: Ptr slgen_; }; -void translation_loop(DeviceId const &device, PCQueue &pcqueue, - std::vector> &vocabs, - Ptr options); - } // namespace bergamot } // namespace marian diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 4ab539f..1b33558 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -13,18 +13,28 @@ Service::Service(Ptr options) text_processor_(vocabs_, options), batcher_(options), pcqueue_(2 * options->get("cpu-threads")) { - if (numWorkers_ > 0) { + if (numWorkers_ == 0) { + // In case workers are 0, a single-translator is created and initialized + // in the main thread. + marian::DeviceId deviceId(/*cpuId=*/0, DeviceType::cpu); + translators_.emplace_back(deviceId, vocabs_, options); + translators_.back().initialize(); + } else { + // If workers specified are greater than 0, translators_ are populated with + // unitialized instances. These are then initialized inside + // individual threads and set to consume from producer-consumer queue. workers_.reserve(numWorkers_); + translators_.reserve(numWorkers_); for (size_t cpuId = 0; cpuId < numWorkers_; cpuId++) { marian::DeviceId deviceId(cpuId, DeviceType::cpu); - workers_.emplace_back(translation_loop, // Function - deviceId, std::ref(pcqueue_), std::ref(vocabs_), - options); + translators_.emplace_back(deviceId, vocabs_, options); + + auto &translator = translators_.back(); + workers_.emplace_back([&translator, this] { + translator.initialize(); + translator.consumeFrom(pcqueue_); + }); } - } else { - marian::DeviceId deviceId(/*cpuId=*/0, DeviceType::cpu); - translator = - UPtr(new BatchTranslator(deviceId, vocabs_, options)); } } @@ -65,7 +75,7 @@ std::future Service::translate(std::string &&input) { // Queue single-threaded Batch batch; while (batcher_ >> batch) { - translator->translate(batch); + translators_[0].translate(batch); } } diff --git a/src/translator/service.h b/src/translator/service.h index 38a45c6..55b754a 100644 --- a/src/translator/service.h +++ b/src/translator/service.h @@ -68,10 +68,8 @@ private: TextProcessor text_processor_; // ORDER DEPENDENCY Batcher batcher_; PCQueue pcqueue_; + std::vector translators_; std::vector workers_; - - // Optional - UPtr translator{nullptr}; }; std::vector> loadVocabularies(Ptr options); From 45a8309c6972b121d62f1e9329267f752b8c796b Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Sun, 14 Feb 2021 22:28:08 +0000 Subject: [PATCH 37/98] Missed translation_result -> response rename --- src/translator/request.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/translator/request.cpp b/src/translator/request.cpp index 23bd679..9317f69 100644 --- a/src/translator/request.cpp +++ b/src/translator/request.cpp @@ -48,9 +48,9 @@ void Request::processHistory(size_t index, Ptr history) { void Request::completeRequest() { // Request no longer needs to hold the content, can transfer it to // Response. - Response translation_result(std::move(source_), std::move(sourceAlignments_), - std::move(histories_), *vocabs_); - response_.set_value(std::move(translation_result)); + Response response(std::move(source_), std::move(sourceAlignments_), + std::move(histories_), *vocabs_); + response_.set_value(std::move(response)); } bool Request::operator<(const Request &b) const { From d27a96fc53add7b36d063aaf86c528bc03798eea Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 10:04:15 +0200 Subject: [PATCH 38/98] Updated wasm readme --- wasm/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wasm/README.md b/wasm/README.md index 6be6209..131f9eb 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -37,11 +37,12 @@ You can also see everything in action by following the next steps: * Start the test webserver (ensure you have the latest nodejs installed) ``` cd test_page -bash start_server +bash start_server.sh ``` * Open any of the browsers below * Firefox Nightly +87: make sure the following prefs are on (about:config) ```` + dom.postMessage.sharedArrayBuffer.bypassCOOP_COEP.insecure.enabled = true javascript.options.wasm_simd = true javascript.options.wasm_simd_wormhole = true ```` From f7c86518cfbe418ba9db6655a6e093de520c618d Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 10:04:49 +0200 Subject: [PATCH 39/98] Update test page package-lock.json --- wasm/test_page/package-lock.json | 515 +------------------------------ 1 file changed, 1 insertion(+), 514 deletions(-) diff --git a/wasm/test_page/package-lock.json b/wasm/test_page/package-lock.json index 065c92d..ae4cb9d 100644 --- a/wasm/test_page/package-lock.json +++ b/wasm/test_page/package-lock.json @@ -1,519 +1,6 @@ { - "name": "test_page", - "lockfileVersion": 2, "requires": true, - "packages": { - "": { - "dependencies": { - "cors": "^2.8.5", - "express": "^4.17.1", - "nocache": "^2.1.0" - } - }, - "node_modules/accepts": { - "version": "1.3.7", - "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.7.tgz", - "integrity": "sha512-Il80Qs2WjYlJIBNzNkK6KYqlVMTbZLXgHx2oT0pU/fjRHyEp+PEfEPY0R3WCwAGVOtauxh1hOxNgIf5bv7dQpA==", - "dependencies": { - "mime-types": "~2.1.24", - "negotiator": "0.6.2" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/array-flatten": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", - "integrity": "sha1-ml9pkFGx5wczKPKgCJaLZOopVdI=" - }, - "node_modules/body-parser": { - "version": "1.19.0", - "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.19.0.tgz", - "integrity": "sha512-dhEPs72UPbDnAQJ9ZKMNTP6ptJaionhP5cBb541nXPlW60Jepo9RV/a4fX4XWW9CuFNK22krhrj1+rgzifNCsw==", - "dependencies": { - "bytes": "3.1.0", - "content-type": "~1.0.4", - "debug": "2.6.9", - "depd": "~1.1.2", - "http-errors": "1.7.2", - "iconv-lite": "0.4.24", - "on-finished": "~2.3.0", - "qs": "6.7.0", - "raw-body": "2.4.0", - "type-is": "~1.6.17" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/bytes": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.0.tgz", - "integrity": "sha512-zauLjrfCG+xvoyaqLoV8bLVXXNGC4JqlxFCutSDWA6fJrTo2ZuvLYTqZ7aHBLZSMOopbzwv8f+wZcVzfVTI2Dg==", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/content-disposition": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.3.tgz", - "integrity": "sha512-ExO0774ikEObIAEV9kDo50o+79VCUdEB6n6lzKgGwupcVeRlhrj3qGAfwq8G6uBJjkqLrhT0qEYFcWng8z1z0g==", - "dependencies": { - "safe-buffer": "5.1.2" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/content-type": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", - "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/cookie": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.0.tgz", - "integrity": "sha512-+Hp8fLp57wnUSt0tY0tHEXh4voZRDnoIrZPqlo3DPiI4y9lwg/jqx+1Om94/W6ZaPDOUbnjOt/99w66zk+l1Xg==", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/cookie-signature": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", - "integrity": "sha1-4wOogrNCzD7oylE6eZmXNNqzriw=" - }, - "node_modules/cors": { - "version": "2.8.5", - "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz", - "integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==", - "dependencies": { - "object-assign": "^4", - "vary": "^1" - }, - "engines": { - "node": ">= 0.10" - } - }, - "node_modules/debug": { - "version": "2.6.9", - "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", - "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", - "dependencies": { - "ms": "2.0.0" - } - }, - "node_modules/depd": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/depd/-/depd-1.1.2.tgz", - "integrity": "sha1-m81S4UwJd2PnSbJ0xDRu0uVgtak=", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/destroy": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.0.4.tgz", - "integrity": "sha1-l4hXRCxEdJ5CBmE+N5RiBYJqvYA=" - }, - "node_modules/ee-first": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", - "integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0=" - }, - "node_modules/encodeurl": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", - "integrity": "sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k=", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/escape-html": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", - "integrity": "sha1-Aljq5NPQwJdN4cFpGI7wBR0dGYg=" - }, - "node_modules/etag": { - "version": "1.8.1", - "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", - "integrity": "sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc=", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/express": { - "version": "4.17.1", - "resolved": "https://registry.npmjs.org/express/-/express-4.17.1.tgz", - "integrity": "sha512-mHJ9O79RqluphRrcw2X/GTh3k9tVv8YcoyY4Kkh4WDMUYKRZUq0h1o0w2rrrxBqM7VoeUVqgb27xlEMXTnYt4g==", - "dependencies": { - "accepts": "~1.3.7", - "array-flatten": "1.1.1", - "body-parser": "1.19.0", - "content-disposition": "0.5.3", - "content-type": "~1.0.4", - "cookie": "0.4.0", - "cookie-signature": "1.0.6", - "debug": "2.6.9", - "depd": "~1.1.2", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "etag": "~1.8.1", - "finalhandler": "~1.1.2", - "fresh": "0.5.2", - "merge-descriptors": "1.0.1", - "methods": "~1.1.2", - "on-finished": "~2.3.0", - "parseurl": "~1.3.3", - "path-to-regexp": "0.1.7", - "proxy-addr": "~2.0.5", - "qs": "6.7.0", - "range-parser": "~1.2.1", - "safe-buffer": "5.1.2", - "send": "0.17.1", - "serve-static": "1.14.1", - "setprototypeof": "1.1.1", - "statuses": "~1.5.0", - "type-is": "~1.6.18", - "utils-merge": "1.0.1", - "vary": "~1.1.2" - }, - "engines": { - "node": ">= 0.10.0" - } - }, - "node_modules/finalhandler": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.2.tgz", - "integrity": "sha512-aAWcW57uxVNrQZqFXjITpW3sIUQmHGG3qSb9mUah9MgMC4NeWhNOlNjXEYq3HjRAvL6arUviZGGJsBg6z0zsWA==", - "dependencies": { - "debug": "2.6.9", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "on-finished": "~2.3.0", - "parseurl": "~1.3.3", - "statuses": "~1.5.0", - "unpipe": "~1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/forwarded": { - "version": "0.1.2", - "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.1.2.tgz", - "integrity": "sha1-mMI9qxF1ZXuMBXPozszZGw/xjIQ=", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/fresh": { - "version": "0.5.2", - "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", - "integrity": "sha1-PYyt2Q2XZWn6g1qx+OSyOhBWBac=", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/http-errors": { - "version": "1.7.2", - "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.7.2.tgz", - "integrity": "sha512-uUQBt3H/cSIVfch6i1EuPNy/YsRSOUBXTVfZ+yR7Zjez3qjBz6i9+i4zjNaoqcoFVI4lQJ5plg63TvGfRSDCRg==", - "dependencies": { - "depd": "~1.1.2", - "inherits": "2.0.3", - "setprototypeof": "1.1.1", - "statuses": ">= 1.5.0 < 2", - "toidentifier": "1.0.0" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/iconv-lite": { - "version": "0.4.24", - "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", - "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", - "dependencies": { - "safer-buffer": ">= 2.1.2 < 3" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/inherits": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", - "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" - }, - "node_modules/ipaddr.js": { - "version": "1.9.1", - "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", - "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", - "engines": { - "node": ">= 0.10" - } - }, - "node_modules/media-typer": { - "version": "0.3.0", - "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", - "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/merge-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", - "integrity": "sha1-sAqqVW3YtEVoFQ7J0blT8/kMu2E=" - }, - "node_modules/methods": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", - "integrity": "sha1-VSmk1nZUE07cxSZmVoNbD4Ua/O4=", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/mime": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", - "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==", - "bin": { - "mime": "cli.js" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/mime-db": { - "version": "1.45.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.45.0.tgz", - "integrity": "sha512-CkqLUxUk15hofLoLyljJSrukZi8mAtgd+yE5uO4tqRZsdsAJKv0O+rFMhVDRJgozy+yG6md5KwuXhD4ocIoP+w==", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/mime-types": { - "version": "2.1.28", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.28.tgz", - "integrity": "sha512-0TO2yJ5YHYr7M2zzT7gDU1tbwHxEUWBCLt0lscSNpcdAfFyJOVEpRYNS7EXVcTLNj/25QO8gulHC5JtTzSE2UQ==", - "dependencies": { - "mime-db": "1.45.0" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/ms": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", - "integrity": "sha1-VgiurfwAvmwpAd9fmGF4jeDVl8g=" - }, - "node_modules/negotiator": { - "version": "0.6.2", - "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", - "integrity": "sha512-hZXc7K2e+PgeI1eDBe/10Ard4ekbfrrqG8Ep+8Jmf4JID2bNg7NvCPOZN+kfF574pFQI7mum2AUqDidoKqcTOw==", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/nocache": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/nocache/-/nocache-2.1.0.tgz", - "integrity": "sha512-0L9FvHG3nfnnmaEQPjT9xhfN4ISk0A8/2j4M37Np4mcDesJjHgEUfgPhdCyZuFI954tjokaIj/A3NdpFNdEh4Q==", - "engines": { - "node": ">=4.0.0" - } - }, - "node_modules/object-assign": { - "version": "4.1.1", - "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", - "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/on-finished": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.3.0.tgz", - "integrity": "sha1-IPEzZIGwg811M3mSoWlxqi2QaUc=", - "dependencies": { - "ee-first": "1.1.1" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/parseurl": { - "version": "1.3.3", - "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", - "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/path-to-regexp": { - "version": "0.1.7", - "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", - "integrity": "sha1-32BBeABfUi8V60SQ5yR6G/qmf4w=" - }, - "node_modules/proxy-addr": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.6.tgz", - "integrity": "sha512-dh/frvCBVmSsDYzw6n926jv974gddhkFPfiN8hPOi30Wax25QZyZEGveluCgliBnqmuM+UJmBErbAUFIoDbjOw==", - "dependencies": { - "forwarded": "~0.1.2", - "ipaddr.js": "1.9.1" - }, - "engines": { - "node": ">= 0.10" - } - }, - "node_modules/qs": { - "version": "6.7.0", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.7.0.tgz", - "integrity": "sha512-VCdBRNFTX1fyE7Nb6FYoURo/SPe62QCaAyzJvUjwRaIsc+NePBEniHlvxFmmX56+HZphIGtV0XeCirBtpDrTyQ==", - "engines": { - "node": ">=0.6" - } - }, - "node_modules/range-parser": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", - "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/raw-body": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.4.0.tgz", - "integrity": "sha512-4Oz8DUIwdvoa5qMJelxipzi/iJIi40O5cGV1wNYp5hvZP8ZN0T+jiNkL0QepXs+EsQ9XJ8ipEDoiH70ySUJP3Q==", - "dependencies": { - "bytes": "3.1.0", - "http-errors": "1.7.2", - "iconv-lite": "0.4.24", - "unpipe": "1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/safe-buffer": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", - "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" - }, - "node_modules/safer-buffer": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", - "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" - }, - "node_modules/send": { - "version": "0.17.1", - "resolved": "https://registry.npmjs.org/send/-/send-0.17.1.tgz", - "integrity": "sha512-BsVKsiGcQMFwT8UxypobUKyv7irCNRHk1T0G680vk88yf6LBByGcZJOTJCrTP2xVN6yI+XjPJcNuE3V4fT9sAg==", - "dependencies": { - "debug": "2.6.9", - "depd": "~1.1.2", - "destroy": "~1.0.4", - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "etag": "~1.8.1", - "fresh": "0.5.2", - "http-errors": "~1.7.2", - "mime": "1.6.0", - "ms": "2.1.1", - "on-finished": "~2.3.0", - "range-parser": "~1.2.1", - "statuses": "~1.5.0" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/send/node_modules/ms": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.1.tgz", - "integrity": "sha512-tgp+dl5cGk28utYktBsrFqA7HKgrhgPsg6Z/EfhWI4gl1Hwq8B/GmY/0oXZ6nF8hDVesS/FpnYaD/kOWhYQvyg==" - }, - "node_modules/serve-static": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.14.1.tgz", - "integrity": "sha512-JMrvUwE54emCYWlTI+hGrGv5I8dEwmco/00EvkzIIsR7MqrHonbD9pO2MOfFnpFntl7ecpZs+3mW+XbQZu9QCg==", - "dependencies": { - "encodeurl": "~1.0.2", - "escape-html": "~1.0.3", - "parseurl": "~1.3.3", - "send": "0.17.1" - }, - "engines": { - "node": ">= 0.8.0" - } - }, - "node_modules/setprototypeof": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.1.1.tgz", - "integrity": "sha512-JvdAWfbXeIGaZ9cILp38HntZSFSo3mWg6xGcJJsd+d4aRMOqauag1C63dJfDw7OaMYwEbHMOxEZ1lqVRYP2OAw==" - }, - "node_modules/statuses": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.5.0.tgz", - "integrity": "sha1-Fhx9rBd2Wf2YEfQ3cfqZOBR4Yow=", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/toidentifier": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.0.tgz", - "integrity": "sha512-yaOH/Pk/VEhBWWTlhI+qXxDFXlejDGcQipMlyxda9nthulaxLZUNcUqFxokp0vcYnvteJln5FNQDRrxj3YcbVw==", - "engines": { - "node": ">=0.6" - } - }, - "node_modules/type-is": { - "version": "1.6.18", - "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", - "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", - "dependencies": { - "media-typer": "0.3.0", - "mime-types": "~2.1.24" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/unpipe": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", - "integrity": "sha1-sr9O6FFKrmFltIF4KdIbLvSZBOw=", - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/utils-merge": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", - "integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM=", - "engines": { - "node": ">= 0.4.0" - } - }, - "node_modules/vary": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", - "integrity": "sha1-IpnwLG3tMNSllhsLn3RSShj2NPw=", - "engines": { - "node": ">= 0.8" - } - } - }, + "lockfileVersion": 1, "dependencies": { "accepts": { "version": "1.3.7", From 26ea5bba7a0a37c5785d34be6586f154f1bebb0b Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 10:26:04 +0200 Subject: [PATCH 40/98] Some cleanup --- wasm/README.md | 6 +++--- wasm/test_page/bergamot-httpserver.js | 4 ---- wasm/test_page/bergamot.html | 6 +++--- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/wasm/README.md b/wasm/README.md index 131f9eb..bb43144 100644 --- a/wasm/README.md +++ b/wasm/README.md @@ -35,17 +35,17 @@ input.delete(); You can also see everything in action by following the next steps: * Start the test webserver (ensure you have the latest nodejs installed) -``` +```bash cd test_page bash start_server.sh ``` * Open any of the browsers below * Firefox Nightly +87: make sure the following prefs are on (about:config) - ```` + ``` dom.postMessage.sharedArrayBuffer.bypassCOOP_COEP.insecure.enabled = true javascript.options.wasm_simd = true javascript.options.wasm_simd_wormhole = true - ```` + ``` * Chrome Canary +90: start with the following argument ``` diff --git a/wasm/test_page/bergamot-httpserver.js b/wasm/test_page/bergamot-httpserver.js index f23b3e7..b28719f 100644 --- a/wasm/test_page/bergamot-httpserver.js +++ b/wasm/test_page/bergamot-httpserver.js @@ -33,7 +33,3 @@ function serveFile(res, pathName, mime) { server.listen(8000); console.log('HTTP and BinaryJS server started on port 8000'); - - - - diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 49ca50e..e7e1fe5 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -29,9 +29,9 @@
- - - + + +
From d3969bcd2d2430a4bf5f047d791eb768ba4cb013 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 10:34:57 +0200 Subject: [PATCH 41/98] Add support for translating multiple sentences on the test page + report words per second metric in the log --- wasm/test_page/bergamot.html | 42 +++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index e7e1fe5..d093208 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -37,10 +37,13 @@
- +

- +

@@ -65,17 +68,23 @@ model = new Module.TranslationModel(modelConfig); } - const translate = (sentence) => { + const translate = (sentences) => { // Instantiate the arguments of translate() API i.e. TranslationRequest and input (vector) var request = new Module.TranslationRequest(); let input = new Module.VectorString; // Initialize the input - input.push_back(sentence); - /* + sentences.forEach(sentence => { + // prevent empty sentences - it breaks the translation + if (sentence.trim() === "") { + return; + } + input.push_back(sentence.trim()) + }) // Access input (just for debugging) console.log('Input size=', input.size()); + /* for (let i = 0; i < input.size(); i++) { console.log(' val:' + input.get(i)); } @@ -85,14 +94,14 @@ let result = model.translate(input, request); // Access original and translated text from each entry of vector //console.log('Result size=', result.size(), ' - TimeDiff - ', (Date.now() - start)/1000); - let translatedText = ""; + const translatedSentences = []; for (let i = 0; i < result.size(); i++) { - translatedText += result.get(i).getTranslatedText() + " "; + translatedSentences.push(result.get(i).getTranslatedText()); } - console.log(translatedText); + console.log({translatedSentences}); request.delete(); input.delete(); - return translatedText; + return translatedSentences; } document.querySelector("#load").addEventListener("click", () => { @@ -105,10 +114,17 @@ const translateCall = () => { const text = document.querySelector('#from').value; - let start = Date.now(); - const translate_text = translate(text); - log(`sentence translation time ${(Date.now() - start)/1000} secs`); - document.querySelector('#to').value = translate_text; + const sentences = text.split("\n"); + let wordCount = 0; + sentences.forEach(sentence => { + wordCount += sentence.trim().split(" ").length; + }) + const start = Date.now(); + const translatedSentences = translate(sentences); + const secs = (Date.now() - start) / 1000; + log(`Translation of ${translatedSentences.length} sentences (wordCount ${wordCount}) took ${secs} secs (${Math.round(wordCount / secs)} words per second)`); + + document.querySelector('#to').value = translatedSentences.join("\n"); } document.querySelector("#translate").addEventListener("click", () => { From 28c0ab2e04f6e32b999aac0caa181cd914f92e30 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 10:37:37 +0200 Subject: [PATCH 42/98] Tweak words per second metric in the test page log --- wasm/test_page/bergamot.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index d093208..992d758 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -117,7 +117,7 @@ the sky is blue const sentences = text.split("\n"); let wordCount = 0; sentences.forEach(sentence => { - wordCount += sentence.trim().split(" ").length; + wordCount += sentence.trim().split(" ").filter(word => word.trim() !== "").length; }) const start = Date.now(); const translatedSentences = translate(sentences); From a33b3a3bb5bcac9fe34135d671773a11554dce82 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 11:21:36 +0200 Subject: [PATCH 43/98] Add instructions on how to assemble and package the set of files expected by the test page --- README.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 333e758..4bff753 100644 --- a/README.md +++ b/README.md @@ -45,10 +45,25 @@ Download the models from `https://github.com/mozilla-applied-ml/bergamot-models` The build also allows packaging files into wasm binary (i.e. preloading in Emscripten’s virtual file system) using cmake option `PACKAGE_DIR`. The compile command below packages all the files in PATH directory (in these case, your models) into wasm binary. ```bash -emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR= ./models +emcmake cmake -DCOMPILE_WASM=on -DPACKAGE_DIR=/repo/models ../ ``` Files packaged this way are preloaded in the root of the virtual file system. +To package the set of files expected by the test page: + +```bash +git clone https://github.com/browsermt/students +cd students/esen/ +./download-models.sh +cp esen.student.tiny11/lex.s2t ../../models/lex.esen.s2t +cp esen.student.tiny11/model.npz ../../models/model.esen.npz +cp esen.student.tiny11/vocab.esen.spm ../../models/vocab.esen.spm +cd - +cd students/enes/ +./download-models.sh +cp enes.student.tiny11/lex.s2t ../../models/lex.enes.s2t +cp enes.student.tiny11/model.npz ../../models/model.enes.npz +``` After Editing Files: From 53e0b9fc5c219ae57d79be57acbec0dd580e89a8 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 11:22:23 +0200 Subject: [PATCH 44/98] Fix typo in lexical shortlist argument on test page --- wasm/test_page/bergamot.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 992d758..4ead87d 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -61,7 +61,7 @@ the sky is blue // For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/ // This example captures the most relevant options: model file, vocabulary files and shortlist file // var modelConfig = "{\"models\":[\"/model.enes.npz\"],\"vocabs\":[\"/vocab.esen.spm\"],\"beam-size\":1}";//,\"shortlist\":[\"/lex.s2t\"] - const modelConfig = `{\"models\":[\"/model.${lang}.npz\"],\"vocabs\":[\"/vocab.esen.spm\",\"/vocab.esen.spm\"],\"beam-size\":1} ,\"shortlist\":[\"/lex.s2t\"]`; + const modelConfig = `{\"models\":[\"/model.${lang}.npz\"],\"vocabs\":[\"/vocab.esen.spm\",\"/vocab.esen.spm\"],\"beam-size\":1} ,\"shortlist\":[\"/lex.esen.s2t\"]`; // Instantiate the TranslationModel if (model) model.delete(); From e50dd0909f4709a6336b46a0baee175353ed0150 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 11:23:08 +0200 Subject: [PATCH 45/98] Ignore contents in models directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 59363a8..6c301d6 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ wasm/test_page/node_modules build-wasm +models From 7030fa015745070e0d7dc8ab6f0a5d25a1d95a78 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 11:25:13 +0200 Subject: [PATCH 46/98] Ignore test page bundled artifacts --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6c301d6..d7d931f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ wasm/test_page/node_modules build-wasm models +wasm/test_page/bergamot-translator-worker.* From 49ad6514aec6498e2a24a7dd96cff25d4e64ab5d Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 11:27:47 +0200 Subject: [PATCH 47/98] Add reproducible docker-based builds + let test page use these by default --- .gitignore | 2 +- docker/Makefile | 55 ++++++++++++++++++++++++++++++++++ docker/README.md | 27 +++++++++++++++++ docker/wasm/Dockerfile | 36 ++++++++++++++++++++++ wasm/test_page/start_server.sh | 8 ++--- 5 files changed, 123 insertions(+), 5 deletions(-) create mode 100644 docker/Makefile create mode 100644 docker/README.md create mode 100644 docker/wasm/Dockerfile diff --git a/.gitignore b/.gitignore index d7d931f..5a73aac 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,6 @@ *.swo wasm/test_page/node_modules -build-wasm +build-* models wasm/test_page/bergamot-translator-worker.* diff --git a/docker/Makefile b/docker/Makefile new file mode 100644 index 0000000..583a588 --- /dev/null +++ b/docker/Makefile @@ -0,0 +1,55 @@ +# -*- mode: makefile-gmake; indent-tabs-mode: true; tab-width: 4 -*- +SHELL = bash +PWD = $(shell pwd) +WASM_IMAGE = local/bergamot-translator-build-wasm + +all: wasm-image compile-wasm + +# Build the Docker image for WASM builds +wasm-image: + docker build -t local/bergamot-translator-build-wasm ./wasm/ + +# Commands for compilation: +cmake_cmd = cmake + +wasm_cmake_cmd = ${cmake_cmd} +wasm_cmake_cmd += -DCOMPILE_WASM=on +wasm_cmake_cmd += -DProtobuf_INCLUDE_DIR=/usr/opt/protobuf-wasm-lib/dist/include +wasm_cmake_cmd += -DProtobuf_LIBRARY=/usr/opt/protobuf-wasm-lib/dist/lib/libprotobuf.a +wasm_cmake_cmd += -DPACKAGE_DIR=/repo/models + +make_cmd = make +#make_cmd += VERBOSE=1 + +# ... and running things on Docker +docker_mounts = ${PWD}/..:/repo +docker_mounts += ${HOME}/.ccache:/.ccache +run_on_docker = docker run --rm +run_on_docker += $(addprefix -v, ${docker_mounts}) +run_on_docker += ${INTERACTIVE_DOCKER_SESSION} + +${HOME}/.ccache: + mkdir -p $@ + +# Remove the bergamot-translator WASM build dir, forcing a clean compilation attempt +clean-wasm: BUILD_DIR = /repo/build-wasm-docker +clean-wasm: ${HOME}/.ccache + ${run_on_docker} ${WASM_IMAGE} bash -c '(rm -rf ${BUILD_DIR} || true)' + +# Compile bergamot-translator to WASM +compile-wasm: BUILD_DIR = /repo/build-wasm-docker +compile-wasm: ${HOME}/.ccache + ${run_on_docker} ${WASM_IMAGE} bash -c 'mkdir -p ${BUILD_DIR} && \ +cd ${BUILD_DIR} && \ +(emcmake ${wasm_cmake_cmd} .. && \ +(emmake ${make_cmd}) || \ +rm CMakeCache.txt)' + +# Start interactive shells for development / debugging purposes +native-shell: INTERACTIVE_DOCKER_SESSION = -it +native-shell: + ${run_on_docker} ${NATIVE_IMAGE} bash + +wasm-shell: INTERACTIVE_DOCKER_SESSION = -it +wasm-shell: + ${run_on_docker} ${WASM_IMAGE} bash diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..d98456a --- /dev/null +++ b/docker/README.md @@ -0,0 +1,27 @@ +## WASM + +Prepare docker image for WASM compilation: + +```bash +make wasm-image +``` + +Compile to wasm: + +```bash +make compile-wasm +``` + +## Debugging + +Remove the marian-decoder build dir, forcing the next compilation attempt to start from scratch: + +```bash +make clean-wasm +``` + +Enter a docker container shell for manually running commands: + +```bash +make wasm-shell +``` diff --git a/docker/wasm/Dockerfile b/docker/wasm/Dockerfile new file mode 100644 index 0000000..f309662 --- /dev/null +++ b/docker/wasm/Dockerfile @@ -0,0 +1,36 @@ +FROM emscripten/emsdk:2.0.9 + +# Install specific version of CMake +WORKDIR /usr +RUN wget https://github.com/Kitware/CMake/releases/download/v3.17.2/cmake-3.17.2-Linux-x86_64.tar.gz -qO-\ + | tar xzf - --strip-components 1 + +# Install Python and Java (needed for Closure Compiler minification) +RUN apt-get update \ + && apt-get install -y \ + python3 \ + default-jre + +# Deps to compile protobuf from source + the protoc binary which we need natively +RUN apt-get update -y && apt-get --no-install-recommends -y install \ + protobuf-compiler \ + autoconf \ + autotools-dev \ + automake \ + autogen \ + libtool && ln -s /usr/bin/libtoolize /usr/bin/libtool \ + && mkdir -p /usr/opt \ + && cd /usr/opt \ + && git clone https://github.com/menduz/protobuf-wasm-lib + +RUN cd /usr/opt/protobuf-wasm-lib \ + && /bin/bash -c "BRANCH=v3.6.1 ./prepare.sh" +RUN cd /usr/opt/protobuf-wasm-lib/protobuf \ + && bash -x ../build.sh +RUN cp /usr/bin/protoc /usr/opt/protobuf-wasm-lib/dist/bin/protoc + +RUN apt-get --no-install-recommends -y install \ + libprotobuf-dev + +# Necessary for benchmarking +RUN pip3 install sacrebleu diff --git a/wasm/test_page/start_server.sh b/wasm/test_page/start_server.sh index b83344b..b0b5be1 100644 --- a/wasm/test_page/start_server.sh +++ b/wasm/test_page/start_server.sh @@ -1,8 +1,8 @@ #!/bin/bash -cp ../../build-wasm/wasm/bergamot-translator-worker.data . -cp ../../build-wasm/wasm/bergamot-translator-worker.js . -cp ../../build-wasm/wasm/bergamot-translator-worker.wasm . -cp ../../build-wasm/wasm/bergamot-translator-worker.worker.js . +cp ../../build-wasm-docker/wasm/bergamot-translator-worker.data . +cp ../../build-wasm-docker/wasm/bergamot-translator-worker.js . +cp ../../build-wasm-docker/wasm/bergamot-translator-worker.wasm . +cp ../../build-wasm-docker/wasm/bergamot-translator-worker.worker.js . npm install node bergamot-httpserver.js \ No newline at end of file From 77f39545f314c7a931c91aef0a11e871ff5a880c Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 11:30:45 +0200 Subject: [PATCH 48/98] Add time it takes to arrive to preRun to test page --- wasm/test_page/bergamot.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 4ead87d..7b38cc2 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -141,13 +141,15 @@ the sky is blue document.querySelector("#log").value += message + "\n"; } + const start = Date.now(); let moduleLoadStart; var Module = { preRun: [function() { + log(`Time until Module.preRun: ${(Date.now() - start)/1000} secs`); moduleLoadStart = Date.now(); }], onRuntimeInitialized: function() { - log(`Wasm Runtime initialized in ${(Date.now() - moduleLoadStart)/1000} secs`); + log(`Wasm Runtime initialized (preRun -> onRuntimeInitialized) in ${(Date.now() - moduleLoadStart)/1000} secs`); } }; From dbdcdab1153be9891e2a44aa308b29c0141349aa Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 11:59:03 +0200 Subject: [PATCH 49/98] Avoid use of unsafe eval in glue code --- wasm/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 40b08bf..8375158 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -14,7 +14,7 @@ target_include_directories(bergamot-translator-worker target_compile_definitions(bergamot-translator-worker PRIVATE WASM_BINDINGS) target_compile_options(bergamot-translator-worker PRIVATE ${WASM_COMPILE_FLAGS}) -set(LINKER_FLAGS "--bind -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1") +set(LINKER_FLAGS "--bind -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -s NO_DYNAMIC_EXECUTION=1") if (NOT PACKAGE_DIR STREQUAL "") set(LINKER_FLAGS "${LINKER_FLAGS} --preload-file ${PACKAGE_DIR}@/") endif() From 70bdcd436571de532ea202d95edf7cccf9505bb4 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 12:54:32 +0200 Subject: [PATCH 50/98] Fix typo from when fixing typo --- wasm/test_page/bergamot.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 7b38cc2..e5d7a90 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -61,7 +61,7 @@ the sky is blue // For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/ // This example captures the most relevant options: model file, vocabulary files and shortlist file // var modelConfig = "{\"models\":[\"/model.enes.npz\"],\"vocabs\":[\"/vocab.esen.spm\"],\"beam-size\":1}";//,\"shortlist\":[\"/lex.s2t\"] - const modelConfig = `{\"models\":[\"/model.${lang}.npz\"],\"vocabs\":[\"/vocab.esen.spm\",\"/vocab.esen.spm\"],\"beam-size\":1} ,\"shortlist\":[\"/lex.esen.s2t\"]`; + const modelConfig = `{\"models\":[\"/model.${lang}.npz\"],\"vocabs\":[\"/vocab.esen.spm\",\"/vocab.esen.spm\"],\"beam-size\":1} ,\"shortlist\":[\"/lex.${lang}.s2t\"]`; // Instantiate the TranslationModel if (model) model.delete(); From da56501c4f255d9bc57c2d244e0979c29676ad3f Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 13:10:10 +0200 Subject: [PATCH 51/98] Finally found the original typo that made it appear as if loading the model in the test page was faster than elsewhere - the lexical shortlist was not being included at the right place in the model config --- wasm/test_page/bergamot.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index e5d7a90..6985cee 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -61,7 +61,7 @@ the sky is blue // For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/ // This example captures the most relevant options: model file, vocabulary files and shortlist file // var modelConfig = "{\"models\":[\"/model.enes.npz\"],\"vocabs\":[\"/vocab.esen.spm\"],\"beam-size\":1}";//,\"shortlist\":[\"/lex.s2t\"] - const modelConfig = `{\"models\":[\"/model.${lang}.npz\"],\"vocabs\":[\"/vocab.esen.spm\",\"/vocab.esen.spm\"],\"beam-size\":1} ,\"shortlist\":[\"/lex.${lang}.s2t\"]`; + const modelConfig = `{\"models\":[\"/model.${lang}.npz\"],\"vocabs\":[\"/vocab.esen.spm\",\"/vocab.esen.spm\"],\"beam-size\":1,\"shortlist\":[\"/lex.${lang}.s2t\"]}`; // Instantiate the TranslationModel if (model) model.delete(); From 1e94d78c4d2b6bb9b763c16c59b0178a8458e18f Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 13:19:39 +0200 Subject: [PATCH 52/98] Formatting --- wasm/test_page/bergamot.html | 230 +++++++++++++++++------------------ 1 file changed, 115 insertions(+), 115 deletions(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 6985cee..541da15 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -1,41 +1,41 @@ - + - - - + } + + -
+
-
+
-
+


-
+
-
+

-
+
- - + const translatedSentences = translate(sentences); + const secs = (Date.now() - start) / 1000; + log(`Translation of ${translatedSentences.length} sentences (wordCount ${wordCount}) took ${secs} secs (${Math.round(wordCount / secs)} words per second)`); + + document.querySelector('#to').value = translatedSentences.join("\n"); + } + + document.querySelector("#translate").addEventListener("click", () => { + translateCall(); + }); + + document.querySelector("#from").addEventListener('keyup', function(event) { + if (event.keyCode === 13) { + translateCall(); + } + }); + + const log = (message) => { + document.querySelector("#log").value += message + "\n"; + } + + const start = Date.now(); + let moduleLoadStart; + var Module = { + preRun: [function() { + log(`Time until Module.preRun: ${(Date.now() - start) / 1000} secs`); + moduleLoadStart = Date.now(); + }], + onRuntimeInitialized: function() { + log(`Wasm Runtime initialized (preRun -> onRuntimeInitialized) in ${(Date.now() - moduleLoadStart) / 1000} secs`); + } + }; + + From fcc998ffa4c2468baed11889951685ff0b923cf7 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 13:30:07 +0200 Subject: [PATCH 53/98] Add 10 lines of esen benchmark sentences to test page --- wasm/test_page/bergamot.html | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 541da15..cbd2665 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -30,16 +30,24 @@
- - + +


From f3ff1d29ae4d6d036f68bc993420c36015f10b09 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 13:30:46 +0200 Subject: [PATCH 54/98] Make modelConfig an object instead of string (less likelihood of typos) --- wasm/test_page/bergamot.html | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index cbd2665..0de9925 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -67,13 +67,25 @@ En consecuencia, durante el a const loadModel = (lang) => { // Set the Model Configuration as YAML formatted string. // For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/ - // This example captures the most relevant options: model file, vocabulary files and shortlist file - // var modelConfig = "{\"models\":[\"/model.enes.npz\"],\"vocabs\":[\"/vocab.esen.spm\"],\"beam-size\":1}";//,\"shortlist\":[\"/lex.s2t\"] - const modelConfig = `{\"models\":[\"/model.${lang}.npz\"],\"vocabs\":[\"/vocab.esen.spm\",\"/vocab.esen.spm\"],\"beam-size\":1,\"shortlist\":[\"/lex.${lang}.s2t\"]}`; + + const modelConfig = { + "models": [ + `/model.${lang}.npz` + ], + "vocabs": [ + "/vocab.esen.spm", + "/vocab.esen.spm" + ], + "shortlist": [ + `/lex.${lang}.s2t`, + 50, + 50, + ] + }; // Instantiate the TranslationModel if (model) model.delete(); - model = new Module.TranslationModel(modelConfig); + model = new Module.TranslationModel(JSON.stringify(modelConfig)); } const translate = (sentences) => { From 7d6346d3b0b000f281e99f972bb2fe663b93b27f Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 13:35:22 +0200 Subject: [PATCH 55/98] Add model config used in pr6 benchmarks --- wasm/test_page/bergamot.html | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 0de9925..9322368 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -76,11 +76,22 @@ En consecuencia, durante el a "/vocab.esen.spm", "/vocab.esen.spm" ], + "beam-size": 1, + "mini-batch": 32, + "maxi-batch": 100, + "maxi-batch-sort": "src", + "workspace": 128, + "skip-cost": true, + "cpu-threads": 1, "shortlist": [ `/lex.${lang}.s2t`, 50, 50, ] + // TODO: Enable when wormhole is enabled + // "int8shift": true, + // TODO: Enable when loading of binary models is supported and we use model.intgemm.alphas.bin + // "int8shiftAlphaAll": true, }; // Instantiate the TranslationModel From 64d57d8aa089957f5c8ffe88f7ce805de0423e6e Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 13:50:59 +0200 Subject: [PATCH 56/98] Use yaml for modelConfig on test page --- wasm/test_page/bergamot.html | 51 +++++++++++++++++------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 9322368..04ff5ae 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -67,36 +67,33 @@ En consecuencia, durante el a const loadModel = (lang) => { // Set the Model Configuration as YAML formatted string. // For available configuration options, please check: https://marian-nmt.github.io/docs/cmd/marian-decoder/ - - const modelConfig = { - "models": [ - `/model.${lang}.npz` - ], - "vocabs": [ - "/vocab.esen.spm", - "/vocab.esen.spm" - ], - "beam-size": 1, - "mini-batch": 32, - "maxi-batch": 100, - "maxi-batch-sort": "src", - "workspace": 128, - "skip-cost": true, - "cpu-threads": 1, - "shortlist": [ - `/lex.${lang}.s2t`, - 50, - 50, - ] - // TODO: Enable when wormhole is enabled - // "int8shift": true, - // TODO: Enable when loading of binary models is supported and we use model.intgemm.alphas.bin - // "int8shiftAlphaAll": true, - }; + const modelConfig = `models: + - /model.${lang}.npz +vocabs: + - /vocab.esen.spm + - /vocab.esen.spm +beam-size: 1 +normalize: 1.0 +word-penalty: 0 +mini-batch: 32 +maxi-batch: 100 +maxi-batch-sort: src +workspace: 128 +max-length-factor: 2.0 +skip-cost: true +shortlist: + - lex.${lang}.s2t + - 50 + - 50 +`; +// TODO: Use in model config when wormhole is enabled: +// gemm-precision: int8shift +// TODO: Use in model config when loading of binary models is supported and we use model.intgemm.alphas.bin: +// gemm-precision: int8shiftAlphaAll // Instantiate the TranslationModel if (model) model.delete(); - model = new Module.TranslationModel(JSON.stringify(modelConfig)); + model = new Module.TranslationModel(modelConfig); } const translate = (sentences) => { From 3dd7a60b3511e5ebc09169f33d37913834e83a1d Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 15 Feb 2021 12:50:40 +0100 Subject: [PATCH 57/98] Enabled simd shuffle pattern for intgemm compilation - WORMHOLE cmake option is set to ON when compiling for WASM - WASM module might not run on Chrome --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 677963f..ccaf652 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,10 @@ SET(COMPILE_DECODER_ONLY ON CACHE BOOL "Compile marian-decoder only") SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support") SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds") SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.") +if(COMPILE_WASM) + # Set WORMHOLE to ON for marian whenever compiling for wasm platform + SET(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160") +endif() execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) From 91e45cb4f08a1b9f59757c82c61fbd5b86d88915 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 13:58:12 +0200 Subject: [PATCH 58/98] Prepend shortlist path with / --- wasm/test_page/bergamot.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 04ff5ae..8fc7824 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -82,7 +82,7 @@ workspace: 128 max-length-factor: 2.0 skip-cost: true shortlist: - - lex.${lang}.s2t + - /lex.${lang}.s2t - 50 - 50 `; From 9a5ae9568e50856d520839854dc00ee2662b2d04 Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 14:24:59 +0200 Subject: [PATCH 59/98] Turn of assertions and disable exception catching for wasm builds --- CMakeLists.txt | 2 +- wasm/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ccaf652..8044cb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,7 +38,7 @@ endif() if(COMPILE_WASM) list(APPEND WASM_COMPILE_FLAGS -pthread -O3 -g2 -fPIC -mssse3 -msimd128) - list(APPEND WASM_COMPILE_FLAGS "SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=1" "SHELL:-s DISABLE_EXCEPTION_CATCHING=0" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1") + list(APPEND WASM_COMPILE_FLAGS "SHELL:-s WASM=1" "SHELL:-s ASSERTIONS=0" "SHELL:-s DISABLE_EXCEPTION_CATCHING=1" "SHELL:-s LLD_REPORT_UNDEFINED" "SHELL:-s FORCE_FILESYSTEM=1" "SHELL:-s ALLOW_MEMORY_GROWTH=1") list(APPEND WASM_COMPILE_FLAGS -Wno-error=pthreads-mem-growth) endif(COMPILE_WASM) diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 8375158..748762d 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -14,7 +14,7 @@ target_include_directories(bergamot-translator-worker target_compile_definitions(bergamot-translator-worker PRIVATE WASM_BINDINGS) target_compile_options(bergamot-translator-worker PRIVATE ${WASM_COMPILE_FLAGS}) -set(LINKER_FLAGS "--bind -s ASSERTIONS=1 -s DISABLE_EXCEPTION_CATCHING=0 -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -s NO_DYNAMIC_EXECUTION=1") +set(LINKER_FLAGS "--bind -s ASSERTIONS=0 -s DISABLE_EXCEPTION_CATCHING=1 -s FORCE_FILESYSTEM=1 -s ALLOW_MEMORY_GROWTH=1 -s NO_DYNAMIC_EXECUTION=1") if (NOT PACKAGE_DIR STREQUAL "") set(LINKER_FLAGS "${LINKER_FLAGS} --preload-file ${PACKAGE_DIR}@/") endif() From 9a5cf30bbbdee83d98e933ee122aed00b26b161a Mon Sep 17 00:00:00 2001 From: Motin Date: Mon, 15 Feb 2021 15:03:00 +0200 Subject: [PATCH 60/98] Revert "Enabled simd shuffle pattern for intgemm compilation" This reverts commit 3dd7a60b3511e5ebc09169f33d37913834e83a1d. --- CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8044cb0..1083384 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,10 +23,6 @@ SET(COMPILE_DECODER_ONLY ON CACHE BOOL "Compile marian-decoder only") SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support") SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds") SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.") -if(COMPILE_WASM) - # Set WORMHOLE to ON for marian whenever compiling for wasm platform - SET(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160") -endif() execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) From ca6ca154b9ee74899f1a801a8a3c91972ca10043 Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 15 Feb 2021 15:22:31 +0000 Subject: [PATCH 61/98] Changing fn name from enqueue to produceTo(pcqueue) --- src/translator/batcher.cpp | 2 +- src/translator/batcher.h | 2 +- src/translator/service.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/translator/batcher.cpp b/src/translator/batcher.cpp index 5fdcc3a..9ba0d03 100644 --- a/src/translator/batcher.cpp +++ b/src/translator/batcher.cpp @@ -62,7 +62,7 @@ void Batcher::addWholeRequest(Ptr request) { } } -void Batcher::enqueue(PCQueue &pcqueue) { +void Batcher::produceTo(PCQueue &pcqueue) { Batch batch; while (cleaveBatch(batch)) { pcqueue.ProduceSwap(batch); diff --git a/src/translator/batcher.h b/src/translator/batcher.h index d6b85f3..3427257 100644 --- a/src/translator/batcher.h +++ b/src/translator/batcher.h @@ -21,7 +21,7 @@ public: // which maintains priority among sentences from multiple concurrent requests. void addSentenceWithPriority(RequestSentence &sentence); void addWholeRequest(Ptr request); - void enqueue(PCQueue &pcqueue); + void produceTo(PCQueue &pcqueue); // Loads sentences with sentences compiled from (tentatively) multiple // requests optimizing for both padding and priority. diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 1b33558..96f391c 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -70,7 +70,7 @@ std::future Service::translate(std::string &&input) { batcher_.addWholeRequest(request); if (numWorkers_ > 0) { - batcher_.enqueue(pcqueue_); + batcher_.produceTo(pcqueue_); } else { // Queue single-threaded Batch batch; From 0374ac4696b124ed9e015325aef3c1501a514736 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 15 Feb 2021 14:28:06 +0100 Subject: [PATCH 62/98] Updated marian submodule - Includes try/catch free builds - Has ASSERTION=0 and DISABLE_EXCEPTION_CATCHING=1 for wasm builds --- 3rd_party/marian-dev | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rd_party/marian-dev b/3rd_party/marian-dev index 29ecba1..467c43a 160000 --- a/3rd_party/marian-dev +++ b/3rd_party/marian-dev @@ -1 +1 @@ -Subproject commit 29ecba1cb1b8ea26ae582d3851e214769b89e566 +Subproject commit 467c43a292a68b7913af2a00d353de97c1740f92 From 3607523c24ca69fa3b195f1aae1aaf0c0bb44f65 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 15 Feb 2021 16:54:50 +0100 Subject: [PATCH 63/98] Enabled COMPILE_WITHOUT_EXCEPTIONS for marian submodule --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1083384..a2aec07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ SET(COMPILE_DECODER_ONLY ON CACHE BOOL "Compile marian-decoder only") SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support") SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds") SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.") +SET(COMPILE_WITHOUT_EXCEPTIONS ON CACHE BOOL "Compile without exceptions") execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) From c5c5339489d6d209271f76ac2f53ce7ac92fa7c0 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Mon, 15 Feb 2021 17:18:59 +0100 Subject: [PATCH 64/98] Re-enable simd shuffle pattern for intgemm compilation --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index a2aec07..8d1ff1b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,10 @@ SET(COMPILE_WITH_PTHREADS OFF CACHE BOOL "Compile with pthreads support") SET(USE_WASM_COMPATIBLE_BLAS ON CACHE BOOL "Compile with a WASM compatible blas for decoder only builds") SET(COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables.") SET(COMPILE_WITHOUT_EXCEPTIONS ON CACHE BOOL "Compile without exceptions") +if(COMPILE_WASM) + # Set WORMHOLE to ON for marian whenever compiling for wasm platform + SET(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160") +endif() execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) From d5a5e754510aeb158fea3e82939426e4d29885ed Mon Sep 17 00:00:00 2001 From: Jerin Philip Date: Mon, 15 Feb 2021 20:21:10 +0000 Subject: [PATCH 65/98] Renaming variables; Enhancing documentation --- src/translator/request.cpp | 45 +++++++++++- src/translator/request.h | 142 ++++++++++++++++++++++--------------- src/translator/service.cpp | 6 +- 3 files changed, 130 insertions(+), 63 deletions(-) diff --git a/src/translator/request.cpp b/src/translator/request.cpp index 9317f69..303f9cc 100644 --- a/src/translator/request.cpp +++ b/src/translator/request.cpp @@ -10,14 +10,15 @@ namespace marian { namespace bergamot { +// ----------------------------------------------------------------- Request::Request(unsigned int Id, int lineNumberBegin, std::vector> &vocabs, std::string &&source, Segments &&segments, - std::vector &&sourceAlignments, + std::vector &&sourceTokenRanges, std::promise responsePromise) : Id_(Id), lineNumberBegin_(lineNumberBegin), vocabs_(&vocabs), source_(std::move(source)), segments_(std::move(segments)), - sourceAlignments_(std::move(sourceAlignments)), + sourceTokenRanges_(std::move(sourceTokenRanges)), response_(std::move(responsePromise)) { counter_ = segments_.size(); @@ -48,7 +49,7 @@ void Request::processHistory(size_t index, Ptr history) { void Request::completeRequest() { // Request no longer needs to hold the content, can transfer it to // Response. - Response response(std::move(source_), std::move(sourceAlignments_), + Response response(std::move(source_), std::move(sourceTokenRanges_), std::move(histories_), *vocabs_); response_.set_value(std::move(response)); } @@ -58,6 +59,8 @@ bool Request::operator<(const Request &b) const { return Id_ < b.Id_; } +// ------------------------------------------------------------------ + RequestSentence::RequestSentence(size_t index, Ptr request) : index_(index), request_(request) {} @@ -87,5 +90,41 @@ bool operator<(const RequestSentence &a, const RequestSentence &b) { return a.request_ < b.request_; } +// ---------------------------------------------------------------------- + +void Batch::reset() { + Id_ = 0; + sentences_.clear(); +} + +void Batch::log() { + int numTokens{0}, maxLength{0}; + for (auto &sentence : sentences_) { + numTokens += sentence.numTokens(); + maxLength = std::max(maxLength, static_cast(sentence.numTokens())); + } + + LOG(info, "Batch(Id_={}, tokens={}, max-length={}, sentences_={})", Id_, + numTokens, maxLength, sentences_.size()); +} + +void Batch::add(const RequestSentence &sentence) { + sentences_.push_back(sentence); +} + +void Batch::setId(int Id) { + assert(Id > 0); + Id_ = Id; + if (Id % 500 == 0) { + log(); + } +} + +void Batch::completeBatch(const Histories &histories) { + for (int i = 0; i < sentences_.size(); i++) { + sentences_[i].completeSentence(histories[i]); + } +} + } // namespace bergamot } // namespace marian diff --git a/src/translator/request.h b/src/translator/request.h index 8912a49..095a03c 100644 --- a/src/translator/request.h +++ b/src/translator/request.h @@ -3,20 +3,19 @@ // // Request: holds the input blob of a text, Segments (vector) which are // to go to the batching mechanism and alignments between the processed -// segments and the input blob (sourceAlignments). In addition, Request takes +// segments and the input blob (sourceTokenRanges). In addition, Request takes // care of the barrier which fires when all the Segments in a request are done -// translating by the workers (BatchTranslator). Request is to be extended with -// notions of Priority (sequence, user-given). +// translating by the workers (BatchTranslator). +// TODO(jerinphilip): Extend Request with notions of Priority (sequence, +// user-given). // -// RequestSentence: is a tuple of (index, Request*). This provides the +// RequestSentence: is a tuple of (index, Ptr). This provides the // batching mechanism access to the segment within the request. The backref to // Request allows event triggering the barrier upon completion of the last // sentence by a worker. // -// PCItem: is a vector of RequestSentences and a batchNumber, which is what the -// PCQueue holds. The batches are constructed from segments returned by a -// RequestSentence. Can be enhanced with paddingSize, countTokens eventually for -// logging. +// Batch: is a vector of RequestSentences tagged with a batchNumber, which is +// what the PCQueue holds. Batch is "produced" by the Batcher. #ifndef SRC_BERGAMOT_REQUEST_H_ #define SRC_BERGAMOT_REQUEST_H_ @@ -37,23 +36,10 @@ namespace marian { namespace bergamot { class Request { -private: - unsigned int Id_; - int lineNumberBegin_; - std::string source_; - std::atomic counter_; - std::vector> *vocabs_; - - Segments segments_; - std::vector sourceAlignments_; - std::vector> histories_; - - std::promise response_; - public: Request(unsigned int Id, int lineNumberBegin, std::vector> &vocabs_, std::string &&source, - Segments &&segments, std::vector &&sourceAlignments, + Segments &&segments, std::vector &&sourceTokenRanges, std::promise responsePromise); // Obtain the count of tokens in the segment correponding to index. Used to @@ -68,7 +54,8 @@ public: // several requests. Segment getSegment(size_t index) const; - // For notions of priority among requests (used to enable in Batcher). + // For notions of priority among requests, used to enable std::set in + // Batcher. bool operator<(const Request &request) const; // Processes a history obtained after translating in a heterogenous batch @@ -77,20 +64,60 @@ public: // On completion of last segment, sets value of the promise. void completeRequest(); + +private: + unsigned int Id_; + int lineNumberBegin_; + + // Multiple translation-workers can concurrently access the same Request. The + // following atomic atomically operates on the variable holding sentences + // remaining to be translated. + std::atomic counter_; + + // source_ holds the source string to be translated. segments_ hold the + // sentences generated from source_ in vector. sourceTokenRanges_ are + // string_views of the text corresponding to these words, pointing to + // sequences in source_. histories_ is a buffer which eventually stores the + // translations of each segment in the corresponding index. + std::string source_; + Segments segments_; + std::vector sourceTokenRanges_; + std::vector> histories_; + + // Members above are moved into newly constructed Response on completion + // of translation of all segments. The promise below is set to this Response + // value. future to this promise is made available to the user through + // Service. + std::promise response_; + + // Constructing Response requires the vocabs_ used to generate Request. + std::vector> *vocabs_; }; class RequestSentence { -private: - size_t index_; - Ptr request_; + // A RequestSentence provides a view to a sentence within a Request. Existence + // of this class allows the sentences and associated information to be kept + // within Request. public: RequestSentence(size_t, Ptr); size_t numTokens() const; + + // lineNumber in Request, used for matching marian-decoder. SentenceTuple + // requires lineNumber to be set for Corpus based batches. size_t lineNumber() const; + + // Accessor to the segment represented by the RequestSentence. Segment getUnderlyingSegment() const; + + // Forwards call to Request, checking for completion. void completeSentence(Ptr history); + friend bool operator<(const RequestSentence &a, const RequestSentence &b); + +private: + size_t index_; + Ptr request_; }; typedef std::vector RequestSentences; @@ -98,47 +125,48 @@ typedef std::vector RequestSentences; class Batch { public: Batch() { reset(); } - void reset() { - Id_ = 0; - sentences_.clear(); - } - // Convenience function to determine poison. - bool isPoison() { return (Id_ == -1); } + // Reset is required to reuse the same batch by consumer. + void reset(); + + // Methods to construct and determine poison. static Batch poison() { Batch poison_; poison_.Id_ = -1; return poison_; } + bool isPoison() const { return (Id_ == -1); } - void log() { - int numTokens{0}, maxLength{0}; - for (auto &sentence : sentences_) { - numTokens += sentence.numTokens(); - maxLength = std::max(maxLength, static_cast(sentence.numTokens())); - } + size_t size() const { return sentences_.size(); } - LOG(info, "Batch(Id_={}, tokens={}, max-length={}, sentences_={})", Id_, - numTokens, maxLength, sentences_.size()); - } + // Accessors to load data into a batch. Use add(...) to add sentences into a + // batch. Once complete with a legal batch, use setId to set Id_ accordingly. + // setId only allows setting Id > 0. For use in Batcher, which acts as a + // producer to a PCQueue holding "Batch"es. + // + // Id_ = + // -1 : Batch::Poison + // 0 : Empty Batch + // >0 : Legal batch containing sentences - void add(const RequestSentence &sentence) { sentences_.push_back(sentence); } - - size_t size() { return sentences_.size(); } - - void setId(int Id) { - assert(Id > 0); - Id_ = Id; - if (Id % 500 == 0) { - log(); - } - } + void add(const RequestSentence &sentence); + void setId(int Id); + // Accessors to read from a Batch. For use in BatchTranslator (consumer on a + // PCQueue holding batches). + // + // sentences() are used to access sentences to construct marian internal + // batch. const RequestSentences &sentences() { return sentences_; } - void completeBatch(const Histories &histories) { - for (int i = 0; i < sentences_.size(); i++) { - sentences_[i].completeSentence(histories[i]); - } - } + + // On obtaining Histories after translating a batch, completeBatch can be + // called with Histories , which forwards the call to Request through + // RequestSentence and triggers completion, by setting the promised value to + // the future given to client. + void completeBatch(const Histories &histories); + + // Convenience function to log batch-statistics. numTokens, max-length. + // TODO(jerinphilip): Use to log and report packing efficiency. + void log(); private: int Id_; diff --git a/src/translator/service.cpp b/src/translator/service.cpp index 96f391c..2163eef 100644 --- a/src/translator/service.cpp +++ b/src/translator/service.cpp @@ -56,8 +56,8 @@ std::future Service::translate(std::string &&input) { // returns future corresponding to the promise. Segments segments; - std::vector sourceAlignments; - text_processor_.process(input, segments, sourceAlignments); + std::vector sourceTokenRanges; + text_processor_.process(input, segments, sourceTokenRanges); std::promise responsePromise; auto future = responsePromise.get_future(); @@ -65,7 +65,7 @@ std::future Service::translate(std::string &&input) { Ptr request = New(requestId_++, /* lineNumberBegin = */ 0, vocabs_, std::move(input), std::move(segments), - std::move(sourceAlignments), std::move(responsePromise)); + std::move(sourceTokenRanges), std::move(responsePromise)); batcher_.addWholeRequest(request); From 921c2eedf812b3304a06ebfad890fb025755c2a0 Mon Sep 17 00:00:00 2001 From: Abhishek Aggarwal Date: Tue, 16 Feb 2021 14:21:46 +0100 Subject: [PATCH 66/98] Updated config for min inference time - This combination gives min inference time (~ 200 WPS) on local machine --- wasm/test_page/bergamot.html | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index 8fc7824..d91a9a1 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -75,17 +75,25 @@ vocabs: beam-size: 1 normalize: 1.0 word-penalty: 0 -mini-batch: 32 -maxi-batch: 100 -maxi-batch-sort: src +max-input-sentence-tokens: 128 +max-input-tokens: 1024 workspace: 128 max-length-factor: 2.0 skip-cost: true +cpu-threads: 1 +quiet: true +quiet-translation: true shortlist: - /lex.${lang}.s2t - 50 - 50 `; +/* +This config is not valid anymore in new APIs +mini-batch: 32 +maxi-batch: 100 +maxi-batch-sort: src +*/ // TODO: Use in model config when wormhole is enabled: // gemm-precision: int8shift // TODO: Use in model config when loading of binary models is supported and we use model.intgemm.alphas.bin: From b1e72ce75e2bce611b6dee11408278f8b3e3e4ec Mon Sep 17 00:00:00 2001 From: Motin Date: Tue, 16 Feb 2021 15:46:15 +0200 Subject: [PATCH 67/98] Updated instructions on how to get all relevant models in place for the upcoming release --- README.md | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4bff753..0d55686 100644 --- a/README.md +++ b/README.md @@ -52,17 +52,10 @@ Files packaged this way are preloaded in the root of the virtual file system. To package the set of files expected by the test page: ```bash -git clone https://github.com/browsermt/students -cd students/esen/ -./download-models.sh -cp esen.student.tiny11/lex.s2t ../../models/lex.esen.s2t -cp esen.student.tiny11/model.npz ../../models/model.esen.npz -cp esen.student.tiny11/vocab.esen.spm ../../models/vocab.esen.spm -cd - -cd students/enes/ -./download-models.sh -cp enes.student.tiny11/lex.s2t ../../models/lex.enes.s2t -cp enes.student.tiny11/model.npz ../../models/model.enes.npz +mkdir models +git clone https://github.com/motin/bergamot-models +cp -r bergamot-models/* models +gunzip models/*/* ``` After Editing Files: From d907400a80d59cac771dad7f31d67bcb67411270 Mon Sep 17 00:00:00 2001 From: Motin Date: Tue, 16 Feb 2021 17:00:45 +0200 Subject: [PATCH 68/98] Updated test page to use the model structure from bergamot-models repo --- wasm/test_page/bergamot.html | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/wasm/test_page/bergamot.html b/wasm/test_page/bergamot.html index d91a9a1..7956544 100644 --- a/wasm/test_page/bergamot.html +++ b/wasm/test_page/bergamot.html @@ -64,14 +64,20 @@ En consecuencia, durante el a