2020-11-03 11:00:33 +03:00
cmake_minimum_required ( VERSION 3.5.1 )
2021-05-17 18:34:57 +03:00
set ( CMAKE_MODULE_PATH ${ CMAKE_CURRENT_SOURCE_DIR } /cmake )
2020-11-03 11:00:33 +03:00
if ( POLICY CMP0074 )
cmake_policy ( SET CMP0074 NEW ) # CMake 3.12
endif ( )
2021-05-10 13:28:37 +03:00
if ( POLICY CMP0077 )
cmake_policy ( SET CMP0077 NEW )
endif ( )
2020-11-03 11:00:33 +03:00
project ( bergamot_translator CXX C )
set ( CMAKE_CXX_STANDARD 17 )
set ( CMAKE_CXX_STANDARD_REQUIRED ON )
2021-05-01 02:29:23 +03:00
# Note that with CMake MSVC build, the option CMAKE_BUILD_TYPE is automatically derived from the key
# 'configurationType' in CMakeSettings.json configurations
if ( NOT CMAKE_BUILD_TYPE )
message ( WARNING "CMAKE_BUILD_TYPE not set; setting to Release" )
set ( CMAKE_BUILD_TYPE "Release" )
endif ( )
2021-12-15 02:53:53 +03:00
if ( NOT COMPILE_WASM )
# Setting BUILD_ARCH to native invokes CPU intrinsic detection logic below.
# Prevent invoking that logic for WASM builds.
set ( BUILD_ARCH native CACHE STRING "Compile for this CPU architecture." )
# Unfortunately MSVC supports a limited subset of BUILD_ARCH flags. Instead try to guess
# what architecture we can compile to reading BUILD_ARCH and mapping it to MSVC values
# references: https://clang.llvm.org/docs/UsersManual.html https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/i386-and-x86-64-Options.html
# https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86?redirectedfrom=MSDN&view=vs-2019&view=msvc-170 https://devblogs.microsoft.com/oldnewthing/20201026-00/?p=104397
# This is by no means an exhaustive list but should match the most common flags Linux programmers expect to parse to MSVC
if ( MSVC )
if ( BUILD_ARCH STREQUAL "native" ) # avx2 is good default for native. Very few desktop systems support avx512
set ( MSVC_BUILD_ARCH "/arch:AVX2" )
elseif ( BUILD_ARCH STREQUAL "skylake-avx512" OR BUILD_ARCH STREQUAL "cannonlake" OR BUILD_ARCH STREQUAL "x86-64-v4" OR BUILD_ARCH STREQUAL "tigerlake" OR BUILD_ARCH STREQUAL "cooperlake" OR BUILD_ARCH STREQUAL "cascadelake" )
set ( MSVC_BUILD_ARCH "/arch:AVX512" )
elseif ( BUILD_ARCH STREQUAL "core-avx2" OR BUILD_ARCH STREQUAL "haswell" OR BUILD_ARCH STREQUAL "x86-64-v3" OR BUILD_ARCH STREQUAL "broadwell" OR BUILD_ARCH STREQUAL "skylake" )
set ( MSVC_BUILD_ARCH "/arch:AVX2" )
elseif ( BUILD_ARCH STREQUAL "sandybridge" OR BUILD_ARCH STREQUAL "corei7-avx" OR BUILD_ARCH STREQUAL "core-avx-i" OR BUILD_ARCH STREQUAL "ivybridge" )
set ( MSVC_BUILD_ARCH "/arch:AVX" )
elseif ( BUILD_ARCH STREQUAL "nehalem" OR BUILD_ARCH STREQUAL "westmere" OR BUILD_ARCH STREQUAL "x86-64-v2" OR BUILD_ARCH STREQUAL "corei7" OR BUILD_ARCH STREQUAL "core2" )
set ( MSVC_BUILD_ARCH "/arch:SSE2" ) # This is MSVC default. We won't go down to SSE because we don't support that hardware at all with intgemm. Marian recommends to only go down to SSE4.1 at most
else ( )
message ( WARNING "Unknown BUILD_ARCH ${BUILD_ARCH} provided. Default to SSE2 for Windows build" )
set ( MSVC_BUILD_ARCH "/arch:SSE2" )
endif ( )
endif ( MSVC )
endif ( )
2021-05-01 02:29:23 +03:00
#MSVC can't seem to pick up correct flags otherwise:
if ( MSVC )
add_definitions ( -DUSE_SSE2=1 ) # Supposed to fix something in the sse_mathfun.h but not sure it does
2021-12-15 02:53:53 +03:00
set ( INTRINSICS ${ MSVC_BUILD_ARCH } ) # ARCH we're targetting on win32. @TODO variable
2021-05-01 02:29:23 +03:00
set ( CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /bigobj" )
2022-01-22 21:41:04 +03:00
set ( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /MP /GL /DNDEBUG" )
2021-05-01 02:29:23 +03:00
set ( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG" )
# ignores warning LNK4049: locally defined symbol free imported - this comes from zlib
set ( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /ignore:4049" )
set ( CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT" )
set ( CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRTD" )
set ( CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental" )
endif ( MSVC )
2021-02-23 17:53:05 +03:00
include ( CMakeDependentOption )
2021-02-11 17:34:27 +03:00
# Project specific cmake options
option ( COMPILE_WASM "Compile for WASM" OFF )
2021-05-10 13:28:37 +03:00
cmake_dependent_option ( USE_WASM_COMPATIBLE_SOURCE "Use wasm compatible sources" OFF "NOT COMPILE_WASM" ON )
2021-06-14 17:02:42 +03:00
# WASM disables a million libraries, which also includes the unit test-library.
cmake_dependent_option ( COMPILE_UNIT_TESTS "Compile unit tests" OFF "USE_WASM_COMPATIBLE_SOURCE" ON )
Alignments + weak quality scores capability in Service (#46)
* Draft adjustments to API
* Adjustments to docs
* Let's call the word + sentence ranges annotations
* Editing confusing comment on size()
* Fixing compilation for template adjustments for SentenceRanges
* string_view template hacks
This commit shifts AnnotatedBlob into a templated type and gets the
troubled part to compile. All to manage absl::string_view and
std::string_view.
Objective: marian::bergamot stays C++ 11 to pluck and put in marian
code, bergamot-translator somehow flexes C++17. Simplify development in
one place.
* Fixing the wiring: Gets source to build
Runtime errors exist, but AnnotatedBlobs are consistent.
* Bugfix: Matching old-state after factoring AnnotatedBlob in
* Removing vocabs_ from Response.
(For the umpteenth time).
* Alignment API ready in marian::bergamot::Response
* Wiring alignments upto TranslationResult
* Adjustment to get alignments; bergamot-translator-app has alignments available
* Accessing words instead of Ids
This code sets up access of word string_views from annotations instead
of printing Ids. However, we have segfault. This is likely due to
targetRanges not being set, pending from
https://github.com/browsermt/bergamot-translator/issues/25.
Could also be a rogue EOS token which we're filtering for in string_view
annotations, but not so in alignments.
* Switching to browsermt/marian-dev@jp/decode-string-view for targetTokenRanges
* Target word byte range annotations available
Issues corresponding to #25 should be resolved. There is still a
segfault. Could be due to EOS. Pending investigation.
* Bugfix: Tokens for alignments are now through.
Was not EOS.
* browsermt/marian-dev@master
ByteRange changes work downstream and has been merged to master.
Updating submodule to point to master.
* Style and documentation enhancements: response.cpp
* Style and documentation enhancements: TranslationResult.h
* Descriptions for SentenceRanges templating
* Switching to marian-dev@wasm-sync
* AnnotatedBlob can be copy-ctord/copy-assigned
* TranslationResult: Empty ctor + WASM Bindings
Allows empty construction of TranslationResult. Using this empty
constructor, WASM bindings are adjusted. Unsure of the results, maybe
@abhi-agg can test.
* Cosmetic: SentenceRangesT -> Annotation
- SentenceRangesT is renamed to AnnotationT;
- Further comments to explain heavily templated files.
* Response: Cleaning up unused members and adding docs
* Adding quality scores - attempt
* Stub QualityScores
This adjustment adds capability to get "scores", which should
potentially indicate how confident (at least relative in a
target-sentence) should be. This enables writing the code forward for
TranslationResult, and an example quality-score people can be pointed
at.
- These are not between [0,1] yet.
- In addition, guards to check out-of-bounds access have been placed so
illegal accesses are caught early on during development.
* Removing token debug statements
* Reworking Annotation without templates
https://github.com/mozilla/bergamot-translator/issues/8 provides
ByteRanges.
- This ByteRange data-type is used in Annotation and converted
to marian::string_view(=absl::string-view) on demand.
- Since Annotation[using ByteRange] is not bound to anything else, it
can be unit tested. A unit test is added (originally to test
independently for integration after).
- Annotation with ByteRange is now propogated across marian::bergamot
and functionality matched to how it was previously working.
This eliminates the string-view conversion and template code.
* Nit: Removing std::endl flushes
* Bring TranslationResult and Response closer
Helps https://github.com/browsermt/bergamot-translator/issues/53.
In preparation , the data-export types for Quality and Alignment are
pushed down to Response from TranslationResult and computed during
construction. This brings TranslationResult closer to Response, paving
way to avoid having two TranslationResults.
histories_ only remain for marian-decoder replacement usage, which can
be removed in a separate PR.
* Clean up hacks originally added for a unit-test to compile
* Moving Annotation functions to cpp and documenting header file
* Shifting alignments, qualityScore testing capability into main-mts
* Restore Unified API files to previous state
* Adaptations to fix Response with Quality, Alignments to connect to old Unified API
* Missing reset on TranslationResultBindings
* Cleaning up Response documentation to reflect newer code
* Minor adjustments to get build back after main sync
* Marian seems to make available Catch somehow
* Disable COMPILE_BERGAMOT_TESTS for WASM
* Add COMPILE_BERGAMOT_TESTS as a CMakeDependent option
* Use the COMPILE_TESTS flag instead to skip macos.yml
* Trigger unit-tests on GitHub runners for Annotation
* Reordering enable_testing() to before inclusion of test directory
* doc constructs required to operate with alignments
Documents with doxygen compatible documentation for Response,
AnnotatedBlob, Annotation, ByteRange.
Incorporates doxygen compatible documentation for
* Updates ByteRange consistent with general C++
Also little documentation enhancements in the process.
* Updating marian-dev@9337105
* Copy-paste documentation because lazy
* Turn off autoformat and manually edit to fix style changes
* AnnotatedBlob -> AnnotatedText; blob -> text
* text.text in test app renamed
* text of text -> blob of text in places of documentation
2021-03-31 19:41:36 +03:00
option ( COMPILE_TESTS "Compile bergamot-tests" OFF )
2022-01-02 15:33:30 +03:00
cmake_dependent_option ( ENABLE_CACHE_STATS "Enable stats on cache" ON "COMPILE_TESTS" OFF )
Alignments + weak quality scores capability in Service (#46)
* Draft adjustments to API
* Adjustments to docs
* Let's call the word + sentence ranges annotations
* Editing confusing comment on size()
* Fixing compilation for template adjustments for SentenceRanges
* string_view template hacks
This commit shifts AnnotatedBlob into a templated type and gets the
troubled part to compile. All to manage absl::string_view and
std::string_view.
Objective: marian::bergamot stays C++ 11 to pluck and put in marian
code, bergamot-translator somehow flexes C++17. Simplify development in
one place.
* Fixing the wiring: Gets source to build
Runtime errors exist, but AnnotatedBlobs are consistent.
* Bugfix: Matching old-state after factoring AnnotatedBlob in
* Removing vocabs_ from Response.
(For the umpteenth time).
* Alignment API ready in marian::bergamot::Response
* Wiring alignments upto TranslationResult
* Adjustment to get alignments; bergamot-translator-app has alignments available
* Accessing words instead of Ids
This code sets up access of word string_views from annotations instead
of printing Ids. However, we have segfault. This is likely due to
targetRanges not being set, pending from
https://github.com/browsermt/bergamot-translator/issues/25.
Could also be a rogue EOS token which we're filtering for in string_view
annotations, but not so in alignments.
* Switching to browsermt/marian-dev@jp/decode-string-view for targetTokenRanges
* Target word byte range annotations available
Issues corresponding to #25 should be resolved. There is still a
segfault. Could be due to EOS. Pending investigation.
* Bugfix: Tokens for alignments are now through.
Was not EOS.
* browsermt/marian-dev@master
ByteRange changes work downstream and has been merged to master.
Updating submodule to point to master.
* Style and documentation enhancements: response.cpp
* Style and documentation enhancements: TranslationResult.h
* Descriptions for SentenceRanges templating
* Switching to marian-dev@wasm-sync
* AnnotatedBlob can be copy-ctord/copy-assigned
* TranslationResult: Empty ctor + WASM Bindings
Allows empty construction of TranslationResult. Using this empty
constructor, WASM bindings are adjusted. Unsure of the results, maybe
@abhi-agg can test.
* Cosmetic: SentenceRangesT -> Annotation
- SentenceRangesT is renamed to AnnotationT;
- Further comments to explain heavily templated files.
* Response: Cleaning up unused members and adding docs
* Adding quality scores - attempt
* Stub QualityScores
This adjustment adds capability to get "scores", which should
potentially indicate how confident (at least relative in a
target-sentence) should be. This enables writing the code forward for
TranslationResult, and an example quality-score people can be pointed
at.
- These are not between [0,1] yet.
- In addition, guards to check out-of-bounds access have been placed so
illegal accesses are caught early on during development.
* Removing token debug statements
* Reworking Annotation without templates
https://github.com/mozilla/bergamot-translator/issues/8 provides
ByteRanges.
- This ByteRange data-type is used in Annotation and converted
to marian::string_view(=absl::string-view) on demand.
- Since Annotation[using ByteRange] is not bound to anything else, it
can be unit tested. A unit test is added (originally to test
independently for integration after).
- Annotation with ByteRange is now propogated across marian::bergamot
and functionality matched to how it was previously working.
This eliminates the string-view conversion and template code.
* Nit: Removing std::endl flushes
* Bring TranslationResult and Response closer
Helps https://github.com/browsermt/bergamot-translator/issues/53.
In preparation , the data-export types for Quality and Alignment are
pushed down to Response from TranslationResult and computed during
construction. This brings TranslationResult closer to Response, paving
way to avoid having two TranslationResults.
histories_ only remain for marian-decoder replacement usage, which can
be removed in a separate PR.
* Clean up hacks originally added for a unit-test to compile
* Moving Annotation functions to cpp and documenting header file
* Shifting alignments, qualityScore testing capability into main-mts
* Restore Unified API files to previous state
* Adaptations to fix Response with Quality, Alignments to connect to old Unified API
* Missing reset on TranslationResultBindings
* Cleaning up Response documentation to reflect newer code
* Minor adjustments to get build back after main sync
* Marian seems to make available Catch somehow
* Disable COMPILE_BERGAMOT_TESTS for WASM
* Add COMPILE_BERGAMOT_TESTS as a CMakeDependent option
* Use the COMPILE_TESTS flag instead to skip macos.yml
* Trigger unit-tests on GitHub runners for Annotation
* Reordering enable_testing() to before inclusion of test directory
* doc constructs required to operate with alignments
Documents with doxygen compatible documentation for Response,
AnnotatedBlob, Annotation, ByteRange.
Incorporates doxygen compatible documentation for
* Updates ByteRange consistent with general C++
Also little documentation enhancements in the process.
* Updating marian-dev@9337105
* Copy-paste documentation because lazy
* Turn off autoformat and manually edit to fix style changes
* AnnotatedBlob -> AnnotatedText; blob -> text
* text.text in test app renamed
* text of text -> blob of text in places of documentation
2021-03-31 19:41:36 +03:00
2021-03-25 14:32:06 +03:00
# Set 3rd party submodule specific cmake options for this project
2021-02-11 17:42:18 +03:00
SET ( COMPILE_CUDA OFF CACHE BOOL "Compile GPU version" )
SET ( USE_SENTENCEPIECE ON CACHE BOOL "Download and compile SentencePiece" )
SET ( USE_STATIC_LIBS ON CACHE BOOL "Link statically against non-system libs" )
2021-05-01 02:29:23 +03:00
SET ( SSPLIT_COMPILE_LIBRARY_ONLY ON CACHE BOOL "Do not compile ssplit tests" )
2021-04-01 18:29:02 +03:00
if ( USE_WASM_COMPATIBLE_SOURCE )
2021-02-23 20:13:07 +03:00
SET ( COMPILE_LIBRARY_ONLY ON CACHE BOOL "Build only the Marian library and exclude all executables." )
2021-02-23 17:53:05 +03:00
SET ( USE_MKL OFF CACHE BOOL "Compile with MKL support" )
2021-03-25 14:32:06 +03:00
# # Setting the ssplit-cpp submodule specific cmake options for wasm
2021-05-01 02:29:23 +03:00
SET ( SSPLIT_USE_INTERNAL_PCRE2 ON CACHE BOOL "Use internal PCRE2 instead of system PCRE2" )
2021-02-15 19:18:59 +03:00
endif ( )
2020-11-11 18:19:54 +03:00
2021-02-17 14:48:00 +03:00
# Documentation: https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html
# Ensures the submodules are set correctly during a build.
find_package ( Git QUIET )
if ( GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git" )
# Update submodules as needed
option ( GIT_SUBMODULE "Check submodules during build" ON )
if ( GIT_SUBMODULE )
message ( STATUS "Submodule update" )
execute_process ( COMMAND ${ GIT_EXECUTABLE } submodule update --init --recursive
W O R K I N G _ D I R E C T O R Y $ { C M A K E _ C U R R E N T _ S O U R C E _ D I R }
R E S U L T _ V A R I A B L E G I T _ S U B M O D _ R E S U L T )
if ( NOT GIT_SUBMOD_RESULT EQUAL "0" )
message ( FATAL_ERROR "git submodule update --init failed with ${GIT_SUBMOD_RESULT}, please checkout submodules" )
endif ( )
endif ( )
endif ( )
2021-02-02 17:41:26 +03:00
2021-05-17 18:34:57 +03:00
# Project versioning
include ( GetVersionFromFile )
message ( STATUS "Project name: ${PROJECT_NAME}" )
message ( STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}" )
2021-02-11 17:34:27 +03:00
if ( COMPILE_WASM )
2022-04-20 02:39:32 +03:00
# See https://github.com/emscripten-core/emscripten/blob/main/src/settings.js
2021-04-01 18:29:02 +03:00
set ( WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160" )
2022-04-20 02:39:32 +03:00
list ( APPEND WASM_COMPILE_FLAGS
- O 3
# Preserve whitespaces in JS even for release builds; this doesn't increase wasm binary size
$ < $ < C O N F I G : R e l e a s e > : - g 1 >
# Relevant Debug info only for release with debug builds as this increases wasm binary size
$ < $ < C O N F I G : R e l W i t h D e b I n f o > : - g 2 >
- f P I C
- m s s s e 3
- m s i m d 1 2 8
# -fno-exceptions # Can't do that because spdlog uses exceptions
- s D I S A B L E _ E X C E P T I O N _ C A T C H I N G = 1
- s S T R I C T = 1
)
list ( APPEND WASM_LINK_FLAGS
- O 3
# Preserve whitespaces in JS even for release builds; this doesn't increase wasm binary size
$ < $ < C O N F I G : R e l e a s e > : - g 1 >
# Relevant Debug info only for release with debug builds as this increases wasm binary size
$ < $ < C O N F I G : R e l W i t h D e b I n f o > : - g 2 >
- l e m b i n d
# Save some code, and some speed
- s A S S E R T I O N S = 0
- s D I S A B L E _ E X C E P T I O N _ C A T C H I N G = 1
# the intgemm functions we call will be undefined since these are linked at
# runtime by our own javascript.
- s L L D _ R E P O R T _ U N D E F I N E D
- s E R R O R _ O N _ U N D E F I N E D _ S Y M B O L S = 0
# Cause we can!
- s S T R I C T = 1
# You know we need it
- s A L L O W _ M E M O R Y _ G R O W T H = 1
- s E N V I R O N M E N T = w e b , w o r k e r
# No need to call main(), there's nothing there.
- s I N V O K E _ R U N = 0
# No need for filesystem code in the generated Javascript
- s F I L E S Y S T E M = 0
# If you turn this on, it will mangle names which makes the dynamic linking hard.
- s D E C L A R E _ A S M _ M O D U L E _ E X P O R T S = 0
# Export all of the intgemm functions in case we need to fall back to using the embedded intgemm
- s E X P O R T E D _ F U N C T I O N S = [ _ i n t 8 P r e p a r e A F a l l b a c k , _ i n t 8 P r e p a r e B F a l l b a c k , _ i n t 8 P r e p a r e B F r o m T r a n s p o s e d F a l l b a c k , _ i n t 8 P r e p a r e B F r o m Q u a n t i z e d T r a n s p o s e d F a l l b a c k , _ i n t 8 P r e p a r e B i a s F a l l b a c k , _ i n t 8 M u l t i p l y A n d A d d B i a s F a l l b a c k , _ i n t 8 S e l e c t C o l u m n s O f B F a l l b a c k ]
# Necessary for mozintgemm linking. This prepares the `wasmMemory` variable ahead of time as
# opposed to delegating that task to the wasm binary itself. This way we can link MozIntGEMM
# module to the same memory as the main bergamot-translator module.
- s I M P O R T E D _ M E M O R Y = 1
# Dynamic execution is either frowned upon or blocked inside browser extensions
- s D Y N A M I C _ E X E C U T I O N = 0
)
2021-02-11 17:34:27 +03:00
endif ( COMPILE_WASM )
2021-02-02 17:39:19 +03:00
Alignments + weak quality scores capability in Service (#46)
* Draft adjustments to API
* Adjustments to docs
* Let's call the word + sentence ranges annotations
* Editing confusing comment on size()
* Fixing compilation for template adjustments for SentenceRanges
* string_view template hacks
This commit shifts AnnotatedBlob into a templated type and gets the
troubled part to compile. All to manage absl::string_view and
std::string_view.
Objective: marian::bergamot stays C++ 11 to pluck and put in marian
code, bergamot-translator somehow flexes C++17. Simplify development in
one place.
* Fixing the wiring: Gets source to build
Runtime errors exist, but AnnotatedBlobs are consistent.
* Bugfix: Matching old-state after factoring AnnotatedBlob in
* Removing vocabs_ from Response.
(For the umpteenth time).
* Alignment API ready in marian::bergamot::Response
* Wiring alignments upto TranslationResult
* Adjustment to get alignments; bergamot-translator-app has alignments available
* Accessing words instead of Ids
This code sets up access of word string_views from annotations instead
of printing Ids. However, we have segfault. This is likely due to
targetRanges not being set, pending from
https://github.com/browsermt/bergamot-translator/issues/25.
Could also be a rogue EOS token which we're filtering for in string_view
annotations, but not so in alignments.
* Switching to browsermt/marian-dev@jp/decode-string-view for targetTokenRanges
* Target word byte range annotations available
Issues corresponding to #25 should be resolved. There is still a
segfault. Could be due to EOS. Pending investigation.
* Bugfix: Tokens for alignments are now through.
Was not EOS.
* browsermt/marian-dev@master
ByteRange changes work downstream and has been merged to master.
Updating submodule to point to master.
* Style and documentation enhancements: response.cpp
* Style and documentation enhancements: TranslationResult.h
* Descriptions for SentenceRanges templating
* Switching to marian-dev@wasm-sync
* AnnotatedBlob can be copy-ctord/copy-assigned
* TranslationResult: Empty ctor + WASM Bindings
Allows empty construction of TranslationResult. Using this empty
constructor, WASM bindings are adjusted. Unsure of the results, maybe
@abhi-agg can test.
* Cosmetic: SentenceRangesT -> Annotation
- SentenceRangesT is renamed to AnnotationT;
- Further comments to explain heavily templated files.
* Response: Cleaning up unused members and adding docs
* Adding quality scores - attempt
* Stub QualityScores
This adjustment adds capability to get "scores", which should
potentially indicate how confident (at least relative in a
target-sentence) should be. This enables writing the code forward for
TranslationResult, and an example quality-score people can be pointed
at.
- These are not between [0,1] yet.
- In addition, guards to check out-of-bounds access have been placed so
illegal accesses are caught early on during development.
* Removing token debug statements
* Reworking Annotation without templates
https://github.com/mozilla/bergamot-translator/issues/8 provides
ByteRanges.
- This ByteRange data-type is used in Annotation and converted
to marian::string_view(=absl::string-view) on demand.
- Since Annotation[using ByteRange] is not bound to anything else, it
can be unit tested. A unit test is added (originally to test
independently for integration after).
- Annotation with ByteRange is now propogated across marian::bergamot
and functionality matched to how it was previously working.
This eliminates the string-view conversion and template code.
* Nit: Removing std::endl flushes
* Bring TranslationResult and Response closer
Helps https://github.com/browsermt/bergamot-translator/issues/53.
In preparation , the data-export types for Quality and Alignment are
pushed down to Response from TranslationResult and computed during
construction. This brings TranslationResult closer to Response, paving
way to avoid having two TranslationResults.
histories_ only remain for marian-decoder replacement usage, which can
be removed in a separate PR.
* Clean up hacks originally added for a unit-test to compile
* Moving Annotation functions to cpp and documenting header file
* Shifting alignments, qualityScore testing capability into main-mts
* Restore Unified API files to previous state
* Adaptations to fix Response with Quality, Alignments to connect to old Unified API
* Missing reset on TranslationResultBindings
* Cleaning up Response documentation to reflect newer code
* Minor adjustments to get build back after main sync
* Marian seems to make available Catch somehow
* Disable COMPILE_BERGAMOT_TESTS for WASM
* Add COMPILE_BERGAMOT_TESTS as a CMakeDependent option
* Use the COMPILE_TESTS flag instead to skip macos.yml
* Trigger unit-tests on GitHub runners for Annotation
* Reordering enable_testing() to before inclusion of test directory
* doc constructs required to operate with alignments
Documents with doxygen compatible documentation for Response,
AnnotatedBlob, Annotation, ByteRange.
Incorporates doxygen compatible documentation for
* Updates ByteRange consistent with general C++
Also little documentation enhancements in the process.
* Updating marian-dev@9337105
* Copy-paste documentation because lazy
* Turn off autoformat and manually edit to fix style changes
* AnnotatedBlob -> AnnotatedText; blob -> text
* text.text in test app renamed
* text of text -> blob of text in places of documentation
2021-03-31 19:41:36 +03:00
# Needs to be enabled before including the folder containing tests (src/tests)
if ( COMPILE_TESTS )
enable_testing ( )
endif ( COMPILE_TESTS )
2021-02-11 17:34:27 +03:00
add_subdirectory ( 3rd_party )
add_subdirectory ( src )
2021-02-26 16:55:30 +03:00
2021-02-11 18:38:36 +03:00
if ( COMPILE_WASM )
2021-02-11 21:09:30 +03:00
add_subdirectory ( wasm )
2021-02-26 16:55:30 +03:00
else ( )
add_subdirectory ( app )
2021-02-11 18:38:36 +03:00
endif ( COMPILE_WASM )
Alignments + weak quality scores capability in Service (#46)
* Draft adjustments to API
* Adjustments to docs
* Let's call the word + sentence ranges annotations
* Editing confusing comment on size()
* Fixing compilation for template adjustments for SentenceRanges
* string_view template hacks
This commit shifts AnnotatedBlob into a templated type and gets the
troubled part to compile. All to manage absl::string_view and
std::string_view.
Objective: marian::bergamot stays C++ 11 to pluck and put in marian
code, bergamot-translator somehow flexes C++17. Simplify development in
one place.
* Fixing the wiring: Gets source to build
Runtime errors exist, but AnnotatedBlobs are consistent.
* Bugfix: Matching old-state after factoring AnnotatedBlob in
* Removing vocabs_ from Response.
(For the umpteenth time).
* Alignment API ready in marian::bergamot::Response
* Wiring alignments upto TranslationResult
* Adjustment to get alignments; bergamot-translator-app has alignments available
* Accessing words instead of Ids
This code sets up access of word string_views from annotations instead
of printing Ids. However, we have segfault. This is likely due to
targetRanges not being set, pending from
https://github.com/browsermt/bergamot-translator/issues/25.
Could also be a rogue EOS token which we're filtering for in string_view
annotations, but not so in alignments.
* Switching to browsermt/marian-dev@jp/decode-string-view for targetTokenRanges
* Target word byte range annotations available
Issues corresponding to #25 should be resolved. There is still a
segfault. Could be due to EOS. Pending investigation.
* Bugfix: Tokens for alignments are now through.
Was not EOS.
* browsermt/marian-dev@master
ByteRange changes work downstream and has been merged to master.
Updating submodule to point to master.
* Style and documentation enhancements: response.cpp
* Style and documentation enhancements: TranslationResult.h
* Descriptions for SentenceRanges templating
* Switching to marian-dev@wasm-sync
* AnnotatedBlob can be copy-ctord/copy-assigned
* TranslationResult: Empty ctor + WASM Bindings
Allows empty construction of TranslationResult. Using this empty
constructor, WASM bindings are adjusted. Unsure of the results, maybe
@abhi-agg can test.
* Cosmetic: SentenceRangesT -> Annotation
- SentenceRangesT is renamed to AnnotationT;
- Further comments to explain heavily templated files.
* Response: Cleaning up unused members and adding docs
* Adding quality scores - attempt
* Stub QualityScores
This adjustment adds capability to get "scores", which should
potentially indicate how confident (at least relative in a
target-sentence) should be. This enables writing the code forward for
TranslationResult, and an example quality-score people can be pointed
at.
- These are not between [0,1] yet.
- In addition, guards to check out-of-bounds access have been placed so
illegal accesses are caught early on during development.
* Removing token debug statements
* Reworking Annotation without templates
https://github.com/mozilla/bergamot-translator/issues/8 provides
ByteRanges.
- This ByteRange data-type is used in Annotation and converted
to marian::string_view(=absl::string-view) on demand.
- Since Annotation[using ByteRange] is not bound to anything else, it
can be unit tested. A unit test is added (originally to test
independently for integration after).
- Annotation with ByteRange is now propogated across marian::bergamot
and functionality matched to how it was previously working.
This eliminates the string-view conversion and template code.
* Nit: Removing std::endl flushes
* Bring TranslationResult and Response closer
Helps https://github.com/browsermt/bergamot-translator/issues/53.
In preparation , the data-export types for Quality and Alignment are
pushed down to Response from TranslationResult and computed during
construction. This brings TranslationResult closer to Response, paving
way to avoid having two TranslationResults.
histories_ only remain for marian-decoder replacement usage, which can
be removed in a separate PR.
* Clean up hacks originally added for a unit-test to compile
* Moving Annotation functions to cpp and documenting header file
* Shifting alignments, qualityScore testing capability into main-mts
* Restore Unified API files to previous state
* Adaptations to fix Response with Quality, Alignments to connect to old Unified API
* Missing reset on TranslationResultBindings
* Cleaning up Response documentation to reflect newer code
* Minor adjustments to get build back after main sync
* Marian seems to make available Catch somehow
* Disable COMPILE_BERGAMOT_TESTS for WASM
* Add COMPILE_BERGAMOT_TESTS as a CMakeDependent option
* Use the COMPILE_TESTS flag instead to skip macos.yml
* Trigger unit-tests on GitHub runners for Annotation
* Reordering enable_testing() to before inclusion of test directory
* doc constructs required to operate with alignments
Documents with doxygen compatible documentation for Response,
AnnotatedBlob, Annotation, ByteRange.
Incorporates doxygen compatible documentation for
* Updates ByteRange consistent with general C++
Also little documentation enhancements in the process.
* Updating marian-dev@9337105
* Copy-paste documentation because lazy
* Turn off autoformat and manually edit to fix style changes
* AnnotatedBlob -> AnnotatedText; blob -> text
* text.text in test app renamed
* text of text -> blob of text in places of documentation
2021-03-31 19:41:36 +03:00
2022-01-26 23:33:43 +03:00
option ( COMPILE_PYTHON "Compile python bindings. Intended to be activated with setup.py" OFF )
if ( COMPILE_PYTHON )
add_subdirectory ( bindings/python )
endif ( COMPILE_PYTHON )