Batteries included python package (#310)

Imports python bindings and associated sources incubated in
https://github.com/jerinphilip/lemonade to bergamot-translator. Adds
 a pybind11 dependency for python bindings.

Following the import, the python build is integrated into the existing 
CMake based build system here. There is a command-line application 
provided through python which provides the ability to fetch and prepare 
models from model-repositories (like browsermt/students or OPUS).

Wheels built for a few common operating systems are provided via GitHub
releases through automated actions configured to run at tagged semantic
versions and pushes to main.

The documentation for python is also integrated into our existing
documentation setup. Previous documentation GitHub action is now
configured to run behind python builds in Ubuntu 18.04 Python3.7,
in order to pick up the packaged as a wheel bergamot module and the
sphinx documentation using the python module.

Formatting checks of black, isort with profile black and a pytype type
checker is configured for the python component residing in this repository.
This commit is contained in:
Jerin Philip 2022-01-26 20:33:43 +00:00 committed by GitHub
parent 3dde0fe245
commit c0f311a8c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 1433 additions and 128 deletions

View File

@ -26,7 +26,7 @@ jobs:
- name: Run clang-format
run:
python3 run-clang-format.py --style file -r src wasm
python3 run-clang-format.py --style file -r src wasm bindings/python
- name: Prepare build, compilation database etc.

View File

@ -1,89 +0,0 @@
name: Documentation
on:
push:
branches: [ main, ci-sandbox ]
tags: ['v[0-9]+.[0-9]+.[0-9]+']
pull_request:
branches: [ '**' ]
jobs:
api-documentation:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive
# Runs javascript to extract push events from both tags and branch (only main, due to workflow trigger)
# converts refs/<>/<name> -> <name>
# eg:
# refs/head/main -> main
# refs/tags/v0.1.0 -> v0.1.0
#
- name: Extract tag name
id: tag
uses: actions/github-script@0.2.0
if: ${{ github.event_name == 'push' }}
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const args = context.payload.ref.split("/");
[refs, category, ...rest] = args;
return rest.join("/");
# Patches the BERGAMOT_VERSION file used by sphinx-docs at run time to
# obtain names like 'main' or 'ci-sandbox' to not confuse with version
# based documentation built separately.
- name: Deploy-time patch version
run: |
echo ${{steps.tag.outputs.result }} > BERGAMOT_VERSION
- name: Set up Doxygen
run: sudo apt-get install -y doxygen
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Set up dependency cache
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('doc/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
working-directory: ./doc
run: python3 -m pip install -r requirements.txt
- name: Build documentation
working-directory: ./doc
run: sphinx-build -b html ./ build/
- name: Deploy 🚀
uses: JamesIves/github-pages-deploy-action@4.1.3
if: ${{ github.event_name == 'push' && github.repository == 'browsermt/bergamot-translator' }}
with:
repository-name: 'browsermt/docs'
branch: gh-pages # The branch the action should deploy to.
folder: './doc/build/' # The folder the action should deploy.
target-folder: '${{ steps.tag.outputs.result }}'
ssh-key: ${{ secrets.BERGAMOT_SSH_PRIVATE_KEY }}
# This artifact contains the HTML output of Sphinx only.
# With index.html at the root of the produced zip file.
# For use for maintainers to download the zip and check render of
# documentation while generated at pull-request.
- name: Upload documentation
uses: actions/upload-artifact@v2
if: ${{ github.event_name == 'pull_request'}}
with:
name: api-docs
path: ./doc/build/
if-no-files-found: error

378
.github/workflows/python.yml vendored Normal file
View File

@ -0,0 +1,378 @@
name: "Python Bindings"
'on':
push:
branches:
- main
- ci-sandbox
tags:
- "v*.*.*"
pull_request:
branches:
- '**'
env:
qt_version: "6.2.1" # only used by build-macos
ccache_basedir: ${{ github.workspace }}
ccache_dir: "${{ github.workspace }}/.ccache"
ccache_compilercheck: content
ccache_compress: 'true'
ccache_compresslevel: 9
ccache_maxsize: 200M
ccache_cmake: -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache
jobs:
python-ubuntu:
strategy:
fail-fast: false
matrix:
include:
- name: "Ubuntu 18.04 / py3.6"
os: "ubuntu-18.04"
python-version: "3.6"
- name: "Ubuntu 18.04 / py3.7"
os: "ubuntu-18.04"
python-version: "3.7"
- name: "Ubuntu 20.04 / py3.8"
os: "ubuntu-20.04"
python-version: "3.8"
- name: "Ubuntu 20.04 / py3.9"
os: "ubuntu-20.04"
python-version: "3.9"
- name: "Ubuntu 20.04 / py3.10"
os: "ubuntu-20.04"
python-version: "3.10"
name: ${{ matrix.name }}
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install Dependencies
run: |-
sudo apt-get update
sudo apt-get install -y \
ccache libprotobuf-dev protobuf-compiler \
python3-setuptools python3-pybind11
- name: Install MKL
run: |-
wget -qO- "https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB" | sudo apt-key add -
sudo sh -c "echo deb https://apt.repos.intel.com/mkl all main > /etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get update -o Dir::Etc::sourcelist="/etc/apt/sources.list.d/intel-mkl.list"
sudo apt-get install -y --no-install-recommends intel-mkl-64bit-2020.0-088
- name: Generate ccache_vars for ccache based on machine
shell: bash
id: ccache_vars
run: |-
echo "::set-output name=hash::$(echo ${{ env.ccache_compilercheck }})"
echo "::set-output name=timestamp::$(date '+%Y-%m-%dT%H.%M.%S')"
- name: Cache-op for build-cache through ccache
uses: actions/cache@v2
with:
path: ${{ env.ccache_dir }}
key: ccache-${{ matrix.name }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }}-${{ steps.ccache_vars.outputs.timestamp }}
restore-keys: |-
ccache-${{ matrix.name }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }}
ccache-${{ matrix.name }}-${{ steps.ccache_vars.outputs.hash }}
ccache-${{ matrix.name }}
- name: ccache environment setup
run: |-
echo "CCACHE_COMPILER_CHECK=${{ env.ccache_compilercheck }}" >> $GITHUB_ENV
echo "CCACHE_BASEDIR=${{ env.ccache_basedir }}" >> $GITHUB_ENV
echo "CCACHE_COMPRESS=${{ env.ccache_compress }}" >> $GITHUB_ENV
echo "CCACHE_COMPRESSLEVEL=${{ env.ccache_compresslevel }}" >> $GITHUB_ENV
echo "CCACHE_DIR=${{ env.ccache_dir }}" >> $GITHUB_ENV
echo "CCACHE_MAXSIZE=${{ env.ccache_maxsize }}" >> $GITHUB_ENV
- name: ccache prolog
run: |-
ccache -s # Print current cache stats
ccache -z # Zero cache entry
- name: Inject local version identifier for non tag builds
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
run: |-
echo "PYTHON_LOCAL_VERSION_IDENTIFIER=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
- name: setup.py
run: |-
python3 -m pip install wheel
BUILD_ARCH=core-avx-i python3 setup.py bdist_wheel --universal
# We're happy with just compile for the moment, so cache gets some seeding.
- name: Install onto root python lib
run: |-
python3 -m pip install --ignore-installed dist/bergamot-*.whl
- name: Fetch models from translateLocally repository.
run: |-
python3 -m bergamot download -m en-de-tiny
python3 -m bergamot download -m de-en-tiny
python3 -m bergamot ls
- name: Fetch models from opus repository.
run: |-
python3 -m bergamot download -m eng-fin-tiny -r opus
python3 -m bergamot ls -r opus
- name: Run the sample python script shipped with module
run: |-
python3 -m bergamot translate --model en-de-tiny <<< "Hello World"
python3 -m bergamot translate --model en-de-tiny de-en-tiny <<< "Hello World"
python3 -m bergamot translate --model eng-fin-tiny --repository opus <<< "Hello World"
- name: ccache epilog
run: 'ccache -s # Print current cache stats'
- uses: actions/upload-artifact@v2
with:
path: ${{github.workspace}}/dist/bergamot-*.whl
python-macos:
name: "MacOS 10.15 / py3.10"
runs-on: "macos-10.15"
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive
- name: Install Dependencies
run: |-
brew update
brew install openblas protobuf ccache boost pybind11
brew install coreutils findutils libarchive
- name: Generate ccache_vars for ccache based on machine
shell: bash
id: ccache_vars
run: |-
echo "::set-output name=hash::$(echo ${{ env.ccache_compilercheck }})"
echo "::set-output name=timestamp::$(date '+%Y-%m-%dT%H.%M.%S')"
- name: Cache-op for build-cache through ccache
uses: actions/cache@v2
with:
path: ${{ env.ccache_dir }}
key: ccache-${{ job.id }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }}-${{ steps.ccache_vars.outputs.timestamp }}
restore-keys: |-
ccache-${{ job.id }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }}
ccache-${{ job.id }}-${{ steps.ccache_vars.outputs.hash }}
ccache-${{ job.id }}
- name: ccache environment setup
run: |-
echo "CCACHE_COMPILER_CHECK=${{ env.ccache_compilercheck }}" >> $GITHUB_ENV
echo "CCACHE_BASEDIR=${{ env.ccache_basedir }}" >> $GITHUB_ENV
echo "CCACHE_COMPRESS=${{ env.ccache_compress }}" >> $GITHUB_ENV
echo "CCACHE_COMPRESSLEVEL=${{ env.ccache_compresslevel }}" >> $GITHUB_ENV
echo "CCACHE_DIR=${{ env.ccache_dir }}" >> $GITHUB_ENV
echo "CCACHE_MAXSIZE=${{ env.ccache_maxsize }}" >> $GITHUB_ENV
- name: ccache prolog
run: |-
ccache -s # Print current cache stats
ccache -z # Zero cache entry
- name: Apply required patches
run: |-
patch -p1 < patches/01-marian-fstream-for-macos.patch
# Appears to be required per GitHub CI;
- name: Set MACOSX DEPLOYMENT TARGET via environment variable
run: |-
echo "MACOSX_DEPLOYMENT_TARGET=10.15" >> $GITHUB_ENV
- name: Inject local version identifier for non tag builds
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
run: |-
echo "PYTHON_LOCAL_VERSION_IDENTIFIER=$(git rev-parse --short HEAD)" >> $GITHUB_ENV
- name: setup.py
run: |-
python3 -m pip install --upgrade packaging wheel
BUILD_ARCH=core-avx-i python3 setup.py bdist_wheel --universal
# We're happy with just compile for the moment, so cache gets some seeding.
- name: Install onto root python lib
run: |-
python3 -m pip install dist/bergamot-*.whl
- name: Fetch models from translateLocally repository.
run: |-
python3 -m bergamot download -m en-de-tiny
python3 -m bergamot download -m de-en-tiny
- name: Fetch models from opus repository.
run: |-
python3 -m bergamot download -m eng-fin-tiny -r opus
python3 -m bergamot ls -r opus
- name: Run the sample python script shipped with module
run: |-
python3 -m bergamot translate --model en-de-tiny <<< "Hello World"
python3 -m bergamot translate --model en-de-tiny de-en-tiny <<< "Hello World"
python3 -m bergamot translate --model eng-fin-tiny --repository opus <<< "Hello World"
- name: ccache epilog
run: 'ccache -s # Print current cache stats'
- uses: actions/upload-artifact@v2
with:
path: ${{github.workspace}}/dist/bergamot-*.whl
# Try to upload a release using https://github.com/marvinpinto/actions/issues/177#issuecomment-917605585 as a model
release-latest:
name: Release Latest Build
runs-on: ubuntu-latest
needs: [python-ubuntu, python-macos]
if: github.ref == 'refs/heads/main'
steps:
- name: Download artifacts
uses: actions/download-artifact@v2
- name: Update GitHub prerelease
uses: marvinpinto/action-automatic-releases@latest
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
automatic_release_tag: latest
prerelease: true
title: "Latest Build"
files: |
${{github.workspace}}/artifact/*.whl
release-version:
name: Release version
runs-on: ubuntu-latest
needs: [python-ubuntu, python-macos]
permissions:
contents: "write"
packages: "write"
pull-requests: "read"
if: startsWith(github.ref, 'refs/tags/v')
steps:
- name: Download artifacts
uses: actions/download-artifact@v2
- name: Update GitHub release
uses: marvinpinto/action-automatic-releases@latest
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
automatic_release_tag: ${{ github.ref_name }}
prerelease: false
title: "${{ github.ref_name }}"
files: |
${{github.workspace}}/artifact/*.whl
python-checks:
name: "formatting and typechecks"
runs-on: "ubuntu-latest"
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive
- name: Install Dependencies
run: |-
python3 -m pip install black isort pytype
- name: "Formatting checks: black, isort"
run: |
python3 -m black --check bindings/python/ setup.py doc/conf.py
python3 -m isort --profile black --diff --check bindings/python setup.py doc/conf.py
- name: "Static typing checks: pytype"
run: |-
python3 -m pytype bindings/python
docs:
runs-on: ubuntu-18.04
needs: [python-ubuntu]
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive
# Runs javascript to extract push events from both tags and branch (only main, due to workflow trigger)
# converts refs/<>/<name> -> <name>
# eg:
# refs/head/main -> main
# refs/tags/v0.1.0 -> v0.1.0
#
- name: Download artifacts
uses: actions/download-artifact@v2
- name: Extract tag name
id: tag
uses: actions/github-script@0.2.0
if: ${{ github.event_name == 'push' }}
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const args = context.payload.ref.split("/");
[refs, category, ...rest] = args;
return rest.join("/");
# Patches the BERGAMOT_VERSION file used by sphinx-docs at run time to
# obtain names like 'main' or 'ci-sandbox' to not confuse with version
# based documentation built separately.
- name: Deploy-time patch version
run: |
echo ${{steps.tag.outputs.result }} > BERGAMOT_VERSION
- name: Set up Doxygen
run: sudo apt-get install -y doxygen
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Set up dependency cache
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('doc/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
working-directory: ./doc
run: |
python3 -m pip install -r requirements.txt
python3 -m pip install ${{github.workspace}}/artifact/bergamot-*-cp37*.whl
- name: Build documentation
working-directory: ./doc
run: sphinx-build -b html ./ build/
- name: Deploy 🚀
uses: JamesIves/github-pages-deploy-action@4.1.3
if: ${{ github.event_name == 'push' && github.repository == 'browsermt/bergamot-translator' }}
with:
repository-name: 'browsermt/docs'
branch: gh-pages # The branch the action should deploy to.
folder: './doc/build/' # The folder the action should deploy.
target-folder: '${{ steps.tag.outputs.result }}'
ssh-key: ${{ secrets.BERGAMOT_SSH_PRIVATE_KEY }}
# This artifact contains the HTML output of Sphinx only.
# With index.html at the root of the produced zip file.
# For use for maintainers to download the zip and check render of
# documentation while generated at pull-request.
- name: Upload documentation
uses: actions/upload-artifact@v2
if: ${{ github.event_name == 'pull_request'}}
with:
name: api-docs
path: ./doc/build/
if-no-files-found: error

3
.gitmodules vendored
View File

@ -7,3 +7,6 @@
[submodule "bergamot-translator-tests"]
path = bergamot-translator-tests
url = https://github.com/browsermt/bergamot-translator-tests
[submodule "3rd_party/pybind11"]
path = 3rd_party/pybind11
url = https://github.com/pybind/pybind11.git

View File

@ -23,3 +23,7 @@ get_directory_property(CMAKE_C_FLAGS DIRECTORY marian-dev DEFINITION CMAKE_C_FLA
get_directory_property(CMAKE_CXX_FLAGS DIRECTORY marian-dev DEFINITION CMAKE_CXX_FLAGS)
set(CMAKE_C_FLAGS ${CMAKE_C_FLAGS} PARENT_SCOPE)
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} PARENT_SCOPE)
if(COMPILE_PYTHON)
add_subdirectory(pybind11)
endif(COMPILE_PYTHON)

1
3rd_party/pybind11 vendored Submodule

@ -0,0 +1 @@
Subproject commit 9ec1128c7aac3d069a4ec2bd1dfc7f57c6526d1c

View File

@ -131,3 +131,8 @@ else()
add_subdirectory(app)
endif(COMPILE_WASM)
option(COMPILE_PYTHON "Compile python bindings. Intended to be activated with setup.py" OFF)
if(COMPILE_PYTHON)
add_subdirectory(bindings/python)
endif(COMPILE_PYTHON)

2
MANIFEST.in Normal file
View File

@ -0,0 +1,2 @@
include README.md LICENSE

1
bindings/CMakeLists.txt Normal file
View File

@ -0,0 +1 @@
add_subdirectory(python)

View File

@ -0,0 +1,9 @@
find_package(Python COMPONENTS Interpreter Development REQUIRED)
message("Using Python: " ${Python_EXECUTABLE})
# pybind11 method:
pybind11_add_module(_bergamot SHARED bergamot.cpp)
target_link_libraries(_bergamot PUBLIC pybind11::module pybind11::headers bergamot-translator)
target_include_directories(_bergamot PUBLIC ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/src
${CMAKE_BINARY_DIR}/src)

View File

@ -0,0 +1,18 @@
import typing
from ._bergamot import * # type: ignore
from .repository import Aggregator, TranslateLocallyLike
REPOSITORY = Aggregator(
[
TranslateLocallyLike("browsermt", "https://translatelocally.com/models.json"),
TranslateLocallyLike(
"opus", "https://object.pouta.csc.fi/OPUS-MT-models/app/models.json"
),
]
)
"""
REPOSITORY is a global object that aggregates multiple model-providers to
provide a (model-provider: str, model-code: str) based query mechanism to
get models.
"""

View File

@ -0,0 +1,20 @@
import argparse
import sys
from argparse import ArgumentParser
from .cmds import CMDS, make_parser
def main() -> None:
parser = make_parser()
args = parser.parse_args()
if args.action in CMDS:
CMDS[args.action].execute(args)
else:
parser.print_help(sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,216 @@
#include <pybind11/iostream.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/stl_bind.h>
#include <translator/annotation.h>
#include <translator/parser.h>
#include <translator/project_version.h>
#include <translator/response.h>
#include <translator/response_options.h>
#include <translator/service.h>
#include <translator/translation_model.h>
#include <iostream>
#include <string>
#include <vector>
namespace py = pybind11;
using marian::bergamot::AnnotatedText;
using marian::bergamot::ByteRange;
using marian::bergamot::ConcatStrategy;
using marian::bergamot::Response;
using marian::bergamot::ResponseOptions;
using Service = marian::bergamot::AsyncService;
using _Model = marian::bergamot::TranslationModel;
using Model = std::shared_ptr<_Model>;
using Alignment = std::vector<std::vector<float>>;
using Alignments = std::vector<Alignment>;
PYBIND11_MAKE_OPAQUE(std::vector<Response>);
PYBIND11_MAKE_OPAQUE(std::vector<std::string>);
PYBIND11_MAKE_OPAQUE(Alignments);
class ServicePyAdapter {
public:
ServicePyAdapter(const Service::Config &config) : service_(make_service(config)) {}
std::shared_ptr<_Model> modelFromConfig(const std::string &config) {
auto parsedConfig = marian::bergamot::parseOptionsFromString(config);
return service_.createCompatibleModel(parsedConfig);
}
std::shared_ptr<_Model> modelFromConfigPath(const std::string &configPath) {
auto config = marian::bergamot::parseOptionsFromFilePath(configPath);
return service_.createCompatibleModel(config);
}
std::vector<Response> translate(Model model, std::vector<std::string> &inputs, const ResponseOptions &options) {
py::scoped_ostream_redirect outstream(std::cout, // std::ostream&
py::module_::import("sys").attr("stdout") // Python output
);
py::scoped_ostream_redirect errstream(std::cerr, // std::ostream&
py::module_::import("sys").attr("stderr") // Python output
);
py::call_guard<py::gil_scoped_release> gil_guard;
// Prepare promises, save respective futures. Have callback's in async set
// value to the promises.
std::vector<std::future<Response>> futures;
std::vector<std::promise<Response>> promises;
promises.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++) {
auto callback = [&promises, i](Response &&response) { promises[i].set_value(std::move(response)); };
service_.translate(model, std::move(inputs[i]), std::move(callback), options);
futures.push_back(std::move(promises[i].get_future()));
}
// Wait on all futures to be ready.
std::vector<Response> responses;
for (size_t i = 0; i < futures.size(); i++) {
futures[i].wait();
responses.push_back(std::move(futures[i].get()));
}
return responses;
}
std::vector<Response> pivot(Model first, Model second, std::vector<std::string> &inputs,
const ResponseOptions &options) {
py::scoped_ostream_redirect outstream(std::cout, // std::ostream&
py::module_::import("sys").attr("stdout") // Python output
);
py::scoped_ostream_redirect errstream(std::cerr, // std::ostream&
py::module_::import("sys").attr("stderr") // Python output
);
py::call_guard<py::gil_scoped_release> gil_guard;
// Prepare promises, save respective futures. Have callback's in async set
// value to the promises.
std::vector<std::future<Response>> futures;
std::vector<std::promise<Response>> promises;
promises.resize(inputs.size());
for (size_t i = 0; i < inputs.size(); i++) {
auto callback = [&promises, i](Response &&response) { promises[i].set_value(std::move(response)); };
service_.pivot(first, second, std::move(inputs[i]), std::move(callback), options);
futures.push_back(std::move(promises[i].get_future()));
}
// Wait on all futures to be ready.
std::vector<Response> responses;
for (size_t i = 0; i < futures.size(); i++) {
futures[i].wait();
responses.push_back(std::move(futures[i].get()));
}
return responses;
}
private /*functions*/:
static Service make_service(const Service::Config &config) {
py::scoped_ostream_redirect outstream(std::cout, // std::ostream&
py::module_::import("sys").attr("stdout") // Python output
);
py::scoped_ostream_redirect errstream(std::cerr, // std::ostream&
py::module_::import("sys").attr("stderr") // Python output
);
py::call_guard<py::gil_scoped_release> gil_guard;
return Service(config);
}
private /*data*/:
Service service_;
};
PYBIND11_MODULE(_bergamot, m) {
m.doc() = "Bergamot pybind11 bindings";
m.attr("__version__") = marian::bergamot::bergamotBuildVersion();
py::class_<ByteRange>(m, "ByteRange")
.def(py::init<>())
.def_readonly("begin", &ByteRange::begin)
.def_readonly("end", &ByteRange::end)
.def("__repr__", [](const ByteRange &range) {
return "{" + std::to_string(range.begin) + ", " + std::to_string(range.end) + "}";
});
py::class_<AnnotatedText>(m, "AnnotatedText")
.def(py::init<>())
.def("numWords", &AnnotatedText::numWords)
.def("numSentences", &AnnotatedText::numSentences)
.def("word",
[](const AnnotatedText &annotatedText, size_t sentenceIdx, size_t wordIdx) -> std::string {
auto view = annotatedText.word(sentenceIdx, wordIdx);
return std::string(view.data(), view.size());
})
.def("sentence",
[](const AnnotatedText &annotatedText, size_t sentenceIdx) -> std::string {
auto view = annotatedText.sentence(sentenceIdx);
return std::string(view.data(), view.size());
})
.def("wordAsByteRange", &AnnotatedText::wordAsByteRange)
.def("sentenceAsByteRange", &AnnotatedText::sentenceAsByteRange)
.def_readonly("text", &AnnotatedText::text);
py::class_<Response>(m, "Response")
.def(py::init<>())
.def_readonly("source", &Response::source)
.def_readonly("target", &Response::target)
.def_readonly("alignments", &Response::alignments);
py::bind_vector<std::vector<std::string>>(m, "VectorString");
py::bind_vector<std::vector<Response>>(m, "VectorResponse");
py::enum_<ConcatStrategy>(m, "ConcatStrategy")
.value("FAITHFUL", ConcatStrategy::FAITHFUL)
.value("SPACE", ConcatStrategy::SPACE)
.export_values();
py::class_<ResponseOptions>(m, "ResponseOptions")
.def(
py::init<>([](bool qualityScores, bool alignment, bool HTML, bool sentenceMappings, ConcatStrategy strategy) {
return ResponseOptions{qualityScores, alignment, HTML, sentenceMappings, strategy};
}),
py::arg("qualityScores") = true, py::arg("alignment") = false, py::arg("HTML") = false,
py::arg("sentenceMappings") = true, py::arg("concatStrategy") = ConcatStrategy::FAITHFUL)
.def_readwrite("qualityScores", &ResponseOptions::qualityScores)
.def_readwrite("HTML", &ResponseOptions::HTML)
.def_readwrite("alignment", &ResponseOptions::alignment)
.def_readwrite("concatStrategy", &ResponseOptions::concatStrategy)
.def_readwrite("sentenceMappings", &ResponseOptions::sentenceMappings);
py::class_<ServicePyAdapter>(m, "Service")
.def(py::init<const Service::Config &>())
.def("modelFromConfig", &ServicePyAdapter::modelFromConfig)
.def("modelFromConfigPath", &ServicePyAdapter::modelFromConfigPath)
.def("translate", &ServicePyAdapter::translate)
.def("pivot", &ServicePyAdapter::pivot);
py::class_<Service::Config>(m, "ServiceConfig")
.def(py::init<>([](size_t numWorkers, bool cacheEnabled, size_t cacheSize, size_t cacheMutexBuckets,
std::string logging) {
Service::Config config;
config.numWorkers = numWorkers;
config.cacheEnabled = cacheEnabled;
config.cacheSize = cacheSize;
config.cacheMutexBuckets = cacheMutexBuckets;
config.logger.level = logging;
return config;
}),
py::arg("numWorkers") = 1, py::arg("cacheEnabled") = false, py::arg("cacheSize") = 20000,
py::arg("cacheMutexBuckets") = 1, py::arg("logLevel") = "off")
.def_readwrite("numWorkers", &Service::Config::numWorkers)
.def_readwrite("cacheEnabled", &Service::Config::cacheEnabled)
.def_readwrite("cacheSize", &Service::Config::cacheSize)
.def_readwrite("cacheMutexBuckets", &Service::Config::cacheMutexBuckets);
py::class_<_Model, std::shared_ptr<_Model>>(m, "TranslationModel");
}

177
bindings/python/cmds.py Normal file
View File

@ -0,0 +1,177 @@
import argparse
import sys
from collections import Counter, defaultdict
from . import REPOSITORY, ResponseOptions, Service, ServiceConfig, VectorString
CMDS = {}
def _register_cmd(cmd: str):
"""
Convenience decorator function, which populates the dictionary above with
commands created in a declarative fashion.
"""
def __inner(cls):
CMDS[cmd] = cls
return cls
return __inner
@_register_cmd("translate")
class Translate:
@staticmethod
def embed_subparser(key: str, subparsers: argparse._SubParsersAction):
translate = subparsers.add_parser(
key,
description="translate using a given model. Multiple models mean pivoting",
)
translate.add_argument(
"-m",
"--model",
type=str,
nargs="+",
help="Path to model file(s) to use in forward or pivot translation",
required=True,
)
translate.add_argument(
"-r",
"--repository",
type=str,
help="Repository to download model from",
choices=REPOSITORY.available(),
default="browsermt",
)
translate.add_argument(
"--num-workers",
type=int,
help="Number of worker threads to use to translate",
default=4,
)
translate.add_argument(
"--log-level",
type=str,
default="off",
help="Set verbosity level of logging: trace, debug, info, warn, err(or), critical, off",
)
# Tweak response-options for quick HTML in out via commandline
options = translate.add_argument_group("response-options")
options.add_argument("--html", type=bool, default=False)
options.add_argument("--alignment", type=bool, default=False)
options.add_argument("--quality-scores", type=bool, default=False)
@staticmethod
def execute(args: argparse.Namespace):
# Build service
config = ServiceConfig(numWorkers=args.num_workers, logLevel=args.log_level)
service = Service(config)
models = [
service.modelFromConfigPath(
REPOSITORY.modelConfigPath(args.repository, model)
)
for model in args.model
]
# Configure a few options which require how a Response is constructed
options = ResponseOptions(
alignment=args.alignment, qualityScores=args.quality_scores, HTML=args.html
)
source = sys.stdin.read()
responses = None
if len(models) == 1:
[model] = models
responses = service.translate(model, VectorString([source]), options)
else:
[first, second] = models
responses = service.pivot(first, second, VectorString([source]), options)
for response in responses:
print(response.target.text, end="")
@_register_cmd("download")
class Download:
@staticmethod
def embed_subparser(key: str, subparsers: argparse._SubParsersAction):
download = subparsers.add_parser(
key, description="Download models from the web."
)
download.add_argument(
"-m",
"--model",
type=str,
required=False,
default=None,
help="Fetch model with given code. Use ls to list available models. Optional, if none supplied all models are downloaded.",
)
download.add_argument(
"-r",
"--repository",
type=str,
help="Repository to download model from",
choices=REPOSITORY.available(),
default="browsermt",
)
@staticmethod
def execute(args: argparse.Namespace):
if args.model is not None:
REPOSITORY.download(args.repository, args.model)
else:
for model in REPOSITORY.models(args.repository, filter_downloaded=False):
REPOSITORY.download(args.repository, model)
@_register_cmd("ls")
class List:
@staticmethod
def embed_subparser(key: str, subparsers: argparse._SubParsersAction):
ls = subparsers.add_parser(key, description="List available models.")
ls.add_argument(
"-r",
"--repository",
type=str,
help="Repository to list models from",
choices=REPOSITORY.available(),
default="browsermt",
)
@staticmethod
def execute(args: argparse.Namespace):
print("Available models: ")
for counter, identifier in enumerate(
REPOSITORY.models(args.repository, filter_downloaded=True), 1
):
model = REPOSITORY.model(args.repository, identifier)
print(
" {}.".format(str(counter).rjust(4)),
model["code"],
model["name"],
)
print()
def make_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser("bergamot")
subparsers = parser.add_subparsers(
title="actions",
description="The following actions are available through the bergamot package",
help="To obtain help on how to run these actions supply <cmd> -h.",
dest="action",
)
for key, cls in CMDS.items():
cls.embed_subparser(key, subparsers)
return parser

View File

@ -0,0 +1,185 @@
import json
import os
import tarfile
import typing as t
from abc import ABC, abstractmethod
from functools import partial
from urllib.parse import urlparse
import requests
from appdirs import AppDirs
from .typing_utils import URL, PathLike
from .utils import download_resource, patch_marian_for_bergamot
APP = "bergamot"
class Repository(ABC):
"""
An interface for several repositories. Intended to enable interchangable
use of translateLocally and Mozilla repositories for usage through python.
"""
@property
@abstractmethod
def name(self):
pass
@abstractmethod
def update(self):
"""Updates the model list"""
pass
@abstractmethod
def models(self) -> t.List[str]:
"""returns identifiers for available models"""
pass
@abstractmethod
def model(self, model_identifier: str) -> t.Any:
"""returns entry for the for available models"""
pass
@abstractmethod
def modelConfigPath(self, model_identifier: str) -> str:
"""returns modelConfigPath for for a given model-identifier"""
pass
@abstractmethod
def download(self, model_identifier: str):
pass
class TranslateLocallyLike(Repository):
"""
This class implements Repository to fetch models from translateLocally.
AppDirs is used to standardize directories and further specialization
happens with translateLocally identifier.
"""
def __init__(self, name, url):
self.url = url
self._name = name
appDir = AppDirs(APP)
f = lambda *args: os.path.join(*args, self._name)
self.dirs = {
"cache": f(appDir.user_cache_dir),
"config": f(appDir.user_config_dir),
"data": f(appDir.user_data_dir),
"archive": f(appDir.user_data_dir, "archives"),
"models": f(appDir.user_data_dir, "models"),
}
for directory in self.dirs.values():
os.makedirs(directory, exist_ok=True)
self.models_file_path = os.path.join(self.dirs["config"], "models.json")
self.update()
@property
def name(self) -> str:
return self._name
def update(self) -> None:
inventory = requests.get(self.url).text
with open(self.models_file_path, "w+") as models_file:
models_file.write(inventory)
self.data = json.loads(inventory)
# Update inverse lookup.
self.data_by_code = {}
for model in self.data["models"]:
self.data_by_code[model["code"]] = model
def models(self, filter_downloaded: bool = True) -> t.List[str]:
codes = []
for model in self.data["models"]:
if filter_downloaded:
fprefix = self._archive_name_without_extension(model["url"])
model_dir = os.path.join(self.dirs["models"], fprefix)
if os.path.exists(model_dir):
codes.append(model["code"])
else:
codes.append(model["code"])
return codes
def modelConfigPath(self, model_identifier: str) -> str:
model = self.model(model_identifier)
fprefix = self._archive_name_without_extension(model["url"])
model_dir = os.path.join(self.dirs["models"], fprefix)
return os.path.join(model_dir, "config.bergamot.yml")
def model(self, model_identifier: str) -> t.Any:
return self.data_by_code[model_identifier]
def download(self, model_identifier: str):
# Download path
model = self.model(model_identifier)
model_archive = "{}.tar.gz".format(model["shortName"])
save_location = os.path.join(self.dirs["archive"], model_archive)
download_resource(model["url"], save_location)
with tarfile.open(save_location) as model_archive:
model_archive.extractall(self.dirs["models"])
fprefix = self._archive_name_without_extension(model["url"])
model_dir = os.path.join(self.dirs["models"], fprefix)
symlink = os.path.join(self.dirs["models"], model["code"])
print(
"Downloading and extracting {} into ... {}".format(
model["code"], model_dir
),
end=" ",
)
if not os.path.exists(symlink):
os.symlink(model_dir, symlink)
config_path = os.path.join(symlink, "config.intgemm8bitalpha.yml")
bergamot_config_path = os.path.join(symlink, "config.bergamot.yml")
# Finally patch so we don't have to reload this again.
patch_marian_for_bergamot(config_path, bergamot_config_path)
print("Done.")
def _archive_name_without_extension(self, url: URL):
o = urlparse(url)
fname = os.path.basename(o.path) # something tar.gz.
fname_without_extension = fname.replace(".tar.gz", "")
return fname_without_extension
class Aggregator:
def __init__(self, repositories: t.List[Repository]):
self.repositories = {}
for repository in repositories:
if repository.name in self.repositories:
raise ValueError("Duplicate repository found.")
self.repositories[repository.name] = repository
# Default is self.repostiory
self.default_repository = repositories[0]
def update(self, name: str) -> None:
self.repositories.get(name, self.default_repository).update()
def modelConfigPath(self, name: str, code: str) -> PathLike:
return self.repositories.get(name, self.default_repository).modelConfigPath(
code
)
def models(self, name: str, filter_downloaded: bool = True) -> t.List[str]:
return self.repositories.get(name, self.default_repository).models()
def model(self, name: str, model_identifier: str) -> t.Any:
return self.repositories.get(name, self.default_repository).model(
model_identifier
)
def available(self):
return list(self.repositories.keys())
def download(self, name: str, model_identifier: str) -> None:
self.repositories.get(name, self.default_repository).download(model_identifier)

View File

@ -0,0 +1,5 @@
import pathlib
import typing as t
PathLike = t.TypeVar("PathLike", str, pathlib.Path)
URL = str

52
bindings/python/utils.py Normal file
View File

@ -0,0 +1,52 @@
import os
import requests
import yaml
from .typing_utils import URL, PathLike
def download_resource(url: URL, save_location: PathLike, force_download=False):
"""
Downloads a resource from url into save_location, overwrites only if
force_download is true.
"""
if force_download or not os.path.exists(save_location):
response = requests.get(url, stream=True)
# Throw an error for bad status codes
response.raise_for_status()
with open(save_location, "wb") as handle:
for block in response.iter_content(1024):
handle.write(block)
def patch_marian_for_bergamot(
marian_config_path: PathLike, bergamot_config_path: PathLike, quality: bool = False
):
"""
Accepts path to a config-file from marian-training and followign
quantization and adjusts parameters for use in bergamot.
"""
# Load marian_config_path
data = None
with open(marian_config_path) as fp:
data = yaml.load(fp, Loader=yaml.FullLoader)
# Update a few entries. Things here are hardcode.
data.update(
{
"ssplit-prefix-file": "",
"ssplit-mode": "paragraph",
"max-length-break": 128,
"mini-batch-words": 1024,
"workspace": 128, # shipped models use big workspaces. We'd prefer to keep it low.
"alignment": "soft",
}
)
if quality:
data.update({"quality": quality, "skip-cost": False})
# Write-out.
with open(bergamot_config_path, "w") as output_file:
print(yaml.dump(data, sort_keys=False), file=output_file)

View File

@ -6,29 +6,32 @@
# -- Path setup --------------------------------------------------------------
import datetime
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import datetime
import sys
sys.path.insert(0, os.path.abspath('.'))
sys.path.insert(0, os.path.abspath("."))
# -- Project information -----------------------------------------------------
project = 'Bergamot Translator'
copyright = '2021, Bergamot Translator Team'
author = 'Bergamot Translator Team'
project = "Bergamot Translator"
copyright = "2021-2022 Bergamot Translator Team"
author = "Bergamot Translator Team"
# The full version, including alpha/beta/rc tags
# TODO: add GitHub commit hash to the version
version_file = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'BERGAMOT_VERSION')
version_file = os.path.join(
os.path.dirname(os.path.dirname(__file__)), "BERGAMOT_VERSION"
)
with open(os.path.abspath(version_file)) as f:
version = f.read().strip()
release = version + ' ' + str(datetime.date.today())
release = version + " " + str(datetime.date.today())
# -- General configuration ---------------------------------------------------
@ -37,24 +40,26 @@ release = version + ' ' + str(datetime.date.today())
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.mathjax',
'sphinx.ext.todo',
'breathe',
'exhale',
'recommonmark',
"sphinx.ext.mathjax",
"sphinx.ext.todo",
"breathe",
"exhale",
"recommonmark",
"sphinx.ext.autodoc",
"sphinxarg.ext",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = [
'build',
'doxygen',
'venv',
'README.md',
"build",
"doxygen",
"venv",
"README.md",
]
@ -63,23 +68,23 @@ exclude_patterns = [
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
htmlhelp_basename = 'bergamot-translator'
html_theme = "sphinx_rtd_theme"
htmlhelp_basename = "bergamot-translator"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_css_files = ['css/custom.css']
html_static_path = ["_static"]
html_css_files = ["css/custom.css"]
# The base URL which points to the root of the HTML documentation
html_baseurl = 'http://jerinphilip.github.io/bergamot-translator'
html_baseurl = "https://browser.mt/docs"
# -- Extension configuration -------------------------------------------------
breathe_projects = { 'bergamot-translator': './doxygen/xml' }
breathe_default_project = 'bergamot-translator'
breathe_projects = {"bergamot-translator": "./doxygen/xml"}
breathe_default_project = "bergamot-translator"
doxygen_config = """
INPUT = ../src ../app
@ -94,27 +99,28 @@ WARN_IF_UNDOCUMENTED = NO
"""
exhale_args = {
'containmentFolder' : './api',
'rootFileName' : 'library_index.rst',
'rootFileTitle' : 'Library API',
'doxygenStripFromPath' : '..',
'createTreeView' : True,
'exhaleExecutesDoxygen' : True,
'exhaleDoxygenStdin' : doxygen_config.strip(),
"containmentFolder": "./api",
"rootFileName": "library_index.rst",
"rootFileTitle": "Library API",
"doxygenStripFromPath": "..",
"createTreeView": True,
"exhaleExecutesDoxygen": True,
"exhaleDoxygenStdin": doxygen_config.strip(),
}
primary_domain = 'cpp'
highlight_language = 'cpp'
primary_domain = "cpp"
highlight_language = "cpp"
# A trick to include markdown files from outside the source directory using
# 'mdinclude'. Warning: all other markdown files not included via 'mdinclude'
# will be rendered using recommonmark as recommended by Sphinx
from m2r import MdInclude
def setup(app):
# from m2r to make `mdinclude` work
app.add_config_value('no_underscore_emphasis', False, 'env')
app.add_config_value('m2r_parse_relative_links', False, 'env')
app.add_config_value('m2r_anonymous_references', False, 'env')
app.add_config_value('m2r_disable_inline_math', False, 'env')
app.add_directive('mdinclude', MdInclude)
app.add_config_value("no_underscore_emphasis", False, "env")
app.add_config_value("m2r_parse_relative_links", False, "env")
app.add_config_value("m2r_anonymous_references", False, "env")
app.add_config_value("m2r_disable_inline_math", False, "env")
app.add_directive("mdinclude", MdInclude)

View File

@ -17,6 +17,7 @@ This is developer documentation.
marian-integration
wasm-example
api/library_index
python

87
doc/python.rst Normal file
View File

@ -0,0 +1,87 @@
.. Bergamot documentation master file, created by
sphinx-quickstart on Tue Jan 18 17:26:57 2022.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Python
=======
.. toctree::
:maxdepth: 3
:caption: Contents:
This document describes python bindings from bergamot-translator and a
batteries included python package supplied for easy use. The library also
provides entry point via a command-line making it easier for the average user
to get started.
As bergamot-translator is built on top of marian, the python API should also
work as python bindings for marian trained models, if they need to be
integrated into python code-bases.
*Disclaimer*: The package is still in early stages and unstable. Functions and
classes might move around quite fast. Use at your own risk.
Command Line Interface
----------------------
.. argparse::
:ref: bergamot.cmds.make_parser
:prog: bergamot
Module Documentation
--------------------
.. automodule:: bergamot
:members:
:undoc-members:
bergamot-translator
+++++++++++++++++++
The following components are exported from C++ via python-bindings and form
library primitives that can be used to build translation workflows.
.. autoclass:: bergamot.ServiceConfig
:members:
:undoc-members:
.. autoclass:: bergamot.Service
:members:
:undoc-members:
.. autoclass:: bergamot.TranslationModel
:members:
:undoc-members:
.. autoclass:: bergamot.ResponseOptions
:members:
:undoc-members:
Model Inventory
+++++++++++++++
.. autoclass:: bergamot.repository.Repository
:members:
:undoc-members:
.. autoclass:: bergamot.repository.TranslateLocallyLike
:members:
:undoc-members:
Utilities
+++++++++
.. autofunction:: bergamot.utils.patch_marian_for_bergamot
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`

View File

@ -5,3 +5,4 @@ sphinx_rtd_theme
mistune<2.0.0
recommonmark
m2r
sphinx-argparse

View File

@ -0,0 +1,13 @@
diff --git a/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp b/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp
index 7b1173931df977e69021f3995fa064a492f89d38..948e91eaf99b6b29ce41cf793fba6717f3b5f5b5 100644
--- a/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp
+++ b/3rd_party/marian-dev/src/3rd_party/zstr/strict_fstream.hpp
@@ -27,7 +27,7 @@ static std::string strerror()
{
buff = "Unknown error";
}
-#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || __APPLE__) && ! _GNU_SOURCE
+#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || __APPLE__)
// XSI-compliant strerror_r()
if (strerror_r(errno, &buff[0], buff.size()) != 0)
{

210
setup.py Normal file
View File

@ -0,0 +1,210 @@
import io
import os
import re
import subprocess
import sys
from setuptools import Command, Extension, find_packages, setup
from setuptools.command.build_ext import build_ext
from setuptools.command.build_py import build_py as _build_py
# Convert distutils Windows platform specifiers to CMake -A arguments
PLAT_TO_CMAKE = {
"win32": "Win32",
"win-amd64": "x64",
"win-arm32": "ARM",
"win-arm64": "ARM64",
}
# A CMakeExtension needs a sourcedir instead of a file list.
# The name must be the _single_ output extension from the CMake build.
# If you need multiple extensions, see scikit-build.
class CMakeExtension(Extension):
def __init__(self, name, sourcedir=""):
Extension.__init__(self, name, sources=[])
self.sourcedir = os.path.abspath(sourcedir)
class CMakeBuild(build_ext):
def build_extension(self, ext):
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
# required for auto-detection & inclusion of auxiliary "native" libs
if not extdir.endswith(os.path.sep):
extdir += os.path.sep
debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug
cfg = "Debug" if debug else "Release"
# CMake lets you override the generator - we need to check this.
# Can be set with Conda-Build, for example.
cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
build_arch = os.environ.get("BUILD_ARCH", "native")
# Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
# EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
# from Python.
cmake_args = [
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}",
f"-DPYTHON_EXECUTABLE={sys.executable}",
f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm
f"-DCMAKE_CXX_COMPILER_LAUNCHER=ccache",
f"-DCMAKE_C_COMPILER_LAUNCHER=ccache",
f"-DCOMPILE_PYTHON=ON",
f"-DSSPLIT_USE_INTERNAL_PCRE2=ON",
f"-DBUILD_ARCH={build_arch}",
]
build_args = ["-t", "_bergamot"]
# Adding CMake arguments set as environment variable
# (needed e.g. to build for ARM OSx on conda-forge)
if "CMAKE_ARGS" in os.environ:
cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item]
# In this example, we pass in the version to C++. You might not need to.
cmake_args += [f"-DEXAMPLE_VERSION_INFO={self.distribution.get_version()}"]
if self.compiler.compiler_type != "msvc":
# Using Ninja-build since it a) is available as a wheel and b)
# multithreads automatically. MSVC would require all variables be
# exported for Ninja to pick it up, which is a little tricky to do.
# Users can override the generator with CMAKE_GENERATOR in CMake
# 3.15+.
if not cmake_generator:
try:
import ninja # noqa: F401
cmake_args += ["-GNinja"]
except ImportError:
pass
else:
# Single config generators are handled "normally"
single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})
# CMake allows an arch-in-generator style for backward compatibility
contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
# Specify the arch if using MSVC generator, but only if it doesn't
# contain a backward-compatibility arch spec already in the
# generator name.
if not single_config and not contains_arch:
cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]]
# Multi-config generators have a different way to specify configs
if not single_config:
cmake_args += [
f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}"
]
build_args += ["--config", cfg]
if sys.platform.startswith("darwin"):
# Cross-compile support for macOS - respect ARCHFLAGS if set
archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", ""))
if archs:
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]
# Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
# across all generators.
if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
# self.parallel is a Python 3 only way to set parallel jobs by hand
# using -j in the build_ext call, not supported by pip or PyPA-build.
if hasattr(self, "parallel") and self.parallel:
# CMake 3.12+ only.
build_args += [f"-j{self.parallel}"]
if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)
print("cmake", ext.sourcedir, " ".join(cmake_args))
subprocess.check_call(
["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp
)
subprocess.check_call(
["cmake", "--build", "."] + build_args, cwd=self.build_temp
)
here = os.path.abspath(os.path.dirname(__file__))
# Import the README and use it as the long-description.
# Note: this will only work if 'README.md' is present in your MANIFEST.in file!
with io.open(os.path.join(here, "README.md"), encoding="utf-8") as f:
long_description = "\n" + f.read()
version = None
with open(os.path.join(here, "BERGAMOT_VERSION")) as f:
version = f.read().strip()
suffix = os.environ.get("PYTHON_LOCAL_VERSION_IDENTIFIER", None)
if suffix is not None:
version = "{}+{}".format(version, suffix)
class UploadCommand(Command):
"""Support setup.py upload."""
description = "Build and publish the package."
user_options = []
@staticmethod
def status(s):
"""Prints things in bold."""
print("\033[1m{0}\033[0m".format(s))
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
try:
self.status("Removing previous builds…")
rmtree(os.path.join(here, "dist"))
except OSError:
pass
self.status("Building Source and Wheel (universal) distribution…")
os.system("{0} setup.py sdist bdist_wheel --universal".format(sys.executable))
self.status("Pushing git tags…")
os.system("git push --tags")
self.status("Uploading the package to PyPI via Twine…")
os.system("twine upload dist/*")
sys.exit()
class build_py(_build_py):
def run(self):
self.run_command("build_ext")
return super().run()
# The information here can also be placed in setup.cfg - better separation of
# logic and declaration, and simpler if you include description/version in a file.
setup(
name="bergamot",
version=version,
author="Jerin Philip",
author_email="jerinphilip@live.in",
url="https://github.com/browsermt/bergamot-translator/",
description="Bergamot translator python binding.",
long_description="",
ext_modules=[CMakeExtension("bergamot/_bergamot")],
cmdclass={"build_py": build_py, "build_ext": CMakeBuild},
zip_safe=False,
extras_require={"test": ["pytest>=6.0"]},
license_files=("LICENSE",),
python_requires=">=3.6",
packages=["bergamot"],
package_dir={"bergamot": "bindings/python"},
install_requires=["requests", "pyyaml", "appdirs"],
entry_points={
"console_scripts": [
"bergamot = bergamot.__main__:main",
],
},
)