Merged PR 25919: Sync with public master - no review required

Sync with public master, checking compilation, regression tests etc.
This commit is contained in:
Marcin Junczys-Dowmunt 2022-10-04 00:42:52 +00:00
parent 2c55cdb3c0
commit 1e92cff93d
18 changed files with 1322 additions and 812 deletions

View File

@ -9,7 +9,7 @@ on:
jobs:
build-macos:
name: MacOS CPU-only
runs-on: macos-10.15
runs-on: macos-12
steps:
- name: Checkout
@ -18,10 +18,12 @@ jobs:
submodules: recursive
- name: Install dependencies
run: brew install boost openssl protobuf
run: brew install boost openblas openssl protobuf
- name: Configure CMake
run: |
export LDFLAGS="-L/usr/local/opt/openblas/lib"
export CPPFLAGS="-I/usr/local/opt/openblas/include"
mkdir -p build
cd build
cmake .. \
@ -48,4 +50,4 @@ jobs:
./marian-decoder --version
./marian-scorer --version
./spm_encode --version
ls -hlv $(find . -maxdepth 1 -type f -perm +ugo+x \( -name "marian*" -o -name "spm*" \))

View File

@ -13,42 +13,58 @@ jobs:
include:
# Ubuntu CPU-only build
- name: "Ubuntu CPU-only"
os: ubuntu-18.04
os: ubuntu-20.04
cuda: ""
gcc: 7
gcc: 9
clang: ""
cpu: true
gpu: false
unit_tests: true
examples: false
# Using Clang compiler
- name: "Ubuntu CPU-only clang-14"
os: ubuntu-22.04
cuda: ""
gcc: ""
clang: 14
cpu: true
gpu: false
unit_tests: true
examples: false
# Ubuntu GPU-only build
- name: "Ubuntu GPU-only"
os: ubuntu-18.04
cuda: "10.2"
gcc: 7
os: ubuntu-20.04
cuda: "11.1"
gcc: 9
clang: ""
cpu: false
gpu: true
unit_tests: false
examples: true
# Ubuntu 20.04 supports CUDA 11+
# Ubuntu 22.04 supports CUDA 11.7
# Unit tests and examples are not compiled to save disk space
- name: "Ubuntu 20.04 CUDA 11.2 gcc-9"
os: ubuntu-20.04
cuda: "11.2"
gcc: 9
- name: "Ubuntu 22.04 CUDA 11.7 gcc-11"
os: ubuntu-22.04
cuda: "11.7"
gcc: 11
clang: ""
cpu: false
gpu: true
unit_tests: false
examples: false
# Ubuntu 18.04 supports CUDA 10.1+
# Ubuntu 20.04 supports CUDA 11+
# Unit tests and examples are not compiled to save disk space
- name: "Ubuntu 18.04 CUDA 10.2 gcc-8"
os: ubuntu-18.04
cuda: "10.2"
gcc: 8
- name: "Ubuntu 20.04 CUDA 11.1 gcc-9"
os: ubuntu-20.04
cuda: "11.1"
gcc: 9
clang: ""
cpu: true
gpu: true
unit_tests: false
examples: false
# Ubuntu 18.04 supports CUDA 10.1+
# But it will soon be removed from GitHub workflows
# Ubuntu 16.04 supports CUDA 8+
# But it is no longer available in GitHub workflows
@ -64,10 +80,13 @@ jobs:
# The following packages are already installed on GitHub-hosted runners: build-essential openssl libssl-dev
# No need to install libprotobuf{17,10,9v5} on Ubuntu {20,18,16}.04 because it is installed together with libprotobuf-dev
# Boost is no longer pre-installed on GitHub-hosted runners
# Clang 12, 13 and 14 are pre-installed on the ubuntu-22.04 image
# Note that installation of libunwind-dev is a bug fix for ubuntu-22.04 images on Azure/GitHub-hosted machines
# and is normally not required
- name: Install dependencies
run: |
sudo apt-get install -y libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-system-dev \
gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }}
sudo apt-get install -y libunwind-dev libgoogle-perftools-dev libprotobuf-dev protobuf-compiler libboost-system-dev
[ -z "${{ matrix.gcc }}" ] || sudo apt-get install -y gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }}
# https://software.intel.com/content/www/us/en/develop/articles/installing-intel-free-libs-and-python-apt-repo.html
- name: Install MKL
@ -86,9 +105,10 @@ jobs:
# https://github.com/actions/virtual-environments/issues/687#issuecomment-610471671
- name: Configure CMake
run: |
[ -z "${{ matrix.gcc }}" ] || export CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}
[ -z "${{ matrix.clang }}" ] || export CC=/usr/bin/clang-${{ matrix.clang }} CXX=/usr/bin/clang++-${{ matrix.clang }}
mkdir -p build
cd build
CC=/usr/bin/gcc-${{ matrix.gcc }} CXX=/usr/bin/g++-${{ matrix.gcc }} CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }} \
cmake .. \
-DBoost_ARCHITECTURE=-x64 \
-DCMAKE_BUILD_TYPE=Release \
@ -122,4 +142,4 @@ jobs:
./marian-scorer --version
./marian-server --version
./spm_encode --version
ls -hlv $(find . -maxdepth 1 -type f -executable \( -name "marian*" -o -name "spm*" \))

View File

@ -18,23 +18,26 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Read/restore checkpoints from main process only when training with MPI
- Multi-loss casts type to first loss-type before accumulation (aborted before due to missing cast)
- Throw `ShapeSizeException` if total expanded shape size exceeds numeric capacity of the maximum int value (2^31-1)
- During mini-batch-fitting, catch `ShapeSizeException` and use another sizing hint. Aborts outside mini-batch-fitting.
- During mini-batch-fitting, catch `ShapeSizeException` and use another sizing hint. Aborts outside mini-batch-fitting.
- Fix incorrect/missing gradient accumulation with delay > 1 or large effective batch size of biases of affine operations.
- Fixed case augmentation with multi-threaded reading.
- Scripts using PyYAML now use `safe_load`; see https://msg.pyyaml.org/load
- Fixed check for `fortran_ordering` in cnpy
- Fixed fp16 training/inference with factors-combine concat method
- Fixed clang 13.0.1 compatibility
- Fixed potential vulnerabilities from lxml<4.9.1 or mistune<2.0.31
### Changed
- Parameter synchronization in local sharding model now executes hash checksum before syncing
- Make guided-alignment faster via sparse memory layout, add alignment points for EOS, remove losses other than ce
- Negative `--workspace -N` value allocates workspace as total available GPU memory minus N megabytes.
- Negative `--workspace -N` value allocates workspace as total available GPU memory minus N megabytes.
- Set default parameters for cost-scaling to 8.f 10000 1.f 8.f, i.e. when scaling scale by 8 and do not try to automatically scale up or down. This seems most stable.
- Make guided-alignment faster via sparse memory layout, add alignment points for EOS, remove losses other than ce.
- Changed minimal C++ standard to C++-17
- Faster LSH top-k search on CPU
- Updated intgemm to the latest upstream version
- Parameters in npz files are no longer implicitly assumed to be row-ordered. Non row-ordered parameters will result in an abort
- Updated Catch2 header from 2.10.1 to 2.13.9
## [1.11.0] - 2022-02-08

View File

@ -1,11 +1,9 @@
Marian
======
[![Build Status CUDA 10](https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cuda-10.2.svg?label=CUDA%2010.2)](http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cuda-10.2/)
[![Build Status CUDA 11](https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cuda-11.4.svg?label=CUDA%2011.4)](http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cuda-11.4/)
[![Build Status CPU](https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cpu.svg?label=CPU)](http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cpu/)
[![Tests Status](https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-regression-tests.svg?label=tests)](http://vali.inf.ed.ac.uk/jenkins/job/marian-regression-tests/)
[![Ubuntu](https://github.com/marian-nmt/marian-dev/actions/workflows/ubuntu.yml/badge.svg)](https://github.com/marian-nmt/marian-dev/actions/workflows/ubuntu.yml)
[![Windows](https://github.com/marian-nmt/marian-dev/actions/workflows/windows.yml/badge.svg)](https://github.com/marian-nmt/marian-dev/actions/workflows/windows.yml)
[![MacOS](https://github.com/marian-nmt/marian-dev/actions/workflows/macos.yml/badge.svg)](https://github.com/marian-nmt/marian-dev/actions/workflows/macos.yml)
[![Latest release](https://img.shields.io/github/release/marian-nmt/marian.svg?label=release)](https://github.com/marian-nmt/marian/releases)
[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE.md)
[![Twitter](https://img.shields.io/twitter/follow/marian_nmt.svg?style=social)](https://twitter.com/intent/follow?screen_name=marian_nmt)

View File

@ -1 +1 @@
v1.11.12
v1.11.13

1
doc/.gitignore vendored
View File

@ -2,3 +2,4 @@ api
build
doxygen
venv
CONTRIBUTING.md

View File

@ -14,10 +14,11 @@ help:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
cp $(SOURCEDIR)/../CONTRIBUTING.md $(SOURCEDIR)/
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
# Clean target as recommended by Exhale
# https://exhale.readthedocs.io/en/latest/usage.html#optional-create-a-proper-clean-target
clean:
rm -rf doxygen/ api/
rm -rf doxygen/ api/ $(SOURCEDIR)/CONTRIBUTING.md
@$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

View File

@ -114,16 +114,3 @@ exhale_args = {
primary_domain = 'cpp'
highlight_language = 'cpp'
# A trick to include markdown files from outside the source directory using
# 'mdinclude'. Warning: all other markdown files not included via 'mdinclude'
# will be rendered using recommonmark as recommended by Sphinx
from m2r import MdInclude
def setup(app):
# from m2r to make `mdinclude` work
app.add_config_value('no_underscore_emphasis', False, 'env')
app.add_config_value('m2r_parse_relative_links', False, 'env')
app.add_config_value('m2r_anonymous_references', False, 'env')
app.add_config_value('m2r_disable_inline_math', False, 'env')
app.add_directive('mdinclude', MdInclude)

View File

@ -1,7 +1,7 @@
Welcome to Marian's documentation!
==================================
|buildgpu| |buildcpu| |tests| |release| |license|
|ubuntu| |windows| |macos| |release| |license|
Marian is an efficient and self-contained Neural Machine Translation framework with an integrated
automatic differentiation engine based on dynamic computation graphs, written entirely in C++.
@ -19,7 +19,7 @@ This is developer documentation. User documentation is available at https://mari
factors
api/library_index
contributing
CONTRIBUTING
doc_guide
@ -30,17 +30,17 @@ Indices and tables
* :ref:`genindex`
.. |buildgpu| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cuda-10.2.svg?label=CUDAC%20Build
:target: http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cuda-10.2/
:alt: GPU build status
.. |ubuntu| image:: https://github.com/marian-nmt/marian-dev/actions/workflows/ubuntu.yml/badge.svg
:target: https://github.com/marian-nmt/marian-dev/actions/workflows/ubuntu.yml
:alt: Ubuntu build status
.. |buildcpu| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-dev-cpu.svg?label=CPU%20Build
:target: http://vali.inf.ed.ac.uk/jenkins/job/marian-dev-cpu/
:alt: CPU build status
.. |windows| image:: https://github.com/marian-nmt/marian-dev/actions/workflows/windows.yml/badge.svg
:target: https://github.com/marian-nmt/marian-dev/actions/workflows/windows.yml
:alt: Windows build status
.. |tests| image:: https://img.shields.io/jenkins/s/http/vali.inf.ed.ac.uk/jenkins/view/marian/job/marian-regression-tests.svg?label=Tests
:target: http://vali.inf.ed.ac.uk/jenkins/job/marian-regression-tests/
:alt: Tests status
.. |macos| image:: https://github.com/marian-nmt/marian-dev/actions/workflows/macos.yml/badge.svg
:target: https://github.com/marian-nmt/marian-dev/actions/workflows/macos.yml
:alt: MacOS build status
.. |release| image:: https://img.shields.io/github/release/marian-nmt/marian.svg?label=Release
:target: https://github.com/marian-nmt/marian/releases

View File

@ -1,9 +1,9 @@
lxml>=4.9.1
docutils<=0.17
sphinx==2.4.4
breathe==4.13.0
exhale
sphinx_rtd_theme
myst-parser==0.14.0a3
mistune<2.0.0
m2r
sphinx-mathjax-offline
Jinja2<3.1

@ -1 +1 @@
Subproject commit 29f4f7c380c860a95b9375813f4b199b2e6b5556
Subproject commit 25e84383225a29f769e362250654ddf256d06261

@ -1 +1 @@
Subproject commit 4fa9ff55af68bc87d8bd04c9b410f1e1d3874718
Subproject commit 92e116efa369d6ed848c8eb19dfcef8bf7245d71

1993
src/3rd_party/catch.hpp vendored

File diff suppressed because it is too large Load Diff

@ -1 +1 @@
Subproject commit a05a2e51ab524bcee954a39ee72005193f3adf7c
Subproject commit 0eda93a95a4472af0a50c78b5df58e7fc459ac7a

@ -1 +1 @@
Subproject commit 1d7e84aeb3f1ebdc78f6965d79ad3ca3003789fe
Subproject commit 8909c57b5473cb95e197fa7f034edabb474535ba

View File

@ -54,8 +54,10 @@ void ConfigValidator::validateOptionsTranslation() const {
ABORT_IF(models.empty() && configs.empty(),
"You need to provide at least one model file or a config file");
#ifdef COMPILE_CPU
ABORT_IF(get<bool>("model-mmap") && get<size_t>("cpu-threads") == 0,
"Model MMAP is CPU-only, please use --cpu-threads");
#endif
for(const auto& modelFile : models) {
filesystem::Path modelPath(modelFile);

View File

@ -130,7 +130,7 @@ namespace marian {
// @TODO: add checks for empty factor groups until it stops crashing (training already works; decoder still crashes)
io::InputFileStream in(modelPath);
for (WordIndex v = 0; io::getline(in, line); v++) {
for(; io::getline(in, line);) {
utils::splitAny(line, tokBuf, " \t");
factorMapTokenized.push_back(tokBuf);
}

View File

@ -221,7 +221,6 @@ private:
}
void prune(float threshold = 0.f) {
size_t i = 0;
for(auto& probs : data_) {
std::vector<std::pair<float, WordIndex>> sorter;
for(auto& it : probs)
@ -237,8 +236,6 @@ private:
else
break;
}
++i;
}
}