mirror of
https://github.com/marian-nmt/marian.git
synced 2024-11-03 20:13:47 +03:00
add -DDETERMINISTIC=ON/OFF flag (#912)
* Add -DDETERMINISTIC=ON/OFF flag to CMake * Use -DDETERMINISTIC=on in GitHub/Azure workflows Co-authored-by: Roman Grundkiewicz <rgrundkiewicz@gmail.com>
This commit is contained in:
parent
a365bb5ce9
commit
05ba9e4c31
1
.github/workflows/ubuntu.yml
vendored
1
.github/workflows/ubuntu.yml
vendored
@ -98,6 +98,7 @@ jobs:
|
||||
-DCOMPILE_SERVER=on \
|
||||
-DCOMPILE_TESTS=${{ matrix.unit_tests }} \
|
||||
-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-${{ matrix.cuda }} \
|
||||
-DDETERMINISTIC=on \
|
||||
-DUSE_FBGEMM=${{ matrix.cpu }} \
|
||||
-DUSE_SENTENCEPIECE=on \
|
||||
-DUSE_STATIC_LIBS=on \
|
||||
|
2
.github/workflows/windows.yml
vendored
2
.github/workflows/windows.yml
vendored
@ -81,6 +81,7 @@ jobs:
|
||||
-DCOMPILE_CUDA="${{ matrix.gpu }}"
|
||||
-DCOMPILE_SERVER="FALSE"
|
||||
-DCOMPILE_TESTS="TRUE"
|
||||
-DDETERMINISTIC="TRUE"
|
||||
-DUSE_FBGEMM="TRUE"
|
||||
-DUSE_MPI="FALSE"
|
||||
-DUSE_NCCL="FALSE"
|
||||
@ -110,6 +111,7 @@ jobs:
|
||||
-DCOMPILE_CUDA="${{ matrix.gpu }}"
|
||||
-DCOMPILE_SERVER="FALSE"
|
||||
-DCOMPILE_TESTS="TRUE"
|
||||
-DDETERMINISTIC="TRUE"
|
||||
-DUSE_FBGEMM="TRUE"
|
||||
-DUSE_MPI="FALSE"
|
||||
-DUSE_NCCL="FALSE"
|
||||
|
@ -31,6 +31,7 @@ option(USE_NCCL "Use NCCL library" ON)
|
||||
option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON)
|
||||
option(USE_STATIC_LIBS "Link statically against non-system libs" OFF)
|
||||
option(GENERATE_MARIAN_INSTALL_TARGETS "Generate Marian install targets (requires CMake 3.12+)" OFF)
|
||||
option(DETERMINISTIC "Try to make training results as deterministic as possible (e.g. for testing)" OFF)
|
||||
|
||||
# fbgemm and sentencepiece are both defined with "non-local" installation targets (the source projects don't define them,
|
||||
# so we define them in src\3rd_party\CMakeLists.txt), but that isn't supported until CMake 3.12. Prior to CMake 3.12,
|
||||
@ -571,6 +572,15 @@ if(USE_STATIC_LIBS)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
|
||||
endif()
|
||||
|
||||
if(DETERMINISTIC)
|
||||
message(WARNING "Option DETERMINISTIC=ON: Trying to make training as deterministic as possible, may result in slow-down")
|
||||
add_definitions(-DDETERMINISTIC=1)
|
||||
list(APPEND CUDA_NVCC_FLAGS -DDETERMINISTIC=1; )
|
||||
else()
|
||||
add_definitions(-DDETERMINISTIC=0)
|
||||
list(APPEND CUDA_NVCC_FLAGS -DDETERMINISTIC=0; )
|
||||
endif()
|
||||
|
||||
# Find MPI
|
||||
if(USE_MPI)
|
||||
# 2.0 refers to MPI2 standard. OpenMPI is an implementation of that standard regardless of the specific OpenMPI version
|
||||
|
@ -470,7 +470,7 @@ stages:
|
||||
|
||||
|
||||
# Marian is built in the same job where the regression tests are run to make sure that executables
|
||||
# is compiled and run on a machine with the same CPU architecture, which is required for
|
||||
# are compiled and run on a machine with the same CPU architecture, which is required for
|
||||
# compilations with FBGEMM.
|
||||
- stage: Tests
|
||||
jobs:
|
||||
@ -530,6 +530,7 @@ stages:
|
||||
-DCMAKE_MAKE_PROGRAM="ninja.exe" ^
|
||||
-DCMAKE_TOOLCHAIN_FILE="$(VCPKG_DIR)\scripts\buildsystems\vcpkg.cmake" ^
|
||||
-DVCPKG_TARGET_TRIPLET="x64-windows-static" ^
|
||||
-DDETERMINISTIC="TRUE" ^
|
||||
^
|
||||
-DCOMPILE_CPU="TRUE" ^
|
||||
-DCOMPILE_CUDA="FALSE" ^
|
||||
@ -634,6 +635,7 @@ stages:
|
||||
-DCMAKE_BUILD_TYPE=slim \
|
||||
-DCOMPILE_CPU=on \
|
||||
-DCOMPILE_CUDA=off \
|
||||
-DDETERMINISTIC=on \
|
||||
-DUSE_FBGEMM=on \
|
||||
-DUSE_SENTENCEPIECE=on \
|
||||
-DUSE_STATIC_LIBS=on
|
||||
|
@ -897,8 +897,13 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) {
|
||||
cli.add<bool>("--shuffle-in-ram",
|
||||
"Keep shuffled corpus in RAM, do not write to temp file");
|
||||
|
||||
#if DETERMINISTIC
|
||||
cli.add<size_t>("--data-threads",
|
||||
"Number of concurrent threads to use during data reading and processing", 1);
|
||||
#else
|
||||
cli.add<size_t>("--data-threads",
|
||||
"Number of concurrent threads to use during data reading and processing", 8);
|
||||
#endif
|
||||
|
||||
// @TODO: Consider making the next two options options of the vocab instead, to make it more local in scope.
|
||||
cli.add<size_t>("--all-caps-every",
|
||||
@ -919,8 +924,13 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) {
|
||||
"Round up batch size to next power of 2 for more efficient training, but this can make batch size less stable. Disable with --mini-batch-round-up=false",
|
||||
true);
|
||||
} else {
|
||||
#if DETERMINISTIC
|
||||
cli.add<size_t>("--data-threads",
|
||||
"Number of concurrent threads to use during data reading and processing", 1);
|
||||
#else
|
||||
cli.add<size_t>("--data-threads",
|
||||
"Number of concurrent threads to use during data reading and processing", 8);
|
||||
#endif
|
||||
}
|
||||
// clang-format on
|
||||
}
|
||||
|
@ -1163,7 +1163,7 @@ void PasteRows(Tensor out,
|
||||
size_t rowsToCopy = indices->size();
|
||||
|
||||
int threads = std::min(MAX_THREADS, (int)cols);
|
||||
#if 0 // @TODO: make this configurable with a 'deterministic' flag
|
||||
#if DETERMINISTIC
|
||||
// If we only use one block, then each core operates on a different column,
|
||||
// hence the summation becomes deterministic.
|
||||
// However, we only use e.g. 512 cores out of possibly 3000+, so this will be
|
||||
|
Loading…
Reference in New Issue
Block a user