mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-19 02:37:14 +03:00
Allow to choose fine-grained CPU intrinsics on as CMake options (#849)
* allow to choose fine-grained CPU intrinsics on as CMake options * inform user that e.g. -DCOMPILE_AVX2=off will be ignored with -march=native if there is compiler support
This commit is contained in:
parent
a17ee300f4
commit
be65065623
@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- Allow for fine-grained CPU intrinsics overrides when BUILD_ARCH != native e.g. -DBUILD_ARCH=x86-64 -DCOMPILE_AVX512=off
|
||||
- Adds custom bias epilogue kernel.
|
||||
- Adds support for fusing relu and bias addition into gemms when using cuda 11.
|
||||
- Better suppression of unwanted output symbols, specifically "\n" from SentencePiece with byte-fallback. Can be deactivated with --allow-special
|
||||
@ -36,6 +37,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
||||
- Broken links to MNIST data sets
|
||||
|
||||
### Changed
|
||||
- For BUILD_ARCH != native enable all intrinsics types by default, can be disabled like this: -DCOMPILE_AVX512=off
|
||||
- Moved FBGEMM pointer to commit c258054 for gcc 9.3+ fix
|
||||
- Change compile options a la -DCOMPILE_CUDA_SM35 to -DCOMPILE_KEPLER, -DCOMPILE_MAXWELL,
|
||||
-DCOMPILE_PASCAL, -DCOMPILE_VOLTA, -DCOMPILE_TURING and -DCOMPILE_AMPERE
|
||||
|
@ -124,50 +124,81 @@ else(MSVC)
|
||||
|
||||
# Detect support CPU instrinsics for the current platform. This will
|
||||
# only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we
|
||||
# minimally use -msse4.1. This seems to work with MKL.
|
||||
# force intrinsics as set in the options.
|
||||
set(INTRINSICS "")
|
||||
list(APPEND INTRINSICS_NVCC)
|
||||
|
||||
option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON)
|
||||
option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON)
|
||||
option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON)
|
||||
option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON)
|
||||
option(COMPILE_AVX "Compile CPU code with AVX support" ON)
|
||||
option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON)
|
||||
option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON)
|
||||
|
||||
if(BUILD_ARCH STREQUAL "native")
|
||||
message(STATUS "Building with -march=native and intrinsics will be chosen automatically by the compiler to match the current machine.")
|
||||
message(STATUS "Checking support for CPU intrinsics")
|
||||
include(FindSSE)
|
||||
if(SSE2_FOUND)
|
||||
message(STATUS "SSE2 support found")
|
||||
if(SSE2_FOUND AND NOT COMPILE_SSE2)
|
||||
message(WARNING "SSE2 enabled due to -march=native and -DCOMPILE_SSE2=${COMPILE_SSE2} is ignored.")
|
||||
endif(SSE2_FOUND AND NOT COMPILE_SSE2)
|
||||
if(SSE3_FOUND AND NOT COMPILE_SSE3)
|
||||
message(WARNING "SSE3 enabled due to -march=native and -DCOMPILE_SSE3=${COMPILE_SSE3} is ignored.")
|
||||
endif(SSE3_FOUND AND NOT COMPILE_SSE3)
|
||||
if(SSE4_1_FOUND AND NOT COMPILE_SSE4_1)
|
||||
message(WARNING "SSE4.1 enabled due to -march=native and -DCOMPILE_SSE4_1=${COMPILE_SSE4_1} is ignored.")
|
||||
endif(SSE4_1_FOUND AND NOT COMPILE_SSE4_1)
|
||||
if(SSE4_2_FOUND AND NOT COMPILE_SSE4_2)
|
||||
message(WARNING "SSE4.2 enabled due to -march=native and -DCOMPILE_SSE4_2=${COMPILE_SSE4_2} is ignored.")
|
||||
endif(SSE4_2_FOUND AND NOT COMPILE_SSE4_2)
|
||||
if(AVX_FOUND AND NOT COMPILE_AVX)
|
||||
message(WARNING "AVX enabled due to -march=native and -DCOMPILE_AVX=${COMPILE_AVX} is ignored.")
|
||||
endif(AVX_FOUND AND NOT COMPILE_AVX)
|
||||
if(AVX2_FOUND AND NOT COMPILE_AVX2)
|
||||
message(WARNING "AVX2 enabled due to -march=native and -DCOMPILE_AVX2=${COMPILE_AVX2} is ignored.")
|
||||
endif(AVX2_FOUND AND NOT COMPILE_AVX2)
|
||||
if(AVX512_FOUND AND NOT COMPILE_AVX512)
|
||||
message(WARNING "AVX512 enabled due to -march=native and -DCOMPILE_AVX512=${COMPILE_AVX512} is ignored.")
|
||||
endif(AVX512_FOUND AND NOT COMPILE_AVX512)
|
||||
else()
|
||||
# force to build with the requested intrisics, requires compiler support
|
||||
message(STATUS "Building with -march=${BUILD_ARCH} and forcing intrisics as requested")
|
||||
if(COMPILE_SSE2)
|
||||
message(STATUS "SSE2 support requested")
|
||||
set(INTRINSICS "${INTRINSICS} -msse2")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse2)
|
||||
endif(SSE2_FOUND)
|
||||
if(SSE3_FOUND)
|
||||
message(STATUS "SSE3 support found")
|
||||
endif(COMPILE_SSE2)
|
||||
if(COMPILE_SSE3)
|
||||
message(STATUS "SSE3 support requested")
|
||||
set(INTRINSICS "${INTRINSICS} -msse3")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse3)
|
||||
endif(SSE3_FOUND)
|
||||
if(SSE4_1_FOUND)
|
||||
message(STATUS "SSE4.1 support found")
|
||||
endif(COMPILE_SSE3)
|
||||
if(COMPILE_SSE4_1)
|
||||
message(STATUS "SSE4.1 support requested")
|
||||
set(INTRINSICS "${INTRINSICS} -msse4.1")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.1)
|
||||
endif(SSE4_1_FOUND)
|
||||
if(SSE4_2_FOUND)
|
||||
message(STATUS "SSE4.2 support found")
|
||||
endif(COMPILE_SSE4_1)
|
||||
if(COMPILE_SSE4_2)
|
||||
message(STATUS "SSE4.2 support requested")
|
||||
set(INTRINSICS "${INTRINSICS} -msse4.2")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.2)
|
||||
endif(SSE4_2_FOUND)
|
||||
if(AVX_FOUND)
|
||||
message(STATUS "AVX support found")
|
||||
endif(COMPILE_SSE4_2)
|
||||
if(COMPILE_AVX)
|
||||
message(STATUS "AVX support requested")
|
||||
set(INTRINSICS "${INTRINSICS} -mavx")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx)
|
||||
endif(AVX_FOUND)
|
||||
if(AVX2_FOUND)
|
||||
message(STATUS "AVX2 support found")
|
||||
endif(COMPILE_AVX)
|
||||
if(COMPILE_AVX2)
|
||||
message(STATUS "AVX2 support requested")
|
||||
set(INTRINSICS "${INTRINSICS} -mavx2")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx2)
|
||||
endif(AVX2_FOUND)
|
||||
if(AVX512_FOUND)
|
||||
message(STATUS "AVX512 support found")
|
||||
endif(COMPILE_AVX2)
|
||||
if(COMPILE_AVX512)
|
||||
message(STATUS "AVX512 support requested")
|
||||
set(INTRINSICS "${INTRINSICS} -mavx512f")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx512f)
|
||||
endif(AVX512_FOUND)
|
||||
else()
|
||||
set(INTRINSICS "-msse4.1")
|
||||
endif(COMPILE_AVX512)
|
||||
endif()
|
||||
|
||||
if(USE_FBGEMM)
|
||||
|
Loading…
Reference in New Issue
Block a user