Allow to choose fine-grained CPU intrinsics on as CMake options (#849)

* allow to choose fine-grained CPU intrinsics on as CMake options
* inform user that e.g. -DCOMPILE_AVX2=off will be ignored with -march=native if there is compiler support
This commit is contained in:
Marcin Junczys-Dowmunt 2021-04-09 09:02:34 -07:00 committed by GitHub
parent a17ee300f4
commit be65065623
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 57 additions and 24 deletions

View File

@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]
### Added
- Allow for fine-grained CPU intrinsics overrides when BUILD_ARCH != native e.g. -DBUILD_ARCH=x86-64 -DCOMPILE_AVX512=off
- Adds custom bias epilogue kernel.
- Adds support for fusing relu and bias addition into gemms when using cuda 11.
- Better suppression of unwanted output symbols, specifically "\n" from SentencePiece with byte-fallback. Can be deactivated with --allow-special
@ -36,6 +37,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Broken links to MNIST data sets
### Changed
- For BUILD_ARCH != native enable all intrinsics types by default, can be disabled like this: -DCOMPILE_AVX512=off
- Moved FBGEMM pointer to commit c258054 for gcc 9.3+ fix
- Change compile options a la -DCOMPILE_CUDA_SM35 to -DCOMPILE_KEPLER, -DCOMPILE_MAXWELL,
-DCOMPILE_PASCAL, -DCOMPILE_VOLTA, -DCOMPILE_TURING and -DCOMPILE_AMPERE

View File

@ -124,50 +124,81 @@ else(MSVC)
# Detect support CPU instrinsics for the current platform. This will
# only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we
# minimally use -msse4.1. This seems to work with MKL.
# force intrinsics as set in the options.
set(INTRINSICS "")
list(APPEND INTRINSICS_NVCC)
option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON)
option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON)
option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON)
option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON)
option(COMPILE_AVX "Compile CPU code with AVX support" ON)
option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON)
option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON)
if(BUILD_ARCH STREQUAL "native")
message(STATUS "Building with -march=native and intrinsics will be chosen automatically by the compiler to match the current machine.")
message(STATUS "Checking support for CPU intrinsics")
include(FindSSE)
if(SSE2_FOUND)
message(STATUS "SSE2 support found")
if(SSE2_FOUND AND NOT COMPILE_SSE2)
message(WARNING "SSE2 enabled due to -march=native and -DCOMPILE_SSE2=${COMPILE_SSE2} is ignored.")
endif(SSE2_FOUND AND NOT COMPILE_SSE2)
if(SSE3_FOUND AND NOT COMPILE_SSE3)
message(WARNING "SSE3 enabled due to -march=native and -DCOMPILE_SSE3=${COMPILE_SSE3} is ignored.")
endif(SSE3_FOUND AND NOT COMPILE_SSE3)
if(SSE4_1_FOUND AND NOT COMPILE_SSE4_1)
message(WARNING "SSE4.1 enabled due to -march=native and -DCOMPILE_SSE4_1=${COMPILE_SSE4_1} is ignored.")
endif(SSE4_1_FOUND AND NOT COMPILE_SSE4_1)
if(SSE4_2_FOUND AND NOT COMPILE_SSE4_2)
message(WARNING "SSE4.2 enabled due to -march=native and -DCOMPILE_SSE4_2=${COMPILE_SSE4_2} is ignored.")
endif(SSE4_2_FOUND AND NOT COMPILE_SSE4_2)
if(AVX_FOUND AND NOT COMPILE_AVX)
message(WARNING "AVX enabled due to -march=native and -DCOMPILE_AVX=${COMPILE_AVX} is ignored.")
endif(AVX_FOUND AND NOT COMPILE_AVX)
if(AVX2_FOUND AND NOT COMPILE_AVX2)
message(WARNING "AVX2 enabled due to -march=native and -DCOMPILE_AVX2=${COMPILE_AVX2} is ignored.")
endif(AVX2_FOUND AND NOT COMPILE_AVX2)
if(AVX512_FOUND AND NOT COMPILE_AVX512)
message(WARNING "AVX512 enabled due to -march=native and -DCOMPILE_AVX512=${COMPILE_AVX512} is ignored.")
endif(AVX512_FOUND AND NOT COMPILE_AVX512)
else()
# force to build with the requested intrisics, requires compiler support
message(STATUS "Building with -march=${BUILD_ARCH} and forcing intrisics as requested")
if(COMPILE_SSE2)
message(STATUS "SSE2 support requested")
set(INTRINSICS "${INTRINSICS} -msse2")
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse2)
endif(SSE2_FOUND)
if(SSE3_FOUND)
message(STATUS "SSE3 support found")
endif(COMPILE_SSE2)
if(COMPILE_SSE3)
message(STATUS "SSE3 support requested")
set(INTRINSICS "${INTRINSICS} -msse3")
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse3)
endif(SSE3_FOUND)
if(SSE4_1_FOUND)
message(STATUS "SSE4.1 support found")
endif(COMPILE_SSE3)
if(COMPILE_SSE4_1)
message(STATUS "SSE4.1 support requested")
set(INTRINSICS "${INTRINSICS} -msse4.1")
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.1)
endif(SSE4_1_FOUND)
if(SSE4_2_FOUND)
message(STATUS "SSE4.2 support found")
endif(COMPILE_SSE4_1)
if(COMPILE_SSE4_2)
message(STATUS "SSE4.2 support requested")
set(INTRINSICS "${INTRINSICS} -msse4.2")
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.2)
endif(SSE4_2_FOUND)
if(AVX_FOUND)
message(STATUS "AVX support found")
endif(COMPILE_SSE4_2)
if(COMPILE_AVX)
message(STATUS "AVX support requested")
set(INTRINSICS "${INTRINSICS} -mavx")
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx)
endif(AVX_FOUND)
if(AVX2_FOUND)
message(STATUS "AVX2 support found")
endif(COMPILE_AVX)
if(COMPILE_AVX2)
message(STATUS "AVX2 support requested")
set(INTRINSICS "${INTRINSICS} -mavx2")
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx2)
endif(AVX2_FOUND)
if(AVX512_FOUND)
message(STATUS "AVX512 support found")
endif(COMPILE_AVX2)
if(COMPILE_AVX512)
message(STATUS "AVX512 support requested")
set(INTRINSICS "${INTRINSICS} -mavx512f")
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx512f)
endif(AVX512_FOUND)
else()
set(INTRINSICS "-msse4.1")
endif(COMPILE_AVX512)
endif()
if(USE_FBGEMM)