Enable compute75 when using cuda10 (#698)

* Enable compute75 when using cuda10 or newer and disable compute <50 when using CUDA11 * Re-enable deprecated architectures with CUDA11
2024-09-17 09:47:34 +03:00 · 2020-09-01 16:56:24 +01:00 · 2020-09-01 16:56:24 +01:00 · 4d9d15649e
commit 4d9d15649e
parent cd04725639
2 changed files with 35 additions and 4 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 ## [Unreleased]

 ### Added
+- Turing and Ampere GPU optimisation support, if the CUDA version supports it.
 - Printing word-level scores in marian-scorer
 - Optimize LayerNormalization on CPU by 6x through vectorization (ffast-math) and fixing performance regression introduced with strides in 77a420
 - Decoding multi-source models in marian-server with --tsv
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -13,10 +13,6 @@ set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.")
 # Custom CMake options
 option(COMPILE_CPU "Compile CPU version" ON)
 option(COMPILE_CUDA "Compile GPU version" ON)
-option(COMPILE_CUDA_SM35 "Compile GPU version with SM35 support" ON)
-option(COMPILE_CUDA_SM50 "Compile GPU version with SM50 support" ON)
-option(COMPILE_CUDA_SM60 "Compile GPU version with SM60 support" ON)
-option(COMPILE_CUDA_SM70 "Compile GPU version with SM70 support" ON)
 option(COMPILE_EXAMPLES "Compile examples" OFF)
 option(COMPILE_SERVER "Compile marian-server" OFF)
 option(COMPILE_TESTS "Compile tests" OFF)
@ -243,6 +239,30 @@ if(CUDA_FOUND)
      message(WARNING "On some Unix systems CUDA 10.0+ requires CMake 3.12.2+; you use CMake ${CMAKE_VERSION}")
  endif()

+  # We want to compile as many targets as possible but different CUDA versions support different targets.
+  # Let's instead enable options based on what cuda version we have.
+  if((CUDA_VERSION VERSION_EQUAL "9.0" OR CUDA_VERSION VERSION_GREATER "9.0") AND CUDA_VERSION VERSION_LESS "11.0")
+    option(COMPILE_CUDA_SM35 "Compile GPU version with SM35 support" ON)
+    option(COMPILE_CUDA_SM50 "Compile GPU version with SM50 support" ON)
+    option(COMPILE_CUDA_SM60 "Compile GPU version with SM60 support" ON)
+    option(COMPILE_CUDA_SM70 "Compile GPU version with SM70 support" ON)
+  endif()
+  if((CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") AND CUDA_VERSION VERSION_LESS "11.0")
+    option(COMPILE_CUDA_SM35 "Compile GPU version with SM35 support" ON)
+    option(COMPILE_CUDA_SM50 "Compile GPU version with SM50 support" ON)
+    option(COMPILE_CUDA_SM60 "Compile GPU version with SM60 support" ON)
+    option(COMPILE_CUDA_SM70 "Compile GPU version with SM70 support" ON)
+    option(COMPILE_CUDA_SM75 "Compile GPU version with SM75 support" ON)
+  endif()
+  if(CUDA_VERSION VERSION_EQUAL "11.0" OR CUDA_VERSION VERSION_GREATER "11.0")
+    option(COMPILE_CUDA_SM35 "Compile GPU version with SM35 support" ON)
+    option(COMPILE_CUDA_SM50 "Compile GPU version with SM50 support" ON)
+    option(COMPILE_CUDA_SM60 "Compile GPU version with SM60 support" ON)
+    option(COMPILE_CUDA_SM70 "Compile GPU version with SM70 support" ON)
+    option(COMPILE_CUDA_SM75 "Compile GPU version with SM75 support" ON)
+    option(COMPILE_CUDA_SM80 "Compile GPU version with SM80 support" ON)
+  endif()
+
  if(COMPILE_CUDA_SM35)
    LIST(APPEND COMPUTE -arch=sm_35; -gencode=arch=compute_35,code=sm_35;)                             # Tesla K40 and above
  endif(COMPILE_CUDA_SM35)
@ -255,6 +275,16 @@ if(CUDA_FOUND)
  if(COMPILE_CUDA_SM70)
    LIST(APPEND COMPUTE -gencode=arch=compute_70,code=sm_70; -gencode=arch=compute_70,code=compute_70) # Volta GPUs
  endif(COMPILE_CUDA_SM70)
+  if(CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0")
+    if(COMPILE_CUDA_SM75)
+        LIST(APPEND COMPUTE -gencode=arch=compute_75,code=sm_75; -gencode=arch=compute_75,code=compute_75) # Turing GPUs
+    endif(COMPILE_CUDA_SM75)
+  endif()
+  if(CUDA_VERSION VERSION_EQUAL "11.0" OR CUDA_VERSION VERSION_GREATER "11.0")
+    if(COMPILE_CUDA_SM80)
+        LIST(APPEND COMPUTE -gencode=arch=compute_80,code=sm_80; -gencode=arch=compute_80,code=compute_80) # Ampere GPUs
+    endif(COMPILE_CUDA_SM80)
+  endif()

  if(USE_STATIC_LIBS)
    set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})