cmake_minimum_required(VERSION 3.5.1) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) if (POLICY CMP0074) cmake_policy(SET CMP0074 NEW) # CMake 3.12 endif () project(marian CXX C) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(BUILD_ARCH native CACHE STRING "Compile for this CPU architecture.") # Custom CMake options option(COMPILE_CPU "Compile CPU version" ON) option(COMPILE_CUDA "Compile GPU version" ON) option(COMPILE_EXAMPLES "Compile examples" OFF) option(COMPILE_SERVER "Compile marian-server" OFF) option(COMPILE_TESTS "Compile tests" OFF) if(APPLE) option(USE_APPLE_ACCELERATE "Compile with Apple Accelerate" ON) else(APPLE) option(USE_APPLE_ACCELERATE "Compile with Apple Accelerate" OFF) endif(APPLE) option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) option(USE_CUDNN "Use CUDNN library" OFF) option(USE_DOXYGEN "Build documentation with Doxygen" ON) option(USE_FBGEMM "Use FBGEMM" OFF) option(USE_MKL "Compile with MKL support" ON) option(USE_MPI "Use MPI library" OFF) option(USE_NCCL "Use NCCL library" ON) option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON) option(USE_STATIC_LIBS "Link statically against non-system libs" OFF) option(GENERATE_MARIAN_INSTALL_TARGETS "Generate Marian install targets (requires CMake 3.12+)" OFF) option(DETERMINISTIC "Try to make training results as deterministic as possible (e.g. for testing)" OFF) # fbgemm and sentencepiece are both defined with "non-local" installation targets (the source projects don't define them, # so we define them in src\3rd_party\CMakeLists.txt), but that isn't supported until CMake 3.12. Prior to CMake 3.12, # targets could only be install(...)ed in the same CMakeLists.txt they were defined. We currently target CMake 3.5.1 # as our minimum supported CMake version, so this option exists to provide compatibility by disabling install targets. if(GENERATE_MARIAN_INSTALL_TARGETS AND ${CMAKE_VERSION} VERSION_LESS "3.12") message(WARNING "Marian install targets cannot be generated on CMake <3.12.\ Please upgrade your CMake version or set GENERATE_MARIAN_INSTALL_TARGETS=OFF to remove this warning. Disabling installation targets.") set(GENERATE_MARIAN_INSTALL_TARGETS OFF CACHE BOOL "Forcing disabled installation targets due to CMake <3.12." FORCE) endif() if(GENERATE_MARIAN_INSTALL_TARGETS) include(GNUInstallDirs) # This defines default values for installation directories (all platforms even if named GNU) include(InstallRequiredSystemLibraries) # Tell CMake that the `install` target needs to install required system libraries (eg: Windows SDK) include(CMakePackageConfigHelpers) # Helper to create relocatable packages install(EXPORT marian-targets # Installation target DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake) endif(GENERATE_MARIAN_INSTALL_TARGETS) # use ccache (https://ccache.dev) for faster compilation if requested and available if(USE_CCACHE) find_program(CCACHE_PROGRAM ccache) if(CCACHE_PROGRAM) message(STATUS "Will be using ccache for faster repeat compilation (use cmake -DUSE_CCACHE=off to disable).") set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") else(CCACHE_PROGRAM) message(WARNING "Compilation with ccache requested but no ccache found.") endif(CCACHE_PROGRAM) endif(USE_CCACHE) # Project versioning find_package(Git QUIET) include(GetVersionFromFile) message(STATUS "Project name: ${PROJECT_NAME}") message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}") execute_process(COMMAND git submodule update --init --recursive --no-fetch WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) # Note that with CMake MSVC build, the option CMAKE_BUILD_TYPE is automatically derived from the key # 'configurationType' in CMakeSettings.json configurations if(NOT CMAKE_BUILD_TYPE) message(WARNING "CMAKE_BUILD_TYPE not set; setting to Release") set(CMAKE_BUILD_TYPE "Release") endif() ############################################################################### # Set compilation flags if(MSVC) # These are used in src/CMakeLists.txt on a per-target basis list(APPEND ALL_WARNINGS /WX; /W4;) # Disabled bogus warnings for CPU intrinsics and Protobuf: # C4100: 'identifier' : unreferenced formal parameter # C4310: cast truncates constant value # C4324: 'marian::cpu::int16::`anonymous-namespace'::ScatterPut': structure was padded due to alignment specifier # C4702: unreachable code; note it is also disabled globally in the VS project file # C4996: warning STL4015: The std::iterator class template (used as a base class to provide typedefs) is deprecated in C++17 if(USE_SENTENCEPIECE) set(DISABLE_GLOBALLY "/wd\"4310\" /wd\"4324\" /wd\"4702\" /wd\"4996\" /wd\"4100\"") else() set(DISABLE_GLOBALLY "/wd\"4310\" /wd\"4324\" /wd\"4702\" /wd\"4996\"") endif() # set(INTRINSICS "/arch:AVX") add_definitions(-DUSE_SSE2=1) # Or maybe use these? set(INTRINSICS "/arch:AVX2") # set(INTRINSICS "/arch:AVX512") # /bigobj is necessary for expression_operators.cpp. See https://stackoverflow.com/questions/15110580/penalty-of-the-msvs-compiler-flag-bigobj set(CMAKE_CXX_FLAGS "/EHsc /DWIN32 /D_WINDOWS /DUNICODE /D_UNICODE /D_CRT_NONSTDC_NO_WARNINGS /D_CRT_SECURE_NO_WARNINGS /bigobj ${DISABLE_GLOBALLY}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} /MT /O2 ${INTRINSICS} /Zi /MP /GL /DNDEBUG") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS} /MTd /Od /Ob0 ${INTRINSICS} /RTC1 /Zi /D_DEBUG") # ignores warning LNK4049: locally defined symbol free imported - this comes from zlib set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG /LTCG:incremental /INCREMENTAL:NO /ignore:4049") set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRT") set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS} /NODEFAULTLIB:MSVCRTD") set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /LTCG:incremental") find_library(SHLWAPI Shlwapi.lib) set(EXT_LIBS ${EXT_LIBS} SHLWAPI) if(USE_FBGEMM) if(NOT USE_STATIC_LIBS) # FBGEMM on Windows can be compiled only statically via CMake message(FATAL_ERROR "FATAL ERROR: FBGEMM must be compiled statically on Windows, \ add -DUSE_STATIC_LIBS=on to the cmake command") endif() set(EXT_LIBS ${EXT_LIBS} fbgemm) add_definitions(-DUSE_FBGEMM=1 -DFBGEMM_STATIC=1) endif(USE_FBGEMM) else(MSVC) # Check we are using at least g++ 5.0 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) message(FATAL_ERROR "FATAL ERROR: Compiling Marian requires at least g++ 5.0, your version is ${CMAKE_CXX_COMPILER_VERSION}") endif() # Detect support CPU instrinsics for the current platform. This will # only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we # force intrinsics as set in the options. set(INTRINSICS "") list(APPEND INTRINSICS_NVCC) option(COMPILE_SSE2 "Compile CPU code with SSE2 support" ON) option(COMPILE_SSE3 "Compile CPU code with SSE3 support" ON) option(COMPILE_SSE4_1 "Compile CPU code with SSE4.1 support" ON) option(COMPILE_SSE4_2 "Compile CPU code with SSE4.2 support" ON) option(COMPILE_AVX "Compile CPU code with AVX support" ON) option(COMPILE_AVX2 "Compile CPU code with AVX2 support" ON) option(COMPILE_AVX512 "Compile CPU code with AVX512 support" ON) if(BUILD_ARCH STREQUAL "native") message(STATUS "Building with -march=native and intrinsics will be chosen automatically by the compiler to match the current machine.") message(STATUS "Checking support for CPU intrinsics") include(FindSSE) if(SSE2_FOUND AND NOT COMPILE_SSE2) message(WARNING "SSE2 enabled due to -march=native and -DCOMPILE_SSE2=${COMPILE_SSE2} is ignored.") endif(SSE2_FOUND AND NOT COMPILE_SSE2) if(SSE3_FOUND AND NOT COMPILE_SSE3) message(WARNING "SSE3 enabled due to -march=native and -DCOMPILE_SSE3=${COMPILE_SSE3} is ignored.") endif(SSE3_FOUND AND NOT COMPILE_SSE3) if(SSE4_1_FOUND AND NOT COMPILE_SSE4_1) message(WARNING "SSE4.1 enabled due to -march=native and -DCOMPILE_SSE4_1=${COMPILE_SSE4_1} is ignored.") endif(SSE4_1_FOUND AND NOT COMPILE_SSE4_1) if(SSE4_2_FOUND AND NOT COMPILE_SSE4_2) message(WARNING "SSE4.2 enabled due to -march=native and -DCOMPILE_SSE4_2=${COMPILE_SSE4_2} is ignored.") endif(SSE4_2_FOUND AND NOT COMPILE_SSE4_2) if(AVX_FOUND AND NOT COMPILE_AVX) message(WARNING "AVX enabled due to -march=native and -DCOMPILE_AVX=${COMPILE_AVX} is ignored.") endif(AVX_FOUND AND NOT COMPILE_AVX) if(AVX2_FOUND AND NOT COMPILE_AVX2) message(WARNING "AVX2 enabled due to -march=native and -DCOMPILE_AVX2=${COMPILE_AVX2} is ignored.") endif(AVX2_FOUND AND NOT COMPILE_AVX2) if(AVX512_FOUND AND NOT COMPILE_AVX512) message(WARNING "AVX512 enabled due to -march=native and -DCOMPILE_AVX512=${COMPILE_AVX512} is ignored.") endif(AVX512_FOUND AND NOT COMPILE_AVX512) else() # force to build with the requested intrisics, requires compiler support message(STATUS "Building with -march=${BUILD_ARCH} and forcing intrisics as requested") if(COMPILE_SSE2) message(STATUS "SSE2 support requested") set(INTRINSICS "${INTRINSICS} -msse2") list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse2) endif(COMPILE_SSE2) if(COMPILE_SSE3) message(STATUS "SSE3 support requested") set(INTRINSICS "${INTRINSICS} -msse3") list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse3) endif(COMPILE_SSE3) if(COMPILE_SSE4_1) message(STATUS "SSE4.1 support requested") set(INTRINSICS "${INTRINSICS} -msse4.1") list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.1) endif(COMPILE_SSE4_1) if(COMPILE_SSE4_2) message(STATUS "SSE4.2 support requested") set(INTRINSICS "${INTRINSICS} -msse4.2") list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.2) endif(COMPILE_SSE4_2) if(COMPILE_AVX) message(STATUS "AVX support requested") set(INTRINSICS "${INTRINSICS} -mavx") list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx) endif(COMPILE_AVX) if(COMPILE_AVX2) message(STATUS "AVX2 support requested") set(INTRINSICS "${INTRINSICS} -mavx2") list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx2) endif(COMPILE_AVX2) if(COMPILE_AVX512) message(STATUS "AVX512 support requested") set(INTRINSICS "${INTRINSICS} -mavx512f") list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx512f) endif(COMPILE_AVX512) endif() if(USE_FBGEMM) set(EXT_LIBS ${EXT_LIBS} fbgemm dl) add_definitions(-DUSE_FBGEMM=1) endif(USE_FBGEMM) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) # Clang-10.0.0 complains when CUDA is newer than 10.1 set(CLANG_IGNORE_UNKNOWN_CUDA "-Wno-unknown-warning-option -Wno-unknown-cuda-version") endif() set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA}") # These are used in src/CMakeLists.txt on a per-target basis list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wno-unused-function; -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;) # This warning does not exist prior to gcc 5.0 if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) list(APPEND ALL_WARNINGS -Wsuggest-override -Wno-int-in-bool-context) endif() if(CMAKE_COMPILER_IS_GNUCC) # these flags are not known to clang set(CMAKE_GCC_FLAGS "-Wl,--no-as-needed") set(CMAKE_RDYNAMIC_FLAG "-rdynamic") endif(CMAKE_COMPILER_IS_GNUCC) set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_CXX_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}") set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg") set(CMAKE_CXX_FLAGS_PROFGEN "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction") # these need to be set separately set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_C_FLAGS_PROFGEN "${CMAKE_C_FLAGS_RELEASE} -fprofile-generate -fprofile-correction") set(CMAKE_C_FLAGS_PROFUSE "${CMAKE_C_FLAGS_RELEASE} -fprofile-use -fprofile-correction") endif(MSVC) # with gcc 7.0 and above we need to mark fallthrough in switch case statements # that can be done in comments for backcompat, but CCACHE removes comments. # -C makes gcc keep comments. if(USE_CCACHE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -C") endif() ############################################################################### # Downloading SentencePiece if requested and set to compile with it. # Requires all the dependencies imposed by SentencePiece if(USE_SENTENCEPIECE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_SENTENCEPIECE") LIST(APPEND CUDA_NVCC_FLAGS -DUSE_SENTENCEPIECE; ) set(EXT_LIBS ${EXT_LIBS} sentencepiece sentencepiece_train) endif() if(USE_ONNX) message(STATUS "Enabling experimental ONNX support") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_ONNX") # TODO: likely required to find protobuf by itself, we should check/fix this. Before it would take advantage of sentencepiece doing that. set(EXT_LIBS ${EXT_LIBS} protobuf) include_directories(${Protobuf_INCLUDE_DIRS}) endif() # Find packages set(EXT_LIBS ${EXT_LIBS} ${CMAKE_DL_LIBS}) ############################################################################### if(COMPILE_CUDA) if(USE_STATIC_LIBS) # link statically to stdlib libraries if(NOT MSVC) set(CMAKE_EXE_LINKER_FLAGS "-static-libgcc -static-libstdc++") endif() # look for libraries that have .a suffix set(_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) if(WIN32) list(INSERT CMAKE_FIND_LIBRARY_SUFFIXES 0 .lib .a) else() set(CMAKE_FIND_LIBRARY_SUFFIXES .a _static.a) endif() endif() find_package(CUDA "9.0") # TODO: only enable FP16-related options for compute_70 and higher. if(CUDA_FOUND) # CUDA >= 10.0 requires CMake >= 3.12.2 if((CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") AND (CMAKE_VERSION VERSION_LESS "3.12.2")) message(WARNING "On some Unix systems CUDA 10.0+ requires CMake 3.12.2+; you use CMake ${CMAKE_VERSION}") endif() # We want to compile as many targets as possible but different CUDA versions support different targets. # Let's instead enable options based on what cuda version we have. if((CUDA_VERSION VERSION_EQUAL "9.0" OR CUDA_VERSION VERSION_GREATER "9.0") AND CUDA_VERSION VERSION_LESS "11.0") option(COMPILE_KEPLER "Compile GPU version with SM35 support" OFF) option(COMPILE_MAXWELL "Compile GPU version with SM50 support" OFF) option(COMPILE_PASCAL "Compile GPU version with SM60 support" ON) option(COMPILE_VOLTA "Compile GPU version with SM70 support" ON) endif() if((CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") AND CUDA_VERSION VERSION_LESS "11.0") option(COMPILE_KEPLER "Compile GPU version with SM35 support" OFF) option(COMPILE_MAXWELL "Compile GPU version with SM50 support" OFF) option(COMPILE_PASCAL "Compile GPU version with SM60 support" ON) option(COMPILE_VOLTA "Compile GPU version with SM70 support" ON) option(COMPILE_TURING "Compile GPU version with SM75 support" ON) endif() if(CUDA_VERSION VERSION_EQUAL "11.0" OR CUDA_VERSION VERSION_GREATER "11.0") option(COMPILE_KEPLER "Compile GPU version with SM35 support" OFF) # deprecated for CUDA 11 option(COMPILE_MAXWELL "Compile GPU version with SM50 support" OFF) # deprecated for CUDA 11 option(COMPILE_PASCAL "Compile GPU version with SM60 support" ON) option(COMPILE_VOLTA "Compile GPU version with SM70 support" ON) option(COMPILE_TURING "Compile GPU version with SM75 support" ON) option(COMPILE_AMPERE "Compile GPU version with SM80 support" ON) LIST(APPEND COMPUTE -Wno-deprecated-gpu-targets) endif() if(CUDA_VERSION VERSION_EQUAL "11.1" OR CUDA_VERSION VERSION_GREATER "11.1") option(COMPILE_KEPLER "Compile GPU version with SM35 support" OFF) # deprecated for CUDA 11 option(COMPILE_MAXWELL "Compile GPU version with SM50 support" OFF) # deprecated for CUDA 11 option(COMPILE_PASCAL "Compile GPU version with SM60 support" ON) option(COMPILE_VOLTA "Compile GPU version with SM70 support" ON) option(COMPILE_TURING "Compile GPU version with SM75 support" ON) option(COMPILE_AMPERE "Compile GPU version with SM80 support" ON) option(COMPILE_AMPERE_RTX "Compile GPU version with SM86 support" ON) LIST(APPEND COMPUTE -Wno-deprecated-gpu-targets) endif() if(COMPILE_KEPLER) message(STATUS "Compiling code for Kepler GPUs") LIST(APPEND COMPUTE -gencode=arch=compute_35,code=sm_35;) # Tesla K40 and above endif(COMPILE_KEPLER) if(COMPILE_MAXWELL) message(STATUS "Compiling code for Maxwell GPUs") LIST(APPEND COMPUTE -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52;) # Maxwell GPUs endif(COMPILE_MAXWELL) if(COMPILE_PASCAL) message(STATUS "Compiling code for Pascal GPUs") LIST(APPEND COMPUTE -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61;) # Pascal GPUs endif(COMPILE_PASCAL) if(COMPILE_VOLTA) message(STATUS "Compiling code for Volta GPUs") LIST(APPEND COMPUTE -arch=sm_70; -gencode=arch=compute_70,code=sm_70; -gencode=arch=compute_70,code=compute_70) # Volta GPUs endif(COMPILE_VOLTA) if(CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") if(COMPILE_TURING) message(STATUS "Compiling code for Turing GPUs") LIST(APPEND COMPUTE -gencode=arch=compute_75,code=sm_75; -gencode=arch=compute_75,code=compute_75) # Turing GPUs endif(COMPILE_TURING) endif() if(CUDA_VERSION VERSION_EQUAL "11.0" OR CUDA_VERSION VERSION_GREATER "11.0") if(COMPILE_AMPERE) message(STATUS "Compiling code for Ampere GPUs") LIST(APPEND COMPUTE -gencode=arch=compute_80,code=sm_80; -gencode=arch=compute_80,code=compute_80) # Ampere GPUs endif(COMPILE_AMPERE) endif() if(CUDA_VERSION VERSION_EQUAL "11.1" OR CUDA_VERSION VERSION_GREATER "11.1") if(COMPILE_AMPERE_RTX) message(STATUS "Compiling code for Ampere RTX GPUs") LIST(APPEND COMPUTE -gencode=arch=compute_86,code=sm_86; -gencode=arch=compute_86,code=compute_86) # Ampere RTX GPUs endif(COMPILE_AMPERE_RTX) endif() if(USE_STATIC_LIBS) set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_cusparse_LIBRARY}) set(CUDA_LIBS ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_cusparse_LIBRARY}) find_library(CUDA_culibos_LIBRARY NAMES culibos PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64) # The cuLIBOS library does not seem to exist in Windows CUDA toolkit installs if(CUDA_culibos_LIBRARY) set(EXT_LIBS ${EXT_LIBS} ${CUDA_culibos_LIBRARY}) set(CUDA_LIBS ${CUDA_LIBS} ${CUDA_culibos_LIBRARY}) elseif(NOT WIN32) message(FATAL_ERROR "cuLIBOS library not found") endif() # CUDA 10.1 introduces cublasLt library that is required on static build if ((CUDA_VERSION VERSION_EQUAL "10.1" OR CUDA_VERSION VERSION_GREATER "10.1")) find_library(CUDA_cublasLt_LIBRARY NAMES cublasLt PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64) if(NOT CUDA_cublasLt_LIBRARY) message(FATAL_ERROR "cuBLASLt library not found") endif() set(EXT_LIBS ${EXT_LIBS} ${CUDA_cublasLt_LIBRARY}) set(CUDA_LIBS ${CUDA_LIBS} ${CUDA_cublasLt_LIBRARY}) endif() message(STATUS "Found CUDA libraries: ${CUDA_LIBS}") else(USE_STATIC_LIBS) set(CUDA_LIBS ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) # We actually only need cublasLt here after cuda 11. Marian will work fine without it pre cuda 11. We want to force CMake to use the cublas # version that ships with CUDA 11 so we force the search to occur inside of the cuda toolkit directory. set(CUDA_LIBS ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) if ((CUDA_VERSION VERSION_EQUAL "11.0" OR CUDA_VERSION VERSION_GREATER "11.0")) find_library(CUDA_cublasLt_LIBRARY NAMES cublasLt PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 NO_DEFAULT_PATH) if(NOT CUDA_cublasLt_LIBRARY) message(FATAL_ERROR "cuBLASLt library not found") endif() set(EXT_LIBS ${EXT_LIBS} ${CUDA_cublasLt_LIBRARY}) set(CUDA_LIBS ${CUDA_LIBS} ${CUDA_cublasLt_LIBRARY}) endif() set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY} ${CUDA_CUBLAS_LIBRARIES}) message(STATUS "Found CUDA libraries: ${CUDA_LIBS}") endif(USE_STATIC_LIBS) if(USE_CUDNN) find_package(CUDNN "7.0") if(CUDNN_FOUND) include_directories(${CUDNN_INCLUDE_DIRS}) set(EXT_LIBS ${EXT_LIBS} ${CUDNN_LIBRARIES}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUDNN") LIST(APPEND CUDA_NVCC_FLAGS -DCUDNN; ) endif(CUDNN_FOUND) endif(USE_CUDNN) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUDA_FOUND") list(APPEND CUDA_NVCC_FLAGS -DCUDA_FOUND; ) if(MSVC) list(APPEND CUDA_NVCC_FLAGS -DBOOST_PP_VARIADICS=0; ) endif() if(USE_NCCL) add_library(nccl STATIC IMPORTED) set(EXT_LIBS ${EXT_LIBS} nccl) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_NCCL") LIST(APPEND CUDA_NVCC_FLAGS -DUSE_NCCL; ) endif(USE_NCCL) if(USE_STATIC_LIBS) set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) endif() else(CUDA_FOUND) message(" Cannot find suitable CUDA libraries. Specify the path explicitly with -DCUDA_TOOLKIT_ROOT_DIR=/path/to/appropriate/cuda/installation (hint: try /usr/local/$(readlink /usr/local/cuda)) OR compile the CPU-only version of Marian with -DCOMPILE_CUDA=off ") message(FATAL_ERROR "FATAL ERROR: No suitable CUDA library found.") endif(CUDA_FOUND) else(COMPILE_CUDA) message(WARNING "COMPILE_CUDA=off : Building only CPU version") endif(COMPILE_CUDA) # TODO: make compatible with older CUDA versions if(CMAKE_BUILD_TYPE STREQUAL "Debug") list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O0; -g; --use_fast_math; ${COMPUTE}) else(CMAKE_BUILD_TYPE STREQUAL "Debug") list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O3; -g; --use_fast_math; ${COMPUTE}) endif(CMAKE_BUILD_TYPE STREQUAL "Debug") if(NOT MSVC) # @TODO: add warnings here too list(APPEND CUDA_NVCC_FLAGS -ccbin ${CMAKE_C_COMPILER}; -std=c++11; -Xcompiler\ -fPIC; -Xcompiler\ -Wno-unused-result; -Xcompiler\ -Wno-deprecated; -Xcompiler\ -Wno-pragmas; -Xcompiler\ -Wno-unused-value; -Xcompiler\ -Werror;) list(APPEND CUDA_NVCC_FLAGS ${INTRINSICS_NVCC}) else() list(APPEND CUDA_NVCC_FLAGS -Xcompiler\ /FS; -Xcompiler\ /MT$<$:d>; ) endif() list(REMOVE_DUPLICATES CUDA_NVCC_FLAGS) set(CUDA_PROPAGATE_HOST_FLAGS OFF) if(USE_STATIC_LIBS) set(_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) if(WIN32) list(INSERT CMAKE_FIND_LIBRARY_SUFFIXES 0 .lib .a) else() set(CMAKE_FIND_LIBRARY_SUFFIXES .a) endif() endif() ############################################################################### # Find Tcmalloc_minimal # re-used from sentencepiece if(NOT WIN32) if(USE_STATIC_LIBS) find_library(TCMALLOC_LIB NAMES libtcmalloc_minimal.a) else() find_library(TCMALLOC_LIB NAMES tcmalloc_minimal) endif() if (TCMALLOC_LIB) message(STATUS "Found TCMalloc: ${TCMALLOC_LIB}") set(EXT_LIBS ${EXT_LIBS} ${Tcmalloc_LIBRARIES}) add_definitions(-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free) else() message(STATUS "Not Found TCMalloc: ${TCMALLOC_LIB}") endif() endif() ############################################################################### # Find BLAS library if(COMPILE_CPU) if(NOT GENERATE_MARIAN_INSTALL_TARGETS) set(EXT_LIBS ${EXT_LIBS} intgemm) # Enable intgemm when compiling CPU add_definitions(-DCOMPILE_CPU=1) endif() if(USE_APPLE_ACCELERATE) if(NOT APPLE) message(FATAL_ERROR "FATAL ERROR: Apple Accelerate only works on macOS.") endif() set(BLAS_VENDOR "Accelerate") # see https://developer.apple.com/documentation/accelerate for more info # you may need to install Xcode command line tools if you don't have them already (https://developer.apple.com/xcode/features/) include_directories("/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers") set(EXT_LIBS ${EXT_LIBS} "-framework Accelerate") add_definitions(-DBLAS_FOUND=1) else(USE_APPLE_ACCELERATE) if(USE_MKL) find_package(MKL) endif(USE_MKL) if(MKL_FOUND) include_directories(${MKL_INCLUDE_DIR}) set(EXT_LIBS ${EXT_LIBS} ${MKL_LIBRARIES}) set(BLAS_FOUND TRUE) add_definitions(-DBLAS_FOUND=1 -DMKL_FOUND=1) else(MKL_FOUND) set(BLAS_VENDOR "OpenBLAS") find_package(BLAS) if(BLAS_FOUND) include(FindCBLAS) if(CBLAS_FOUND) include_directories(${BLAS_INCLUDE_DIR} ${CBLAS_INCLUDE_DIR}) set(EXT_LIBS ${EXT_LIBS} ${BLAS_LIBRARIES} ${CBLAS_LIBRARIES}) add_definitions(-DBLAS_FOUND=1) endif(CBLAS_FOUND) endif(BLAS_FOUND) endif(MKL_FOUND) endif(USE_APPLE_ACCELERATE) endif(COMPILE_CPU) ############################################################################### # Find OpenSSL set(BOOST_COMPONENTS "") if(COMPILE_SERVER) find_package(OpenSSL) if(OpenSSL_FOUND) message(STATUS "Found OpenSSL") include_directories(${OPENSSL_INCLUDE_DIR}) set(EXT_LIBS ${EXT_LIBS} ${OPENSSL_CRYPTO_LIBRARY}) if(MSVC AND USE_STATIC_LIBS) # "If you link with static OpenSSL libraries then you're expected to additionally link your # application with WS2_32.LIB, GDI32.LIB, ADVAPI32.LIB, CRYPT32.LIB and USER32.LIB" # See https://github.com/openssl/openssl/blob/OpenSSL_1_1_1d/NOTES.WIN#L127 # Linking with crypt32.lib seem to be enough. set(EXT_LIBS ${EXT_LIBS} crypt32.lib) endif() set(BOOST_COMPONENTS ${BOOST_COMPONENTS} system) else(OpenSSL_FOUND) message(WARNING "Cannot find OpenSSL library. Not compiling server.") set(COMPILE_SERVER "off") endif(OpenSSL_FOUND) endif(COMPILE_SERVER) ############################################################################### # Undo static lib search and put non-static searches here: if(USE_STATIC_LIBS) set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) endif() if(DETERMINISTIC) message(WARNING "Option DETERMINISTIC=ON: Trying to make training as deterministic as possible, may result in slow-down") add_definitions(-DDETERMINISTIC=1) list(APPEND CUDA_NVCC_FLAGS -DDETERMINISTIC=1; ) else() add_definitions(-DDETERMINISTIC=0) list(APPEND CUDA_NVCC_FLAGS -DDETERMINISTIC=0; ) endif() # Find MPI if(USE_MPI) # 2.0 refers to MPI2 standard. OpenMPI is an implementation of that standard regardless of the specific OpenMPI version # e.g. OpenMPI 1.10 implements MPI2 and will be found correctly. find_package(MPI 2.0 REQUIRED) if(MPI_FOUND) include_directories(${MPI_INCLUDE_PATH}) set(EXT_LIBS ${EXT_LIBS} ${MPI_LIBRARIES}) if(USE_STATIC_LIBS) # alternatively this could install OpenMPI like NCCL and link against that statically with greater control message(WARNING "MPI implementations are notoriously difficult to link statically, linking ${MPI_LIBRARIES} dynamically despite -DUSE_STATIC_LIBS=on") endif(USE_STATIC_LIBS) add_definitions(-DMPI_FOUND=1) endif(MPI_FOUND) endif(USE_MPI) ############################################################################### # Find Boost if required if(BOOST_COMPONENTS) if(USE_STATIC_LIBS) set(Boost_USE_STATIC_LIBS ON) endif() find_package(Boost COMPONENTS ${BOOST_COMPONENTS}) if(Boost_FOUND) include_directories(${Boost_INCLUDE_DIRS}) set(EXT_LIBS ${EXT_LIBS} ${Boost_LIBRARIES}) set(EXT_LIBS ${EXT_LIBS} ${ZLIB_LIBRARIES}) # hack for static compilation if(MSVC) add_definitions(-DBOOST_ALL_NO_LIB=1) # hack for missing date-time stub endif() else(Boost_FOUND) message(SEND_ERROR "Cannot find Boost libraries. Terminating.") endif(Boost_FOUND) endif(BOOST_COMPONENTS) ############################################################################### if(COMPILE_TESTS) enable_testing() endif(COMPILE_TESTS) if(COMPILE_EXAMPLES) add_definitions(-DCOMPILE_EXAMPLES=1) endif(COMPILE_EXAMPLES) # Generate project_version.h to reflect our version number configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/common/project_version.h.in ${CMAKE_CURRENT_SOURCE_DIR}/src/common/project_version.h @ONLY) # Generate build_info.cpp with CMake cache variables include(GetCacheVariables) # make sure src/common/build_info.cpp has been removed execute_process(COMMAND rm ${CMAKE_CURRENT_SOURCE_DIR}/src/common/build_info.cpp OUTPUT_QUIET ERROR_QUIET) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/common/build_info.cpp.in ${CMAKE_CURRENT_BINARY_DIR}/src/common/build_info.cpp @ONLY) # to be able to check if this is a CMake-based compilation, which always adds # build-info option, even on Windows. add_definitions(-DBUILD_INFO_AVAILABLE=1) # Compile source files include_directories(${marian_SOURCE_DIR}/src) add_subdirectory(src) ############################################################################### if(USE_DOXYGEN) # Add a target to generate API documentation with Doxygen find_package(Doxygen) if(DOXYGEN_FOUND) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) add_custom_target(doc ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Generating API documentation with Doxygen" VERBATIM ) endif(DOXYGEN_FOUND) endif(USE_DOXYGEN)