llama-cpp: fix cuda support; integrate upstream

This commit is contained in:
happysalada 2023-12-31 16:57:28 +01:00
parent 98a0c372a3
commit 47fc482e58

View File

@ -5,7 +5,6 @@
, fetchpatch , fetchpatch
, nix-update-script , nix-update-script
, stdenv , stdenv
, symlinkJoin
, config , config
, cudaSupport ? config.cudaSupport , cudaSupport ? config.cudaSupport
@ -17,30 +16,19 @@
, openclSupport ? false , openclSupport ? false
, clblast , clblast
, openblasSupport ? !rocmSupport , blasSupport ? !rocmSupport && !cudaSupport
, openblas , openblas
, pkg-config , pkg-config
, metalSupport ? stdenv.isDarwin && stdenv.isAarch64 && !openclSupport
}: }:
assert lib.assertMsg
(lib.count lib.id [openclSupport openblasSupport rocmSupport] == 1)
"llama-cpp: exactly one of openclSupport, openblasSupport and rocmSupport should be enabled";
let let
cudatoolkit_joined = symlinkJoin { # It's necessary to consistently use backendStdenv when building with CUDA support,
name = "${cudaPackages.cudatoolkit.name}-merged"; # otherwise we get libstdc++ errors downstream.
paths = [ # cuda imposes an upper bound on the gcc version, e.g. the latest gcc compatible with cudaPackages_11 is gcc11
cudaPackages.cudatoolkit.lib effectiveStdenv = if cudaSupport then cudaPackages.backendStdenv else stdenv;
cudaPackages.cudatoolkit.out
] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [
# for some reason some of the required libs are in the targets/x86_64-linux
# directory; not sure why but this works around it
"${cudaPackages.cudatoolkit}/targets/${stdenv.system}"
];
};
metalSupport = stdenv.isDarwin && stdenv.isAarch64;
in in
stdenv.mkDerivation (finalAttrs: { effectiveStdenv.mkDerivation (finalAttrs: {
pname = "llama-cpp"; pname = "llama-cpp";
version = "1710"; version = "1710";
@ -67,25 +55,42 @@ stdenv.mkDerivation (finalAttrs: {
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
''; '';
nativeBuildInputs = [ cmake ] ++ lib.optionals openblasSupport [ pkg-config ]; nativeBuildInputs = [ cmake ] ++ lib.optionals blasSupport [ pkg-config ] ++ lib.optionals cudaSupport [
cudaPackages.cuda_nvcc
buildInputs = lib.optionals metalSupport # TODO: Replace with autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
cudaPackages.autoAddOpenGLRunpathHook
];
buildInputs = lib.optionals effectiveStdenv.isDarwin
(with darwin.apple_sdk.frameworks; [ (with darwin.apple_sdk.frameworks; [
Accelerate Accelerate
CoreGraphics CoreGraphics
CoreVideo CoreVideo
Foundation Foundation
MetalKit
]) ])
++ lib.optionals cudaSupport [ ++ lib.optionals metalSupport (with darwin.apple_sdk.frameworks; [
cudatoolkit_joined MetalKit
] ++ lib.optionals rocmSupport [ ])
++ lib.optionals cudaSupport (with cudaPackages; [
cuda_cccl.dev # <nv/target>
# A temporary hack for reducing the closure size, remove once cudaPackages
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
cuda_cudart.dev
cuda_cudart.lib
cuda_cudart.static
libcublas.dev
libcublas.lib
libcublas.static
]) ++ lib.optionals rocmSupport [
rocmPackages.clr rocmPackages.clr
rocmPackages.hipblas rocmPackages.hipblas
rocmPackages.rocblas rocmPackages.rocblas
] ++ lib.optionals openclSupport [ ] ++ lib.optionals openclSupport [
clblast clblast
] ++ lib.optionals openblasSupport [ ] ++ lib.optionals blasSupport [
openblas openblas
]; ];
@ -109,7 +114,7 @@ stdenv.mkDerivation (finalAttrs: {
++ lib.optionals openclSupport [ ++ lib.optionals openclSupport [
"-DLLAMA_CLBLAST=ON" "-DLLAMA_CLBLAST=ON"
] ]
++ lib.optionals openblasSupport [ ++ lib.optionals blasSupport [
"-DLLAMA_BLAS=ON" "-DLLAMA_BLAS=ON"
"-DLLAMA_BLAS_VENDOR=OpenBLAS" "-DLLAMA_BLAS_VENDOR=OpenBLAS"
]; ];
@ -140,7 +145,7 @@ stdenv.mkDerivation (finalAttrs: {
license = licenses.mit; license = licenses.mit;
mainProgram = "llama-cpp-main"; mainProgram = "llama-cpp-main";
maintainers = with maintainers; [ dit7ya elohmeier ]; maintainers = with maintainers; [ dit7ya elohmeier ];
broken = stdenv.isDarwin && stdenv.isx86_64; broken = (effectiveStdenv.isDarwin && effectiveStdenv.isx86_64) || lib.count lib.id [openclSupport blasSupport rocmSupport cudaSupport] == 0;
platforms = platforms.unix; platforms = platforms.unix;
}; };
}) })