nixpkgs/pkgs/tools/misc/ollama/default.nix

{ lib
, buildGoModule
, fetchFromGitHub
, fetchpatch
, buildEnv
, linkFarm
, overrideCC
, makeWrapper
, stdenv

, cmake
, gcc12
, clblast
, libdrm
, rocmPackages
, cudaPackages
, linuxPackages
, darwin

, enableRocm ? false
, enableCuda ? false
}:

let
  pname = "ollama";
  version = "0.1.24";

  warnIfNotLinux = warning: (lib.warnIfNot stdenv.isLinux warning stdenv.isLinux);
  gpuWarning = api: "building ollama with ${api} is only supported on linux; falling back to cpu";
  rocmIsEnabled = enableRocm && (warnIfNotLinux (gpuWarning "rocm"));
  cudaIsEnabled = enableCuda && (warnIfNotLinux (gpuWarning "cuda"));
  enableLinuxGpu = rocmIsEnabled || cudaIsEnabled;

  appleFrameworks = darwin.apple_sdk_11_0.frameworks;
  metalFrameworks = [
    appleFrameworks.Accelerate
    appleFrameworks.Metal
    appleFrameworks.MetalKit
    appleFrameworks.MetalPerformanceShaders
  ];

  src = fetchFromGitHub {
    owner = "jmorganca";
    repo = "ollama";
    rev = "v${version}";
    hash = "sha256-GwZA1QUH8I8m2bGToIcMMaB5MBnioQP4+n1SauUJYP8=";
    fetchSubmodules = true;
  };
  preparePatch = patch: hash: fetchpatch {
    url = "file://${src}/llm/patches/${patch}";
    inherit hash;
    stripLen = 1;
    extraPrefix = "llm/llama.cpp/";
  };
  inherit (lib) licenses platforms maintainers;
  ollama = {
    inherit pname version src;
    vendorHash = "sha256-wXRbfnkbeXPTOalm7SFLvHQ9j46S/yLNbFy+OWNSamQ=";

    nativeBuildInputs = [
      cmake
    ] ++ lib.optionals enableLinuxGpu [
      makeWrapper
    ] ++ lib.optionals stdenv.isDarwin
      metalFrameworks;

    patches = [
      # remove uses of `git` in the `go generate` script
      # instead use `patch` where necessary
      ./remove-git.patch
      # replace a hardcoded use of `g++` with `$CXX`
      ./replace-gcc.patch

      # ollama's patches of llama.cpp's example server
      # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream"
      (preparePatch "01-cache.diff" "sha256-PC4yN98hFvK+PEITiDihL8ki3bJuLVXrAm0CGf8GPJE=")
      (preparePatch "02-shutdown.diff" "sha256-cElAp9Z9exxN964vB/YFuBhZoEcoAwGSMCnbh+l/V4Q=")
    ];
    postPatch = ''
      # use a patch from the nix store in the `go generate` script
      substituteInPlace llm/generate/gen_common.sh \
        --subst-var-by cmakeIncludePatch '${./cmake-include.patch}'
      # `ollama/llm/generate/gen_common.sh` -> "avoid duplicate main symbols when we link into the cgo binary"
      substituteInPlace llm/llama.cpp/examples/server/server.cpp \
        --replace-fail 'int main(' 'int __main('
      # replace inaccurate version number with actual release version
      substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
    '';
    preBuild = ''
      export OLLAMA_SKIP_PATCHING=true
      # build llama.cpp libraries for ollama
      go generate ./...
    '';

    ldflags = [
      "-s"
      "-w"
      "-X=github.com/jmorganca/ollama/version.Version=${version}"
      "-X=github.com/jmorganca/ollama/server.mode=release"
    ];

    meta = {
      description = "Get up and running with large language models locally";
      homepage = "https://github.com/jmorganca/ollama";
      license = licenses.mit;
      platforms = platforms.unix;
      mainProgram = "ollama";
      maintainers = with maintainers; [ abysssol dit7ya elohmeier ];
    };
  };


  rocmClang = linkFarm "rocm-clang" {
    llvm = rocmPackages.llvm.clang;
  };
  rocmPath = buildEnv {
    name = "rocm-path";
    paths = [
      rocmPackages.rocm-device-libs
      rocmClang
    ];
  };
  rocmVars = {
    ROCM_PATH = rocmPath;
    CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
  };

  cudaToolkit = buildEnv {
    name = "cuda-toolkit";
    ignoreCollisions = true; # FIXME: find a cleaner way to do this without ignoring collisions
    paths = [
      cudaPackages.cudatoolkit
      cudaPackages.cuda_cudart
    ];
  };
  cudaVars = {
    CUDA_LIB_DIR = "${cudaToolkit}/lib";
    CUDACXX = "${cudaToolkit}/bin/nvcc";
    CUDAToolkit_ROOT = cudaToolkit;
  };

  linuxGpuLibs = {
    buildInputs = lib.optionals rocmIsEnabled [
      rocmPackages.clr
      rocmPackages.hipblas
      rocmPackages.rocblas
      rocmPackages.rocsolver
      rocmPackages.rocsparse
      libdrm
    ] ++ lib.optionals cudaIsEnabled [
      cudaPackages.cuda_cudart
    ];
  };

  appleGpuLibs = { buildInputs = metalFrameworks; };

  runtimeLibs = lib.optionals rocmIsEnabled [
    rocmPackages.rocm-smi
  ] ++ lib.optionals cudaIsEnabled [
    linuxPackages.nvidia_x11
  ];
  runtimeLibWrapper = {
    postFixup = ''
      mv "$out/bin/${pname}" "$out/bin/.${pname}-unwrapped"
      makeWrapper "$out/bin/.${pname}-unwrapped" "$out/bin/${pname}" \
        --suffix LD_LIBRARY_PATH : '${lib.makeLibraryPath runtimeLibs}'
    '';
  };

  goBuild =
    if cudaIsEnabled then
      buildGoModule.override { stdenv = overrideCC stdenv gcc12; }
    else
      buildGoModule;
in
goBuild (ollama
  // (lib.optionalAttrs rocmIsEnabled rocmVars)
  // (lib.optionalAttrs cudaIsEnabled cudaVars)
  // (lib.optionalAttrs enableLinuxGpu linuxGpuLibs)
  // (lib.optionalAttrs enableLinuxGpu runtimeLibWrapper)

  // (lib.optionalAttrs stdenv.isDarwin appleGpuLibs))