services-flake/nix/ollama.nix

# Based on https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix
{ pkgs, lib, name, config, ... }:
let
  inherit (lib) types;
  ollamaPackage = pkgs.ollama.override {
    inherit (config) acceleration;
    linuxPackages = config.boot.kernelPackages // {
      nvidia_x11 = config.hardware.nvidia.package;
    };
  };
in
{
  options = {
    enable = lib.mkEnableOption "Enable the Ollama service";
    package = lib.mkOption {
      type = types.package;
      default = ollamaPackage;
      description = "The Ollama package to use";
    };
    port = lib.mkOption {
      type = types.port;
      default = 11434;
      description = "The port on which the Ollama service's REST API will listen";
    };
    host = lib.mkOption {
      type = types.str;
      default = "127.0.0.1";
      example = "0.0.0.0";
      description = "The host on which the Ollama service's REST API will listen";
    };
    dataDir = lib.mkOption {
      type = types.str;
      default = "./data/${name}";
      description = ''
        The directory containing the Ollama models.
        Sets the `OLLAMA_MODELS` environment variable.
      '';
    };
    keepAlive = lib.mkOption {
      type = types.str;
      default = "5m";
      description = ''
        The duration that models stay loaded in memory.
        Sets the `OLLAMA_KEEP_ALIVE` environment variable.

        Note: Use a duration string like "5m" for 5 minutes. Or "70" for 70 seconds.
      '';
      example = "70";
    };
    models = lib.mkOption {
      type = types.listOf types.str;
      default = [ ];
      description = ''
        The models to load post start.
        Search for models of your choice from: https://ollama.com/library
      '';
    };
    acceleration = lib.mkOption {
      type = types.nullOr (types.enum [ false "rocm" "cuda" ]);
      default = null;
      example = "rocm";
      description = ''
        What interface to use for hardware acceleration.

        - `null`: default behavior
          - if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
          - if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
          - otherwise defaults to `false`
        - `false`: disable GPU, only use CPU
        - `"rocm"`: supported by most modern AMD GPUs
        - `"cuda"`: supported by most modern NVIDIA GPUs
      '';
    };
    environment = lib.mkOption {
      type = types.attrsOf types.str;
      default = { };
      example = {
        OLLAMA_DEBUG = "1";
      };
      description = ''
        Extra environment variables passed to the `ollama-server` process.
      '';
    };

    outputs.settings = lib.mkOption {
      type = types.deferredModule;
      internal = true;
      readOnly = true;
      default = {
        processes = {
          "${name}" =
            let
              startScript = pkgs.writeShellApplication {
                name = "ollama-server";
                text = ''
                  if [ ! -d "${config.dataDir}" ]; then
                    echo "Creating directory ${config.dataDir}"
                    mkdir -p "${config.dataDir}"
                  fi

                  ${lib.getExe config.package} serve
                '';
              };
            in
            {
              command = startScript;

              environment = {
                OLLAMA_MODELS = config.dataDir;
                OLLAMA_HOST = "${config.host}:${toString config.port}";
                OLLAMA_KEEP_ALIVE = config.keepAlive;
              } // config.environment;

              readiness_probe = {
                http_get = {
                  host = config.host;
                  port = config.port;
                };
                initial_delay_seconds = 2;
                period_seconds = 10;
                timeout_seconds = 4;
                success_threshold = 1;
                failure_threshold = 5;
              };
              namespace = name;
              availability.restart = "on_failure";
            };

          "${name}-models" = {
            command = pkgs.writeShellApplication {
              name = "ollama-models";
              text = ''
                set -x
                OLLAMA_HOST=${config.host}:${toString config.port}
                export OLLAMA_HOST
                models="${lib.concatStringsSep " " config.models}"
                for model in $models
                do
                  ${lib.getExe config.package} pull "$model"
                done
              '';
            };
            namespace = name;
            depends_on."${name}".condition = "process_healthy";
          };
        };
      };
    };
  };
}