services-flake/nix/ollama.nix
2024-06-14 03:11:07 +05:30

151 lines
4.5 KiB
Nix

# Based on https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix
{ pkgs, lib, name, config, ... }:
let
inherit (lib) types;
ollamaPackage = pkgs.ollama.override {
inherit (config) acceleration;
linuxPackages = config.boot.kernelPackages // {
nvidia_x11 = config.hardware.nvidia.package;
};
};
in
{
options = {
enable = lib.mkEnableOption "Enable the Ollama service";
package = lib.mkOption {
type = types.package;
default = ollamaPackage;
description = "The Ollama package to use";
};
port = lib.mkOption {
type = types.port;
default = 11434;
description = "The port on which the Ollama service's REST API will listen";
};
host = lib.mkOption {
type = types.str;
default = "127.0.0.1";
example = "0.0.0.0";
description = "The host on which the Ollama service's REST API will listen";
};
dataDir = lib.mkOption {
type = types.str;
default = "./data/${name}";
description = ''
The directory containing the Ollama models.
Sets the `OLLAMA_MODELS` environment variable.
'';
};
keepAlive = lib.mkOption {
type = types.str;
default = "5m";
description = ''
The duration that models stay loaded in memory.
Sets the `OLLAMA_KEEP_ALIVE` environment variable.
Note: Use a duration string like "5m" for 5 minutes. Or "70" for 70 seconds.
'';
example = "70";
};
models = lib.mkOption {
type = types.listOf types.str;
default = [ ];
description = ''
The models to load post start.
Search for models of your choice from: https://ollama.com/library
'';
};
acceleration = lib.mkOption {
type = types.nullOr (types.enum [ false "rocm" "cuda" ]);
default = null;
example = "rocm";
description = ''
What interface to use for hardware acceleration.
- `null`: default behavior
- if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
- if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
- otherwise defaults to `false`
- `false`: disable GPU, only use CPU
- `"rocm"`: supported by most modern AMD GPUs
- `"cuda"`: supported by most modern NVIDIA GPUs
'';
};
environment = lib.mkOption {
type = types.attrsOf types.str;
default = { };
example = {
OLLAMA_DEBUG = "1";
};
description = ''
Extra environment variables passed to the `ollama-server` process.
'';
};
outputs.settings = lib.mkOption {
type = types.deferredModule;
internal = true;
readOnly = true;
default = {
processes = {
"${name}" =
let
startScript = pkgs.writeShellApplication {
name = "ollama-server";
text = ''
if [ ! -d "${config.dataDir}" ]; then
echo "Creating directory ${config.dataDir}"
mkdir -p "${config.dataDir}"
fi
${lib.getExe config.package} serve
'';
};
in
{
command = startScript;
environment = {
OLLAMA_MODELS = config.dataDir;
OLLAMA_HOST = "${config.host}:${toString config.port}";
OLLAMA_KEEP_ALIVE = config.keepAlive;
} // config.environment;
readiness_probe = {
http_get = {
host = config.host;
port = config.port;
};
initial_delay_seconds = 2;
period_seconds = 10;
timeout_seconds = 4;
success_threshold = 1;
failure_threshold = 5;
};
namespace = name;
availability.restart = "on_failure";
};
"${name}-models" = {
command = pkgs.writeShellApplication {
name = "ollama-models";
text = ''
set -x
OLLAMA_HOST=${config.host}:${toString config.port}
export OLLAMA_HOST
models="${lib.concatStringsSep " " config.models}"
for model in $models
do
${lib.getExe config.package} pull "$model"
done
'';
};
namespace = name;
depends_on."${name}".condition = "process_healthy";
};
};
};
};
};
}