diff --git a/doc/ollama.md b/doc/ollama.md new file mode 100644 index 0000000..f605f91 --- /dev/null +++ b/doc/ollama.md @@ -0,0 +1,40 @@ +# Ollama + +[Ollama](https://github.com/ollama/ollama) enables you to get up and running with Llama 3, Mistral, Gemma, and other large language models. + +## Getting Started + +```nix +# In `perSystem.process-compose.` +{ + services.ollama."ollama1".enable = true; +} +``` + +## Acceleration + +By default Ollama uses the CPU for inference. To enable GPU acceleration: + +### Cuda + +```nix +# In `perSystem.process-compose.` +{ + services.ollama."ollama1" = { + enable = true; + acceleration = "cuda"; + }; +} +``` + +### ROCm + +```nix +# In `perSystem.process-compose.` +{ + services.ollama."ollama1" = { + enable = true; + acceleration = "rocm"; + }; +} +``` diff --git a/nix/default.nix b/nix/default.nix index a1fc194..f389d38 100644 --- a/nix/default.nix +++ b/nix/default.nix @@ -9,6 +9,7 @@ in ./elasticsearch.nix ./mysql ./nginx + ./ollama.nix ./postgres ./redis-cluster.nix ./redis.nix diff --git a/nix/ollama.nix b/nix/ollama.nix new file mode 100644 index 0000000..ce75708 --- /dev/null +++ b/nix/ollama.nix @@ -0,0 +1,150 @@ +# Based on https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix +{ pkgs, lib, name, config, ... }: +let + inherit (lib) types; + ollamaPackage = pkgs.ollama.override { + inherit (config) acceleration; + linuxPackages = config.boot.kernelPackages // { + nvidia_x11 = config.hardware.nvidia.package; + }; + }; +in +{ + options = { + enable = lib.mkEnableOption "Enable the Ollama service"; + package = lib.mkOption { + type = types.package; + default = ollamaPackage; + description = "The Ollama package to use"; + }; + port = lib.mkOption { + type = types.port; + default = 11434; + description = "The port on which the Ollama service's REST API will listen"; + }; + host = lib.mkOption { + type = types.str; + default = "127.0.0.1"; + example = "0.0.0.0"; + description = "The host on which the Ollama service's REST API will listen"; + }; + dataDir = lib.mkOption { + type = types.str; + default = "./data/ollama"; + description = '' + The directory containing the Ollama models. + Sets the `OLLAMA_MODELS` environment variable. + ''; + }; + keepAlive = lib.mkOption { + type = types.str; + default = "5m"; + description = '' + The duration that models stay loaded in memory. + Sets the `OLLAMA_KEEP_ALIVE` environment variable. + + Note: Use a duration string like "5m" for 5 minutes. Or "70" for 70 seconds. + ''; + example = "70"; + }; + models = lib.mkOption { + type = types.listOf types.str; + default = [ ]; + description = '' + The models to load post start. + Search for models of your choice from: https://ollama.com/library + ''; + }; + acceleration = lib.mkOption { + type = types.nullOr (types.enum [ false "rocm" "cuda" ]); + default = null; + example = "rocm"; + description = '' + What interface to use for hardware acceleration. + + - `null`: default behavior + - if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"` + - if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"` + - otherwise defaults to `false` + - `false`: disable GPU, only use CPU + - `"rocm"`: supported by most modern AMD GPUs + - `"cuda"`: supported by most modern NVIDIA GPUs + ''; + }; + environment = lib.mkOption { + type = types.attrsOf types.str; + default = { }; + example = { + OLLAMA_DEBUG = "1"; + }; + description = '' + Extra environment variables passed to the `ollama-server` process. + ''; + }; + + outputs.settings = lib.mkOption { + type = types.deferredModule; + internal = true; + readOnly = true; + default = { + processes = { + "${name}" = + let + startScript = pkgs.writeShellApplication { + name = "ollama-server"; + text = '' + if [ ! -d ${config.dataDir} ]; then + echo "Creating directory ${config.dataDir}" + mkdir -p ${config.dataDir} + fi + + ${lib.getExe config.package} serve + ''; + }; + in + { + command = startScript; + + environment = { + OLLAMA_MODELS = config.dataDir; + OLLAMA_HOST = "${config.host}:${toString config.port}"; + OLLAMA_KEEP_ALIVE = config.keepAlive; + } // config.environment; + + readiness_probe = { + http_get = { + host = config.host; + port = config.port; + }; + initial_delay_seconds = 2; + period_seconds = 10; + timeout_seconds = 4; + success_threshold = 1; + failure_threshold = 5; + }; + namespace = name; + availability.restart = "on_failure"; + }; + + "${name}-models" = { + command = pkgs.writeShellApplication { + name = "ollama-models"; + text = '' + set -x + OLLAMA_HOST=${config.host}:${toString config.port} + export OLLAMA_HOST + models="${lib.concatStringsSep " " config.models}" + for model in $models + do + ${lib.getExe config.package} pull "$model" + done + ''; + }; + namespace = name; + depends_on."${name}".condition = "process_healthy"; + }; + }; + }; + }; + }; +} diff --git a/nix/ollama_test.nix b/nix/ollama_test.nix new file mode 100644 index 0000000..41ced18 --- /dev/null +++ b/nix/ollama_test.nix @@ -0,0 +1,16 @@ +{ pkgs, ... }: { + services.ollama."ollama1".enable = true; + + # Cannot test auto-loading models yet because that requires internet connection. + settings.processes.test = + { + command = pkgs.writeShellApplication { + runtimeInputs = [ pkgs.curl ]; + text = '' + curl http://127.0.0.1:11434 + ''; + name = "ollama-test"; + }; + depends_on."ollama1".condition = "process_healthy"; + }; +} diff --git a/test/flake.nix b/test/flake.nix index 40e7c53..b7faa31 100644 --- a/test/flake.nix +++ b/test/flake.nix @@ -38,6 +38,7 @@ "${inputs.services-flake}/nix/elasticsearch_test.nix" "${inputs.services-flake}/nix/mysql/mysql_test.nix" "${inputs.services-flake}/nix/nginx/nginx_test.nix" + "${inputs.services-flake}/nix/ollama_test.nix" "${inputs.services-flake}/nix/postgres/postgres_test.nix" "${inputs.services-flake}/nix/redis_test.nix" "${inputs.services-flake}/nix/redis-cluster_test.nix"