shivaraj-bh 2024-06-11 10:11:00 +05:30 committed by Shivaraj B H
parent 3e68d66e04
commit d84efa4788
5 changed files with 208 additions and 0 deletions

40
doc/ollama.md Normal file
View File

@ -0,0 +1,40 @@
# Ollama
[Ollama](https://github.com/ollama/ollama) enables you to get up and running with Llama 3, Mistral, Gemma, and other large language models.
## Getting Started
```nix
# In `perSystem.process-compose.<name>`
{
services.ollama."ollama1".enable = true;
}
```
## Acceleration
By default Ollama uses the CPU for inference. To enable GPU acceleration:
### Cuda
```nix
# In `perSystem.process-compose.<name>`
{
services.ollama."ollama1" = {
enable = true;
acceleration = "cuda";
};
}
```
### ROCm
```nix
# In `perSystem.process-compose.<name>`
{
services.ollama."ollama1" = {
enable = true;
acceleration = "rocm";
};
}
```

View File

@ -9,6 +9,7 @@ in
./elasticsearch.nix
./mysql
./nginx
./ollama.nix
./postgres
./redis-cluster.nix
./redis.nix

150
nix/ollama.nix Normal file
View File

@ -0,0 +1,150 @@
# Based on https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix
{ pkgs, lib, name, config, ... }:
let
inherit (lib) types;
ollamaPackage = pkgs.ollama.override {
inherit (config) acceleration;
linuxPackages = config.boot.kernelPackages // {
nvidia_x11 = config.hardware.nvidia.package;
};
};
in
{
options = {
enable = lib.mkEnableOption "Enable the Ollama service";
package = lib.mkOption {
type = types.package;
default = ollamaPackage;
description = "The Ollama package to use";
};
port = lib.mkOption {
type = types.port;
default = 11434;
description = "The port on which the Ollama service's REST API will listen";
};
host = lib.mkOption {
type = types.str;
default = "127.0.0.1";
example = "0.0.0.0";
description = "The host on which the Ollama service's REST API will listen";
};
dataDir = lib.mkOption {
type = types.str;
default = "./data/ollama";
description = ''
The directory containing the Ollama models.
Sets the `OLLAMA_MODELS` environment variable.
'';
};
keepAlive = lib.mkOption {
type = types.str;
default = "5m";
description = ''
The duration that models stay loaded in memory.
Sets the `OLLAMA_KEEP_ALIVE` environment variable.
Note: Use a duration string like "5m" for 5 minutes. Or "70" for 70 seconds.
'';
example = "70";
};
models = lib.mkOption {
type = types.listOf types.str;
default = [ ];
description = ''
The models to load post start.
Search for models of your choice from: https://ollama.com/library
'';
};
acceleration = lib.mkOption {
type = types.nullOr (types.enum [ false "rocm" "cuda" ]);
default = null;
example = "rocm";
description = ''
What interface to use for hardware acceleration.
- `null`: default behavior
- if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
- if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
- otherwise defaults to `false`
- `false`: disable GPU, only use CPU
- `"rocm"`: supported by most modern AMD GPUs
- `"cuda"`: supported by most modern NVIDIA GPUs
'';
};
environment = lib.mkOption {
type = types.attrsOf types.str;
default = { };
example = {
OLLAMA_DEBUG = "1";
};
description = ''
Extra environment variables passed to the `ollama-server` process.
'';
};
outputs.settings = lib.mkOption {
type = types.deferredModule;
internal = true;
readOnly = true;
default = {
processes = {
"${name}" =
let
startScript = pkgs.writeShellApplication {
name = "ollama-server";
text = ''
if [ ! -d ${config.dataDir} ]; then
echo "Creating directory ${config.dataDir}"
mkdir -p ${config.dataDir}
fi
${lib.getExe config.package} serve
'';
};
in
{
command = startScript;
environment = {
OLLAMA_MODELS = config.dataDir;
OLLAMA_HOST = "${config.host}:${toString config.port}";
OLLAMA_KEEP_ALIVE = config.keepAlive;
} // config.environment;
readiness_probe = {
http_get = {
host = config.host;
port = config.port;
};
initial_delay_seconds = 2;
period_seconds = 10;
timeout_seconds = 4;
success_threshold = 1;
failure_threshold = 5;
};
namespace = name;
availability.restart = "on_failure";
};
"${name}-models" = {
command = pkgs.writeShellApplication {
name = "ollama-models";
text = ''
set -x
OLLAMA_HOST=${config.host}:${toString config.port}
export OLLAMA_HOST
models="${lib.concatStringsSep " " config.models}"
for model in $models
do
${lib.getExe config.package} pull "$model"
done
'';
};
namespace = name;
depends_on."${name}".condition = "process_healthy";
};
};
};
};
};
}

16
nix/ollama_test.nix Normal file
View File

@ -0,0 +1,16 @@
{ pkgs, ... }: {
services.ollama."ollama1".enable = true;
# Cannot test auto-loading models yet because that requires internet connection.
settings.processes.test =
{
command = pkgs.writeShellApplication {
runtimeInputs = [ pkgs.curl ];
text = ''
curl http://127.0.0.1:11434
'';
name = "ollama-test";
};
depends_on."ollama1".condition = "process_healthy";
};
}

View File

@ -38,6 +38,7 @@
"${inputs.services-flake}/nix/elasticsearch_test.nix"
"${inputs.services-flake}/nix/mysql/mysql_test.nix"
"${inputs.services-flake}/nix/nginx/nginx_test.nix"
"${inputs.services-flake}/nix/ollama_test.nix"
"${inputs.services-flake}/nix/postgres/postgres_test.nix"
"${inputs.services-flake}/nix/redis_test.nix"
"${inputs.services-flake}/nix/redis-cluster_test.nix"