mirror of
https://github.com/juspay/services-flake.git
synced 2024-09-17 15:28:33 +03:00
feat(ollama): init
ported from https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix Also see the nixos module: https://github.com/NixOS/nixpkgs/blob/master/nixos/modules/services/misc/ollama.nix
This commit is contained in:
parent
3e68d66e04
commit
d84efa4788
40
doc/ollama.md
Normal file
40
doc/ollama.md
Normal file
@ -0,0 +1,40 @@
|
||||
# Ollama
|
||||
|
||||
[Ollama](https://github.com/ollama/ollama) enables you to get up and running with Llama 3, Mistral, Gemma, and other large language models.
|
||||
|
||||
## Getting Started
|
||||
|
||||
```nix
|
||||
# In `perSystem.process-compose.<name>`
|
||||
{
|
||||
services.ollama."ollama1".enable = true;
|
||||
}
|
||||
```
|
||||
|
||||
## Acceleration
|
||||
|
||||
By default Ollama uses the CPU for inference. To enable GPU acceleration:
|
||||
|
||||
### Cuda
|
||||
|
||||
```nix
|
||||
# In `perSystem.process-compose.<name>`
|
||||
{
|
||||
services.ollama."ollama1" = {
|
||||
enable = true;
|
||||
acceleration = "cuda";
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### ROCm
|
||||
|
||||
```nix
|
||||
# In `perSystem.process-compose.<name>`
|
||||
{
|
||||
services.ollama."ollama1" = {
|
||||
enable = true;
|
||||
acceleration = "rocm";
|
||||
};
|
||||
}
|
||||
```
|
@ -9,6 +9,7 @@ in
|
||||
./elasticsearch.nix
|
||||
./mysql
|
||||
./nginx
|
||||
./ollama.nix
|
||||
./postgres
|
||||
./redis-cluster.nix
|
||||
./redis.nix
|
||||
|
150
nix/ollama.nix
Normal file
150
nix/ollama.nix
Normal file
@ -0,0 +1,150 @@
|
||||
# Based on https://github.com/shivaraj-bh/ollama-flake/blob/main/services/ollama.nix
|
||||
{ pkgs, lib, name, config, ... }:
|
||||
let
|
||||
inherit (lib) types;
|
||||
ollamaPackage = pkgs.ollama.override {
|
||||
inherit (config) acceleration;
|
||||
linuxPackages = config.boot.kernelPackages // {
|
||||
nvidia_x11 = config.hardware.nvidia.package;
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
options = {
|
||||
enable = lib.mkEnableOption "Enable the Ollama service";
|
||||
package = lib.mkOption {
|
||||
type = types.package;
|
||||
default = ollamaPackage;
|
||||
description = "The Ollama package to use";
|
||||
};
|
||||
port = lib.mkOption {
|
||||
type = types.port;
|
||||
default = 11434;
|
||||
description = "The port on which the Ollama service's REST API will listen";
|
||||
};
|
||||
host = lib.mkOption {
|
||||
type = types.str;
|
||||
default = "127.0.0.1";
|
||||
example = "0.0.0.0";
|
||||
description = "The host on which the Ollama service's REST API will listen";
|
||||
};
|
||||
dataDir = lib.mkOption {
|
||||
type = types.str;
|
||||
default = "./data/ollama";
|
||||
description = ''
|
||||
The directory containing the Ollama models.
|
||||
Sets the `OLLAMA_MODELS` environment variable.
|
||||
'';
|
||||
};
|
||||
keepAlive = lib.mkOption {
|
||||
type = types.str;
|
||||
default = "5m";
|
||||
description = ''
|
||||
The duration that models stay loaded in memory.
|
||||
Sets the `OLLAMA_KEEP_ALIVE` environment variable.
|
||||
|
||||
Note: Use a duration string like "5m" for 5 minutes. Or "70" for 70 seconds.
|
||||
'';
|
||||
example = "70";
|
||||
};
|
||||
models = lib.mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [ ];
|
||||
description = ''
|
||||
The models to load post start.
|
||||
Search for models of your choice from: https://ollama.com/library
|
||||
'';
|
||||
};
|
||||
acceleration = lib.mkOption {
|
||||
type = types.nullOr (types.enum [ false "rocm" "cuda" ]);
|
||||
default = null;
|
||||
example = "rocm";
|
||||
description = ''
|
||||
What interface to use for hardware acceleration.
|
||||
|
||||
- `null`: default behavior
|
||||
- if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
|
||||
- if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
|
||||
- otherwise defaults to `false`
|
||||
- `false`: disable GPU, only use CPU
|
||||
- `"rocm"`: supported by most modern AMD GPUs
|
||||
- `"cuda"`: supported by most modern NVIDIA GPUs
|
||||
'';
|
||||
};
|
||||
environment = lib.mkOption {
|
||||
type = types.attrsOf types.str;
|
||||
default = { };
|
||||
example = {
|
||||
OLLAMA_DEBUG = "1";
|
||||
};
|
||||
description = ''
|
||||
Extra environment variables passed to the `ollama-server` process.
|
||||
'';
|
||||
};
|
||||
|
||||
outputs.settings = lib.mkOption {
|
||||
type = types.deferredModule;
|
||||
internal = true;
|
||||
readOnly = true;
|
||||
default = {
|
||||
processes = {
|
||||
"${name}" =
|
||||
let
|
||||
startScript = pkgs.writeShellApplication {
|
||||
name = "ollama-server";
|
||||
text = ''
|
||||
if [ ! -d ${config.dataDir} ]; then
|
||||
echo "Creating directory ${config.dataDir}"
|
||||
mkdir -p ${config.dataDir}
|
||||
fi
|
||||
|
||||
${lib.getExe config.package} serve
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
command = startScript;
|
||||
|
||||
environment = {
|
||||
OLLAMA_MODELS = config.dataDir;
|
||||
OLLAMA_HOST = "${config.host}:${toString config.port}";
|
||||
OLLAMA_KEEP_ALIVE = config.keepAlive;
|
||||
} // config.environment;
|
||||
|
||||
readiness_probe = {
|
||||
http_get = {
|
||||
host = config.host;
|
||||
port = config.port;
|
||||
};
|
||||
initial_delay_seconds = 2;
|
||||
period_seconds = 10;
|
||||
timeout_seconds = 4;
|
||||
success_threshold = 1;
|
||||
failure_threshold = 5;
|
||||
};
|
||||
namespace = name;
|
||||
availability.restart = "on_failure";
|
||||
};
|
||||
|
||||
"${name}-models" = {
|
||||
command = pkgs.writeShellApplication {
|
||||
name = "ollama-models";
|
||||
text = ''
|
||||
set -x
|
||||
OLLAMA_HOST=${config.host}:${toString config.port}
|
||||
export OLLAMA_HOST
|
||||
models="${lib.concatStringsSep " " config.models}"
|
||||
for model in $models
|
||||
do
|
||||
${lib.getExe config.package} pull "$model"
|
||||
done
|
||||
'';
|
||||
};
|
||||
namespace = name;
|
||||
depends_on."${name}".condition = "process_healthy";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
16
nix/ollama_test.nix
Normal file
16
nix/ollama_test.nix
Normal file
@ -0,0 +1,16 @@
|
||||
{ pkgs, ... }: {
|
||||
services.ollama."ollama1".enable = true;
|
||||
|
||||
# Cannot test auto-loading models yet because that requires internet connection.
|
||||
settings.processes.test =
|
||||
{
|
||||
command = pkgs.writeShellApplication {
|
||||
runtimeInputs = [ pkgs.curl ];
|
||||
text = ''
|
||||
curl http://127.0.0.1:11434
|
||||
'';
|
||||
name = "ollama-test";
|
||||
};
|
||||
depends_on."ollama1".condition = "process_healthy";
|
||||
};
|
||||
}
|
@ -38,6 +38,7 @@
|
||||
"${inputs.services-flake}/nix/elasticsearch_test.nix"
|
||||
"${inputs.services-flake}/nix/mysql/mysql_test.nix"
|
||||
"${inputs.services-flake}/nix/nginx/nginx_test.nix"
|
||||
"${inputs.services-flake}/nix/ollama_test.nix"
|
||||
"${inputs.services-flake}/nix/postgres/postgres_test.nix"
|
||||
"${inputs.services-flake}/nix/redis_test.nix"
|
||||
"${inputs.services-flake}/nix/redis-cluster_test.nix"
|
||||
|
Loading…
Reference in New Issue
Block a user