clickhouse init script (#91)

* clickhouse init script

* clickhouse schema test

* clickhouse folder

* docs: add initialDatabases to tips & tricks

* docs: use heading anchor

* add TODO to find an alternative to start clickhouse-server during init

---------

Co-authored-by: shivaraj-bh <sbh69840@gmail.com>
This commit is contained in:
roman-bodavskiy 2024-02-07 19:33:06 +03:00 committed by GitHub
parent 9b806b53f3
commit a0bde519ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 216 additions and 92 deletions

View File

@ -29,3 +29,26 @@ Clickhouse has [HTTP Interface](https://clickhouse.com/docs/en/interfaces/http)
};
}
```
{#initial-database}
### Initial database schema
To load a database schema, you can use the `initialDatabases` option:
```nix
{
services.clickhouse."clickhouse-1" = {
enable = true;
initialDatabases = [
{
name = "sample_db";
schemas = [ ./test.sql ];
}
# or just create the database:
{
name = "sample_db_without_schema";
}
];
};
}
```

View File

@ -1,87 +0,0 @@
# Based on: https://github.com/cachix/devenv/blob/main/src/modules/services/clickhouse.nix
{ pkgs, lib, name, config, ... }:
let
inherit (lib) types;
in
{
options = {
enable = lib.mkEnableOption name;
package = lib.mkOption {
type = types.package;
description = "Which package of clickhouse to use";
default = pkgs.clickhouse;
defaultText = lib.literalExpression "pkgs.clickhouse";
};
port = lib.mkOption {
type = types.int;
description = "Which port to run clickhouse on. This port is for `clickhouse-client` program";
default = 9000;
};
dataDir = lib.mkOption {
type = types.str;
default = "./data/${name}";
description = "The clickhouse data directory";
};
extraConfig = lib.mkOption {
type = types.lines;
description = "Additional configuration to be appended to `clickhouse-config.yaml`.";
default = "";
};
outputs.settings = lib.mkOption {
type = types.deferredModule;
internal = true;
readOnly = true;
default = {
processes = {
"${name}" =
let
clickhouseConfig = pkgs.writeText "clickhouse-config.yaml" ''
logger:
level: warning
console: 1
tcp_port: ${toString config.port}
default_profile: default
default_database: default
path: ${config.dataDir}/clickhouse
tmp_path: ${config.dataDir}/clickhouse/tmp
user_files_path: ${config.dataDir}/clickhouse/user_files
format_schema_path: ${config.dataDir}/clickhouse/format_schemas
user_directories:
users_xml:
path: ${config.package}/etc/clickhouse-server/users.xml
${config.extraConfig}
'';
startScript = pkgs.writeShellApplication {
name = "start-clickhouse";
runtimeInputs = [ config.package ];
text = ''
clickhouse-server --config-file=${clickhouseConfig}
'';
};
in
{
command = "${lib.getExe startScript}";
readiness_probe = {
exec.command = ''${config.package}/bin/clickhouse-client --query "SELECT 1" --port ${builtins.toString config.port}'';
initial_delay_seconds = 2;
period_seconds = 10;
timeout_seconds = 4;
success_threshold = 1;
failure_threshold = 5;
};
namespace = name;
# https://github.com/F1bonacc1/process-compose#-auto-restart-if-not-healthy
availability.restart = "on_failure";
};
};
};
};
};
}

View File

@ -1,14 +1,30 @@
{ pkgs, config, ... }: {
services.clickhouse."clickhouse" = {
services.clickhouse."clickhouse1" = {
enable = true;
port = 9000;
extraConfig = ''
http_port: 9050
'';
};
services.clickhouse."clickhouse2" = {
enable = true;
port = 9001;
extraConfig = ''
http_port: 9051
'';
initialDatabases = [
{
name = "sample_db";
schemas = [ ./test.sql ];
}
];
};
# avoid both the processes trying to create `data` directory at the same time
settings.processes."clickhouse2-init".depends_on."clickhouse1-init".condition = "process_completed_successfully";
settings.processes.test =
let
cfg = config.services.clickhouse."clickhouse";
cfg = config.services.clickhouse."clickhouse1";
in
{
command = pkgs.writeShellApplication {
@ -27,9 +43,12 @@
# Test clickhouse http port
curl http://localhost:9050 | grep Ok
# schemas test
clickhouse-client --host 127.0.0.1 --port 9001 --query "SELECT * FROM sample_db.ride WHERE short_id = 'test_ride';" | grep test_ride
'';
name = "clickhouse-test";
};
depends_on."clickhouse".condition = "process_healthy";
depends_on."clickhouse2".condition = "process_healthy";
};
}

166
nix/clickhouse/default.nix Normal file
View File

@ -0,0 +1,166 @@
# Based on: https://github.com/cachix/devenv/blob/main/src/modules/services/clickhouse.nix
{ pkgs, lib, name, config, ... }:
let
inherit (lib) types;
in
{
options = {
enable = lib.mkEnableOption name;
package = lib.mkOption {
type = types.package;
description = "Which package of clickhouse to use";
default = pkgs.clickhouse;
defaultText = lib.literalExpression "pkgs.clickhouse";
};
port = lib.mkOption {
type = types.int;
description = "Which port to run clickhouse on. This port is for `clickhouse-client` program";
default = 9000;
};
dataDir = lib.mkOption {
type = types.str;
default = "./data/${name}";
description = "The clickhouse data directory";
};
extraConfig = lib.mkOption {
type = types.lines;
description = "Additional configuration to be appended to `clickhouse-config.yaml`.";
default = "";
};
initialDatabases = lib.mkOption {
type = types.listOf (types.submodule {
options = {
name = lib.mkOption {
type = types.str;
description = ''
The name of the database to create.
'';
};
schemas = lib.mkOption {
type = types.nullOr (types.listOf types.path);
default = null;
description = ''
The initial list of schemas for the database; if null (the default),
an empty database is created.
'';
};
};
});
default = [ ];
description = ''
List of database names and their initial schemas that should be used to create databases on the first startup
of Postgres. The schema attribute is optional: If not specified, an empty database is created.
'';
example = lib.literalExpression ''
[
{
name = "foodatabase";
schemas = [ ./fooschemas ./bar.sql ];
}
{ name = "bardatabase"; }
]
'';
};
outputs.settings = lib.mkOption {
type = types.deferredModule;
internal = true;
readOnly = true;
default = {
processes =
let
clickhouseConfig = pkgs.writeText "clickhouse-config.yaml" ''
logger:
level: warning
console: 1
tcp_port: ${toString config.port}
default_profile: default
default_database: default
path: ${config.dataDir}/clickhouse
tmp_path: ${config.dataDir}/clickhouse/tmp
user_files_path: ${config.dataDir}/clickhouse/user_files
format_schema_path: ${config.dataDir}/clickhouse/format_schemas
user_directories:
users_xml:
path: ${config.package}/etc/clickhouse-server/users.xml
${config.extraConfig}
'';
in
{
# DB initialization
"${name}-init" =
let
# https://github.com/ClickHouse/ClickHouse/issues/4491
setupInitialSchema = schema: '' < ${schema} tr -s '\r\n' ' ' | clickhouse-client -mn --port ${builtins.toString config.port}; '';
setupInitialDatabases =
lib.concatMapStrings
(database: ''
echo "Creating database: ${database.name}"
clickhouse-client --port ${builtins.toString config.port} --query "CREATE DATABASE iF NOT EXISTS ${database.name}"
echo "Database successfully created: ${database.name}"
${lib.optionalString (database.schemas != null)
(lib.concatMapStrings (schema: setupInitialSchema schema) database.schemas)}
'')
config.initialDatabases;
setupScript = pkgs.writeShellApplication {
name = "setup-clickhouse";
runtimeInputs = with pkgs; [ config.package coreutils gnugrep gawk ];
# TODO: Find a better way to start clickhouse-server than waiting for 5 seconds: https://github.com/juspay/services-flake/pull/91#discussion_r1481710799
text = ''
if test -d ${config.dataDir}
then echo "Clickhouse database directory ${config.dataDir} appears to contain a database; Skipping initialization"
else
echo "Clickhouse is setting up the initial database."
set -m
clickhouse-server --config-file=${clickhouseConfig} &
sleep 5s
echo "Clickhouse server started."
${setupInitialDatabases}
echo "Clickhouse db setting is done."
kill %1
echo "Clickhouse server stopped."
fi
'';
};
in
{
command = setupScript;
namespace = name;
};
# DB process
"${name}" =
let
startScript = pkgs.writeShellApplication {
name = "start-clickhouse";
runtimeInputs = [ config.package ];
text = ''
clickhouse-server --config-file=${clickhouseConfig}
'';
};
in
{
command = "${lib.getExe startScript}";
readiness_probe = {
exec.command = ''${config.package}/bin/clickhouse-client --query "SELECT 1" --port ${builtins.toString config.port}'';
initial_delay_seconds = 2;
period_seconds = 10;
timeout_seconds = 4;
success_threshold = 1;
failure_threshold = 5;
};
namespace = name;
depends_on."${name}-init".condition = "process_completed_successfully";
# https://github.com/F1bonacc1/process-compose#-auto-restart-if-not-healthy
availability.restart = "on_failure";
};
};
};
};
};
}

3
nix/clickhouse/test.sql Normal file
View File

@ -0,0 +1,3 @@
CREATE TABLE sample_db.ride (`id` Int64, `short_id` String) ENGINE = MergeTree() PRIMARY KEY (id);
INSERT INTO sample_db.ride values (1, 'test_ride');

View File

@ -5,7 +5,7 @@ in
{
imports = builtins.map multiService [
./apache-kafka.nix
./clickhouse.nix
./clickhouse
./elasticsearch.nix
./mysql.nix
./nginx.nix

View File

@ -38,7 +38,7 @@
in
builtins.listToAttrs (builtins.map mkPackageFor [
../nix/apache-kafka_test.nix
../nix/clickhouse_test.nix
../nix/clickhouse/clickhouse_test.nix
../nix/elasticsearch_test.nix
../nix/mysql_test.nix
../nix/nginx_test.nix