From 5a93846946dddcc99eac92a6278bb202943d4a9f Mon Sep 17 00:00:00 2001 From: Kartik Gokte Date: Mon, 26 Sep 2022 00:16:03 +0530 Subject: [PATCH] nixos/kthxbye: init --- .../from_md/release-notes/rl-2211.section.xml | 8 + .../manual/release-notes/rl-2211.section.md | 2 + nixos/modules/module-list.nix | 1 + nixos/modules/services/monitoring/kthxbye.nix | 166 ++++++++++++++++++ nixos/tests/all-tests.nix | 1 + nixos/tests/kthxbye.nix | 110 ++++++++++++ 6 files changed, 288 insertions(+) create mode 100644 nixos/modules/services/monitoring/kthxbye.nix create mode 100644 nixos/tests/kthxbye.nix diff --git a/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml index 590141c935b4..ac5be1161db1 100644 --- a/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml +++ b/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml @@ -203,6 +203,14 @@ services.infnoise. + + + kthxbye, + an alert acknowledgement management daemon for Prometheus + Alertmanager. Available as + services.kthxbye + + kanata, diff --git a/nixos/doc/manual/release-notes/rl-2211.section.md b/nixos/doc/manual/release-notes/rl-2211.section.md index 3992dec20476..3c4e2ac31e00 100644 --- a/nixos/doc/manual/release-notes/rl-2211.section.md +++ b/nixos/doc/manual/release-notes/rl-2211.section.md @@ -75,6 +75,8 @@ In addition to numerous new and upgraded packages, this release has the followin - [infnoise](https://github.com/leetronics/infnoise), a hardware True Random Number Generator dongle. Available as [services.infnoise](options.html#opt-services.infnoise.enable). +- [kthxbye](https://github.com/prymitive/kthxbye), an alert acknowledgement management daemon for Prometheus Alertmanager. Available as [services.kthxbye](options.html#opt-services.kthxbye.enable) + - [kanata](https://github.com/jtroo/kanata), a tool to improve keyboard comfort and usability with advanced customization. Available as [services.kanata](options.html#opt-services.kanata.enable). diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index e6f077dd5d08..50092357ccca 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -681,6 +681,7 @@ ./services/monitoring/heapster.nix ./services/monitoring/incron.nix ./services/monitoring/kapacitor.nix + ./services/monitoring/kthxbye.nix ./services/monitoring/loki.nix ./services/monitoring/longview.nix ./services/monitoring/mackerel-agent.nix diff --git a/nixos/modules/services/monitoring/kthxbye.nix b/nixos/modules/services/monitoring/kthxbye.nix new file mode 100644 index 000000000000..3f988dcb722f --- /dev/null +++ b/nixos/modules/services/monitoring/kthxbye.nix @@ -0,0 +1,166 @@ +{ config, pkgs, lib, ... }: +with lib; + +let + cfg = config.services.kthxbye; +in + +{ + options.services.kthxbye = { + enable = mkEnableOption (mdDoc "kthxbye alert acknowledgement management daemon"); + + package = mkOption { + type = types.package; + default = pkgs.kthxbye; + defaultText = literalExpression "pkgs.kthxbye"; + description = mdDoc '' + The kthxbye package that should be used. + ''; + }; + + openFirewall = mkOption { + type = types.bool; + default = false; + description = mdDoc '' + Whether to open ports in the firewall needed for the daemon to function. + ''; + }; + + extraOptions = mkOption { + type = with types; listOf str; + default = []; + description = mdDoc '' + Extra command line options. + + Documentation can be found [here](https://github.com/prymitive/kthxbye/blob/main/README.md). + ''; + example = literalExpression '' + [ + "-extend-with-prefix 'ACK!'" + ]; + ''; + }; + + alertmanager = { + timeout = mkOption { + type = types.str; + default = "1m0s"; + description = mdDoc '' + Alertmanager request timeout duration in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format. + ''; + example = "30s"; + }; + uri = mkOption { + type = types.str; + default = "http://localhost:9093"; + description = mdDoc '' + Alertmanager URI to use. + ''; + example = "https://alertmanager.example.com"; + }; + }; + + extendBy = mkOption { + type = types.str; + default = "15m0s"; + description = mdDoc '' + Extend silences by adding DURATION seconds. + + DURATION should be provided in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format. + ''; + example = "6h0m0s"; + }; + + extendIfExpiringIn = mkOption { + type = types.str; + default = "5m0s"; + description = mdDoc '' + Extend silences that are about to expire in the next DURATION seconds. + + DURATION should be provided in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format. + ''; + example = "1m0s"; + }; + + extendWithPrefix = mkOption { + type = types.str; + default = "ACK!"; + description = mdDoc '' + Extend silences with comment starting with PREFIX string. + ''; + example = "!perma-silence"; + }; + + interval = mkOption { + type = types.str; + default = "45s"; + description = mdDoc '' + Silence check interval duration in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format. + ''; + example = "30s"; + }; + + listenAddress = mkOption { + type = types.str; + default = "0.0.0.0"; + description = mdDoc '' + The address to listen on for HTTP requests. + ''; + example = "127.0.0.1"; + }; + + port = mkOption { + type = types.port; + default = 8080; + description = mdDoc '' + The port to listen on for HTTP requests. + ''; + }; + + logJSON = mkOption { + type = types.bool; + default = false; + description = mdDoc '' + Format logged messages as JSON. + ''; + }; + + maxDuration = mkOption { + type = with types; nullOr str; + default = null; + description = mdDoc '' + Maximum duration of a silence, it won't be extended anymore after reaching it. + + Duration should be provided in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format. + ''; + example = "30d"; + }; + }; + + config = mkIf cfg.enable { + systemd.services.kthxbye = { + description = "kthxbye Alertmanager ack management daemon"; + wantedBy = [ "multi-user.target" ]; + script = '' + ${cfg.package}/bin/kthxbye \ + -alertmanager.timeout ${cfg.alertmanager.timeout} \ + -alertmanager.uri ${cfg.alertmanager.uri} \ + -extend-by ${cfg.extendBy} \ + -extend-if-expiring-in ${cfg.extendIfExpiringIn} \ + -extend-with-prefix ${cfg.extendWithPrefix} \ + -interval ${cfg.interval} \ + -listen ${cfg.listenAddress}:${toString cfg.port} \ + ${optionalString cfg.logJSON "-log-json"} \ + ${optionalString (cfg.maxDuration != null) "-max-duration ${cfg.maxDuration}"} \ + ${concatStringsSep " " cfg.extraOptions} + ''; + serviceConfig = { + Type = "simple"; + DynamicUser = true; + Restart = "on-failure"; + }; + }; + + networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [ cfg.port ]; + }; +} diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 2464ec4d404b..b5a6dada29c2 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -277,6 +277,7 @@ in { komga = handleTest ./komga.nix {}; krb5 = discoverTests (import ./krb5 {}); ksm = handleTest ./ksm.nix {}; + kthxbye = handleTest ./kthxbye.nix {}; kubernetes = handleTestOn ["x86_64-linux"] ./kubernetes {}; languagetool = handleTest ./languagetool.nix {}; latestKernel.login = handleTest ./login.nix { latestKernel = true; }; diff --git a/nixos/tests/kthxbye.nix b/nixos/tests/kthxbye.nix new file mode 100644 index 000000000000..5ca0917ec8e7 --- /dev/null +++ b/nixos/tests/kthxbye.nix @@ -0,0 +1,110 @@ +import ./make-test-python.nix ({ lib, pkgs, ... }: +{ + name = "kthxbye"; + + meta = with lib.maintainers; { + maintainers = [ nukaduka ]; + }; + + nodes.server = { ... }: { + environment.systemPackages = with pkgs; [ prometheus-alertmanager ]; + services.prometheus = { + enable = true; + + globalConfig = { + scrape_interval = "5s"; + scrape_timeout = "5s"; + evaluation_interval = "5s"; + }; + + scrapeConfigs = [ + { + job_name = "prometheus"; + scrape_interval = "5s"; + static_configs = [ + { + targets = [ "localhost:9090" ]; + } + ]; + } + ]; + + rules = [ + '' + groups: + - name: test + rules: + - alert: node_up + expr: up != 0 + for: 5s + labels: + severity: bottom of the barrel + annotations: + summary: node is fine + '' + ]; + + alertmanagers = [ + { + static_configs = [ + { + targets = [ + "localhost:9093" + ]; + } + ]; + } + ]; + + alertmanager = { + enable = true; + openFirewall = true; + configuration.route = { + receiver = "test"; + group_wait = "5s"; + group_interval = "5s"; + group_by = [ "..." ]; + }; + configuration.receivers = [ + { + name = "test"; + webhook_configs = [ + { + url = "http://localhost:1234"; + } + ]; + } + ]; + }; + }; + + services.kthxbye = { + enable = true; + openFirewall = true; + extendIfExpiringIn = "30s"; + logJSON = true; + maxDuration = "15m"; + interval = "5s"; + }; + }; + + testScript = '' + with subtest("start the server"): + start_all() + server.wait_for_unit("prometheus.service") + server.wait_for_unit("alertmanager.service") + server.wait_for_unit("kthxbye.service") + + server.sleep(2) # wait for units to settle + server.systemctl("restart kthxbye.service") # make sure kthxbye comes up after alertmanager + server.sleep(2) + + with subtest("set up test silence which expires in 20s"): + server.succeed('amtool --alertmanager.url "http://localhost:9093" silence add alertname="node_up" -a "nixosTest" -d "20s" -c "ACK! this server is fine!!"') + + with subtest("wait for 21 seconds and check if the silence is still active"): + server.sleep(21) + server.systemctl("status kthxbye.service") + server.succeed("amtool --alertmanager.url 'http://localhost:9093' silence | grep 'ACK'") + ''; +})