diff --git a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml index 4f4a5a3394e6..2f89e7144da5 100644 --- a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml +++ b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml @@ -280,6 +280,12 @@ with many features. + + + pacemaker + cluster resource manager + +
diff --git a/nixos/doc/manual/release-notes/rl-2205.section.md b/nixos/doc/manual/release-notes/rl-2205.section.md index c4281561f165..68bccce6738c 100644 --- a/nixos/doc/manual/release-notes/rl-2205.section.md +++ b/nixos/doc/manual/release-notes/rl-2205.section.md @@ -81,6 +81,8 @@ In addition to numerous new and upgraded packages, this release has the followin - [blocky](https://0xerr0r.github.io/blocky/), fast and lightweight DNS proxy as ad-blocker for local network with many features. +- [pacemaker](https://clusterlabs.org/pacemaker/) cluster resource manager + ## Backward Incompatibilities {#sec-release-22.05-incompatibilities} diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 15b54cd9fe1d..4f077a540dde 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -383,6 +383,7 @@ in os-prober = handleTestOn ["x86_64-linux"] ./os-prober.nix {}; osrm-backend = handleTest ./osrm-backend.nix {}; overlayfs = handleTest ./overlayfs.nix {}; + pacemaker = handleTest ./pacemaker.nix {}; packagekit = handleTest ./packagekit.nix {}; pam-file-contents = handleTest ./pam/pam-file-contents.nix {}; pam-oath-login = handleTest ./pam/pam-oath-login.nix {}; diff --git a/nixos/tests/pacemaker.nix b/nixos/tests/pacemaker.nix new file mode 100644 index 000000000000..684557614953 --- /dev/null +++ b/nixos/tests/pacemaker.nix @@ -0,0 +1,110 @@ +import ./make-test-python.nix ({ pkgs, lib, ... }: rec { + name = "pacemaker"; + meta = with pkgs.lib.maintainers; { + maintainers = [ astro ]; + }; + + nodes = + let + node = i: { + networking.interfaces.eth1.ipv4.addresses = [ { + address = "192.168.0.${toString i}"; + prefixLength = 24; + } ]; + + services.corosync = { + enable = true; + clusterName = "zentralwerk-network"; + nodelist = lib.imap (i: name: { + nodeid = i; + inherit name; + ring_addrs = [ + (builtins.head nodes.${name}.networking.interfaces.eth1.ipv4.addresses).address + ]; + }) (builtins.attrNames nodes); + }; + environment.etc."corosync/authkey" = { + source = builtins.toFile "authkey" + # minimum length: 128 bytes + "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest"; + mode = "0400"; + }; + + services.pacemaker.enable = true; + + # used for pacemaker resource + systemd.services.ha-cat = { + description = "Highly available netcat"; + serviceConfig.ExecStart = "${pkgs.netcat}/bin/nc -l discard"; + }; + }; + in { + node1 = node 1; + node2 = node 2; + node3 = node 3; + }; + + # sets up pacemaker with resources configuration, then crashes a + # node and waits for service restart on another node + testScript = + let + resources = builtins.toFile "cib-resources.xml" '' + + + + + + + + + + ''; + in '' + import re + import time + + start_all() + + ${lib.concatMapStrings (node: '' + ${node}.wait_until_succeeds("corosync-quorumtool") + ${node}.wait_for_unit("pacemaker.service") + '') (builtins.attrNames nodes)} + + # No STONITH device + node1.succeed("crm_attribute -t crm_config -n stonith-enabled -v false") + # Configure the cat resource + node1.succeed("cibadmin --replace --scope resources --xml-file ${resources}") + + # wait until the service is started + while True: + output = node1.succeed("crm_resource -r cat --locate") + match = re.search("is running on: (.+)", output) + if match: + for machine in machines: + if machine.name == match.group(1): + current_node = machine + break + time.sleep(1) + + current_node.log("Service running here!") + current_node.crash() + + # pick another node that's still up + for machine in machines: + if machine.booted: + check_node = machine + # find where the service has been started next + while True: + output = check_node.succeed("crm_resource -r cat --locate") + match = re.search("is running on: (.+)", output) + # output will remain the old current_node until the crash is detected by pacemaker + if match and match.group(1) != current_node.name: + for machine in machines: + if machine.name == match.group(1): + next_node = machine + break + time.sleep(1) + + next_node.log("Service migrated here!") + ''; +}) diff --git a/pkgs/misc/logging/pacemaker/default.nix b/pkgs/misc/logging/pacemaker/default.nix index 7277ae091e78..07194380d4a7 100644 --- a/pkgs/misc/logging/pacemaker/default.nix +++ b/pkgs/misc/logging/pacemaker/default.nix @@ -17,6 +17,7 @@ , pam , pkg-config , python3 +, nixosTests # Pacemaker is compiled twice, once with forOCF = true to extract its # OCF definitions for use in the ocf-resource-agents derivation, then @@ -87,11 +88,15 @@ stdenv.mkDerivation rec { rm -r $out/nix ''; + passthru.tests = { + inherit (nixosTests) pacemaker; + }; + meta = with lib; { homepage = "https://clusterlabs.org/pacemaker/"; description = "Pacemaker is an open source, high availability resource manager suitable for both small and large clusters."; license = licenses.gpl2Plus; platforms = platforms.linux; - maintainers = with maintainers; [ ryantm ]; + maintainers = with maintainers; [ ryantm astro ]; }; } diff --git a/pkgs/servers/corosync/default.nix b/pkgs/servers/corosync/default.nix index 257837f14146..4df8a547dff5 100644 --- a/pkgs/servers/corosync/default.nix +++ b/pkgs/servers/corosync/default.nix @@ -65,6 +65,10 @@ stdenv.mkDerivation rec { --prefix PATH ":" "$out/sbin:${libqb}/sbin" ''; + passthru.tests = { + inherit (nixosTests) pacemaker; + }; + meta = { homepage = "http://corosync.org/"; description = "A Group Communication System with features for implementing high availability within applications";