From b52372675d75eac74312bb3adfb8fe6d1e4c702b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janne=20He=C3=9F?= Date: Thu, 20 Jan 2022 15:32:32 +0100 Subject: [PATCH] nixos/switch-to-configuration: Clean up lower part of the script - Fully get rid of `parseKeyValues` and use systemctl features for that - Add some regex modifiers recommended by perlcritic - Get rid of a postfix if - Sort units when showing their status - Clean the logic for showing what failed from `elif` to `next` - Switch from `state` to `substate` for `auto-restart` because that's actually where the value is stored - Show status of units with one single systemctl call and get rid of COLUMNS in favor of --full - Add a test for failing units --- .../activation/switch-to-configuration.pl | 39 ++++----- nixos/tests/switch-test.nix | 79 ++++++++++++++++++- 2 files changed, 94 insertions(+), 24 deletions(-) diff --git a/nixos/modules/system/activation/switch-to-configuration.pl b/nixos/modules/system/activation/switch-to-configuration.pl index c51ae5d54fc2..1fe346114e43 100644 --- a/nixos/modules/system/activation/switch-to-configuration.pl +++ b/nixos/modules/system/activation/switch-to-configuration.pl @@ -171,14 +171,6 @@ sub parseUnit { return %unitData; } -sub parseKeyValues { - my $info = shift; - foreach my $line (@_) { - $line =~ /^([^=]+)=(.*)$/ or next; - $info->{$1} = $2; - } -} - # Checks whether a specified boolean in a systemd unit is true # or false, with a default that is applied when the value is not set. sub parseSystemdBool { @@ -606,33 +598,36 @@ my $activeNew = getActiveUnits; while (my ($unit, $state) = each %{$activeNew}) { if ($state->{state} eq "failed") { push @failed, $unit; + next; } - elsif ($state->{state} eq "auto-restart") { - # A unit in auto-restart state is a failure *if* it previously failed to start - my $lines = `@systemd@/bin/systemctl show '$unit'`; - my $info = {}; - parseKeyValues($info, split("\n", $lines)); - if ($info->{ExecMainStatus} ne '0') { + if ($state->{substate} eq "auto-restart") { + # A unit in auto-restart substate is a failure *if* it previously failed to start + my $main_status = `@systemd@/bin/systemctl show --value --property=ExecMainStatus '$unit'`; + chomp($main_status); + + if ($main_status ne "0") { push @failed, $unit; + next; } } + # Ignore scopes since they are not managed by this script but rather # created and managed by third-party services via the systemd dbus API. - elsif ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/) { + # This only lists units that are not failed (including ones that are in auto-restart but have not failed previously) + if ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/msx) { push @new, $unit; } } -print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n" - if scalar @new > 0; +if (scalar @new > 0) { + print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n" +} if (scalar @failed > 0) { - print STDERR "warning: the following units failed: ", join(", ", sort(@failed)), "\n"; - foreach my $unit (@failed) { - print STDERR "\n"; - system("COLUMNS=1000 @systemd@/bin/systemctl status --no-pager '$unit' >&2"); - } + my @failed_sorted = sort @failed; + print STDERR "warning: the following units failed: ", join(", ", @failed_sorted), "\n\n"; + system "@systemd@/bin/systemctl status --no-pager --full '" . join("' '", @failed_sorted) . "' >&2"; $res = 4; } diff --git a/nixos/tests/switch-test.nix b/nixos/tests/switch-test.nix index 1c32bf6beb95..8e425f0f8779 100644 --- a/nixos/tests/switch-test.nix +++ b/nixos/tests/switch-test.nix @@ -45,6 +45,31 @@ import ./make-test-python.nix ({ pkgs, ...} : { systemd.services.test.restartIfChanged = false; }; + simpleServiceFailing.configuration = { + imports = [ simpleServiceModified.configuration ]; + systemd.services.test.serviceConfig.ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false"; + }; + + autorestartService.configuration = { + # A service that immediately goes into restarting (but without failing) + systemd.services.autorestart = { + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = "simple"; + Restart = "always"; + RestartSec = "20y"; # Should be long enough + ExecStart = "${pkgs.coreutils}/bin/true"; + }; + }; + }; + + autorestartServiceFailing.configuration = { + imports = [ autorestartService.configuration ]; + systemd.services.autorestart.serviceConfig = { + ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false"; + }; + }; + restart-and-reload-by-activation-script.configuration = { systemd.services = rec { simple-service = { @@ -189,12 +214,13 @@ import ./make-test-python.nix ({ pkgs, ...} : { exec env -i "$@" | tee /dev/stderr ''; in /* python */ '' - def switch_to_specialisation(system, name, action="test"): + def switch_to_specialisation(system, name, action="test", fail=False): if name == "": stc = f"{system}/bin/switch-to-configuration" else: stc = f"{system}/specialisation/{name}/bin/switch-to-configuration" - out = machine.succeed(f"{stc} {action} 2>&1") + out = machine.fail(f"{stc} {action} 2>&1") if fail \ + else machine.succeed(f"{stc} {action} 2>&1") assert_lacks(out, "switch-to-configuration line") # Perl warnings return out @@ -305,7 +331,56 @@ import ./make-test-python.nix ({ pkgs, ...} : { assert_lacks(out, "as well:") assert_contains(out, "would start the following units: test.service\n") + with subtest("failing units"): + # Let the simple service fail + switch_to_specialisation("${machine}", "simpleServiceModified") + out = switch_to_specialisation("${machine}", "simpleServiceFailing", fail=True) + assert_contains(out, "stopping the following units: test.service\n") + assert_lacks(out, "NOT restarting the following changed units:") + assert_lacks(out, "reloading the following units:") + assert_lacks(out, "\nrestarting the following units:") + assert_contains(out, "\nstarting the following units: test.service\n") + assert_lacks(out, "the following new units were started:") + assert_contains(out, "warning: the following units failed: test.service\n") + assert_contains(out, "Main PID:") # output of systemctl + assert_lacks(out, "as well:") + + # A unit that gets into autorestart without failing is not treated as failed + out = switch_to_specialisation("${machine}", "autorestartService") + assert_lacks(out, "stopping the following units:") + assert_lacks(out, "NOT restarting the following changed units:") + assert_lacks(out, "reloading the following units:") + assert_lacks(out, "\nrestarting the following units:") + assert_lacks(out, "\nstarting the following units:") + assert_contains(out, "the following new units were started: autorestart.service\n") + assert_lacks(out, "as well:") + machine.systemctl('stop autorestart.service') # cancel the 20y timer + + # Switching to the same system should do nothing (especially not treat the unit as failed) + out = switch_to_specialisation("${machine}", "autorestartService") + assert_lacks(out, "stopping the following units:") + assert_lacks(out, "NOT restarting the following changed units:") + assert_lacks(out, "reloading the following units:") + assert_lacks(out, "\nrestarting the following units:") + assert_lacks(out, "\nstarting the following units:") + assert_contains(out, "the following new units were started: autorestart.service\n") + assert_lacks(out, "as well:") + machine.systemctl('stop autorestart.service') # cancel the 20y timer + + # If systemd thinks the unit has failed and is in autorestart, we should show it as failed + out = switch_to_specialisation("${machine}", "autorestartServiceFailing", fail=True) + assert_lacks(out, "stopping the following units:") + assert_lacks(out, "NOT restarting the following changed units:") + assert_lacks(out, "reloading the following units:") + assert_lacks(out, "\nrestarting the following units:") + assert_lacks(out, "\nstarting the following units:") + assert_lacks(out, "the following new units were started:") + assert_contains(out, "warning: the following units failed: autorestart.service\n") + assert_contains(out, "Main PID:") # output of systemctl + assert_lacks(out, "as well:") + with subtest("restart and reload by activation script"): + switch_to_specialisation("${machine}", "simpleServiceNorestart") out = switch_to_specialisation("${machine}", "restart-and-reload-by-activation-script") assert_contains(out, "stopping the following units: test.service\n") assert_lacks(out, "NOT restarting the following changed units:")