Merge pull request #154809 from helsinki-systems/feat/stc-proper-unit-file-parser

nixos/switch-to-configuration: Proper unit file parser and clean/fix lower part of the script
This commit is contained in:
Bernardo Meurer 2022-01-27 09:35:34 -08:00 committed by GitHub
commit 5f9b470ff0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 220 additions and 50 deletions

View File

@ -442,13 +442,49 @@
</listitem>
<listitem>
<para>
The interface that allows activation scripts to restart units
has been reworked. Restarting and reloading is now done by a
single file
<literal>/run/nixos/activation-restart-list</literal> that
honors <literal>restartIfChanged</literal> and
<literal>reloadIfChanged</literal> of the units.
<literal>switch-to-configuration</literal> (the script that is
run when running <literal>nixos-rebuild switch</literal> for
example) has been reworked
</para>
<itemizedlist spacing="compact">
<listitem>
<para>
The interface that allows activation scripts to restart
units has been streamlined. Restarting and reloading is
now done by a single file
<literal>/run/nixos/activation-restart-list</literal> that
honors <literal>restartIfChanged</literal> and
<literal>reloadIfChanged</literal> of the units.
</para>
</listitem>
<listitem>
<para>
The script now uses a proper ini-file parser to parse
systemd units. Some values are now only searched in one
section instead of in the entire unit. This is only
relevant for units that dont use the NixOS systemd moule.
</para>
<itemizedlist spacing="compact">
<listitem>
<para>
<literal>RefuseManualStop</literal>,
<literal>X-OnlyManualStart</literal>,
<literal>X-StopOnRemoval</literal>,
<literal>X-StopOnReconfiguration</literal> are only
searched in the <literal>[Unit]</literal> section
</para>
</listitem>
<listitem>
<para>
<literal>X-ReloadIfChanged</literal>,
<literal>X-RestartIfChanged</literal>,
<literal>X-StopIfChanged</literal> are only searched
in the <literal>[Service]</literal> section
</para>
</listitem>
</itemizedlist>
</listitem>
</itemizedlist>
</listitem>
<listitem>
<para>

View File

@ -141,7 +141,11 @@ In addition to numerous new and upgraded packages, this release has the followin
`pkgs.noto-fonts-cjk` is currently an alias of `pkgs.noto-fonts-cjk-sans` and
doesn't include serif fonts.
- The interface that allows activation scripts to restart units has been reworked. Restarting and reloading is now done by a single file `/run/nixos/activation-restart-list` that honors `restartIfChanged` and `reloadIfChanged` of the units.
- `switch-to-configuration` (the script that is run when running `nixos-rebuild switch` for example) has been reworked
* The interface that allows activation scripts to restart units has been streamlined. Restarting and reloading is now done by a single file `/run/nixos/activation-restart-list` that honors `restartIfChanged` and `reloadIfChanged` of the units.
* The script now uses a proper ini-file parser to parse systemd units. Some values are now only searched in one section instead of in the entire unit. This is only relevant for units that don't use the NixOS systemd moule.
* `RefuseManualStop`, `X-OnlyManualStart`, `X-StopOnRemoval`, `X-StopOnReconfiguration` are only searched in the `[Unit]` section
* `X-ReloadIfChanged`, `X-RestartIfChanged`, `X-StopIfChanged` are only searched in the `[Service]` section
- The `services.bookstack.cacheDir` option has been removed, since the
cache directory is now handled by systemd.

View File

@ -2,6 +2,7 @@
use strict;
use warnings;
use Config::IniFiles;
use File::Path qw(make_path);
use File::Basename;
use File::Slurp;
@ -113,26 +114,77 @@ sub parseFstab {
return ($fss, $swaps);
}
sub parseUnit {
my ($filename) = @_;
my $info = {};
parseKeyValues($info, read_file($filename)) if -f $filename;
parseKeyValues($info, read_file("${filename}.d/overrides.conf")) if -f "${filename}.d/overrides.conf";
return $info;
}
# This subroutine takes a single ini file that specified systemd configuration
# like unit configuration and parses it into a hash where the keys are the sections
# of the unit file and the values are hashes themselves. These hashes have the unit file
# keys as their keys (left side of =) and an array of all values that were set as their
# values. If a value is empty (for example `ExecStart=`), then all current definitions are
# removed.
#
# Instead of returning the hash, this subroutine takes a hashref to return the data in. This
# allows calling the subroutine multiple times with the same hash to parse override files.
sub parseSystemdIni {
my ($unitContents, $path) = @_;
# Tie the ini file to a hash for easier access
my %fileContents;
tie %fileContents, "Config::IniFiles", (-file => $path, -allowempty => 1, -allowcontinue => 1);
sub parseKeyValues {
my $info = shift;
foreach my $line (@_) {
# FIXME: not quite correct.
$line =~ /^([^=]+)=(.*)$/ or next;
$info->{$1} = $2;
# Copy over all sections
foreach my $sectionName (keys %fileContents) {
# Copy over all keys
foreach my $iniKey (keys %{$fileContents{$sectionName}}) {
# Ensure the value is an array so it's easier to work with
my $iniValue = $fileContents{$sectionName}{$iniKey};
my @iniValues;
if (ref($iniValue) eq "ARRAY") {
@iniValues = @{$iniValue};
} else {
@iniValues = $iniValue;
}
# Go over all values
for my $iniValue (@iniValues) {
# If a value is empty, it's an override that tells us to clean the value
if ($iniValue eq "") {
delete $unitContents->{$sectionName}->{$iniKey};
next;
}
push(@{$unitContents->{$sectionName}->{$iniKey}}, $iniValue);
}
}
}
return;
}
sub boolIsTrue {
my ($s) = @_;
return $s eq "yes" || $s eq "true";
# This subroutine takes the path to a systemd configuration file (like a unit configuration),
# parses it, and returns a hash that contains the contents. The contents of this hash are
# explained in the `parseSystemdIni` subroutine. Neither the sections nor the keys inside
# the sections are consistently sorted.
#
# If a directory with the same basename ending in .d exists next to the unit file, it will be
# assumed to contain override files which will be parsed as well and handled properly.
sub parseUnit {
my ($unitPath) = @_;
# Parse the main unit and all overrides
my %unitData;
parseSystemdIni(\%unitData, $_) for glob("${unitPath}{,.d/*.conf}");
return %unitData;
}
# Checks whether a specified boolean in a systemd unit is true
# or false, with a default that is applied when the value is not set.
sub parseSystemdBool {
my ($unitConfig, $sectionName, $boolName, $default) = @_;
my @values = @{$unitConfig->{$sectionName}{$boolName} // []};
# Return default if value is not set
if (scalar @values lt 1 || not defined $values[-1]) {
return $default;
}
# If value is defined multiple times, use the last definition
my $last = $values[-1];
# These are valid values as of systemd.syntax(7)
return $last eq "1" || $last eq "yes" || $last eq "true" || $last eq "on";
}
sub recordUnit {
@ -167,17 +219,17 @@ sub handleModifiedUnit {
# Revert of the attempt: https://github.com/NixOS/nixpkgs/pull/147609
# More details: https://github.com/NixOS/nixpkgs/issues/74899#issuecomment-981142430
} else {
my $unitInfo = parseUnit($newUnitFile);
if (boolIsTrue($unitInfo->{'X-ReloadIfChanged'} // "no")) {
my %unitInfo = parseUnit($newUnitFile);
if (parseSystemdBool(\%unitInfo, "Service", "X-ReloadIfChanged", 0)) {
$unitsToReload->{$unit} = 1;
recordUnit($reloadListFile, $unit);
}
elsif (!boolIsTrue($unitInfo->{'X-RestartIfChanged'} // "yes") || boolIsTrue($unitInfo->{'RefuseManualStop'} // "no") || boolIsTrue($unitInfo->{'X-OnlyManualStart'} // "no")) {
elsif (!parseSystemdBool(\%unitInfo, "Service", "X-RestartIfChanged", 1) || parseSystemdBool(\%unitInfo, "Unit", "RefuseManualStop", 0) || parseSystemdBool(\%unitInfo, "Unit", "X-OnlyManualStart", 0)) {
$unitsToSkip->{$unit} = 1;
} else {
# It doesn't make sense to stop and start non-services because
# they can't have ExecStop=
if (!boolIsTrue($unitInfo->{'X-StopIfChanged'} // "yes") || $unit !~ /\.service$/) {
if (!parseSystemdBool(\%unitInfo, "Service", "X-StopIfChanged", 1) || $unit !~ /\.service$/) {
# This unit should be restarted instead of
# stopped and started.
$unitsToRestart->{$unit} = 1;
@ -188,7 +240,7 @@ sub handleModifiedUnit {
# socket(s) instead of the service.
my $socketActivated = 0;
if ($unit =~ /\.service$/) {
my @sockets = split / /, ($unitInfo->{Sockets} // "");
my @sockets = split(/ /, join(" ", @{$unitInfo{Service}{Sockets} // []}));
if (scalar @sockets == 0) {
@sockets = ("$baseName.socket");
}
@ -254,12 +306,12 @@ while (my ($unit, $state) = each %{$activePrev}) {
if (-e $prevUnitFile && ($state->{state} eq "active" || $state->{state} eq "activating")) {
if (! -e $newUnitFile || abs_path($newUnitFile) eq "/dev/null") {
my $unitInfo = parseUnit($prevUnitFile);
$unitsToStop{$unit} = 1 if boolIsTrue($unitInfo->{'X-StopOnRemoval'} // "yes");
my %unitInfo = parseUnit($prevUnitFile);
$unitsToStop{$unit} = 1 if parseSystemdBool(\%unitInfo, "Unit", "X-StopOnRemoval", 1);
}
elsif ($unit =~ /\.target$/) {
my $unitInfo = parseUnit($newUnitFile);
my %unitInfo = parseUnit($newUnitFile);
# Cause all active target units to be restarted below.
# This should start most changed units we stop here as
@ -268,7 +320,7 @@ while (my ($unit, $state) = each %{$activePrev}) {
# active after the system has resumed, which probably
# should not be the case. Just ignore it.
if ($unit ne "suspend.target" && $unit ne "hibernate.target" && $unit ne "hybrid-sleep.target") {
unless (boolIsTrue($unitInfo->{'RefuseManualStart'} // "no") || boolIsTrue($unitInfo->{'X-OnlyManualStart'} // "no")) {
unless (parseSystemdBool(\%unitInfo, "Unit", "RefuseManualStart", 0) || parseSystemdBool(\%unitInfo, "Unit", "X-OnlyManualStart", 0)) {
$unitsToStart{$unit} = 1;
recordUnit($startListFile, $unit);
# Don't spam the user with target units that always get started.
@ -287,7 +339,7 @@ while (my ($unit, $state) = each %{$activePrev}) {
# Stopping a target generally has no effect on other units
# (unless there is a PartOf dependency), so this is just a
# bookkeeping thing to get systemd to do the right thing.
if (boolIsTrue($unitInfo->{'X-StopOnReconfiguration'} // "no")) {
if (parseSystemdBool(\%unitInfo, "Unit", "X-StopOnReconfiguration", 0)) {
$unitsToStop{$unit} = 1;
}
}
@ -546,33 +598,36 @@ my $activeNew = getActiveUnits;
while (my ($unit, $state) = each %{$activeNew}) {
if ($state->{state} eq "failed") {
push @failed, $unit;
next;
}
elsif ($state->{state} eq "auto-restart") {
# A unit in auto-restart state is a failure *if* it previously failed to start
my $lines = `@systemd@/bin/systemctl show '$unit'`;
my $info = {};
parseKeyValues($info, split("\n", $lines));
if ($info->{ExecMainStatus} ne '0') {
if ($state->{substate} eq "auto-restart") {
# A unit in auto-restart substate is a failure *if* it previously failed to start
my $main_status = `@systemd@/bin/systemctl show --value --property=ExecMainStatus '$unit'`;
chomp($main_status);
if ($main_status ne "0") {
push @failed, $unit;
next;
}
}
# Ignore scopes since they are not managed by this script but rather
# created and managed by third-party services via the systemd dbus API.
elsif ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/) {
# This only lists units that are not failed (including ones that are in auto-restart but have not failed previously)
if ($state->{state} ne "failed" && !defined $activePrev->{$unit} && $unit !~ /\.scope$/msx) {
push @new, $unit;
}
}
print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n"
if scalar @new > 0;
if (scalar @new > 0) {
print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n"
}
if (scalar @failed > 0) {
print STDERR "warning: the following units failed: ", join(", ", sort(@failed)), "\n";
foreach my $unit (@failed) {
print STDERR "\n";
system("COLUMNS=1000 @systemd@/bin/systemctl status --no-pager '$unit' >&2");
}
my @failed_sorted = sort @failed;
print STDERR "warning: the following units failed: ", join(", ", @failed_sorted), "\n\n";
system "@systemd@/bin/systemctl status --no-pager --full '" . join("' '", @failed_sorted) . "' >&2";
$res = 4;
}

View File

@ -117,7 +117,7 @@ let
configurationName = config.boot.loader.grub.configurationName;
# Needed by switch-to-configuration.
perl = pkgs.perl.withPackages (p: with p; [ FileSlurp NetDBus XMLParser XMLTwig ]);
perl = pkgs.perl.withPackages (p: with p; [ FileSlurp NetDBus XMLParser XMLTwig ConfigIniFiles ]);
};
# Handle assertions and warnings

View File

@ -45,6 +45,31 @@ import ./make-test-python.nix ({ pkgs, ...} : {
systemd.services.test.restartIfChanged = false;
};
simpleServiceFailing.configuration = {
imports = [ simpleServiceModified.configuration ];
systemd.services.test.serviceConfig.ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false";
};
autorestartService.configuration = {
# A service that immediately goes into restarting (but without failing)
systemd.services.autorestart = {
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "simple";
Restart = "always";
RestartSec = "20y"; # Should be long enough
ExecStart = "${pkgs.coreutils}/bin/true";
};
};
};
autorestartServiceFailing.configuration = {
imports = [ autorestartService.configuration ];
systemd.services.autorestart.serviceConfig = {
ExecStart = lib.mkForce "${pkgs.coreutils}/bin/false";
};
};
restart-and-reload-by-activation-script.configuration = {
systemd.services = rec {
simple-service = {
@ -189,12 +214,13 @@ import ./make-test-python.nix ({ pkgs, ...} : {
exec env -i "$@" | tee /dev/stderr
'';
in /* python */ ''
def switch_to_specialisation(system, name, action="test"):
def switch_to_specialisation(system, name, action="test", fail=False):
if name == "":
stc = f"{system}/bin/switch-to-configuration"
else:
stc = f"{system}/specialisation/{name}/bin/switch-to-configuration"
out = machine.succeed(f"{stc} {action} 2>&1")
out = machine.fail(f"{stc} {action} 2>&1") if fail \
else machine.succeed(f"{stc} {action} 2>&1")
assert_lacks(out, "switch-to-configuration line") # Perl warnings
return out
@ -305,7 +331,56 @@ import ./make-test-python.nix ({ pkgs, ...} : {
assert_lacks(out, "as well:")
assert_contains(out, "would start the following units: test.service\n")
with subtest("failing units"):
# Let the simple service fail
switch_to_specialisation("${machine}", "simpleServiceModified")
out = switch_to_specialisation("${machine}", "simpleServiceFailing", fail=True)
assert_contains(out, "stopping the following units: test.service\n")
assert_lacks(out, "NOT restarting the following changed units:")
assert_lacks(out, "reloading the following units:")
assert_lacks(out, "\nrestarting the following units:")
assert_contains(out, "\nstarting the following units: test.service\n")
assert_lacks(out, "the following new units were started:")
assert_contains(out, "warning: the following units failed: test.service\n")
assert_contains(out, "Main PID:") # output of systemctl
assert_lacks(out, "as well:")
# A unit that gets into autorestart without failing is not treated as failed
out = switch_to_specialisation("${machine}", "autorestartService")
assert_lacks(out, "stopping the following units:")
assert_lacks(out, "NOT restarting the following changed units:")
assert_lacks(out, "reloading the following units:")
assert_lacks(out, "\nrestarting the following units:")
assert_lacks(out, "\nstarting the following units:")
assert_contains(out, "the following new units were started: autorestart.service\n")
assert_lacks(out, "as well:")
machine.systemctl('stop autorestart.service') # cancel the 20y timer
# Switching to the same system should do nothing (especially not treat the unit as failed)
out = switch_to_specialisation("${machine}", "autorestartService")
assert_lacks(out, "stopping the following units:")
assert_lacks(out, "NOT restarting the following changed units:")
assert_lacks(out, "reloading the following units:")
assert_lacks(out, "\nrestarting the following units:")
assert_lacks(out, "\nstarting the following units:")
assert_contains(out, "the following new units were started: autorestart.service\n")
assert_lacks(out, "as well:")
machine.systemctl('stop autorestart.service') # cancel the 20y timer
# If systemd thinks the unit has failed and is in autorestart, we should show it as failed
out = switch_to_specialisation("${machine}", "autorestartServiceFailing", fail=True)
assert_lacks(out, "stopping the following units:")
assert_lacks(out, "NOT restarting the following changed units:")
assert_lacks(out, "reloading the following units:")
assert_lacks(out, "\nrestarting the following units:")
assert_lacks(out, "\nstarting the following units:")
assert_lacks(out, "the following new units were started:")
assert_contains(out, "warning: the following units failed: autorestart.service\n")
assert_contains(out, "Main PID:") # output of systemctl
assert_lacks(out, "as well:")
with subtest("restart and reload by activation script"):
switch_to_specialisation("${machine}", "simpleServiceNorestart")
out = switch_to_specialisation("${machine}", "restart-and-reload-by-activation-script")
assert_contains(out, "stopping the following units: test.service\n")
assert_lacks(out, "NOT restarting the following changed units:")