nixos/qemu-vm: use persistent block device names

This change removes the bespoke logic around identifying block devices.
Instead of trying to find the right device by iterating over
`qemu.drives` and guessing the right partition number (e.g.
/dev/vda{1,2}), devices are now identified by persistent names provided
by udev in /dev/disk/by-*.

Before this change, the root device was formatted on demand in the
initrd. However, this makes it impossible to use filesystem identifiers
to identify devices. Now, the formatting step is performed before the VM
is started. Because some tests, however, rely on this behaviour, a
utility function to replace this behaviour in added in
/nixos/tests/common/auto-format-root-device.nix.

Devices that contain neither a partition table nor a filesystem are
identified by their hardware serial number which is injecetd via QEMU
(and is thus persistent and predictable). PCI paths are not a reliably
way to identify devices because their availability and numbering depends
on the QEMU machine type.

This change makes the module more robust against changes in QEMU and the
kernel (non-persistent device naming) and by decoupling abstractions
(i.e. rootDevice, bootPartition, and bootLoaderDevice) enables further
improvement down the line.
This commit is contained in:
nikstur 2023-06-08 02:34:10 +02:00
parent aa337560aa
commit 0bdba6c99b
14 changed files with 130 additions and 99 deletions

View File

@ -45,3 +45,17 @@
- A new option was added to the virtualisation module that enables specifying explicitly named network interfaces in QEMU VMs. The existing `virtualisation.vlans` is still supported for cases where the name of the network interface is irrelevant.
- `services.nginx` gained a `defaultListen` option at server-level with support for PROXY protocol listeners, also `proxyProtocol` is now exposed in `services.nginx.virtualHosts.<name>.listen` option. It is now possible to run PROXY listeners and non-PROXY listeners at a server-level, see [#213510](https://github.com/NixOS/nixpkgs/pull/213510/) for more details.
## Nixpkgs internals {#sec-release-23.11-nixpkgs-internals}
- The `qemu-vm.nix` module by default now identifies block devices via
persistent names available in `/dev/disk/by-*`. Because the rootDevice is
identfied by its filesystem label, it needs to be formatted before the VM is
started. The functionality of automatically formatting the rootDevice in the
initrd is removed from the QEMU module. However, for tests that depend on
this functionality, a test utility for the scripted initrd is added
(`nixos/tests/common/auto-format-root-device.nix`). To use this in a NixOS
test, import the module, e.g. `imports = [
./common/auto-format-root-device.nix ];` When you use the systemd initrd, you
can automatically format the root device by setting
`virtualisation.fileSystems."/".autoFormat = true;`.

View File

@ -573,6 +573,7 @@ let format' = format; in let
# In this throwaway resource, we only have /dev/vda, but the actual VM may refer to another disk for bootloader, e.g. /dev/vdb
# Use this option to create a symlink from vda to any arbitrary device you want.
${optionalString (config.boot.loader.grub.device != "/dev/vda") ''
mkdir -p $(dirname ${config.boot.loader.grub.device})
ln -s /dev/vda ${config.boot.loader.grub.device}
''}

View File

@ -81,25 +81,6 @@ let
drivesCmdLine = drives: concatStringsSep "\\\n " (imap1 driveCmdline drives);
# Creates a device name from a 1-based a numerical index, e.g.
# * `driveDeviceName 1` -> `/dev/vda`
# * `driveDeviceName 2` -> `/dev/vdb`
driveDeviceName = idx:
let letter = elemAt lowerChars (idx - 1);
in if cfg.qemu.diskInterface == "scsi" then
"/dev/sd${letter}"
else
"/dev/vd${letter}";
lookupDriveDeviceName = driveName: driveList:
(findSingle (drive: drive.name == driveName)
(throw "Drive ${driveName} not found")
(throw "Multiple drives named ${driveName}") driveList).device;
addDeviceNames =
imap1 (idx: drive: drive // { device = driveDeviceName idx; });
# Shell script to start the VM.
startVM =
''
@ -109,25 +90,41 @@ let
set -e
# Create an empty ext4 filesystem image. A filesystem image does not
# contain a partition table but just a filesystem.
createEmptyFilesystemImage() {
local name=$1
local size=$2
local temp=$(mktemp)
${qemu}/bin/qemu-img create -f raw "$temp" "$size"
${pkgs.e2fsprogs}/bin/mkfs.ext4 -L ${rootFilesystemLabel} "$temp"
${qemu}/bin/qemu-img convert -f raw -O qcow2 "$temp" "$name"
rm "$temp"
}
NIX_DISK_IMAGE=$(readlink -f "''${NIX_DISK_IMAGE:-${toString config.virtualisation.diskImage}}") || test -z "$NIX_DISK_IMAGE"
if test -n "$NIX_DISK_IMAGE" && ! test -e "$NIX_DISK_IMAGE"; then
echo "Disk image do not exist, creating the virtualisation disk image..."
# If we are using a bootloader and default filesystems layout.
# We have to reuse the system image layout as a backing image format (CoW)
# So we can write on the top of it.
# If we are not using the default FS layout, potentially, we are interested into
# performing operations in postDeviceCommands or at early boot on the raw device.
# We can still boot through QEMU direct kernel boot feature.
${if (cfg.useBootLoader && cfg.useDefaultFilesystems) then ''
# Create a writable qcow2 image using the systemImage as a backing
# image.
# CoW prevent size to be attributed to an image.
# FIXME: raise this issue to upstream.
${qemu}/bin/qemu-img create \
${concatStringsSep " \\\n" ([ "-f qcow2" ]
++ optional (cfg.useBootLoader && cfg.useDefaultFilesystems) "-F qcow2 -b ${systemImage}/nixos.qcow2"
++ optional (!(cfg.useBootLoader && cfg.useDefaultFilesystems)) "-o size=${toString config.virtualisation.diskSize}M"
++ [ ''"$NIX_DISK_IMAGE"'' ])}
# CoW prevent size to be attributed to an image.
# FIXME: raise this issue to upstream.
${qemu}/bin/qemu-img create \
-f qcow2 \
-b ${systemImage}/nixos.qcow2 \
-F qcow2 \
"$NIX_DISK_IMAGE"
'' else if cfg.useDefaultFilesystems then ''
createEmptyFilesystemImage "$NIX_DISK_IMAGE" "${toString cfg.diskSize}M"
'' else ''
# Create an empty disk image without a filesystem.
${qemu}/bin/qemu-img create -f qcow2 "$NIX_DISK_IMAGE" "${toString cfg.diskSize}M"
''
}
echo "Virtualisation disk image created."
fi
@ -148,6 +145,7 @@ let
${pkgs.erofs-utils}/bin/mkfs.erofs \
--force-uid=0 \
--force-gid=0 \
-L ${nixStoreFilesystemLabel} \
-U eb176051-bd15-49b7-9e6b-462e0b467019 \
-T 0 \
--exclude-regex="$(
@ -218,6 +216,19 @@ let
regInfo = pkgs.closureInfo { rootPaths = config.virtualisation.additionalPaths; };
# Use well-defined and persistent filesystem labels to identify block devices.
rootFilesystemLabel = "nixos";
espFilesystemLabel = "ESP"; # Hard-coded by make-disk-image.nix
nixStoreFilesystemLabel = "nix-store";
# The root drive is a raw disk which does not necessarily contain a
# filesystem or partition table. It thus cannot be identified via the typical
# persistent naming schemes (e.g. /dev/disk/by-{label, uuid, partlabel,
# partuuid}. Instead, supply a well-defined and persistent serial attribute
# via QEMU. Inside the running system, the disk can then be identified via
# the /dev/disk/by-id scheme.
rootDriveSerialAttr = "root";
# System image is akin to a complete NixOS install with
# a boot partition and root partition.
systemImage = import ../../lib/make-disk-image.nix {
@ -225,6 +236,7 @@ let
additionalPaths = [ regInfo ];
format = "qcow2";
onlyNixStore = false;
label = rootFilesystemLabel;
partitionTableType = selectPartitionTableLayout { inherit (cfg) useDefaultFilesystems useEFIBoot; };
# Bootloader should be installed on the system image only if we are booting through bootloaders.
# Though, if a user is not using our default filesystems, it is possible to not have any ESP
@ -247,6 +259,7 @@ let
additionalPaths = [ regInfo ];
format = "qcow2";
onlyNixStore = true;
label = nixStoreFilesystemLabel;
partitionTableType = "none";
installBootLoader = false;
touchEFIVars = false;
@ -255,28 +268,6 @@ let
copyChannel = false;
};
bootConfiguration =
if cfg.useDefaultFilesystems
then
if cfg.useBootLoader
then
if cfg.useEFIBoot then "efi_bootloading_with_default_fs"
else "legacy_bootloading_with_default_fs"
else
if cfg.directBoot.enable then "direct_boot_with_default_fs"
else "custom"
else
"custom";
suggestedRootDevice = {
"efi_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}2";
"legacy_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}1";
"direct_boot_with_default_fs" = cfg.bootLoaderDevice;
# This will enforce a NixOS module type checking error
# to ask explicitly the user to set a rootDevice.
# As it will look like `rootDevice = lib.mkDefault null;` after
# all "computations".
"custom" = null;
}.${bootConfiguration};
in
{
@ -343,44 +334,39 @@ in
virtualisation.bootLoaderDevice =
mkOption {
type = types.path;
default = lookupDriveDeviceName "root" cfg.qemu.drives;
defaultText = literalExpression ''lookupDriveDeviceName "root" cfg.qemu.drives'';
example = "/dev/vda";
default = "/dev/disk/by-id/virtio-${rootDriveSerialAttr}";
defaultText = literalExpression ''/dev/disk/by-id/virtio-${rootDriveSerialAttr}'';
example = "/dev/disk/by-id/virtio-boot-loader-device";
description =
lib.mdDoc ''
The disk to be used for the boot filesystem.
By default, it is the same disk as the root filesystem.
The path (inside th VM) to the device to boot from when legacy booting.
'';
};
virtualisation.bootPartition =
mkOption {
type = types.nullOr types.path;
default = if cfg.useEFIBoot then "${cfg.bootLoaderDevice}1" else null;
defaultText = literalExpression ''if cfg.useEFIBoot then "''${cfg.bootLoaderDevice}1" else null'';
example = "/dev/vda1";
default = if cfg.useEFIBoot then "/dev/disk/by-label/${espFilesystemLabel}" else null;
defaultText = literalExpression ''if cfg.useEFIBoot then "/dev/disk/by-label/${espFilesystemLabel}" else null'';
example = "/dev/disk/by-label/esp";
description =
lib.mdDoc ''
The boot partition to be used to mount /boot filesystem.
In legacy boots, this should be null.
By default, in EFI boot, it is the first partition of the boot device.
The path (inside the VM) to the device containing the EFI System Partition (ESP).
If you are *not* booting from a UEFI firmware, this value is, by
default, `null`. The ESP is mounted under `/boot`.
'';
};
virtualisation.rootDevice =
mkOption {
type = types.nullOr types.path;
example = "/dev/vda2";
default = "/dev/disk/by-label/${rootFilesystemLabel}";
defaultText = literalExpression ''/dev/disk/by-label/${rootFilesystemLabel}'';
example = "/dev/disk/by-label/nixos";
description =
lib.mdDoc ''
The disk or partition to be used for the root filesystem.
By default (read the source code for more details):
- under EFI with a bootloader: 2nd partition of the boot disk
- in legacy boot with a bootloader: 1st partition of the boot disk
- in direct boot (i.e. without a bootloader): whole disk
In case you are not using a default boot device or a default filesystem, you have to set explicitly your root device.
The path (inside the VM) to the device containing the root filesystem.
'';
};
@ -711,7 +697,6 @@ in
mkOption {
type = types.listOf (types.submodule driveOpts);
description = lib.mdDoc "Drives passed to qemu.";
apply = addDeviceNames;
};
diskInterface =
@ -975,29 +960,11 @@ in
# FIXME: make a sense of this mess wrt to multiple ESP present in the system, probably use boot.efiSysMountpoint?
boot.loader.grub.device = mkVMOverride (if cfg.useEFIBoot then "nodev" else cfg.bootLoaderDevice);
boot.loader.grub.gfxmodeBios = with cfg.resolution; "${toString x}x${toString y}";
virtualisation.rootDevice = mkDefault suggestedRootDevice;
boot.initrd.kernelModules = optionals (cfg.useNixStoreImage && !cfg.writableStore) [ "erofs" ];
boot.loader.supportsInitrdSecrets = mkIf (!cfg.useBootLoader) (mkVMOverride false);
boot.initrd.extraUtilsCommands = lib.mkIf (cfg.useDefaultFilesystems && !config.boot.initrd.systemd.enable)
''
# We need mke2fs in the initrd.
copy_bin_and_libs ${pkgs.e2fsprogs}/bin/mke2fs
'';
boot.initrd.postDeviceCommands = lib.mkIf (cfg.useDefaultFilesystems && !config.boot.initrd.systemd.enable)
''
# If the disk image appears to be empty, run mke2fs to
# initialise.
FSTYPE=$(blkid -o value -s TYPE ${cfg.rootDevice} || true)
PARTTYPE=$(blkid -o value -s PTTYPE ${cfg.rootDevice} || true)
if test -z "$FSTYPE" -a -z "$PARTTYPE"; then
mke2fs -t ext4 ${cfg.rootDevice}
fi
'';
boot.initrd.postMountCommands = lib.mkIf (!config.boot.initrd.systemd.enable)
''
# Mark this as a NixOS machine.
@ -1112,6 +1079,7 @@ in
driveExtraOpts.cache = "writeback";
driveExtraOpts.werror = "report";
deviceExtraOpts.bootindex = "1";
deviceExtraOpts.serial = rootDriveSerialAttr;
}])
(mkIf cfg.useNixStoreImage [{
name = "nix-store";
@ -1154,7 +1122,6 @@ in
} else {
device = cfg.rootDevice;
fsType = "ext4";
autoFormat = true;
});
"/tmp" = lib.mkIf config.boot.tmp.useTmpfs {
device = "tmpfs";
@ -1164,7 +1131,7 @@ in
options = [ "mode=1777" "strictatime" "nosuid" "nodev" "size=${toString config.boot.tmp.tmpfsSize}" ];
};
"/nix/${if cfg.writableStore then ".ro-store" else "store"}" = lib.mkIf cfg.useNixStoreImage {
device = "${lookupDriveDeviceName "nix-store" cfg.qemu.drives}";
device = "/dev/disk/by-label/${nixStoreFilesystemLabel}";
neededForBoot = true;
options = [ "ro" ];
};
@ -1174,7 +1141,7 @@ in
neededForBoot = true;
};
"/boot" = lib.mkIf (cfg.useBootLoader && cfg.bootPartition != null) {
device = cfg.bootPartition; # 1 for e.g. `vda1`, as created in `systemImage`
device = cfg.bootPartition;
fsType = "vfat";
noCheck = true; # fsck fails on a r/o filesystem
};

View File

@ -0,0 +1,29 @@
# This is a test utility that automatically formats
# `config.virtualisation.rootDevice` in the initrd.
# Note that when you are using
# `boot.initrd.systemd.enable = true`, you can use
# `virtualisation.fileSystems."/".autoFormat = true;`
# instead.
{ config, pkgs, ... }:
let
rootDevice = config.virtualisation.rootDevice;
in
{
boot.initrd.extraUtilsCommands = ''
# We need mke2fs in the initrd.
copy_bin_and_libs ${pkgs.e2fsprogs}/bin/mke2fs
'';
boot.initrd.postDeviceCommands = ''
# If the disk image appears to be empty, run mke2fs to
# initialise.
FSTYPE=$(blkid -o value -s TYPE ${rootDevice} || true)
PARTTYPE=$(blkid -o value -s PTTYPE ${rootDevice} || true)
if test -z "$FSTYPE" -a -z "$PARTTYPE"; then
mke2fs -t ext4 ${rootDevice}
fi
'';
}

View File

@ -21,13 +21,17 @@ import ./make-test-python.nix {
boot.initrd.systemd.enable = systemdStage1;
};
testScript = ''
testScript = { nodes, ...}:
let
rootDevice = nodes.machine.virtualisation.rootDevice;
in
''
machine.wait_for_unit("default.target")
with subtest("root fs is fsckd"):
machine.succeed("journalctl -b | grep '${if systemdStage1
then "fsck.*vda.*clean"
else "fsck.ext4.*/dev/vda"}'")
then "fsck.*${builtins.baseNameOf rootDevice}.*clean"
else "fsck.ext4.*${rootDevice}"}'")
with subtest("mnt fs is fsckd"):
machine.succeed("journalctl -b | grep 'fsck.*vdb.*clean'")

View File

@ -50,6 +50,7 @@ in makeTest {
imports = [
../modules/profiles/installation-device.nix
../modules/profiles/base.nix
./common/auto-format-root-device.nix
];
nix.settings = {

View File

@ -14,6 +14,8 @@ in {
name = "initrd-luks-empty-passphrase";
nodes.machine = { pkgs, ... }: {
imports = lib.optionals (!systemdStage1) [ ./common/auto-format-root-device.nix ];
virtualisation = {
emptyDiskImages = [ 512 ];
useBootLoader = true;
@ -23,6 +25,7 @@ in {
# the new root device is /dev/vdb
# an empty 512MiB drive, containing no Nix store.
mountHostNixStore = true;
fileSystems."/".autoFormat = lib.mkIf systemdStage1 true;
};
boot.loader.systemd-boot.enable = true;

View File

@ -298,8 +298,13 @@ let
../modules/profiles/installation-device.nix
../modules/profiles/base.nix
extraInstallerConfig
./common/auto-format-root-device.nix
];
# In systemdStage1, also automatically format the device backing the
# root filesystem.
virtualisation.fileSystems."/".autoFormat = systemdStage1;
# builds stuff in the VM, needs more juice
virtualisation.diskSize = 8 * 1024;
virtualisation.cores = 8;

View File

@ -2,6 +2,8 @@ import ./make-test-python.nix ({ lib, pkgs, ... }: {
name = "luks";
nodes.machine = { pkgs, ... }: {
imports = [ ./common/auto-format-root-device.nix ];
# Use systemd-boot
virtualisation = {
emptyDiskImages = [ 512 512 ];

View File

@ -26,6 +26,7 @@ import ./make-test-python.nix ({ lib, pkgs, ... }: {
};
};
virtualisation.rootDevice = "/dev/mapper/cryptroot";
virtualisation.fileSystems."/".autoFormat = true;
};
};

View File

@ -34,6 +34,7 @@ in {
};
};
virtualisation.rootDevice = "/dev/mapper/cryptroot";
virtualisation.fileSystems."/".autoFormat = true;
boot.initrd.secrets."/etc/cryptroot.key" = keyfile;
};
};

View File

@ -25,6 +25,7 @@ import ./make-test-python.nix ({ lib, pkgs, ... }: {
cryptroot2.device = "/dev/vdc";
};
virtualisation.rootDevice = "/dev/mapper/cryptroot";
virtualisation.fileSystems."/".autoFormat = true;
# test mounting device unlocked in initrd after switching root
virtualisation.fileSystems."/cryptroot2".device = "/dev/mapper/cryptroot2";
};

View File

@ -28,6 +28,7 @@ import ./make-test-python.nix ({ lib, pkgs, ... }: {
};
};
virtualisation.rootDevice = "/dev/mapper/cryptroot";
virtualisation.fileSystems."/".autoFormat = true;
};
};

View File

@ -17,6 +17,7 @@ import ./make-test-python.nix ({ lib, ... }: {
specialisation.encrypted-root.configuration = {
virtualisation.rootDevice = "/dev/mapper/root";
virtualisation.fileSystems."/".autoFormat = true;
boot.initrd.luks.devices = lib.mkVMOverride {
root.device = "/dev/vdb";
};