2014-04-14 18:26:48 +04:00
|
|
|
{ config, lib, pkgs, ... }:
|
2013-11-27 19:54:20 +04:00
|
|
|
|
2014-04-14 18:26:48 +04:00
|
|
|
with lib;
|
2013-11-27 19:54:20 +04:00
|
|
|
|
2014-03-18 13:49:25 +04:00
|
|
|
let
|
|
|
|
|
2014-05-07 19:00:46 +04:00
|
|
|
# The container's init script, a small wrapper around the regular
|
|
|
|
# NixOS stage-2 init script.
|
2016-05-07 01:04:28 +03:00
|
|
|
containerInit = (cfg:
|
|
|
|
let
|
|
|
|
renderExtraVeth = (name: cfg:
|
|
|
|
''
|
|
|
|
echo "Bringing ${name} up"
|
|
|
|
ip link set dev ${name} up
|
2016-09-22 13:58:39 +03:00
|
|
|
${optionalString (cfg.localAddress != null) ''
|
2016-05-07 01:04:28 +03:00
|
|
|
echo "Setting ip for ${name}"
|
2016-09-22 13:58:39 +03:00
|
|
|
ip addr add ${cfg.localAddress} dev ${name}
|
2016-05-07 01:04:28 +03:00
|
|
|
''}
|
2016-09-22 13:58:39 +03:00
|
|
|
${optionalString (cfg.localAddress6 != null) ''
|
2016-05-07 01:04:28 +03:00
|
|
|
echo "Setting ip6 for ${name}"
|
2016-09-22 13:58:39 +03:00
|
|
|
ip -6 addr add ${cfg.localAddress6} dev ${name}
|
2016-05-07 01:04:28 +03:00
|
|
|
''}
|
2016-09-22 13:58:39 +03:00
|
|
|
${optionalString (cfg.hostAddress != null) ''
|
2016-05-07 01:04:28 +03:00
|
|
|
echo "Setting route to host for ${name}"
|
2016-09-22 13:58:39 +03:00
|
|
|
ip route add ${cfg.hostAddress} dev ${name}
|
2016-05-07 01:04:28 +03:00
|
|
|
''}
|
2016-09-22 13:58:39 +03:00
|
|
|
${optionalString (cfg.hostAddress6 != null) ''
|
2016-05-07 01:04:28 +03:00
|
|
|
echo "Setting route6 to host for ${name}"
|
2016-09-22 13:58:39 +03:00
|
|
|
ip -6 route add ${cfg.hostAddress6} dev ${name}
|
2016-05-07 01:04:28 +03:00
|
|
|
''}
|
|
|
|
''
|
|
|
|
);
|
|
|
|
in
|
|
|
|
pkgs.writeScript "container-init"
|
|
|
|
''
|
2018-03-01 22:38:53 +03:00
|
|
|
#! ${pkgs.runtimeShell} -e
|
2014-05-07 19:00:46 +04:00
|
|
|
|
2016-05-07 01:04:28 +03:00
|
|
|
# Initialise the container side of the veth pair.
|
2019-03-19 13:01:57 +03:00
|
|
|
if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] ||
|
|
|
|
[ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] ||
|
|
|
|
[ -n "$HOST_BRIDGE" ]; then
|
2016-05-07 01:04:28 +03:00
|
|
|
ip link set host0 name eth0
|
|
|
|
ip link set dev eth0 up
|
2016-01-31 01:00:39 +03:00
|
|
|
|
2016-05-07 01:04:28 +03:00
|
|
|
if [ -n "$LOCAL_ADDRESS" ]; then
|
|
|
|
ip addr add $LOCAL_ADDRESS dev eth0
|
|
|
|
fi
|
|
|
|
if [ -n "$LOCAL_ADDRESS6" ]; then
|
|
|
|
ip -6 addr add $LOCAL_ADDRESS6 dev eth0
|
|
|
|
fi
|
|
|
|
if [ -n "$HOST_ADDRESS" ]; then
|
|
|
|
ip route add $HOST_ADDRESS dev eth0
|
|
|
|
ip route add default via $HOST_ADDRESS
|
|
|
|
fi
|
|
|
|
if [ -n "$HOST_ADDRESS6" ]; then
|
|
|
|
ip -6 route add $HOST_ADDRESS6 dev eth0
|
|
|
|
ip -6 route add default via $HOST_ADDRESS6
|
|
|
|
fi
|
|
|
|
|
2016-09-22 13:58:39 +03:00
|
|
|
${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
|
2016-01-31 01:00:39 +03:00
|
|
|
fi
|
2016-05-07 01:04:28 +03:00
|
|
|
|
|
|
|
# Start the regular stage 1 script.
|
|
|
|
exec "$1"
|
|
|
|
''
|
|
|
|
);
|
|
|
|
|
|
|
|
nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}");
|
2016-09-22 13:58:39 +03:00
|
|
|
|
|
|
|
startScript = cfg:
|
2016-05-07 01:04:28 +03:00
|
|
|
''
|
|
|
|
mkdir -p -m 0755 "$root/etc" "$root/var/lib"
|
|
|
|
mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
|
|
|
|
if ! [ -e "$root/etc/os-release" ]; then
|
|
|
|
touch "$root/etc/os-release"
|
|
|
|
fi
|
|
|
|
|
|
|
|
if ! [ -e "$root/etc/machine-id" ]; then
|
|
|
|
touch "$root/etc/machine-id"
|
|
|
|
fi
|
|
|
|
|
|
|
|
mkdir -p -m 0755 \
|
|
|
|
"/nix/var/nix/profiles/per-container/$INSTANCE" \
|
|
|
|
"/nix/var/nix/gcroots/per-container/$INSTANCE"
|
|
|
|
|
|
|
|
cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
|
|
|
|
|
|
|
|
if [ "$PRIVATE_NETWORK" = 1 ]; then
|
2018-10-29 14:26:18 +03:00
|
|
|
extraFlags+=" --private-network"
|
|
|
|
fi
|
|
|
|
|
2019-03-19 13:01:57 +03:00
|
|
|
if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
|
|
|
|
[ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
|
2016-05-07 01:04:28 +03:00
|
|
|
extraFlags+=" --network-veth"
|
2019-01-05 00:24:13 +03:00
|
|
|
fi
|
|
|
|
|
|
|
|
if [ -n "$HOST_PORT" ]; then
|
|
|
|
OIFS=$IFS
|
|
|
|
IFS=","
|
|
|
|
for i in $HOST_PORT
|
|
|
|
do
|
|
|
|
extraFlags+=" --port=$i"
|
|
|
|
done
|
|
|
|
IFS=$OIFS
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ -n "$HOST_BRIDGE" ]; then
|
|
|
|
extraFlags+=" --network-bridge=$HOST_BRIDGE"
|
2016-05-07 01:04:28 +03:00
|
|
|
fi
|
|
|
|
|
2016-09-22 13:58:39 +03:00
|
|
|
extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg.extraVeths)}"
|
2016-05-07 01:04:28 +03:00
|
|
|
|
|
|
|
for iface in $INTERFACES; do
|
|
|
|
extraFlags+=" --network-interface=$iface"
|
|
|
|
done
|
|
|
|
|
|
|
|
for iface in $MACVLANS; do
|
|
|
|
extraFlags+=" --network-macvlan=$iface"
|
|
|
|
done
|
|
|
|
|
|
|
|
# If the host is 64-bit and the container is 32-bit, add a
|
|
|
|
# --personality flag.
|
2018-04-05 22:22:45 +03:00
|
|
|
${optionalString (config.nixpkgs.localSystem.system == "x86_64-linux") ''
|
2016-05-07 01:04:28 +03:00
|
|
|
if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
|
|
|
|
extraFlags+=" --personality=x86"
|
2014-05-07 19:00:46 +04:00
|
|
|
fi
|
2016-05-07 01:04:28 +03:00
|
|
|
''}
|
|
|
|
|
|
|
|
# Run systemd-nspawn without startup notification (we'll
|
|
|
|
# wait for the container systemd to signal readiness).
|
|
|
|
exec ${config.systemd.package}/bin/systemd-nspawn \
|
|
|
|
--keep-unit \
|
|
|
|
-M "$INSTANCE" -D "$root" $extraFlags \
|
|
|
|
$EXTRA_NSPAWN_FLAGS \
|
|
|
|
--notify-ready=yes \
|
|
|
|
--bind-ro=/nix/store \
|
|
|
|
--bind-ro=/nix/var/nix/db \
|
|
|
|
--bind-ro=/nix/var/nix/daemon-socket \
|
|
|
|
--bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
|
|
|
|
--bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
|
2019-08-18 22:37:38 +03:00
|
|
|
${optionalString (!cfg.ephemeral) "--link-journal=try-guest"} \
|
2016-05-07 01:04:28 +03:00
|
|
|
--setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
|
|
|
|
--setenv HOST_BRIDGE="$HOST_BRIDGE" \
|
|
|
|
--setenv HOST_ADDRESS="$HOST_ADDRESS" \
|
|
|
|
--setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
|
|
|
|
--setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
|
|
|
|
--setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
|
2016-12-03 00:49:38 +03:00
|
|
|
--setenv HOST_PORT="$HOST_PORT" \
|
2016-05-07 01:04:28 +03:00
|
|
|
--setenv PATH="$PATH" \
|
2019-08-18 22:37:38 +03:00
|
|
|
${optionalString cfg.ephemeral "--ephemeral"} \
|
2016-11-22 04:11:33 +03:00
|
|
|
${if cfg.additionalCapabilities != null && cfg.additionalCapabilities != [] then
|
2019-11-07 22:35:17 +03:00
|
|
|
''--capability="${concatStringsSep "," cfg.additionalCapabilities}"'' else ""
|
2016-09-25 18:33:01 +03:00
|
|
|
} \
|
2016-11-22 04:11:33 +03:00
|
|
|
${if cfg.tmpfs != null && cfg.tmpfs != [] then
|
|
|
|
''--tmpfs=${concatStringsSep " --tmpfs=" cfg.tmpfs}'' else ""
|
|
|
|
} \
|
2016-05-07 01:04:28 +03:00
|
|
|
${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
|
2016-09-22 13:58:39 +03:00
|
|
|
'';
|
2016-05-07 01:04:28 +03:00
|
|
|
|
2016-09-22 13:58:39 +03:00
|
|
|
preStartScript = cfg:
|
2016-05-07 01:04:28 +03:00
|
|
|
''
|
|
|
|
# Clean up existing machined registration and interfaces.
|
|
|
|
machinectl terminate "$INSTANCE" 2> /dev/null || true
|
|
|
|
|
2019-03-19 13:01:57 +03:00
|
|
|
if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
|
|
|
|
[ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
|
2016-05-07 01:04:28 +03:00
|
|
|
ip link del dev "ve-$INSTANCE" 2> /dev/null || true
|
|
|
|
ip link del dev "vb-$INSTANCE" 2> /dev/null || true
|
2014-05-07 19:00:46 +04:00
|
|
|
fi
|
|
|
|
|
2016-05-07 01:04:28 +03:00
|
|
|
${concatStringsSep "\n" (
|
|
|
|
mapAttrsToList (name: cfg:
|
|
|
|
''ip link del dev ${name} 2> /dev/null || true ''
|
2016-09-22 13:58:39 +03:00
|
|
|
) cfg.extraVeths
|
2016-05-07 01:04:28 +03:00
|
|
|
)}
|
2016-09-22 13:58:39 +03:00
|
|
|
'';
|
|
|
|
|
2016-05-07 01:04:28 +03:00
|
|
|
postStartScript = (cfg:
|
|
|
|
let
|
2016-09-22 13:58:39 +03:00
|
|
|
ipcall = cfg: ipcmd: variable: attribute:
|
|
|
|
if cfg.${attribute} == null then
|
2016-05-07 01:04:28 +03:00
|
|
|
''
|
|
|
|
if [ -n "${variable}" ]; then
|
|
|
|
${ipcmd} add ${variable} dev $ifaceHost
|
|
|
|
fi
|
|
|
|
''
|
|
|
|
else
|
2016-09-24 10:22:31 +03:00
|
|
|
''${ipcmd} add ${cfg.${attribute}} dev $ifaceHost'';
|
2016-09-22 13:58:39 +03:00
|
|
|
renderExtraVeth = name: cfg:
|
|
|
|
if cfg.hostBridge != null then
|
2016-05-07 01:04:28 +03:00
|
|
|
''
|
|
|
|
# Add ${name} to bridge ${cfg.hostBridge}
|
|
|
|
ip link set dev ${name} master ${cfg.hostBridge} up
|
|
|
|
''
|
|
|
|
else
|
|
|
|
''
|
2019-01-11 21:50:15 +03:00
|
|
|
echo "Bring ${name} up"
|
|
|
|
ip link set dev ${name} up
|
2016-09-22 13:58:39 +03:00
|
|
|
# Set IPs and routes for ${name}
|
|
|
|
${optionalString (cfg.hostAddress != null) ''
|
|
|
|
ip addr add ${cfg.hostAddress} dev ${name}
|
|
|
|
''}
|
|
|
|
${optionalString (cfg.hostAddress6 != null) ''
|
|
|
|
ip -6 addr add ${cfg.hostAddress6} dev ${name}
|
|
|
|
''}
|
|
|
|
${optionalString (cfg.localAddress != null) ''
|
|
|
|
ip route add ${cfg.localAddress} dev ${name}
|
|
|
|
''}
|
|
|
|
${optionalString (cfg.localAddress6 != null) ''
|
|
|
|
ip -6 route add ${cfg.localAddress6} dev ${name}
|
|
|
|
''}
|
|
|
|
'';
|
2016-05-07 01:04:28 +03:00
|
|
|
in
|
|
|
|
''
|
2019-03-19 13:01:57 +03:00
|
|
|
if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
|
|
|
|
[ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
|
2016-05-07 01:04:28 +03:00
|
|
|
if [ -z "$HOST_BRIDGE" ]; then
|
|
|
|
ifaceHost=ve-$INSTANCE
|
|
|
|
ip link set dev $ifaceHost up
|
|
|
|
|
|
|
|
${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"}
|
|
|
|
${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"}
|
|
|
|
${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"}
|
|
|
|
${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"}
|
|
|
|
fi
|
2016-09-22 13:58:39 +03:00
|
|
|
${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
|
2016-05-07 01:04:28 +03:00
|
|
|
fi
|
|
|
|
''
|
|
|
|
);
|
2014-05-07 19:00:46 +04:00
|
|
|
|
2016-09-25 18:33:01 +03:00
|
|
|
serviceDirectives = cfg: {
|
|
|
|
ExecReload = pkgs.writeScript "reload-container"
|
|
|
|
''
|
2018-03-01 22:38:53 +03:00
|
|
|
#! ${pkgs.runtimeShell} -e
|
2016-09-25 18:33:01 +03:00
|
|
|
${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
|
|
|
|
bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
|
|
|
|
'';
|
|
|
|
|
|
|
|
SyslogIdentifier = "container %i";
|
|
|
|
|
|
|
|
EnvironmentFile = "-/etc/containers/%i.conf";
|
|
|
|
|
|
|
|
Type = "notify";
|
|
|
|
|
2019-08-21 09:41:00 +03:00
|
|
|
RuntimeDirectory = lib.optional cfg.ephemeral "containers/%i";
|
2019-08-18 22:37:38 +03:00
|
|
|
|
2016-09-25 18:33:01 +03:00
|
|
|
# Note that on reboot, systemd-nspawn returns 133, so this
|
|
|
|
# unit will be restarted. On poweroff, it returns 0, so the
|
|
|
|
# unit won't be restarted.
|
|
|
|
RestartForceExitStatus = "133";
|
|
|
|
SuccessExitStatus = "133";
|
|
|
|
|
2019-07-31 17:19:18 +03:00
|
|
|
# Some containers take long to start
|
|
|
|
# especially when you automatically start many at once
|
|
|
|
TimeoutStartSec = cfg.timeoutStartSec;
|
|
|
|
|
2016-09-25 18:33:01 +03:00
|
|
|
Restart = "on-failure";
|
|
|
|
|
nixos/containers: Introduce several tweaks to systemd-nspawn from upstream systemd
* Lets container@.service be activated by machines.target instead of
multi-user.target
According to the systemd manpages, all containers that are registered
by machinectl, should be inside machines.target for easy stopping
and starting container units altogether
* make sure container@.service and container.slice instances are
actually located in machine.slice
https://plus.google.com/112206451048767236518/posts/SYAueyXHeEX
See original commit: https://github.com/NixOS/systemd/commit/45d383a3b8
* Enable Cgroup delegation for nixos-containers
Delegate=yes should be set for container scopes where a systemd instance
inside the container shall manage the hierarchies below its own cgroup
and have access to all controllers.
This is equivalent to enabling all accounting options on the systemd
process inside the system container. This means that systemd inside
the container is responsible for managing Cgroup resources for
unit files that enable accounting options inside. Without this
option, units that make use of cgroup features within system
containers might misbehave
See original commit: https://github.com/NixOS/systemd/commit/a931ad47a8
from the manpage:
Turns on delegation of further resource control partitioning to
processes of the unit. Units where this is enabled may create and
manage their own private subhierarchy of control groups below the
control group of the unit itself. For unprivileged services (i.e.
those using the User= setting) the unit's control group will be made
accessible to the relevant user. When enabled the service manager
will refrain from manipulating control groups or moving processes
below the unit's control group, so that a clear concept of ownership
is established: the control group tree above the unit's control
group (i.e. towards the root control group) is owned and managed by
the service manager of the host, while the control group tree below
the unit's control group is owned and managed by the unit itself.
Takes either a boolean argument or a list of control group
controller names. If true, delegation is turned on, and all
supported controllers are enabled for the unit, making them
available to the unit's processes for management. If false,
delegation is turned off entirely (and no additional controllers are
enabled). If set to a list of controllers, delegation is turned on,
and the specified controllers are enabled for the unit. Note that
additional controllers than the ones specified might be made
available as well, depending on configuration of the containing
slice unit or other units contained in it. Note that assigning the
empty string will enable delegation, but reset the list of
controllers, all assignments prior to this will have no effect.
Defaults to false.
Note that controller delegation to less privileged code is only safe
on the unified control group hierarchy. Accordingly, access to the
specified controllers will not be granted to unprivileged services
on the legacy hierarchy, even when requested.
The following controller names may be specified: cpu, cpuacct, io,
blkio, memory, devices, pids. Not all of these controllers are
available on all kernels however, and some are specific to the
unified hierarchy while others are specific to the legacy hierarchy.
Also note that the kernel might support further controllers, which
aren't covered here yet as delegation is either not supported at all
for them or not defined cleanly.
2018-10-21 12:40:20 +03:00
|
|
|
Slice = "machine.slice";
|
|
|
|
Delegate = true;
|
|
|
|
|
2016-09-25 18:33:01 +03:00
|
|
|
# Hack: we don't want to kill systemd-nspawn, since we call
|
|
|
|
# "machinectl poweroff" in preStop to shut down the
|
|
|
|
# container cleanly. But systemd requires sending a signal
|
|
|
|
# (at least if we want remaining processes to be killed
|
|
|
|
# after the timeout). So send an ignored signal.
|
|
|
|
KillMode = "mixed";
|
|
|
|
KillSignal = "WINCH";
|
|
|
|
|
|
|
|
DevicePolicy = "closed";
|
|
|
|
DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2018-04-05 22:22:45 +03:00
|
|
|
system = config.nixpkgs.localSystem.system;
|
2014-04-15 14:58:28 +04:00
|
|
|
|
2018-07-20 23:56:59 +03:00
|
|
|
bindMountOpts = { name, ... }: {
|
2016-01-31 01:00:39 +03:00
|
|
|
|
2015-05-26 14:56:42 +03:00
|
|
|
options = {
|
|
|
|
mountPoint = mkOption {
|
|
|
|
example = "/mnt/usb";
|
|
|
|
type = types.str;
|
2015-09-28 08:48:16 +03:00
|
|
|
description = "Mount point on the container file system.";
|
2015-05-26 14:56:42 +03:00
|
|
|
};
|
|
|
|
hostPath = mkOption {
|
|
|
|
default = null;
|
|
|
|
example = "/home/alice";
|
2015-09-28 08:48:16 +03:00
|
|
|
type = types.nullOr types.str;
|
|
|
|
description = "Location of the host path to be mounted.";
|
2015-05-26 14:56:42 +03:00
|
|
|
};
|
|
|
|
isReadOnly = mkOption {
|
2015-05-26 16:41:31 +03:00
|
|
|
default = true;
|
2015-05-26 14:56:42 +03:00
|
|
|
type = types.bool;
|
2015-09-28 08:48:16 +03:00
|
|
|
description = "Determine whether the mounted path will be accessed in read-only mode.";
|
2015-05-26 14:56:42 +03:00
|
|
|
};
|
|
|
|
};
|
2016-01-31 01:00:39 +03:00
|
|
|
|
2015-05-26 14:56:42 +03:00
|
|
|
config = {
|
|
|
|
mountPoint = mkDefault name;
|
|
|
|
};
|
2016-01-31 01:00:39 +03:00
|
|
|
|
2015-05-26 14:56:42 +03:00
|
|
|
};
|
2016-01-31 01:00:39 +03:00
|
|
|
|
2018-07-20 23:56:59 +03:00
|
|
|
allowedDeviceOpts = { ... }: {
|
2016-09-25 18:33:01 +03:00
|
|
|
options = {
|
|
|
|
node = mkOption {
|
|
|
|
example = "/dev/net/tun";
|
|
|
|
type = types.str;
|
|
|
|
description = "Path to device node";
|
|
|
|
};
|
|
|
|
modifier = mkOption {
|
|
|
|
example = "rw";
|
|
|
|
type = types.str;
|
|
|
|
description = ''
|
|
|
|
Device node access modifier. Takes a combination
|
|
|
|
<literal>r</literal> (read), <literal>w</literal> (write), and
|
|
|
|
<literal>m</literal> (mknod). See the
|
|
|
|
<literal>systemd.resource-control(5)</literal> man page for more
|
|
|
|
information.'';
|
|
|
|
};
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2015-05-26 14:56:42 +03:00
|
|
|
mkBindFlag = d:
|
|
|
|
let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
|
|
|
|
mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
|
|
|
|
in flagPrefix + mountstr ;
|
|
|
|
|
|
|
|
mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs);
|
2015-05-25 22:09:53 +03:00
|
|
|
|
2016-05-07 01:04:28 +03:00
|
|
|
networkOptions = {
|
|
|
|
hostBridge = mkOption {
|
2019-08-08 23:48:27 +03:00
|
|
|
type = types.nullOr types.str;
|
2016-05-07 01:04:28 +03:00
|
|
|
default = null;
|
|
|
|
example = "br0";
|
|
|
|
description = ''
|
|
|
|
Put the host-side of the veth-pair into the named bridge.
|
|
|
|
Only one of hostAddress* or hostBridge can be given.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2016-12-04 07:57:24 +03:00
|
|
|
forwardPorts = mkOption {
|
|
|
|
type = types.listOf (types.submodule {
|
|
|
|
options = {
|
|
|
|
protocol = mkOption {
|
|
|
|
type = types.str;
|
|
|
|
default = "tcp";
|
|
|
|
description = "The protocol specifier for port forwarding between host and container";
|
|
|
|
};
|
|
|
|
hostPort = mkOption {
|
|
|
|
type = types.int;
|
|
|
|
description = "Source port of the external interface on host";
|
|
|
|
};
|
|
|
|
containerPort = mkOption {
|
|
|
|
type = types.nullOr types.int;
|
|
|
|
default = null;
|
|
|
|
description = "Target port of container";
|
|
|
|
};
|
|
|
|
};
|
|
|
|
});
|
|
|
|
default = [];
|
|
|
|
example = [ { protocol = "tcp"; hostPort = 8080; containerPort = 80; } ];
|
2016-12-03 00:49:38 +03:00
|
|
|
description = ''
|
2016-12-18 04:48:29 +03:00
|
|
|
List of forwarded ports from host to container. Each forwarded port
|
|
|
|
is specified by protocol, hostPort and containerPort. By default,
|
|
|
|
protocol is tcp and hostPort and containerPort are assumed to be
|
2018-10-29 14:26:18 +03:00
|
|
|
the same if containerPort is not explicitly given.
|
2016-12-03 00:49:38 +03:00
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2016-05-07 01:04:28 +03:00
|
|
|
hostAddress = mkOption {
|
|
|
|
type = types.nullOr types.str;
|
|
|
|
default = null;
|
|
|
|
example = "10.231.136.1";
|
|
|
|
description = ''
|
|
|
|
The IPv4 address assigned to the host interface.
|
|
|
|
(Not used when hostBridge is set.)
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
|
|
|
hostAddress6 = mkOption {
|
2019-08-08 23:48:27 +03:00
|
|
|
type = types.nullOr types.str;
|
2016-05-07 01:04:28 +03:00
|
|
|
default = null;
|
|
|
|
example = "fc00::1";
|
|
|
|
description = ''
|
|
|
|
The IPv6 address assigned to the host interface.
|
|
|
|
(Not used when hostBridge is set.)
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
|
|
|
localAddress = mkOption {
|
|
|
|
type = types.nullOr types.str;
|
|
|
|
default = null;
|
|
|
|
example = "10.231.136.2";
|
|
|
|
description = ''
|
|
|
|
The IPv4 address assigned to the interface in the container.
|
|
|
|
If a hostBridge is used, this should be given with netmask to access
|
|
|
|
the whole network. Otherwise the default netmask is /32 and routing is
|
|
|
|
set up from localAddress to hostAddress and back.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
|
|
|
localAddress6 = mkOption {
|
2019-08-08 23:48:27 +03:00
|
|
|
type = types.nullOr types.str;
|
2016-05-07 01:04:28 +03:00
|
|
|
default = null;
|
|
|
|
example = "fc00::2";
|
|
|
|
description = ''
|
|
|
|
The IPv6 address assigned to the interface in the container.
|
|
|
|
If a hostBridge is used, this should be given with netmask to access
|
|
|
|
the whole network. Otherwise the default netmask is /128 and routing is
|
|
|
|
set up from localAddress6 to hostAddress6 and back.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2016-09-22 13:58:39 +03:00
|
|
|
dummyConfig =
|
|
|
|
{
|
|
|
|
extraVeths = {};
|
2016-09-25 18:33:01 +03:00
|
|
|
additionalCapabilities = [];
|
2019-08-18 22:37:38 +03:00
|
|
|
ephemeral = false;
|
2019-07-31 17:19:18 +03:00
|
|
|
timeoutStartSec = "15s";
|
2016-09-25 18:33:01 +03:00
|
|
|
allowedDevices = [];
|
2016-09-22 13:58:39 +03:00
|
|
|
hostAddress = null;
|
|
|
|
hostAddress6 = null;
|
|
|
|
localAddress = null;
|
|
|
|
localAddress6 = null;
|
2016-11-22 04:11:33 +03:00
|
|
|
tmpfs = null;
|
2016-09-22 13:58:39 +03:00
|
|
|
};
|
|
|
|
|
2014-03-18 13:49:25 +04:00
|
|
|
in
|
|
|
|
|
2013-11-27 19:54:20 +04:00
|
|
|
{
|
|
|
|
options = {
|
|
|
|
|
|
|
|
boot.isContainer = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
|
|
|
description = ''
|
|
|
|
Whether this NixOS machine is a lightweight container running
|
2020-04-22 06:30:48 +03:00
|
|
|
in another NixOS system. If set to true, support for nested
|
|
|
|
containers is disabled by default, but can be reenabled by
|
2020-04-22 18:56:44 +03:00
|
|
|
setting <option>boot.enableContainers</option> to true.
|
2013-11-27 19:54:20 +04:00
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2015-01-25 01:06:00 +03:00
|
|
|
boot.enableContainers = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = !config.boot.isContainer;
|
|
|
|
description = ''
|
2020-04-22 06:30:48 +03:00
|
|
|
Whether to enable support for NixOS containers. Defaults to true
|
|
|
|
(at no cost if containers are not actually used), but only if the
|
|
|
|
system is not itself a lightweight container of a host.
|
|
|
|
To enable support for nested containers, this option has to be
|
|
|
|
explicitly set to true (in the outer container).
|
2015-01-25 01:06:00 +03:00
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2014-03-18 21:18:35 +04:00
|
|
|
containers = mkOption {
|
2013-11-27 19:54:20 +04:00
|
|
|
type = types.attrsOf (types.submodule (
|
|
|
|
{ config, options, name, ... }:
|
|
|
|
{
|
|
|
|
options = {
|
|
|
|
|
|
|
|
config = mkOption {
|
|
|
|
description = ''
|
|
|
|
A specification of the desired configuration of this
|
|
|
|
container, as a NixOS module.
|
|
|
|
'';
|
2020-04-19 16:41:18 +03:00
|
|
|
type = let
|
|
|
|
confPkgs = if config.pkgs == null then pkgs else config.pkgs;
|
|
|
|
in lib.mkOptionType {
|
2016-08-29 19:25:50 +03:00
|
|
|
name = "Toplevel NixOS config";
|
2020-04-19 16:41:18 +03:00
|
|
|
merge = loc: defs: (import (confPkgs.path + "/nixos/lib/eval-config.nix") {
|
2016-08-29 19:25:50 +03:00
|
|
|
inherit system;
|
2020-04-19 16:41:18 +03:00
|
|
|
pkgs = confPkgs;
|
|
|
|
baseModules = import (confPkgs.path + "/nixos/modules/module-list.nix");
|
|
|
|
inherit (confPkgs) lib;
|
2016-08-29 19:25:50 +03:00
|
|
|
modules =
|
2019-06-05 02:55:30 +03:00
|
|
|
let
|
|
|
|
extraConfig = {
|
|
|
|
_file = "module at ${__curPos.file}:${toString __curPos.line}";
|
|
|
|
config = {
|
|
|
|
boot.isContainer = true;
|
|
|
|
networking.hostName = mkDefault name;
|
|
|
|
networking.useDHCP = false;
|
|
|
|
assertions = [
|
|
|
|
{
|
|
|
|
assertion = config.privateNetwork -> stringLength name < 12;
|
|
|
|
message = ''
|
|
|
|
Container name `${name}` is too long: When `privateNetwork` is enabled, container names can
|
|
|
|
not be longer than 11 characters, because the container's interface name is derived from it.
|
|
|
|
This might be fixed in the future. See https://github.com/NixOS/nixpkgs/issues/38509
|
|
|
|
'';
|
|
|
|
}
|
|
|
|
];
|
|
|
|
};
|
2016-08-29 19:25:50 +03:00
|
|
|
};
|
|
|
|
in [ extraConfig ] ++ (map (x: x.value) defs);
|
|
|
|
prefix = [ "containers" name ];
|
|
|
|
}).config;
|
|
|
|
};
|
2013-11-27 19:54:20 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
path = mkOption {
|
|
|
|
type = types.path;
|
|
|
|
example = "/nix/var/nix/profiles/containers/webserver";
|
|
|
|
description = ''
|
|
|
|
As an alternative to specifying
|
|
|
|
<option>config</option>, you can specify the path to
|
|
|
|
the evaluated NixOS system configuration, typically a
|
|
|
|
symlink to a system profile.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2016-09-25 18:33:01 +03:00
|
|
|
additionalCapabilities = mkOption {
|
|
|
|
type = types.listOf types.str;
|
|
|
|
default = [];
|
|
|
|
example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ];
|
|
|
|
description = ''
|
|
|
|
Grant additional capabilities to the container. See the
|
|
|
|
capabilities(7) and systemd-nspawn(1) man pages for more
|
|
|
|
information.
|
|
|
|
'';
|
|
|
|
};
|
2019-08-18 22:37:38 +03:00
|
|
|
|
2020-04-19 16:41:18 +03:00
|
|
|
pkgs = mkOption {
|
|
|
|
type = types.nullOr types.attrs;
|
|
|
|
default = null;
|
|
|
|
example = literalExample "pkgs";
|
|
|
|
description = ''
|
|
|
|
Customise which nixpkgs to use for this container.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2019-08-18 22:37:38 +03:00
|
|
|
ephemeral = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
|
|
|
description = ''
|
|
|
|
Runs container in ephemeral mode with the empty root filesystem at boot.
|
|
|
|
This way container will be bootstrapped from scratch on each boot
|
|
|
|
and will be cleaned up on shutdown leaving no traces behind.
|
|
|
|
Useful for completely stateless, reproducible containers.
|
|
|
|
|
|
|
|
Note that this option might require to do some adjustments to the container configuration,
|
|
|
|
e.g. you might want to set
|
2020-01-08 22:18:26 +03:00
|
|
|
<varname>systemd.network.networks.$interface.dhcpV4Config.ClientIdentifier</varname> to "mac"
|
2019-08-18 22:37:38 +03:00
|
|
|
if you use <varname>macvlans</varname> option.
|
|
|
|
This way dhcp client identifier will be stable between the container restarts.
|
|
|
|
|
|
|
|
Note that the container journal will not be linked to the host if this option is enabled.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2016-09-25 18:33:01 +03:00
|
|
|
enableTun = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
|
|
|
description = ''
|
|
|
|
Allows the container to create and setup tunnel interfaces
|
|
|
|
by granting the <literal>NET_ADMIN</literal> capability and
|
|
|
|
enabling access to <literal>/dev/net/tun</literal>.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2014-03-18 13:49:25 +04:00
|
|
|
privateNetwork = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
|
|
|
description = ''
|
|
|
|
Whether to give the container its own private virtual
|
|
|
|
Ethernet interface. The interface is called
|
|
|
|
<literal>eth0</literal>, and is hooked up to the interface
|
2014-05-07 19:00:46 +04:00
|
|
|
<literal>ve-<replaceable>container-name</replaceable></literal>
|
2014-03-18 13:49:25 +04:00
|
|
|
on the host. If this option is not set, then the
|
|
|
|
container shares the network interfaces of the host,
|
|
|
|
and can bind to any port on any interface.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2015-08-22 13:01:09 +03:00
|
|
|
interfaces = mkOption {
|
2019-08-08 23:48:27 +03:00
|
|
|
type = types.listOf types.str;
|
2015-08-26 22:11:12 +03:00
|
|
|
default = [];
|
2015-08-22 13:01:09 +03:00
|
|
|
example = [ "eth1" "eth2" ];
|
|
|
|
description = ''
|
|
|
|
The list of interfaces to be moved into the container.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2016-12-06 02:11:49 +03:00
|
|
|
macvlans = mkOption {
|
|
|
|
type = types.listOf types.str;
|
|
|
|
default = [];
|
|
|
|
example = [ "eth1" "eth2" ];
|
|
|
|
description = ''
|
|
|
|
The list of host interfaces from which macvlans will be
|
|
|
|
created. For each interface specified, a macvlan interface
|
|
|
|
will be created and moved to the container.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2016-05-07 01:04:28 +03:00
|
|
|
extraVeths = mkOption {
|
2016-10-09 17:02:14 +03:00
|
|
|
type = with types; attrsOf (submodule { options = networkOptions; });
|
2016-05-07 01:04:28 +03:00
|
|
|
default = {};
|
|
|
|
description = ''
|
2020-04-22 06:30:48 +03:00
|
|
|
Extra veth-pairs to be created for the container.
|
2016-05-07 01:04:28 +03:00
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2015-01-12 20:12:33 +03:00
|
|
|
autoStart = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
|
|
|
description = ''
|
2017-11-01 11:25:26 +03:00
|
|
|
Whether the container is automatically started at boot-time.
|
2015-01-12 20:12:33 +03:00
|
|
|
'';
|
|
|
|
};
|
2015-05-24 19:31:59 +03:00
|
|
|
|
2019-07-31 17:19:18 +03:00
|
|
|
timeoutStartSec = mkOption {
|
|
|
|
type = types.str;
|
|
|
|
default = "1min";
|
|
|
|
description = ''
|
|
|
|
Time for the container to start. In case of a timeout,
|
|
|
|
the container processes get killed.
|
|
|
|
See <citerefentry><refentrytitle>systemd.time</refentrytitle>
|
|
|
|
<manvolnum>7</manvolnum></citerefentry>
|
|
|
|
for more information about the format.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2015-05-26 14:56:42 +03:00
|
|
|
bindMounts = mkOption {
|
2020-08-23 02:28:45 +03:00
|
|
|
type = with types; attrsOf (submodule bindMountOpts);
|
2015-05-26 14:56:42 +03:00
|
|
|
default = {};
|
2020-04-02 08:39:04 +03:00
|
|
|
example = literalExample ''
|
|
|
|
{ "/home" = { hostPath = "/home/alice";
|
|
|
|
isReadOnly = false; };
|
|
|
|
}
|
|
|
|
'';
|
2016-01-31 01:00:39 +03:00
|
|
|
|
2015-05-24 19:31:59 +03:00
|
|
|
description =
|
2015-05-26 14:56:42 +03:00
|
|
|
''
|
2015-05-25 22:09:53 +03:00
|
|
|
An extra list of directories that is bound to the container.
|
2015-05-24 19:31:59 +03:00
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2016-09-25 18:33:01 +03:00
|
|
|
allowedDevices = mkOption {
|
2016-10-02 08:07:00 +03:00
|
|
|
type = with types; listOf (submodule allowedDeviceOpts);
|
2016-09-25 18:33:01 +03:00
|
|
|
default = [];
|
|
|
|
example = [ { node = "/dev/net/tun"; modifier = "rw"; } ];
|
|
|
|
description = ''
|
|
|
|
A list of device nodes to which the containers has access to.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2016-11-22 04:11:33 +03:00
|
|
|
tmpfs = mkOption {
|
|
|
|
type = types.listOf types.str;
|
|
|
|
default = [];
|
|
|
|
example = [ "/var" ];
|
|
|
|
description = ''
|
|
|
|
Mounts a set of tmpfs file systems into the container.
|
|
|
|
Multiple paths can be specified.
|
|
|
|
Valid items must conform to the --tmpfs argument
|
|
|
|
of systemd-nspawn. See systemd-nspawn(1) for details.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2018-02-25 16:22:23 +03:00
|
|
|
extraFlags = mkOption {
|
|
|
|
type = types.listOf types.str;
|
|
|
|
default = [];
|
|
|
|
example = [ "--drop-capability=CAP_SYS_CHROOT" ];
|
|
|
|
description = ''
|
|
|
|
Extra flags passed to the systemd-nspawn command.
|
|
|
|
See systemd-nspawn(1) for details.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2016-05-07 01:04:28 +03:00
|
|
|
} // networkOptions;
|
2013-11-27 19:54:20 +04:00
|
|
|
|
|
|
|
config = mkMerge
|
2016-08-29 19:25:50 +03:00
|
|
|
[
|
|
|
|
(mkIf options.config.isDefined {
|
|
|
|
path = config.config.system.build.toplevel;
|
2013-11-27 19:54:20 +04:00
|
|
|
})
|
|
|
|
];
|
|
|
|
}));
|
|
|
|
|
|
|
|
default = {};
|
|
|
|
example = literalExample
|
|
|
|
''
|
|
|
|
{ webserver =
|
2014-03-19 22:55:05 +04:00
|
|
|
{ path = "/nix/var/nix/profiles/webserver";
|
2013-11-27 19:54:20 +04:00
|
|
|
};
|
|
|
|
database =
|
2014-03-19 22:55:05 +04:00
|
|
|
{ config =
|
2013-11-27 19:54:20 +04:00
|
|
|
{ config, pkgs, ... }:
|
|
|
|
{ services.postgresql.enable = true;
|
2018-10-23 19:22:14 +03:00
|
|
|
services.postgresql.package = pkgs.postgresql_9_6;
|
2018-04-06 04:19:06 +03:00
|
|
|
|
2018-07-25 23:22:54 +03:00
|
|
|
system.stateVersion = "17.03";
|
2013-11-27 19:54:20 +04:00
|
|
|
};
|
|
|
|
};
|
|
|
|
}
|
|
|
|
'';
|
|
|
|
description = ''
|
|
|
|
A set of NixOS system configurations to be run as lightweight
|
|
|
|
containers. Each container appears as a service
|
|
|
|
<literal>container-<replaceable>name</replaceable></literal>
|
|
|
|
on the host system, allowing it to be started and stopped via
|
2016-09-22 13:58:39 +03:00
|
|
|
<command>systemctl</command>.
|
2013-11-27 19:54:20 +04:00
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2016-04-02 18:03:30 +03:00
|
|
|
config = mkIf (config.boot.enableContainers) (let
|
2013-11-27 19:54:20 +04:00
|
|
|
|
2016-04-02 18:03:30 +03:00
|
|
|
unit = {
|
|
|
|
description = "Container '%i'";
|
2013-11-27 19:54:20 +04:00
|
|
|
|
2019-08-21 09:41:00 +03:00
|
|
|
unitConfig.RequiresMountsFor = "/var/lib/containers/%i";
|
2014-03-18 13:49:25 +04:00
|
|
|
|
2016-04-02 18:03:30 +03:00
|
|
|
path = [ pkgs.iproute ];
|
2013-11-27 19:54:20 +04:00
|
|
|
|
2019-08-21 09:41:00 +03:00
|
|
|
environment = {
|
|
|
|
root = "/var/lib/containers/%i";
|
|
|
|
INSTANCE = "%i";
|
|
|
|
};
|
2014-04-01 18:02:53 +04:00
|
|
|
|
2016-09-22 13:58:39 +03:00
|
|
|
preStart = preStartScript dummyConfig;
|
2014-08-12 04:33:30 +04:00
|
|
|
|
2016-09-22 13:58:39 +03:00
|
|
|
script = startScript dummyConfig;
|
2014-03-19 22:55:05 +04:00
|
|
|
|
2016-09-22 13:58:39 +03:00
|
|
|
postStart = postStartScript dummyConfig;
|
2014-03-17 18:03:29 +04:00
|
|
|
|
2019-12-31 00:24:37 +03:00
|
|
|
preStop = "machinectl poweroff $INSTANCE";
|
2014-03-17 18:03:29 +04:00
|
|
|
|
2016-04-02 18:03:30 +03:00
|
|
|
restartIfChanged = false;
|
2016-01-31 01:00:39 +03:00
|
|
|
|
2016-09-25 18:33:01 +03:00
|
|
|
serviceConfig = serviceDirectives dummyConfig;
|
2016-04-02 18:03:30 +03:00
|
|
|
};
|
|
|
|
in {
|
2019-08-14 00:52:01 +03:00
|
|
|
systemd.targets.multi-user.wants = [ "machines.target" ];
|
nixos/containers: Introduce several tweaks to systemd-nspawn from upstream systemd
* Lets container@.service be activated by machines.target instead of
multi-user.target
According to the systemd manpages, all containers that are registered
by machinectl, should be inside machines.target for easy stopping
and starting container units altogether
* make sure container@.service and container.slice instances are
actually located in machine.slice
https://plus.google.com/112206451048767236518/posts/SYAueyXHeEX
See original commit: https://github.com/NixOS/systemd/commit/45d383a3b8
* Enable Cgroup delegation for nixos-containers
Delegate=yes should be set for container scopes where a systemd instance
inside the container shall manage the hierarchies below its own cgroup
and have access to all controllers.
This is equivalent to enabling all accounting options on the systemd
process inside the system container. This means that systemd inside
the container is responsible for managing Cgroup resources for
unit files that enable accounting options inside. Without this
option, units that make use of cgroup features within system
containers might misbehave
See original commit: https://github.com/NixOS/systemd/commit/a931ad47a8
from the manpage:
Turns on delegation of further resource control partitioning to
processes of the unit. Units where this is enabled may create and
manage their own private subhierarchy of control groups below the
control group of the unit itself. For unprivileged services (i.e.
those using the User= setting) the unit's control group will be made
accessible to the relevant user. When enabled the service manager
will refrain from manipulating control groups or moving processes
below the unit's control group, so that a clear concept of ownership
is established: the control group tree above the unit's control
group (i.e. towards the root control group) is owned and managed by
the service manager of the host, while the control group tree below
the unit's control group is owned and managed by the unit itself.
Takes either a boolean argument or a list of control group
controller names. If true, delegation is turned on, and all
supported controllers are enabled for the unit, making them
available to the unit's processes for management. If false,
delegation is turned off entirely (and no additional controllers are
enabled). If set to a list of controllers, delegation is turned on,
and the specified controllers are enabled for the unit. Note that
additional controllers than the ones specified might be made
available as well, depending on configuration of the containing
slice unit or other units contained in it. Note that assigning the
empty string will enable delegation, but reset the list of
controllers, all assignments prior to this will have no effect.
Defaults to false.
Note that controller delegation to less privileged code is only safe
on the unified control group hierarchy. Accordingly, access to the
specified controllers will not be granted to unprivileged services
on the legacy hierarchy, even when requested.
The following controller names may be specified: cpu, cpuacct, io,
blkio, memory, devices, pids. Not all of these controllers are
available on all kernels however, and some are specific to the
unified hierarchy while others are specific to the legacy hierarchy.
Also note that the kernel might support further controllers, which
aren't covered here yet as delegation is either not supported at all
for them or not defined cleanly.
2018-10-21 12:40:20 +03:00
|
|
|
|
2016-04-02 18:03:30 +03:00
|
|
|
systemd.services = listToAttrs (filter (x: x.value != null) (
|
|
|
|
# The generic container template used by imperative containers
|
|
|
|
[{ name = "container@"; value = unit; }]
|
|
|
|
# declarative containers
|
2016-09-25 18:33:01 +03:00
|
|
|
++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let
|
2019-06-06 15:43:46 +03:00
|
|
|
containerConfig = cfg // (
|
2016-09-25 18:33:01 +03:00
|
|
|
if cfg.enableTun then
|
|
|
|
{
|
|
|
|
allowedDevices = cfg.allowedDevices
|
|
|
|
++ [ { node = "/dev/net/tun"; modifier = "rw"; } ];
|
|
|
|
additionalCapabilities = cfg.additionalCapabilities
|
|
|
|
++ [ "CAP_NET_ADMIN" ];
|
|
|
|
}
|
|
|
|
else {});
|
|
|
|
in
|
2019-08-21 09:41:00 +03:00
|
|
|
recursiveUpdate unit {
|
2019-06-06 15:43:46 +03:00
|
|
|
preStart = preStartScript containerConfig;
|
|
|
|
script = startScript containerConfig;
|
|
|
|
postStart = postStartScript containerConfig;
|
|
|
|
serviceConfig = serviceDirectives containerConfig;
|
2019-08-18 22:37:38 +03:00
|
|
|
unitConfig.RequiresMountsFor = lib.optional (!containerConfig.ephemeral) "/var/lib/containers/%i";
|
|
|
|
environment.root = if containerConfig.ephemeral then "/run/containers/%i" else "/var/lib/containers/%i";
|
2016-09-25 18:33:01 +03:00
|
|
|
} // (
|
2019-06-06 15:43:46 +03:00
|
|
|
if containerConfig.autoStart then
|
2016-09-25 18:33:01 +03:00
|
|
|
{
|
nixos/containers: Introduce several tweaks to systemd-nspawn from upstream systemd
* Lets container@.service be activated by machines.target instead of
multi-user.target
According to the systemd manpages, all containers that are registered
by machinectl, should be inside machines.target for easy stopping
and starting container units altogether
* make sure container@.service and container.slice instances are
actually located in machine.slice
https://plus.google.com/112206451048767236518/posts/SYAueyXHeEX
See original commit: https://github.com/NixOS/systemd/commit/45d383a3b8
* Enable Cgroup delegation for nixos-containers
Delegate=yes should be set for container scopes where a systemd instance
inside the container shall manage the hierarchies below its own cgroup
and have access to all controllers.
This is equivalent to enabling all accounting options on the systemd
process inside the system container. This means that systemd inside
the container is responsible for managing Cgroup resources for
unit files that enable accounting options inside. Without this
option, units that make use of cgroup features within system
containers might misbehave
See original commit: https://github.com/NixOS/systemd/commit/a931ad47a8
from the manpage:
Turns on delegation of further resource control partitioning to
processes of the unit. Units where this is enabled may create and
manage their own private subhierarchy of control groups below the
control group of the unit itself. For unprivileged services (i.e.
those using the User= setting) the unit's control group will be made
accessible to the relevant user. When enabled the service manager
will refrain from manipulating control groups or moving processes
below the unit's control group, so that a clear concept of ownership
is established: the control group tree above the unit's control
group (i.e. towards the root control group) is owned and managed by
the service manager of the host, while the control group tree below
the unit's control group is owned and managed by the unit itself.
Takes either a boolean argument or a list of control group
controller names. If true, delegation is turned on, and all
supported controllers are enabled for the unit, making them
available to the unit's processes for management. If false,
delegation is turned off entirely (and no additional controllers are
enabled). If set to a list of controllers, delegation is turned on,
and the specified controllers are enabled for the unit. Note that
additional controllers than the ones specified might be made
available as well, depending on configuration of the containing
slice unit or other units contained in it. Note that assigning the
empty string will enable delegation, but reset the list of
controllers, all assignments prior to this will have no effect.
Defaults to false.
Note that controller delegation to less privileged code is only safe
on the unified control group hierarchy. Accordingly, access to the
specified controllers will not be granted to unprivileged services
on the legacy hierarchy, even when requested.
The following controller names may be specified: cpu, cpuacct, io,
blkio, memory, devices, pids. Not all of these controllers are
available on all kernels however, and some are specific to the
unified hierarchy while others are specific to the legacy hierarchy.
Also note that the kernel might support further controllers, which
aren't covered here yet as delegation is either not supported at all
for them or not defined cleanly.
2018-10-21 12:40:20 +03:00
|
|
|
wantedBy = [ "machines.target" ];
|
2016-09-25 18:33:01 +03:00
|
|
|
wants = [ "network.target" ];
|
|
|
|
after = [ "network.target" ];
|
2019-06-06 17:19:07 +03:00
|
|
|
restartTriggers = [
|
|
|
|
containerConfig.path
|
|
|
|
config.environment.etc."containers/${name}.conf".source
|
|
|
|
];
|
|
|
|
restartIfChanged = true;
|
2016-09-25 18:33:01 +03:00
|
|
|
}
|
|
|
|
else {})
|
2016-04-02 18:03:30 +03:00
|
|
|
)) config.containers)
|
|
|
|
));
|
2014-03-17 18:03:29 +04:00
|
|
|
|
2014-03-19 22:55:05 +04:00
|
|
|
# Generate a configuration file in /etc/containers for each
|
|
|
|
# container so that container@.target can get the container
|
|
|
|
# configuration.
|
2016-12-04 07:57:24 +03:00
|
|
|
environment.etc =
|
2018-10-29 14:26:18 +03:00
|
|
|
let mkPortStr = p: p.protocol + ":" + (toString p.hostPort) + ":" + (if p.containerPort == null then toString p.hostPort else toString p.containerPort);
|
2016-12-04 07:57:24 +03:00
|
|
|
in mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf"
|
2014-03-19 22:55:05 +04:00
|
|
|
{ text =
|
|
|
|
''
|
|
|
|
SYSTEM_PATH=${cfg.path}
|
|
|
|
${optionalString cfg.privateNetwork ''
|
|
|
|
PRIVATE_NETWORK=1
|
2016-01-31 01:00:39 +03:00
|
|
|
${optionalString (cfg.hostBridge != null) ''
|
|
|
|
HOST_BRIDGE=${cfg.hostBridge}
|
|
|
|
''}
|
2016-12-04 07:57:24 +03:00
|
|
|
${optionalString (length cfg.forwardPorts > 0) ''
|
|
|
|
HOST_PORT=${concatStringsSep "," (map mkPortStr cfg.forwardPorts)}
|
2016-12-03 00:49:38 +03:00
|
|
|
''}
|
2014-03-19 22:55:05 +04:00
|
|
|
${optionalString (cfg.hostAddress != null) ''
|
|
|
|
HOST_ADDRESS=${cfg.hostAddress}
|
|
|
|
''}
|
2016-01-31 01:00:39 +03:00
|
|
|
${optionalString (cfg.hostAddress6 != null) ''
|
|
|
|
HOST_ADDRESS6=${cfg.hostAddress6}
|
|
|
|
''}
|
2014-03-19 22:55:05 +04:00
|
|
|
${optionalString (cfg.localAddress != null) ''
|
|
|
|
LOCAL_ADDRESS=${cfg.localAddress}
|
|
|
|
''}
|
2016-01-31 01:00:39 +03:00
|
|
|
${optionalString (cfg.localAddress6 != null) ''
|
|
|
|
LOCAL_ADDRESS6=${cfg.localAddress6}
|
|
|
|
''}
|
2014-03-19 22:55:05 +04:00
|
|
|
''}
|
2016-05-07 01:04:28 +03:00
|
|
|
INTERFACES="${toString cfg.interfaces}"
|
2016-12-06 02:11:49 +03:00
|
|
|
MACVLANS="${toString cfg.macvlans}"
|
2016-05-07 01:04:28 +03:00
|
|
|
${optionalString cfg.autoStart ''
|
|
|
|
AUTO_START=1
|
|
|
|
''}
|
2018-02-25 16:22:23 +03:00
|
|
|
EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts +
|
|
|
|
optionalString (cfg.extraFlags != [])
|
|
|
|
(" " + concatStringsSep " " cfg.extraFlags)}"
|
2014-03-19 22:55:05 +04:00
|
|
|
'';
|
2014-03-18 21:18:35 +04:00
|
|
|
}) config.containers;
|
2013-11-27 19:54:20 +04:00
|
|
|
|
2014-03-18 13:49:25 +04:00
|
|
|
# Generate /etc/hosts entries for the containers.
|
|
|
|
networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
|
|
|
|
''
|
2016-12-07 01:32:17 +03:00
|
|
|
${head (splitString "/" cfg.localAddress)} ${name}.containers
|
2014-03-18 21:18:35 +04:00
|
|
|
'') config.containers);
|
2014-03-18 13:49:25 +04:00
|
|
|
|
2016-12-02 14:51:00 +03:00
|
|
|
networking.dhcpcd.denyInterfaces = [ "ve-*" "vb-*" ];
|
2014-05-07 19:00:46 +04:00
|
|
|
|
2017-11-21 10:41:41 +03:00
|
|
|
services.udev.extraRules = optionalString config.networking.networkmanager.enable ''
|
|
|
|
# Don't manage interfaces created by nixos-container.
|
2017-12-12 20:35:06 +03:00
|
|
|
ENV{INTERFACE}=="v[eb]-*", ENV{NM_UNMANAGED}="1"
|
2017-11-21 10:41:41 +03:00
|
|
|
'';
|
|
|
|
|
2016-07-19 09:13:06 +03:00
|
|
|
environment.systemPackages = [ pkgs.nixos-container ];
|
2019-10-03 10:16:53 +03:00
|
|
|
|
|
|
|
boot.kernelModules = [
|
|
|
|
"bridge"
|
|
|
|
"macvlan"
|
|
|
|
"tap"
|
|
|
|
"tun"
|
|
|
|
];
|
2016-04-02 18:03:30 +03:00
|
|
|
});
|
2014-02-24 21:05:26 +04:00
|
|
|
}
|