Linux: Run all child processes in their own systemd scope to prevent the OOM killer from harvesting kitty when a child process misbehaves

Fixes #7427
This commit is contained in:
Kovid Goyal 2024-05-16 15:43:25 +05:30
parent 3345e40bdb
commit 39ea084be9
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
7 changed files with 180 additions and 0 deletions

View File

@ -82,6 +82,8 @@ Detailed list of changes
- Add some more box-drawing characters from the "Geometric shapes" Unicode block (:iss:`7433`)
- Linux: Run all child processes in their own systemd scope to prevent the OOM killer from harvesting kitty when a child process misbehaves (:iss:`7427`)
0.34.1 [2024-04-19]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -5,6 +5,7 @@
import sys
from collections import defaultdict
from contextlib import contextmanager, suppress
from itertools import count
from typing import TYPE_CHECKING, DefaultDict, Dict, Generator, List, Optional, Sequence, Tuple
import kitty.fast_data_types as fast_data_types
@ -190,6 +191,9 @@ class ProcessDesc(TypedDict):
cmdline: Optional[Sequence[str]]
child_counter = count()
class Child:
child_fd: Optional[int] = None
@ -208,6 +212,7 @@ def __init__(
hold: bool = False,
):
self.is_clone_launch = is_clone_launch
self.id = next(child_counter)
self.add_listen_on_env_var = add_listen_on_env_var
self.argv = list(argv)
if cwd_from:
@ -338,6 +343,14 @@ def fork(self) -> Optional[int]:
self.terminal_ready_fd = ready_write_fd
if self.child_fd is not None:
os.set_blocking(self.child_fd, False)
if not is_macos:
ppid = getpid()
try:
fast_data_types.systemd_move_pid_into_new_scope(pid, f'kitty-{ppid}-{self.id}.scope', f'kitty child process: {pid} launched by: {ppid}')
except NotImplementedError:
pass
except (RuntimeError, OSError) as err:
log_error("Could not move child process into a systemd scope: " + str(err))
return pid
def __del__(self) -> None:

View File

@ -17,6 +17,7 @@ typedef enum {
COCOA_CLEANUP_FUNC,
PNG_READER_CLEANUP_FUNC,
FONTCONFIG_CLEANUP_FUNC,
SYSTEMD_CLEANUP_FUNC,
NUM_CLEANUP_FUNCS
} AtExitCleanupFunc;

View File

@ -499,6 +499,7 @@ extern bool init_logging(PyObject *module);
extern bool init_png_reader(PyObject *module);
extern bool init_utmp(PyObject *module);
extern bool init_loop_utils(PyObject *module);
extern bool init_systemd_module(PyObject *module);
#ifdef __APPLE__
extern int init_CoreText(PyObject *);
extern bool init_cocoa(PyObject *module);
@ -570,6 +571,7 @@ PyInit_fast_data_types(void) {
if (!init_utmp(m)) return NULL;
if (!init_loop_utils(m)) return NULL;
if (!init_crypto_library(m)) return NULL;
if (!init_systemd_module(m)) return NULL;
CellAttrs a;
#define s(name, attr) { a.val = 0; a.attr = 1; PyModule_AddIntConstant(m, #name, shift_to_first_set_bit(a)); }

View File

@ -1581,3 +1581,4 @@ def wayland_compositor_data() -> Tuple[int, Optional[str]]:...
def monotonic() -> float: ...
def timed_debug_print(x: str) -> None: ...
def opengl_version_string() -> str: ...
def systemd_move_pid_into_new_scope(pid: int, scope_name: str, description: str) -> str: ...

155
kitty/systemd.c Normal file
View File

@ -0,0 +1,155 @@
/*
* systemd.c
* Copyright (C) 2024 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define _GNU_SOURCE
#include "data-types.h"
#include "cleanup.h"
#ifdef KITTY_HAS_SYSTEMD
#include <systemd/sd-login.h>
#include <systemd/sd-bus.h>
static struct {
sd_bus *user_bus;
bool initialized;
} systemd = {0};
static void
ensure_initialized(void) {
if (!systemd.initialized) {
systemd.initialized = true;
int ret = sd_bus_default_user(&systemd.user_bus);
if (ret < 0) { log_error("Failed to open systemd user bus with error: %s", strerror(-ret)); }
}
}
#define RAII_bus_error(name) __attribute__((cleanup(sd_bus_error_free))) sd_bus_error name = SD_BUS_ERROR_NULL;
#define RAII_message(name) __attribute__((cleanup(sd_bus_message_unrefp))) sd_bus_message *name = NULL;
#define SYSTEMD_DESTINATION "org.freedesktop.systemd1"
#define SYSTEMD_PATH "/org/freedesktop/systemd1"
#define SYSTEMD_INTERFACE "org.freedesktop.systemd1.Manager"
static bool
set_systemd_error(int r, const char *msg) {
RAII_PyObject(m, PyUnicode_FromFormat("Failed to %s: %s", msg, strerror(-r)));
if (m) {
RAII_PyObject(e, Py_BuildValue("(iO)", -r, m));
if (e) PyErr_SetObject(PyExc_OSError, e);
}
return false;
}
static bool
set_reply_error(const char* func_name, int r, const sd_bus_error *err) {
RAII_PyObject(m, PyUnicode_FromFormat("Failed to call %s: %s: %s", func_name, err->name, err->message));
if (m) {
RAII_PyObject(e, Py_BuildValue("(iO)", -r, m));
if (e) PyErr_SetObject(PyExc_OSError, e);
}
return false;
}
static bool
move_pid_into_new_scope(pid_t pid, const char* scope_name, const char *description) {
ensure_initialized();
if (!systemd.user_bus) {
PyErr_SetString(PyExc_RuntimeError, "Could not connect to systemd user bus");
return false;
}
pid_t parent_pid = getpid();
RAII_bus_error(err); RAII_message(m); RAII_message(reply);
int r;
#define checked_call(func, ...) if ((r = func(__VA_ARGS__)) < 0) { return set_systemd_error(r, #func); }
checked_call(sd_bus_message_new_method_call, systemd.user_bus, &m, SYSTEMD_DESTINATION, SYSTEMD_PATH, SYSTEMD_INTERFACE, "StartTransientUnit");
// mode is "fail" which means it will fail if a unit with scope_name already exists
checked_call(sd_bus_message_append, m, "ss", scope_name, "fail");
checked_call(sd_bus_message_open_container, m, 'a', "(sv)");
if (description && description[0]) {
checked_call(sd_bus_message_append, m, "(sv)", "Description", "s", description);
}
RAII_ALLOC(char, slice, NULL);
if (sd_pid_get_user_slice(parent_pid, &slice) >= 0) {
checked_call(sd_bus_message_append, m, "(sv)", "Slice", "s", slice);
} else {
// Fallback
checked_call(sd_bus_message_append, m, "(sv)", "Slice", "s", "kitty.slice");
}
// Add the PID to this scope
checked_call(sd_bus_message_open_container, m, 'r', "sv");
checked_call(sd_bus_message_append, m, "s", "PIDs");
checked_call(sd_bus_message_open_container, m, 'v', "au");
checked_call(sd_bus_message_open_container, m, 'a', "u");
checked_call(sd_bus_message_append, m, "u", pid);
checked_call(sd_bus_message_close_container, m); // au
checked_call(sd_bus_message_close_container, m); // v
checked_call(sd_bus_message_close_container, m); // (sv)
// If something in this process group is OOMkilled dont kill the rest of
// the process group. Since typically the shell is not causing the OOM
// something being run inside it is.
checked_call(sd_bus_message_append, m, "(sv)", "OOMPolicy", "s", "continue");
// Make sure shells are terminated with SIGHUP not just SIGTERM
checked_call(sd_bus_message_append, m, "(sv)", "SendSIGHUP", "b", true);
// Unload this unit in failed state as well
checked_call(sd_bus_message_append, m, "(sv)", "CollectMode", "s", "inactive-or-failed");
// Only kill the main process on stop
checked_call(sd_bus_message_append, m, "(sv)", "KillMode", "s", "process");
checked_call(sd_bus_message_close_container, m); // End properties a(sv)
//
checked_call(sd_bus_message_append, m, "a(sa(sv))", 0); // No auxiliary units
//
if ((r=sd_bus_call(systemd.user_bus, m, 0 /* timeout default */, &err, &reply)) < 0) return set_reply_error("StartTransientUnit", r, &err);
return true;
#undef checked_call
}
static void
finalize(void) {
if (systemd.user_bus) {
sd_bus_unref(systemd.user_bus);
}
memset(&systemd, 0, sizeof(systemd));
}
#endif
static PyObject*
systemd_move_pid_into_new_scope(PyObject *self UNUSED, PyObject *args) {
long pid; const char *scope_name, *description;
if (!PyArg_ParseTuple(args, "lss", &pid, &scope_name, &description)) return NULL;
#ifdef KITTY_HAS_SYSTEMD
move_pid_into_new_scope(pid, scope_name, description);
#else
PyErr_SetString(PyExc_NotImplementedError, "not supported on this platform");
#endif
if (PyErr_Occurred()) return NULL;
Py_RETURN_NONE;
}
static PyMethodDef module_methods[] = {
METHODB(systemd_move_pid_into_new_scope, METH_VARARGS),
{NULL, NULL, 0, NULL} /* Sentinel */
};
bool
init_systemd_module(PyObject *module) {
#ifdef KITTY_HAS_SYSTEMD
register_at_exit_cleanup_func(SYSTEMD_CLEANUP_FUNC, finalize);
#endif
if (PyModule_AddFunctions(module, module_methods) != 0) return false;
return true;
}

View File

@ -610,6 +610,7 @@ def kitty_env(args: Options) -> Env:
# them and on macOS when building with homebrew it is required
with suppress(SystemExit, subprocess.CalledProcessError):
cflags.extend(pkg_config('simde', '--cflags-only-I', fatal=False))
has_systemd = 0
libcrypto_cflags, libcrypto_ldflags = libcrypto_flags()
cflags.extend(libcrypto_cflags)
if is_macos:
@ -630,6 +631,11 @@ def kitty_env(args: Options) -> Env:
else:
cflags.extend(pkg_config('fontconfig', '--cflags-only-I'))
platform_libs = []
with suppress(SystemExit, subprocess.CalledProcessError):
cflags.extend(pkg_config('libsystemd', '--cflags-only-I', fatal=False))
has_systemd = 1
platform_libs.extend(pkg_config('libsystemd', '--libs'))
cppflags.append(f'-DKITTY_HAS_SYSTEMD={has_systemd}')
cflags.extend(pkg_config('harfbuzz', '--cflags-only-I'))
platform_libs.extend(pkg_config('harfbuzz', '--libs'))
pylib = get_python_flags(args, cflags)