From 18f28a6413e33416576f632367f0a4816c74c188 Mon Sep 17 00:00:00 2001 From: Austin Seipp Date: Mon, 23 Apr 2018 01:02:31 -0500 Subject: [PATCH] nixos: add foundationdb module, documentation Signed-off-by: Austin Seipp --- nixos/modules/module-list.nix | 1 + .../services/databases/foundationdb.nix | 360 ++++++++++++++++++ .../services/databases/foundationdb.xml | 279 ++++++++++++++ 3 files changed, 640 insertions(+) create mode 100644 nixos/modules/services/databases/foundationdb.nix create mode 100644 nixos/modules/services/databases/foundationdb.xml diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 5a56554dc98b..1261fe950928 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -195,6 +195,7 @@ ./services/databases/clickhouse.nix ./services/databases/couchdb.nix ./services/databases/firebird.nix + ./services/databases/foundationdb.nix ./services/databases/hbase.nix ./services/databases/influxdb.nix ./services/databases/memcached.nix diff --git a/nixos/modules/services/databases/foundationdb.nix b/nixos/modules/services/databases/foundationdb.nix new file mode 100644 index 000000000000..ba921a9c1521 --- /dev/null +++ b/nixos/modules/services/databases/foundationdb.nix @@ -0,0 +1,360 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.foundationdb; + + # used for initial cluster configuration + initialIpAddr = if (cfg.publicAddress != "auto") then cfg.publicAddress else "127.0.0.1"; + + fdbServers = n: + concatStringsSep "\n" (map (x: "[fdbserver.${toString (x+cfg.listenPortStart)}]") (range 0 (n - 1))); + + backupAgents = n: + concatStringsSep "\n" (map (x: "[backup_agent.${toString x}]") (range 1 n)); + + configFile = pkgs.writeText "foundationdb.conf" '' + [general] + cluster_file = /etc/foundationdb/fdb.cluster + + [fdbmonitor] + restart_delay = ${toString cfg.restartDelay} + user = ${cfg.user} + group = ${cfg.group} + + [fdbserver] + command = ${pkgs.foundationdb}/bin/fdbserver + public_address = ${cfg.publicAddress}:$ID + listen_address = ${cfg.listenAddress} + datadir = ${cfg.dataDir}/$ID + logdir = ${cfg.logDir} + logsize = ${cfg.logSize} + maxlogssize = ${cfg.maxLogSize} + ${optionalString (cfg.class != null) "class = ${cfg.class}"} + memory = ${cfg.memory} + storage_memory = ${cfg.storageMemory} + + ${optionalString (cfg.locality.machineId != null) "locality_machineid=${cfg.locality.machineId}"} + ${optionalString (cfg.locality.zoneId != null) "locality_zoneid=${cfg.locality.zoneId}"} + ${optionalString (cfg.locality.datacenterId != null) "locality_dcid=${cfg.locality.datacenterId}"} + ${optionalString (cfg.locality.dataHall != null) "locality_data_hall=${cfg.locality.dataHall}"} + + ${fdbServers cfg.serverProcesses} + + [backup_agent] + command = ${pkgs.foundationdb}/libexec/backup_agent + ${backupAgents cfg.backupProcesses} + ''; +in +{ + options.services.foundationdb = { + + enable = mkEnableOption "FoundationDB Server"; + + publicAddress = mkOption { + type = types.str; + default = "auto"; + description = "Publicly visible IP address of the process. Port is determined by process ID"; + }; + + listenAddress = mkOption { + type = types.str; + default = "public"; + description = "Publicly visible IP address of the process. Port is determined by process ID"; + }; + + listenPortStart = mkOption { + type = types.int; + default = 4500; + description = '' + Starting port number for database listening sockets. Every FDB process binds to a + subsequent port, to this number reflects the start of the overall range. e.g. having + 8 server processes will use all ports between 4500 and 4507. + ''; + }; + + openFirewall = mkOption { + type = types.bool; + default = false; + description = '' + Open the firewall ports corresponding to FoundationDB processes and coordinators + using . + ''; + }; + + dataDir = mkOption { + type = types.path; + default = "/var/lib/foundationdb"; + description = "Data directory. All cluster data will be put under here."; + }; + + logDir = mkOption { + type = types.path; + default = "/var/log/foundationdb"; + description = "Log directory."; + }; + + user = mkOption { + type = types.str; + default = "foundationdb"; + description = "User account under which FoundationDB runs."; + }; + + group = mkOption { + type = types.str; + default = "foundationdb"; + description = "Group account under which FoundationDB runs."; + }; + + class = mkOption { + type = types.nullOr (types.enum [ "storage" "transaction" "stateless" ]); + default = null; + description = "Process class"; + }; + + restartDelay = mkOption { + type = types.int; + default = 10; + description = "Number of seconds to wait before restarting servers."; + }; + + logSize = mkOption { + type = types.string; + default = "10MiB"; + description = '' + Roll over to a new log file after the current log file + reaches the specified size. + ''; + }; + + maxLogSize = mkOption { + type = types.string; + default = "100MiB"; + description = '' + Delete the oldest log file when the total size of all log + files exceeds the specified size. If set to 0, old log files + will not be deleted. + ''; + }; + + serverProcesses = mkOption { + type = types.int; + default = 1; + description = "Number of fdbserver processes to run."; + }; + + backupProcesses = mkOption { + type = types.int; + default = 1; + description = "Number of backup_agent processes to run for snapshots."; + }; + + memory = mkOption { + type = types.string; + default = "8GiB"; + description = '' + Maximum memory used by the process. The default value is + 8GiB. When specified without a unit, + MiB is assumed. This parameter does not + change the memory allocation of the program. Rather, it sets + a hard limit beyond which the process will kill itself and + be restarted. The default value of 8GiB + is double the intended memory usage in the default + configuration (providing an emergency buffer to deal with + memory leaks or similar problems). It is not recommended to + decrease the value of this parameter below its default + value. It may be increased if you wish to allocate a very + large amount of storage engine memory or cache. In + particular, when the storageMemory + parameter is increased, the memory + parameter should be increased by an equal amount. + ''; + }; + + storageMemory = mkOption { + type = types.string; + default = "1GiB"; + description = '' + Maximum memory used for data storage. The default value is + 1GiB. When specified without a unit, + MB is assumed. Clusters using the memory + storage engine will be restricted to using this amount of + memory per process for purposes of data storage. Memory + overhead associated with storing the data is counted against + this total. If you increase the + storageMemory, you should also increase + the memory parameter by the same amount. + ''; + }; + + locality = mkOption { + default = { + machineId = null; + zoneId = null; + datacenterId = null; + dataHall = null; + }; + + description = '' + FoundationDB locality settings. + ''; + + type = types.submodule ({ + options = { + machineId = mkOption { + default = null; + type = types.nullOr types.str; + description = '' + Machine identifier key. All processes on a machine should share a + unique id. By default, processes on a machine determine a unique id to share. + This does not generally need to be set. + ''; + }; + + zoneId = mkOption { + default = null; + type = types.nullOr types.str; + description = '' + Zone identifier key. Processes that share a zone id are + considered non-unique for the purposes of data replication. + If unset, defaults to machine id. + ''; + }; + + datacenterId = mkOption { + default = null; + type = types.nullOr types.str; + description = '' + Data center identifier key. All processes physically located in a + data center should share the id. If you are depending on data + center based replication this must be set on all processes. + ''; + }; + + dataHall = mkOption { + default = null; + type = types.nullOr types.str; + description = '' + Data hall identifier key. All processes physically located in a + data hall should share the id. If you are depending on data + hall based replication this must be set on all processes. + ''; + }; + }; + }); + }; + + extraReadWritePaths = mkOption { + default = [ ]; + type = types.listOf types.path; + description = '' + An extra set of filesystem paths that FoundationDB can read to + and write from. By default, FoundationDB runs under a heavily + namespaced systemd environment without write access to most of + the filesystem outside of its data and log directories. By + adding paths to this list, the set of writeable paths will be + expanded. This is useful for allowing e.g. backups to local files, + which must be performed on behalf of the foundationdb service. + ''; + }; + + pidfile = mkOption { + type = types.path; + default = "/run/foundationdb.pid"; + description = "Path to pidfile for fdbmonitor."; + }; + }; + + config = mkIf cfg.enable { + meta.doc = ./foundationdb.xml; + meta.maintainers = with lib.maintainers; [ thoughtpolice ]; + + environment.systemPackages = [ pkgs.foundationdb ]; + + users.extraUsers = optionalAttrs (cfg.user == "foundationdb") (singleton + { name = "foundationdb"; + description = "FoundationDB User"; + uid = config.ids.uids.foundationdb; + group = cfg.group; + }); + + users.extraGroups = optionalAttrs (cfg.group == "foundationdb") (singleton + { name = "foundationdb"; + gid = config.ids.gids.foundationdb; + }); + + networking.firewall.allowedTCPPortRanges = mkIf cfg.openFirewall + [ { from = cfg.listenPortStart; + to = (cfg.listenPortStart + cfg.serverProcesses) - 1; + } + ]; + + systemd.services.foundationdb = { + description = "FoundationDB Service"; + + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + unitConfig = + { RequiresMountsFor = "${cfg.dataDir} ${cfg.logDir}"; + }; + + serviceConfig = + let rwpaths = [ cfg.dataDir cfg.logDir cfg.pidfile "/etc/foundationdb" ] + ++ cfg.extraReadWritePaths; + in + { Type = "simple"; + Restart = "always"; + RestartSec = 5; + User = cfg.user; + Group = cfg.group; + PIDFile = "${cfg.pidfile}"; + + PermissionsStartOnly = true; # setup needs root perms + TimeoutSec = 120; # give reasonable time to shut down + + # Security options + NoNewPrivileges = true; + ProtectHome = true; + ProtectSystem = "strict"; + ProtectKernelTunables = true; + ProtectControlGroups = true; + PrivateTmp = true; + PrivateDevices = true; + ReadWritePaths = lib.concatStringsSep " " (map (x: "-" + x) rwpaths); + }; + + path = [ pkgs.foundationdb pkgs.coreutils ]; + + preStart = '' + rm -f ${cfg.pidfile} && \ + touch ${cfg.pidfile} && \ + chown -R ${cfg.user}:${cfg.group} ${cfg.pidfile} + + for x in "${cfg.logDir}" "${cfg.dataDir}" /etc/foundationdb; do + [ ! -d "$x" ] && mkdir -m 0700 -vp "$x" && chown -R ${cfg.user}:${cfg.group} "$x"; + done + + if [ ! -f /etc/foundationdb/fdb.cluster ]; then + cf=/etc/foundationdb/fdb.cluster + desc=$(tr -dc A-Za-z0-9 /dev/null | head -c8) + rand=$(tr -dc A-Za-z0-9 /dev/null | head -c8) + echo ''${desc}:''${rand}@${initialIpAddr}:${builtins.toString cfg.listenPortStart} > $cf + chmod 0660 $cf && chown -R ${cfg.user}:${cfg.group} $cf + touch "${cfg.dataDir}/.first_startup" + fi + ''; + + script = '' + exec fdbmonitor --lockfile ${cfg.pidfile} --conffile ${configFile}; + ''; + + postStart = '' + if [ -e "${cfg.dataDir}/.first_startup" ]; then + fdbcli --exec "configure new single ssd" + rm -f "${cfg.dataDir}/.first_startup"; + fi + ''; + }; + }; +} diff --git a/nixos/modules/services/databases/foundationdb.xml b/nixos/modules/services/databases/foundationdb.xml new file mode 100644 index 000000000000..d10a5cfe836e --- /dev/null +++ b/nixos/modules/services/databases/foundationdb.xml @@ -0,0 +1,279 @@ + + +FoundationDB + +Source: modules/services/databases/foundationdb.nix + +Upstream documentation: + +Maintainer: Austin Seipp + +Default version: 5.1.x + +FoundationDB (or "FDB") is a distributed, open source, high performance, +transactional key-value store. It can store petabytes of data and deliver +exceptional performance while maintaining consistency and ACID semantics over a +large cluster. + +
Configuring and basic setup + +To enable FoundationDB, add the following to your +configuration.nix: + + +services.foundationdb.enable = true; + + + +After running nixos-rebuild, you can verify whether +FoundationDB is running by executing fdbcli (which is added +to ): + + +$ sudo -u foundationdb fdbcli +Using cluster file `/etc/foundationdb/fdb.cluster'. + +The database is available. + +Welcome to the fdbcli. For help, type `help'. +fdb> status + +Using cluster file `/etc/foundationdb/fdb.cluster'. + +Configuration: + Redundancy mode - single + Storage engine - memory + Coordinators - 1 + +Cluster: + FoundationDB processes - 1 + Machines - 1 + Memory availability - 5.4 GB per process on machine with least available + Fault Tolerance - 0 machines + Server time - 04/20/18 15:21:14 + +... + +fdb> + + + +FoundationDB is run under the foundationdb user and +group by default, but this may be changed in the NixOS configuration. The +systemd unit foundationdb.service controls the +fdbmonitor process. + +By default, the NixOS module for FoundationDB creates a single +SSD-storage based database for development and basic usage. This storage engine +is designed for SSDs and will perform poorly on HDDs; however it can handle far +more data than the alternative "memory" engine and is a better default choice +for most deployments. (Note that you can change the storage backend on-the-fly +for a given FoundationDB cluster using fdbcli.) + +Furthermore, only 1 server process and 1 backup agent are started in the +default configuration. See below for more on scaling to increase this. + +FoundationDB stores all data for all server processes under +/var/lib/foundationdb. You can override this using +, e.g. + + +services.foundationdb.dataDir = "/data/fdb"; + + + + +Similarly, logs are stored under +/var/log/foundationdb by default, and there is a +corresponding as well. + +
+ +
Scaling processes and backup agents + +Scaling the number of server processes is quite easy; simply specify + to be the number of +FoundationDB worker processes that should be started on the machine. + +FoundationDB worker processes typically require 4GB of RAM per-process at +minimum for good performance, so this option is set to 1 by default since the +maximum aount of RAM is unknown. You're advised to abide by this restriction, +so pick a number of processes so that each has 4GB or more. + +A similar option exists in order to scale backup agent processes, +. Backup agents are not +as performance/RAM sensitive, so feel free to experiment with the number of +available backup processes. + +
+ +
Clustering + +FoundationDB on NixOS works similarly to other Linux systems, so this +section will be brief. Please refer to the full FoundationDB documentation for +more on clustering. + +FoundationDB organizes clusters using a set of +coordinators, which are just specially-designated worker +processes. By default, every installation of FoundationDB on NixOS will start +as its own individual cluster, with a single coordinator: the first worker +process on localhost. + +Coordinators are specified globally using the +/etc/foundationdb/fdb.cluster file, which all servers and +client applications will use to find and join coordinators. Note that this file +can not be managed by NixOS so easily: FoundationDB is +designed so that it will rewrite the file at runtime for all clients and nodes +when cluster coordinators change, with clients transparently handling this +without intervention. + +When dealing with a cluster, there are two main things you want to +do: + + + Add a node to the cluster for storage/compute. + Promote an ordinary worker to a coordinator. + + +A node must already be a member of the cluster in order to properly be +promoted to a coordinator, so you must always add it first if you wish to +promote it. + +To add a machine to a FoundationDB cluster: + + + Choose one of the servers to start as the initial coordinator. + + Copy the /etc/foundationdb/fdb.cluster file + from this server to all the other servers. Restart FoundationDB on all of + these other servers, so they join the cluster. + All of these servers are now connected and working together + in the cluster, under the chosen coordinator. + + +At this point, you can add as many nodes as you want by just repeating +the above steps. By default there will still be a single coordinator: you can +use fdbcli to change this and add new coordinators. + +As a convenience, FoundationDB can automatically assign coordinators +based on the redundancy mode you wish to achieve for the cluster. Once all the +nodes have been joined, simply set the replication policy, and then issue the +coordinators auto command + +For example, assuming we have 3 nodes available, we can enable double +redundancy mode, then auto-select coordinators. For double redundancy, 3 +coordinators is ideal: therefore FoundationDB will make +every node a coordinator automatically: + + +fdbcli> configure double ssd +fdbcli> coordinators auto + + +This will transparently update all the servers within seconds, and +appropriately rewrite the fdb.cluster file, as well as +informing all client processes to do the same. + +
+ +
Client connectivity + +By default, all clients must use the current +fdb.cluster file to access a given FoundationDB cluster. +This file is located by default in +/etc/foundationdb/fdb.cluster on all machines with the +FoundationDB service enabled, so you may copy the active one from your cluster +to a new node in order to connect, if it is not part of the cluster. + +
+ +
Backups and Disaster Recovery + +The usual rules for doing FoundationDB backups apply on NixOS as written +in the FoundationDB manual. However, one important difference is the security +profile for NixOS: by default, the foundationdb systemd unit +uses Linux namespaces to restrict write access to the +system, except for the log directory, data directory, and the +/etc/foundationdb/ directory. This is enforced by default +and cannot be disabled. + +However, a side effect of this is that the fdbbackup +command doesn't work properly for local filesystem backups: FoundationDB uses a +server process alongside the database processes to perform backups and copy the +backups to the filesystem. As a result, this process is put under the +restricted namespaces above: the backup process can only write to a limited +number of paths. + +In order to allow flexible backup locations on local disks, the +FoundationDB NixOS module supports a + option. This option +takes a list of paths, and adds them to the systemd unit, allowing the +processes inside the service to write (and read) the specified +directories. + +For example, to create backups in /opt/fdb-backups, +first set up the paths in the module options: + + +services.foundationdb.extraReadWritePaths = [ "/opt/fdb-backups" ]; + + +Restart the FoundationDB service, and it will now be able to write to +this directory (even if it does not yet exist.) Note: this path +must exist before restarting the unit. Otherwise, systemd +will not include it in the private FoundationDB namespace (and it will not add +it dynamically at runtime). + +You can now perform a backup: + + +$ sudo -u foundationdb fdbbackup start -t default -d file:///opt/fdb-backups +$ sudo -u foundationdb fdbbackup status -t default + + +
+ +
Known limitations + +The FoundationDB setup for NixOS should currently be considered beta. +FoundationDB is not new software, but the NixOS compilation and integration has +only undergone fairly basic testing of all the available functionality. + + + TLS plugin support is compiled in, but it's currently not + possible to specify the set of TLS certificate options in + services.foundationdb + There is no way to specify individual parameters for + individual fdbserver processes. Currently, all server + processes inherit all the global fdbmonitor settings. + + Python bindings are not currently installed. + Ruby bindings are not currently installed. + Java bindings are not currently installed. + Go bindings are not currently installed. + + +
+ +
Options + +NixOS's FoundationDB module allows you to configure all of the most +relevant configuration options for fdbmonitor, matching it +quite closely. For a complete list of all options, check man +configuration.nix. + +
+ +
Full documentation + +FoundationDB is a complex piece of software, and requires careful +administration to properly use. Full documentation for administration can be +found here: . + +
+ +