summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2023-12-04 18:43:31 -0800
committerKent Overstreet <kent.overstreet@linux.dev>2023-12-07 11:57:36 -0500
commitfbfdd05ac5b5b748cf9c7c0ea72be5268917c5d6 (patch)
tree478e759ed430c56b4a7f02c2ec26457efd3134e0
parent5fa7db9806b1f7a082a0bcda8ba7b7beeb03bc61 (diff)
fsck: add systemd service definitions for automatic online service
Add some systemd service files so that bcachefs can automatically fsck mounted filesystems in the background. Hopefully with minimal disruption to frontend operations. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--Makefile37
-rw-r--r--debian/bcachefs-tools.postinst2
-rw-r--r--debian/bcachefs-tools.postrm2
-rw-r--r--fsck/bcachefsck@.service.in98
-rwxr-xr-xfsck/bcachefsck_fail63
-rw-r--r--fsck/bcachefsck_fail@.service.in75
-rw-r--r--fsck/system-bcachefsck.slice30
7 files changed, 305 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index aa7341e0..fa9f2ff2 100644
--- a/Makefile
+++ b/Makefile
@@ -91,9 +91,36 @@ else
ROOT_SBINDIR?=$(PREFIX)/sbin
INITRAMFS_DIR=/etc/initramfs-tools
endif
+LIBDIR=$(PREFIX)/lib
+
+PKGCONFIG_SERVICEDIR:=$(shell $(PKG_CONFIG) --variable=systemdsystemunitdir systemd)
+ifeq (,$(PKGCONFIG_SERVICEDIR))
+ $(warning skipping systemd integration)
+else
+BCACHEFSCK_ARGS=-f -n
+systemd_libfiles=\
+ fsck/bcachefsck_fail
+
+systemd_services=\
+ fsck/bcachefsck_fail@.service \
+ fsck/bcachefsck@.service \
+ fsck/system-bcachefsck.slice
+
+built_scripts+=\
+ fsck/bcachefsck_fail@.service \
+ fsck/bcachefsck@.service
+
+%.service: %.service.in
+ @echo " [SED] $@"
+ $(Q)sed -e "s|@libdir@|$(LIBDIR)|g" \
+ -e "s|@bcachefsck_args@|$(BCACHEFSCK_ARGS)|g" < $< > $@
+
+optional_build+=$(systemd_libfiles) $(systemd_services)
+optional_install+=install_systemd
+endif # PKGCONFIG_SERVICEDIR
.PHONY: all
-all: bcachefs
+all: bcachefs $(optional_build)
.PHONY: debug
debug: CFLAGS+=-Werror -DCONFIG_BCACHEFS_DEBUG=y -DCONFIG_VALGRIND=y
@@ -157,7 +184,7 @@ cmd_version.o : .version
.PHONY: install
install: INITRAMFS_HOOK=$(INITRAMFS_DIR)/hooks/bcachefs
install: INITRAMFS_SCRIPT=$(INITRAMFS_DIR)/scripts/local-premount/bcachefs
-install: bcachefs
+install: bcachefs $(optional_install)
$(INSTALL) -m0755 -D bcachefs -t $(DESTDIR)$(ROOT_SBINDIR)
$(INSTALL) -m0644 -D bcachefs.8 -t $(DESTDIR)$(PREFIX)/share/man/man8/
$(INSTALL) -m0755 -D initramfs/script $(DESTDIR)$(INITRAMFS_SCRIPT)
@@ -173,11 +200,17 @@ install: bcachefs
sed -i '/^# Note: make install replaces/,$$d' $(DESTDIR)$(INITRAMFS_HOOK)
echo "copy_exec $(ROOT_SBINDIR)/bcachefs /sbin/bcachefs" >> $(DESTDIR)$(INITRAMFS_HOOK)
+.PHONY: install_systemd
+install_systemd: $(systemd_services) $(systemd_libfiles)
+ $(INSTALL) -m0755 -D $(systemd_libfiles) -t $(DESTDIR)$(LIBDIR)
+ $(INSTALL) -m0644 -D $(systemd_services) -t $(DESTDIR)$(PKGCONFIG_SERVICEDIR)
+
.PHONY: clean
clean:
@echo "Cleaning all"
$(Q)$(RM) bcachefs libbcachefs.a tests/test_helper .version *.tar.xz $(OBJS) $(DEPS) $(DOCGENERATED)
$(Q)$(RM) -rf rust-src/*/target
+ $(Q)$(RM) -f $(built_scripts)
.PHONY: deb
deb: all
diff --git a/debian/bcachefs-tools.postinst b/debian/bcachefs-tools.postinst
index 483b9619..56dd8905 100644
--- a/debian/bcachefs-tools.postinst
+++ b/debian/bcachefs-tools.postinst
@@ -2,6 +2,8 @@
set -e
+#DEBHELPER#
+
case "$1" in
configure)
if which update-initramfs >/dev/null; then
diff --git a/debian/bcachefs-tools.postrm b/debian/bcachefs-tools.postrm
index 6b6fe8ac..2d913367 100644
--- a/debian/bcachefs-tools.postrm
+++ b/debian/bcachefs-tools.postrm
@@ -2,6 +2,8 @@
set -e
+#DEBHELPER#
+
case "$1" in
remove)
if which update-initramfs >/dev/null; then
diff --git a/fsck/bcachefsck@.service.in b/fsck/bcachefsck@.service.in
new file mode 100644
index 00000000..86c1824c
--- /dev/null
+++ b/fsck/bcachefsck@.service.in
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=Online bcachefsck for %f
+OnFailure=bcachefsck@%i.service
+Documentation=man:bcachefs(8)
+
+# Explicitly require the capabilities that this program needs
+ConditionCapability=CAP_SYS_ADMIN
+ConditionCapability=CAP_FOWNER
+ConditionCapability=CAP_DAC_OVERRIDE
+ConditionCapability=CAP_DAC_READ_SEARCH
+ConditionCapability=CAP_SYS_RAWIO
+
+# Must be a mountpoint
+ConditionPathIsMountPoint=%f
+RequiresMountsFor=%f
+
+[Service]
+Type=oneshot
+Environment=SERVICE_MODE=1
+ExecStart=bcachefs fsck --real-mountpoint /tmp/scrub/ @bcachefsck_args@ %f
+SyslogIdentifier=%N
+
+# Run scrub with minimal CPU and IO priority so that nothing else will starve.
+IOSchedulingClass=idle
+CPUSchedulingPolicy=idle
+CPUAccounting=true
+Nice=19
+
+# Create the service underneath the background service slice so that we can
+# control resource usage.
+Slice=system-bcachefsck.slice
+
+# No realtime CPU scheduling
+RestrictRealtime=true
+
+# Dynamically create a user that isn't root
+DynamicUser=true
+
+# Make the entire filesystem readonly and /home inaccessible, then bind mount
+# the filesystem we're supposed to be checking into our private /tmp dir.
+# 'norbind' means that we don't bind anything under that original mount.
+# This enables checking filesystems mounted under /tmp in the global mount
+# namespace.
+ProtectSystem=strict
+ProtectHome=yes
+PrivateTmp=true
+BindPaths=%f:/tmp/scrub:norbind
+
+# No network access
+PrivateNetwork=true
+ProtectHostname=true
+RestrictAddressFamilies=none
+IPAddressDeny=any
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+
+# Hide everything in /proc, even /proc/mounts
+ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+SystemCallFilter=~@mount
+
+# bcachefsck needs these privileges to run, and no others
+CapabilityBoundingSet=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
+AmbientCapabilities=CAP_SYS_ADMIN CAP_FOWNER CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_SYS_RAWIO
+NoNewPrivileges=true
+
+# bcachefsck doesn't create files
+UMask=7777
+
+# No access to hardware /dev files except for block devices
+ProtectClock=true
+DevicePolicy=closed
+DeviceAllow=block-*
diff --git a/fsck/bcachefsck_fail b/fsck/bcachefsck_fail
new file mode 100755
index 00000000..283cee70
--- /dev/null
+++ b/fsck/bcachefsck_fail
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+# Email logs of failed bcachefsck and bcachefsck_all unit runs
+
+recipient="$1"
+test -z "${recipient}" && exit 0
+service="$2"
+test -z "${service}" && exit 0
+mntpoint="$3"
+
+hostname="$(hostname -f 2>/dev/null)"
+test -z "${hostname}" && hostname="${HOSTNAME}"
+
+mailer="$(command -v sendmail)"
+if [ ! -x "${mailer}" ]; then
+ echo "${mailer}: Mailer program not found."
+ exit 1
+fi
+
+fail_mail_mntpoint() {
+ local scrub_svc
+
+ # Turn the mountpoint into a properly escaped systemd instance name
+ scrub_svc="$(systemd-escape --template "${service}@.service" --path "${mntpoint}")"
+ cat << ENDL
+To: ${recipient}
+From: <${service}@${hostname}>
+Subject: ${service} failure on ${mntpoint}
+Content-Transfer-Encoding: 8bit
+Content-Type: text/plain; charset=UTF-8
+
+So sorry, the automatic ${service} of ${mntpoint} on ${hostname} failed.
+Please do not reply to this mesage.
+
+A log of what happened follows:
+ENDL
+ systemctl status --full --lines 4294967295 "${scrub_svc}"
+}
+
+fail_mail() {
+ cat << ENDL
+To: ${recipient}
+From: <${service}@${hostname}>
+Subject: ${service} failure
+
+So sorry, the automatic ${service} on ${hostname} failed.
+
+A log of what happened follows:
+ENDL
+ systemctl status --full --lines 4294967295 "${service}"
+}
+
+if [ -n "${mntpoint}" ]; then
+ fail_mail_mntpoint | "${mailer}" -t -i
+else
+ fail_mail | "${mailer}" -t -i
+fi
+exit "${PIPESTATUS[1]}"
diff --git a/fsck/bcachefsck_fail@.service.in b/fsck/bcachefsck_fail@.service.in
new file mode 100644
index 00000000..369a809a
--- /dev/null
+++ b/fsck/bcachefsck_fail@.service.in
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=Online bcachefsck Failure Reporting for %f
+Documentation=man:bcachefs(8)
+
+[Service]
+Type=oneshot
+Environment=EMAIL_ADDR=root
+ExecStart=@libdir@/bcachefsck_fail "${EMAIL_ADDR}" bcachefs %f
+User=mail
+Group=mail
+SupplementaryGroups=systemd-journal
+
+# Create the service underneath the background service slice so that we can
+# control resource usage.
+Slice=system-bcachefsck.slice
+
+# No realtime scheduling
+RestrictRealtime=true
+
+# Make the entire filesystem readonly and /home inaccessible.
+ProtectSystem=full
+ProtectHome=yes
+PrivateTmp=true
+RestrictSUIDSGID=true
+
+# Emailing reports requires network access, but not the ability to change the
+# hostname.
+ProtectHostname=true
+
+# Don't let the program mess with the kernel configuration at all
+ProtectKernelLogs=true
+ProtectKernelModules=true
+ProtectKernelTunables=true
+ProtectControlGroups=true
+ProtectProc=invisible
+RestrictNamespaces=true
+
+# Can't hide /proc because journalctl needs it to find various pieces of log
+# information
+#ProcSubset=pid
+
+# Only allow the default personality Linux
+LockPersonality=true
+
+# No writable memory pages
+MemoryDenyWriteExecute=true
+
+# Don't let our mounts leak out to the host
+PrivateMounts=true
+
+# Restrict system calls to the native arch and only enough to get things going
+SystemCallArchitectures=native
+SystemCallFilter=@system-service
+SystemCallFilter=~@privileged
+SystemCallFilter=~@resources
+SystemCallFilter=~@mount
+
+# xfs_scrub needs these privileges to run, and no others
+CapabilityBoundingSet=
+NoNewPrivileges=true
+
+# Failure reporting shouldn't create world-readable files
+UMask=0077
+
+# Clean up any IPC objects when this unit stops
+RemoveIPC=true
+
+# No access to hardware device files
+PrivateDevices=true
+ProtectClock=true
diff --git a/fsck/system-bcachefsck.slice b/fsck/system-bcachefsck.slice
new file mode 100644
index 00000000..ea368032
--- /dev/null
+++ b/fsck/system-bcachefsck.slice
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Oracle. All Rights Reserved.
+# Author: Darrick J. Wong <djwong@kernel.org>
+
+[Unit]
+Description=bcachefsck background service slice
+Before=slices.target
+
+[Slice]
+
+# If the CPU usage cgroup controller is available, don't use more than 60% of a
+# single core for all background processes.
+CPUQuota=60%
+CPUAccounting=true
+
+[Install]
+# As of systemd 249, the systemd cgroupv2 configuration code will drop resource
+# controllers from the root and system.slice cgroups at startup if it doesn't
+# find any direct dependencies that require a given controller. Newly
+# activated units with resource control directives are created under the system
+# slice but do not cause a reconfiguration of the slice's resource controllers.
+# Hence we cannot put CPUQuota= into the bcachefsck service units directly.
+#
+# For the CPUQuota directive to have any effect, we must therefore create an
+# explicit definition file for the slice that systemd creates to contain the
+# bcachefsck instance units (e.g. bcachefsck@.service) and we must configure this
+# slice as a dependency of the system slice to establish the direct dependency
+# relation.
+WantedBy=system.slice