From 9b4306677ef69d8b863ba94d94da8c97aa09fc89 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 28 Oct 2022 11:36:32 +0200 Subject: [PATCH] tmpdir-setup: Avoid flush of TMPDIR Use a dm-device (linear target) between the filesystem and the loop device and then use this sequence for teardown: - fcntl EXT4_IOC_SHUTDOWN with EXT4_GOING_FLAGS_NOLOGFLUSH - dmestup reload $dmname --table "0 $sectors zero" - dmsetup resume $dmname --noflush - umount $mountpoint - dmsetup remove --deferred $dmname - rmdir $mountpoint The zero target prevents any real writes to the block device. However, if the filesystems reads back some data, it will get zeros, which could lead to all kinds of random behaviour. For this reason, we shut down the filesystem, which has the additional advantage, that some I/O is prevented in an even ealier stage. Shutdown alone, however, would not prevent all I/O (e.g. not cache writeback or superblock write), so we still need the zero target. Even with this setting, ext4 sometimes logs some errors ("ext4_writepages: jbd2_start: XXX pages, ino YYY; err -5"). We've patched our kernel to avoid that message if the filesystem is shut down. This goes on top of the patches which avoid the usual "mounted" and "unmounted" messages for ext4. To support rolling upgrades of mxqd, keep support to clean up mounts created the old way, which is to mount a loop device directly. --- helper/tmpdir-setup | 56 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/helper/tmpdir-setup b/helper/tmpdir-setup index d79c1a53..384d9bec 100755 --- a/helper/tmpdir-setup +++ b/helper/tmpdir-setup @@ -20,6 +20,8 @@ cmd_create() { filename=$tmpdir/$MXQ_JOBID.tmp mountpoint=$mntdir/$MXQ_JOBID + dmname=mxq.$MXQ_JOBID + dmdevice=/dev/mapper/$dmname umask 006 mkdir -p $tmpdir @@ -29,15 +31,19 @@ cmd_create() { if fallocate -l ${MXQ_SIZE}G $filename; then if loopdevice=$(losetup --find --show $filename); then - if mkfs.ext4 \ - -q \ - -m 0 \ - -E nodiscard,mmp_update_interval=300,lazy_journal_init=1,root_owner=$MXQ_UID:0 \ - -O '64bit,ext_attr,filetype,^has_journal,huge_file,inline_data,^mmp,^quota,sparse_super2' \ - $loopdevice \ - && mkdir -p $mountpoint && mount -Odata=writeback,barrier=0 $loopdevice $mountpoint; then - rmdir $mountpoint/lost+found - status=0 + if dmsetup create $dmname --table "0 $((MXQ_SIZE*1024*1024*2)) linear $loopdevice 0"; then + if mkfs.ext4 \ + -q \ + -m 0 \ + -E nodiscard,mmp_update_interval=300,lazy_journal_init=1,root_owner=$MXQ_UID:0 \ + -O '64bit,ext_attr,filetype,^has_journal,huge_file,inline_data,^mmp,^quota,sparse_super2' \ + $dmdevice \ + && mkdir -p $mountpoint && mount -Odata=writeback,barrier=0 $dmdevice $mountpoint; then + rmdir $mountpoint/lost+found + status=0 + else + dmsetup remove --force --deferred $dmname + fi fi losetup -d $loopdevice fi @@ -52,10 +58,34 @@ cmd_cleanup() { (( $# == 1 )) || usage MXQ_JOBID=$1 - shopt -s dotglob; - rm -rf /dev/shm/mxqd/mnt/job/$MXQ_JOBID/* - umount /dev/shm/mxqd/mnt/job/$MXQ_JOBID - rmdir /dev/shm/mxqd/mnt/job/$MXQ_JOBID + # Support rolling upgrade: Use old code if loop device is directly mounted without a dm device in between. + # + if [[ $(findmnt --output SOURCE --noheadings /dev/shm/mxqd/mnt/job/$MXQ_JOBID) =~ ^/dev/loop ]]; then + shopt -s dotglob + rm -rf /dev/shm/mxqd/mnt/job/$MXQ_JOBID/* + umount /dev/shm/mxqd/mnt/job/$MXQ_JOBID + rmdir /dev/shm/mxqd/mnt/job/$MXQ_JOBID + exit + fi + + mountpoint=$mntdir/$MXQ_JOBID + dmname=mxq.$MXQ_JOBID + dmdevice=/dev/mapper/$dmname + minor=$(( 0x$(stat -c%T $dmdevice) )) + sectors=$(cat /sys/block/dm-$minor/size) + + # fs/ext4/ext4.h: #define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32) + # (_IOC_READ << 30) + (sizeof(_u32) << 16) + ( 'X' << 8 ) + 125 + # ( 2 << 30 ) + ( 4 << 16 ) + ( 88 << 8 ) + 125 = 2147768445 + # fs/ext4/ext4.h: #define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ + + /usr/local/system/perl/bin/perl -e "open F, '<', '$mountpoint/.' ; \$d=pack('L', 2) ; ioctl (F, 2147768445, \$d)" + + dmsetup reload $dmname --table "0 $sectors zero" + dmsetup resume $dmname --noflush + umount --lazy $mountpoint + dmsetup remove $dmname + rmdir $mountpoint } (( $# > 0 )) || usage