From 2640b23ffd3ebc28950b9b751f2f41d3f235925b Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 12 May 2022 16:42:15 +0200 Subject: [PATCH 1/4] Makefile: Update version to 0.30.8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4894831c..33d9586d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ MXQ_VERSION_MAJOR = 0 MXQ_VERSION_MINOR = 30 -MXQ_VERSION_PATCH = 7 +MXQ_VERSION_PATCH = 8 MXQ_VERSION_EXTRA = "beta" MXQ_VERSIONDATE = 2022 From 4285d95893b73ce696b5fe7da28e0e18c76bfd7a Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 12 May 2022 16:42:42 +0200 Subject: [PATCH 2/4] Remove create_job_tmpdir helper This helper has been replaced by tmpdir-setup. Remove it. --- helper/create_job_tmpdir | 42 ---------------------------------------- 1 file changed, 42 deletions(-) delete mode 100755 helper/create_job_tmpdir diff --git a/helper/create_job_tmpdir b/helper/create_job_tmpdir deleted file mode 100755 index 01207427..00000000 --- a/helper/create_job_tmpdir +++ /dev/null @@ -1,42 +0,0 @@ -#! /usr/bin/bash - -# Input (environment): -# -# MXQ_JOBID : job ident -# MXQ_SIZE : size in GB -# MXQ_UID : uid - -# Output: -# -# /dev/shm/mxqd/tmp/$JOBID mounted, space from /scratch/local2 - -tmpdir=/scratch/local2/mxqd/tmp -mntdir=/dev/shm/mxqd/mnt/job -filename=$tmpdir/$MXQ_JOBID.tmp -mountpoint=$mntdir/$MXQ_JOBID - -umask 006 -mkdir -p $tmpdir -mkdir -p $mntdir - -status=1; - -if fallocate -l ${MXQ_SIZE}G $filename; then - if loopdevice=$(losetup --find --show $filename); then - if mkfs.ext4 \ - -q \ - -m 0 \ - -E nodiscard,mmp_update_interval=300,lazy_journal_init=1,root_owner=$MXQ_UID:0 \ - -O '64bit,ext_attr,filetype,^has_journal,huge_file,inline_data,^mmp,^quota,sparse_super2' \ - $loopdevice \ - && mkdir -p $mountpoint && mount -Odata=writeback,barrier=0 $loopdevice $mountpoint; then - rmdir $mountpoint/lost+found - status=0 - fi - losetup -d $loopdevice - fi - rm $filename -else - test -e $filename && rm $filename -fi -exit $status From 0b1a29f579fd637312e9e4ac9acd4ae3104a0031 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 28 Oct 2022 11:31:44 +0200 Subject: [PATCH 3/4] tmpdir-setup: Don't cleanup asynchronously When mxqd restarts and finds finished jobs, it calls the tmpdir cleanup code for these jobs. As part of the recovery procedure, it later scans the system for any leftover mounts. When the regular tmpdir cleanup is done asynchronously, mxqd might discover a directory which is in the progress of being dismounted but still exists in which case it calls the tmpdir cleanup code a second time. There is no harm done, the jobs completed normally. The second attempted cleanup just produces some error messages in the logfile This bug is only triggered when jobs complete while mxqd is stopped. As the "old style" tmpdir setup is going away anyway, don't invent something complicated here and just do the cleanup synchronously. --- helper/tmpdir-setup | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/helper/tmpdir-setup b/helper/tmpdir-setup index fe47dc79..d79c1a53 100755 --- a/helper/tmpdir-setup +++ b/helper/tmpdir-setup @@ -52,12 +52,10 @@ cmd_cleanup() { (( $# == 1 )) || usage MXQ_JOBID=$1 - ( - shopt -s dotglob; - rm -rf /dev/shm/mxqd/mnt/job/$MXQ_JOBID/* - umount /dev/shm/mxqd/mnt/job/$MXQ_JOBID - rmdir /dev/shm/mxqd/mnt/job/$MXQ_JOBID - ) & + shopt -s dotglob; + rm -rf /dev/shm/mxqd/mnt/job/$MXQ_JOBID/* + umount /dev/shm/mxqd/mnt/job/$MXQ_JOBID + rmdir /dev/shm/mxqd/mnt/job/$MXQ_JOBID } (( $# > 0 )) || usage From 9b4306677ef69d8b863ba94d94da8c97aa09fc89 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 28 Oct 2022 11:36:32 +0200 Subject: [PATCH 4/4] tmpdir-setup: Avoid flush of TMPDIR Use a dm-device (linear target) between the filesystem and the loop device and then use this sequence for teardown: - fcntl EXT4_IOC_SHUTDOWN with EXT4_GOING_FLAGS_NOLOGFLUSH - dmestup reload $dmname --table "0 $sectors zero" - dmsetup resume $dmname --noflush - umount $mountpoint - dmsetup remove --deferred $dmname - rmdir $mountpoint The zero target prevents any real writes to the block device. However, if the filesystems reads back some data, it will get zeros, which could lead to all kinds of random behaviour. For this reason, we shut down the filesystem, which has the additional advantage, that some I/O is prevented in an even ealier stage. Shutdown alone, however, would not prevent all I/O (e.g. not cache writeback or superblock write), so we still need the zero target. Even with this setting, ext4 sometimes logs some errors ("ext4_writepages: jbd2_start: XXX pages, ino YYY; err -5"). We've patched our kernel to avoid that message if the filesystem is shut down. This goes on top of the patches which avoid the usual "mounted" and "unmounted" messages for ext4. To support rolling upgrades of mxqd, keep support to clean up mounts created the old way, which is to mount a loop device directly. --- helper/tmpdir-setup | 56 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/helper/tmpdir-setup b/helper/tmpdir-setup index d79c1a53..384d9bec 100755 --- a/helper/tmpdir-setup +++ b/helper/tmpdir-setup @@ -20,6 +20,8 @@ cmd_create() { filename=$tmpdir/$MXQ_JOBID.tmp mountpoint=$mntdir/$MXQ_JOBID + dmname=mxq.$MXQ_JOBID + dmdevice=/dev/mapper/$dmname umask 006 mkdir -p $tmpdir @@ -29,15 +31,19 @@ cmd_create() { if fallocate -l ${MXQ_SIZE}G $filename; then if loopdevice=$(losetup --find --show $filename); then - if mkfs.ext4 \ - -q \ - -m 0 \ - -E nodiscard,mmp_update_interval=300,lazy_journal_init=1,root_owner=$MXQ_UID:0 \ - -O '64bit,ext_attr,filetype,^has_journal,huge_file,inline_data,^mmp,^quota,sparse_super2' \ - $loopdevice \ - && mkdir -p $mountpoint && mount -Odata=writeback,barrier=0 $loopdevice $mountpoint; then - rmdir $mountpoint/lost+found - status=0 + if dmsetup create $dmname --table "0 $((MXQ_SIZE*1024*1024*2)) linear $loopdevice 0"; then + if mkfs.ext4 \ + -q \ + -m 0 \ + -E nodiscard,mmp_update_interval=300,lazy_journal_init=1,root_owner=$MXQ_UID:0 \ + -O '64bit,ext_attr,filetype,^has_journal,huge_file,inline_data,^mmp,^quota,sparse_super2' \ + $dmdevice \ + && mkdir -p $mountpoint && mount -Odata=writeback,barrier=0 $dmdevice $mountpoint; then + rmdir $mountpoint/lost+found + status=0 + else + dmsetup remove --force --deferred $dmname + fi fi losetup -d $loopdevice fi @@ -52,10 +58,34 @@ cmd_cleanup() { (( $# == 1 )) || usage MXQ_JOBID=$1 - shopt -s dotglob; - rm -rf /dev/shm/mxqd/mnt/job/$MXQ_JOBID/* - umount /dev/shm/mxqd/mnt/job/$MXQ_JOBID - rmdir /dev/shm/mxqd/mnt/job/$MXQ_JOBID + # Support rolling upgrade: Use old code if loop device is directly mounted without a dm device in between. + # + if [[ $(findmnt --output SOURCE --noheadings /dev/shm/mxqd/mnt/job/$MXQ_JOBID) =~ ^/dev/loop ]]; then + shopt -s dotglob + rm -rf /dev/shm/mxqd/mnt/job/$MXQ_JOBID/* + umount /dev/shm/mxqd/mnt/job/$MXQ_JOBID + rmdir /dev/shm/mxqd/mnt/job/$MXQ_JOBID + exit + fi + + mountpoint=$mntdir/$MXQ_JOBID + dmname=mxq.$MXQ_JOBID + dmdevice=/dev/mapper/$dmname + minor=$(( 0x$(stat -c%T $dmdevice) )) + sectors=$(cat /sys/block/dm-$minor/size) + + # fs/ext4/ext4.h: #define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32) + # (_IOC_READ << 30) + (sizeof(_u32) << 16) + ( 'X' << 8 ) + 125 + # ( 2 << 30 ) + ( 4 << 16 ) + ( 88 << 8 ) + 125 = 2147768445 + # fs/ext4/ext4.h: #define EXT4_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ + + /usr/local/system/perl/bin/perl -e "open F, '<', '$mountpoint/.' ; \$d=pack('L', 2) ; ioctl (F, 2147768445, \$d)" + + dmsetup reload $dmname --table "0 $sectors zero" + dmsetup resume $dmname --noflush + umount --lazy $mountpoint + dmsetup remove $dmname + rmdir $mountpoint } (( $# > 0 )) || usage