Skip to content

Commit

Permalink
Merge pull request #159 from mariux64/mdcheck-workaround
Browse files Browse the repository at this point in the history
mdcheck workaround
  • Loading branch information
donald authored Mar 12, 2021
2 parents 9899ad0 + a3d1307 commit 3187a78
Show file tree
Hide file tree
Showing 3 changed files with 177 additions and 2 deletions.
3 changes: 3 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ function install_if()
local dst="$2"
shift 2

true < "$src" || exit 1

if need_update "$src" "$dst"; then
dir="$(dirname "$dst")"
test -d "$dir" || mkdir -p "$dir"
Expand Down Expand Up @@ -148,6 +150,7 @@ install_exec mxraid/mxraid "$DESTDIR$usr_sbindir/m
install_exec mxraid/mxraid_assemble "$DESTDIR$usr_sbindir/mxraid_assemble"
install_data mxraid/mxraid.startup.service "$DESTDIR$systemdunitdir/mxraid.startup.service"
install_data mxraid/mxraid.shutdown.service "$DESTDIR$systemdunitdir/mxraid.shutdown.service"
install_exec mxraid/mdcheck.safe "$DESTDIR$usr_bindir/mdcheck.safe"
install_cron mxraid/crond-mdcheck "$DESTDIR$crond_dir/mdcheck"
install_exec kvm_monitor/kvm_monitor.pl "$DESTDIR$udev_helperdir/kvm_monitor.pl"
install_data kvm_monitor/51-raritan-kvm.rules "$DESTDIR$udev_rulesdir/51-raritan-kvm.rules"
Expand Down
4 changes: 2 additions & 2 deletions mxraid/crond-mdcheck
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# 0 2 23 * * root /usr/bin/mdcheck --duration "06:00"
# 0 2 1-22,24-31 * * root /usr/bin/mdcheck --continue --duration "06:00"
0 2 13 * * root /usr/bin/mdcheck.safe --duration "06:00"
0 2 1-12,14-31 * * root /usr/bin/mdcheck.safe --continue --duration "06:00"
172 changes: 172 additions & 0 deletions mxraid/mdcheck.safe
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/bin/bash

# Copyright (C) 2014-2017 Neil Brown <neilb@suse.de>
#
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# Author: Neil Brown
# Email: <neilb@suse.com>

# This script should be run periodically to automatically
# perform a 'check' on any md arrays.
#
# It supports a 'time budget' such that any incomplete 'check'
# will be checkpointed when that time has expired.
# A subsequent invocation can allow the 'check' to continue.
#
# Options are:
# --continue Don't start new checks, only continue old ones.
# --duration This is passed to "date --date=$duration" to find out
# when to finish
#
# To support '--continue', arrays are identified by UUID and the 'sync_completed'
# value is stored in /var/lib/mdcheck/$UUID

# convert a /dev/md name into /sys/.../md equivalent
sysname() {
set `ls -lLd $1`
maj=${5%,}
min=$6
readlink -f /sys/dev/block/$maj:$min
}

args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@")
rv=$?
if [ $rv -ne 0 ]; then exit $rv; fi

eval set -- $args

cont=
endtime=
while [ " $1" != " --" ]
do
case $1 in
--help )
echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]'
echo >&2 ' time-offset must be understood by "date --date"'
exit 0
;;
--continue ) cont=yes ;;
--duration ) shift; dur=$1
endtime=$(date --date "$dur" "+%s")
;;
esac
shift
done
shift

# We need a temp file occasionally...
tmp=/var/lib/mdcheck/.md-check-$$
trap 'rm -f "$tmp"' 0 2 3 15


# firstly, clean out really old state files
mkdir -p /var/lib/mdcheck
find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \;

# Now look at each md device.
cnt=0
for dev in /dev/md?*
do
[ -e "$dev" ] || continue
sys=`sysname $dev`
if [ ! -f "$sys/md/sync_action" ]
then # cannot check this array
continue
fi
if [ "`cat $sys/md/sync_action`" != 'idle' ]
then # This array is busy
continue
fi

mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue
source $tmp
fl="/var/lib/mdcheck/MD_UUID_$MD_UUID"
if [ -z "$cont" ]
then
start=0
logger -p daemon.info mdcheck start checking $dev
elif [ -z "$MD_UUID" -o ! -f "$fl" ]
then
# Nothing to continue here
continue
else
start=`cat "$fl"`
logger -p daemon.info mdcheck continue checking $dev from $start
fi

cnt=$[cnt+1]
eval MD_${cnt}_fl=\$fl
eval MD_${cnt}_sys=\$sys
eval MD_${cnt}_dev=\$dev
echo $start > $fl
echo $start > $sys/md/sync_min
echo check > $sys/md/sync_action
done

if [ -z "$endtime" ]
then
exit 0
fi

while [ `date +%s` -lt $endtime ]
do
any=
for i in `eval echo {1..$cnt}`
do
eval fl=\$MD_${i}_fl
eval sys=\$MD_${i}_sys
eval dev=\$MD_${i}_dev

if [ -z "$fl" ]; then continue; fi

if [ "`cat $sys/md/sync_action`" != 'check' ]
then
logger -p daemon.info mdcheck finished checking $dev
eval MD_${i}_fl=
rm -f $fl
continue;
fi
read a rest < $sys/md/sync_completed
echo $a > $fl
any=yes
done
if [ -z "$any" ]; then exit 0; fi
sleep 120
done

# We've waited, and there are still checks running.
# Time to stop them.
for i in `eval echo {1..$cnt}`
do
eval fl=\$MD_${i}_fl
eval sys=\$MD_${i}_sys
eval dev=\$MD_${i}_dev

if [ -z "$fl" ]; then continue; fi

if [ "`cat $sys/md/sync_action`" != 'check' ]
then
eval MD_${i}_fl=
rm -f $fl
continue;
fi
echo idle > $sys/md/sync_action &
pid=$!
sleep 10
if [ -d /proc/$pid -a "$(cat $sys/md/array_state)" = write-pending ]; then
logger -p daemon.info restart hanging $dev
echo active > $sys/md/array_state
fi
cat $sys/md/sync_min > $fl
logger -p daemon.info pause checking $dev at `cat $fl`
done

0 comments on commit 3187a78

Please sign in to comment.