diff --git a/install.sh b/install.sh index 957c553..110ca0d 100755 --- a/install.sh +++ b/install.sh @@ -64,6 +64,8 @@ function install_if() local dst="$2" shift 2 + true < "$src" || exit 1 + if need_update "$src" "$dst"; then dir="$(dirname "$dst")" test -d "$dir" || mkdir -p "$dir" @@ -148,6 +150,7 @@ install_exec mxraid/mxraid "$DESTDIR$usr_sbindir/m install_exec mxraid/mxraid_assemble "$DESTDIR$usr_sbindir/mxraid_assemble" install_data mxraid/mxraid.startup.service "$DESTDIR$systemdunitdir/mxraid.startup.service" install_data mxraid/mxraid.shutdown.service "$DESTDIR$systemdunitdir/mxraid.shutdown.service" +install_exec mxraid/mdcheck.safe "$DESTDIR$usr_bindir/mdcheck.safe" install_cron mxraid/crond-mdcheck "$DESTDIR$crond_dir/mdcheck" install_exec kvm_monitor/kvm_monitor.pl "$DESTDIR$udev_helperdir/kvm_monitor.pl" install_data kvm_monitor/51-raritan-kvm.rules "$DESTDIR$udev_rulesdir/51-raritan-kvm.rules" diff --git a/mxraid/crond-mdcheck b/mxraid/crond-mdcheck index 09099b3..1d1a194 100644 --- a/mxraid/crond-mdcheck +++ b/mxraid/crond-mdcheck @@ -1,2 +1,2 @@ -# 0 2 23 * * root /usr/bin/mdcheck --duration "06:00" -# 0 2 1-22,24-31 * * root /usr/bin/mdcheck --continue --duration "06:00" +0 2 13 * * root /usr/bin/mdcheck.safe --duration "06:00" +0 2 1-12,14-31 * * root /usr/bin/mdcheck.safe --continue --duration "06:00" diff --git a/mxraid/mdcheck.safe b/mxraid/mdcheck.safe new file mode 100755 index 0000000..01eda55 --- /dev/null +++ b/mxraid/mdcheck.safe @@ -0,0 +1,172 @@ +#!/bin/bash + +# Copyright (C) 2014-2017 Neil Brown +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Author: Neil Brown +# Email: + +# This script should be run periodically to automatically +# perform a 'check' on any md arrays. +# +# It supports a 'time budget' such that any incomplete 'check' +# will be checkpointed when that time has expired. +# A subsequent invocation can allow the 'check' to continue. +# +# Options are: +# --continue Don't start new checks, only continue old ones. +# --duration This is passed to "date --date=$duration" to find out +# when to finish +# +# To support '--continue', arrays are identified by UUID and the 'sync_completed' +# value is stored in /var/lib/mdcheck/$UUID + +# convert a /dev/md name into /sys/.../md equivalent +sysname() { + set `ls -lLd $1` + maj=${5%,} + min=$6 + readlink -f /sys/dev/block/$maj:$min +} + +args=$(getopt -o hcd: -l help,continue,duration: -n mdcheck -- "$@") +rv=$? +if [ $rv -ne 0 ]; then exit $rv; fi + +eval set -- $args + +cont= +endtime= +while [ " $1" != " --" ] +do + case $1 in + --help ) + echo >&2 'Usage: mdcheck [--continue] [--duration time-offset]' + echo >&2 ' time-offset must be understood by "date --date"' + exit 0 + ;; + --continue ) cont=yes ;; + --duration ) shift; dur=$1 + endtime=$(date --date "$dur" "+%s") + ;; + esac + shift +done +shift + +# We need a temp file occasionally... +tmp=/var/lib/mdcheck/.md-check-$$ +trap 'rm -f "$tmp"' 0 2 3 15 + + +# firstly, clean out really old state files +mkdir -p /var/lib/mdcheck +find /var/lib/mdcheck -name "MD_UUID*" -type f -mtime +180 -exec rm {} \; + +# Now look at each md device. +cnt=0 +for dev in /dev/md?* +do + [ -e "$dev" ] || continue + sys=`sysname $dev` + if [ ! -f "$sys/md/sync_action" ] + then # cannot check this array + continue + fi + if [ "`cat $sys/md/sync_action`" != 'idle' ] + then # This array is busy + continue + fi + + mdadm --detail --export "$dev" | grep '^MD_UUID=' > $tmp || continue + source $tmp + fl="/var/lib/mdcheck/MD_UUID_$MD_UUID" + if [ -z "$cont" ] + then + start=0 + logger -p daemon.info mdcheck start checking $dev + elif [ -z "$MD_UUID" -o ! -f "$fl" ] + then + # Nothing to continue here + continue + else + start=`cat "$fl"` + logger -p daemon.info mdcheck continue checking $dev from $start + fi + + cnt=$[cnt+1] + eval MD_${cnt}_fl=\$fl + eval MD_${cnt}_sys=\$sys + eval MD_${cnt}_dev=\$dev + echo $start > $fl + echo $start > $sys/md/sync_min + echo check > $sys/md/sync_action +done + +if [ -z "$endtime" ] +then + exit 0 +fi + +while [ `date +%s` -lt $endtime ] +do + any= + for i in `eval echo {1..$cnt}` + do + eval fl=\$MD_${i}_fl + eval sys=\$MD_${i}_sys + eval dev=\$MD_${i}_dev + + if [ -z "$fl" ]; then continue; fi + + if [ "`cat $sys/md/sync_action`" != 'check' ] + then + logger -p daemon.info mdcheck finished checking $dev + eval MD_${i}_fl= + rm -f $fl + continue; + fi + read a rest < $sys/md/sync_completed + echo $a > $fl + any=yes + done + if [ -z "$any" ]; then exit 0; fi + sleep 120 +done + +# We've waited, and there are still checks running. +# Time to stop them. +for i in `eval echo {1..$cnt}` +do + eval fl=\$MD_${i}_fl + eval sys=\$MD_${i}_sys + eval dev=\$MD_${i}_dev + + if [ -z "$fl" ]; then continue; fi + + if [ "`cat $sys/md/sync_action`" != 'check' ] + then + eval MD_${i}_fl= + rm -f $fl + continue; + fi + echo idle > $sys/md/sync_action & + pid=$! + sleep 10 + if [ -d /proc/$pid -a "$(cat $sys/md/array_state)" = write-pending ]; then + logger -p daemon.info restart hanging $dev + echo active > $sys/md/array_state + fi + cat $sys/md/sync_min > $fl + logger -p daemon.info pause checking $dev at `cat $fl` +done