diff --git a/install.sh b/install.sh index 226015ae..b48fc8c6 100755 --- a/install.sh +++ b/install.sh @@ -162,6 +162,7 @@ install_data pkgadmin/pkgadmin-update.service "$DESTDIR$systemdunitdi install_data pkgadmin/pkgadmin-update.timer "$DESTDIR$systemdunitdir/pkgadmin-update.timer" install_exec mxraid/mxraid "$DESTDIR$usr_sbindir/mxraid" install_exec mxraid/mxraid_assemble "$DESTDIR$usr_sbindir/mxraid_assemble" +install_exec mxraid/mxraid_journal "$DESTDIR$usr_sbindir/mxraid_journal" install_data mxraid/mxraid.service "$DESTDIR$systemdunitdir/mxraid.service" install_data mxraid/mxraid.shutdown.service "$DESTDIR$systemdunitdir/mxraid.shutdown.service" install_exec mxraid/mdcheck.safe "$DESTDIR$usr_bindir/mdcheck.safe" diff --git a/mxraid/mxraid_assemble b/mxraid/mxraid_assemble index 3d5bc18f..b869cc96 100755 --- a/mxraid/mxraid_assemble +++ b/mxraid/mxraid_assemble @@ -14,7 +14,7 @@ import MxRaid::HostData; import MxRaid::Utils; import MxRaid::Color; -my $VERSION = '1.2'; +my $VERSION = '1.3.0'; sub exec_usage { @@ -112,11 +112,15 @@ if ($opts{d}) { my $MDADM_CONF_BASE = '/dev/shm/mdadm.conf'; # config for mdadm, created with information from above +if ($ENV{MDADM_CONF_BASE}) { # option to override when debugging + $MDADM_CONF_BASE = $ENV{MDADM_CONF_BASE}; +} + my $MDADM_ASSEMBLE_OPTIONS = ''; my $cd = MxRaid::ConfData->new($MDADMCONF_DB); $cd->verbose($VERBOSE); $cd->load(); check_enclosures(); # idle a bit if enclosures are attached ... -my $hd = MxRaid::HostData->new($cd); $hd->verbose($VERBOSE); +my $hd = MxRaid::HostData->new($cd); $hd->verbose($VERBOSE); $hd->load(); my $utils = MxRaid::Utils->new(); my $do_default_action = 1; @@ -139,25 +143,26 @@ if ($opts{l}) { for my $label (@labels) { my %reg; print "$label:\n"; - for my $rec (@{$hd->configured_raids()->{$label}}) { + for my $rec (@{$hd->member_dev_recs($label)}) { my $md = $hd->mddev_by_member()->{$rec->[0]}; $md = 'n/a' unless defined $md; $reg{$md}+=1; - printf " %-5s %-9s %-16s '%s'\n", $md, @$rec[0..2]; + printf " %-5s %-10s %-20s '%s'\n", $md, @$rec[0..2]; } print " Note: this doesn't look like a standard configuration.\n" if scalar keys %reg != 1; print "\n"; } if ($VERBOSE >= 2) { my @mounts = split m/\n/, `cat /proc/self/mounts`; - print "Disks not part of a configured SW-RAID:\n"; + print "Disks/partitions not part of a configured SW-RAID:\n"; for my $rec (@{$hd->non_raid_disks()}) { my $hint = ''; if ($hd->mddev_by_member()->{$rec->[0]}) { - $hint .= sprintf " ** part of %s, yet not in mxmd.conf", $hd->mddev_by_member()->{$rec->[0]}; + $hint = sprintf " ***\n%22s- ", ''; + $hint .= sprintf 'part of %s, yet not in mxmd.conf -', $hd->mddev_by_member()->{$rec->[0]}; } $hint .= ' (mounted)' if scalar(grep {m|^/dev/$rec->[0]\d*\b|} @mounts); - printf " %-9s %-16s '%s'%s\n", @$rec[0..2], $hint; + printf " %-10s %-20s '%s'%s\n", @$rec[0..2], $hint; } print "\n"; } @@ -253,6 +258,7 @@ if ($opts{n}) { if ($do_default_action) { create_mdadm_configs(); } + exit; # prepare to assemble for the complete host @@ -285,7 +291,6 @@ sub create_mdadm_configs { } my $res = $utils->list_differences( $cd->members($rlabel), $hd->member_serials($rlabel)); if ($res) { - # print Dumper $res; # second array in $res must be empty, first array contains missing one(s), make shure ;) die "# ERROR: $rlabel, possible software error, stopped" if scalar(@{$res->[1]}); warn sprintf "# NOTE: $rlabel, disk(s) missing: %s.\n", join(', ', @{$res->[0]}) if $VERBOSE >= 1; @@ -295,10 +300,19 @@ sub create_mdadm_configs { # some more checks if some/all arrays run already if (scalar @{$hd->get_md_devices()}) { my %running; - for my $dev (@{$hd->member_devs($rlabel)}) { - my @sys_info = glob("/sys/block/$dev/holders/md*"); + for my $dev_rec (@{$hd->member_dev_recs($rlabel)}) { + my $dev = $dev_rec->[0]; + my $parent = $dev_rec->[4]; + + my @sys_info = (); + if (!$parent) { + @sys_info = glob("/sys/block/$dev/holders/md*"); + } else { + @sys_info = glob("/sys/block/$parent/$dev/holders/md*"); + } + next if scalar(@sys_info) == 0; - die '# Fuck! Stopped' if scalar(@sys_info) > 1; # this will never happen ;) + die '# Ieeck! Stopped, duplicate entry' if scalar(@sys_info) > 1; # this will never happen ;) $sys_info[0] =~ m/(md(\d+))$/; my $md = $1; $active_dev_no = $2; @@ -397,13 +411,13 @@ sub create_mdadm_configs { } else { printf "mdadm -A /dev/md%d -c %s\n", $dev_no, $conf_fn; } - + } else { printf "sudo mdadm -A /dev/md%d -c %s %s\n", $dev_no, $conf_fn, $MDADM_ASSEMBLE_OPTIONS; } } } else { - warn "# NOTE: No SW-RAIDS configured on this host.\n" if $VERBOSE >= 1; + warn "# NOTE: No properly configured SW-RAIDS on this host.\n" if $VERBOSE >= 1; } } @@ -457,8 +471,11 @@ exit; $array_name = shift @rec; chop $array_name; # remove the ':' - next unless @rec; + + # accept sn#.0 for raw disks, but discard it + @rec = map { $_ =~ m/(.*)\.0$/ ? $1 : $_} @rec; + if (exists $self->{db}{$array_name}) { warn "# WARN: database contains same array name more than once ($array_name)!\n" if $self->{verbose} >= 1; push @faulty_names, $array_name; @@ -466,11 +483,12 @@ exit; $self->{db}{$array_name} = [ @rec ]; } close DB; + for (@faulty_names) { delete $self->{db}{$_}; - warn "# NOTE: removed badly configured array '$_' from configuration.\n"; + warn "# NOTE: removed badly defined array '$_' from configuration.\n"; } - + my %tmp; # see if this gets expensive ... for my $label (keys %{$self->{db}}) { for (@{$self->{db}{$label}}) { @@ -539,7 +557,7 @@ exit; package MxRaid::HostData; BEGIN { - $MxRaid::HostData::VERSION = '0.01'; + $MxRaid::HostData::VERSION = '0.01'; %MxRaid::HostData::BAD_MODELS = ( # and the cure... ST8000NM0065 => sub {substr $_[0], 0, 8}, # silly coding of serial ST8000NM0075 => sub {substr $_[0], 0, 8}, @@ -561,19 +579,24 @@ exit; $self->{config} = $conf; $self->{verbose} = 0; $self->{root_priv} = $<==0?1:0; + return $self; + } + + sub load { + my $self = shift; $self->discover_basic(); $self->discover_running(); - return $self; } sub discover_basic { my $self = shift; - # take only whole disks, no partitions + # only disks like 'sd', 'nvme' my @disks = grep {m:/(sd[a-z]+|nvme\d+n\d+)$:} ( - glob('/sys/block/sd[a-z]*'), glob('/sys/block/nvme[0-9]*') + glob('/sys/block/sd[a-z]*'), + glob('/sys/block/nvme[0-9]*') ); - my $diskinfo = $self->get_hd_info(\@disks); + my $diskinfo = $self->get_hd_info_extended_to_partitions(\@disks); my @non_raid; my %configured_raids; my %raid_labels; @@ -599,7 +622,7 @@ exit; sub discover_running { my $self = shift; - my @mds_found = ( + my @mds_found = ( glob('/sys/block/md[0-9]'), # upper limit 128, and no leading zeroes glob('/sys/block/md[0-9][0-9]'), # but who knows glob('/sys/block/md[0-9][0-9][0-9]')); # @@ -611,22 +634,23 @@ exit; for my $m (@mds_found) { $m =~ m,([^/]+)$,; - my $md = $1; - # alternative : glob("/sys/block/$md/slaves/*"); - my @tmp = glob("/sys/block/*/holders/$md"); + my $md_tmp = $1; + # alternative : glob("/sys/block/$md/slaves/*"); # hmm, guess this will be renamed sometime + my @tmp = ( glob("/sys/block/*/holders/$md_tmp"), glob("/sys/block/*/*/holders/$md_tmp") ); for (@tmp) { $_ =~ m,/sys/block/(.+)/holders/(.+),; - push @{$md2hd{$2}}, $1; - die "# what, duplicate device?" if exists $hd2md{$1}; - $hd2md{$1} = $2; + $md_tmp = $2; + my $dev_tmp = $1; # can be 'sda/sda1' + $dev_tmp = ( split(m|/|, $dev_tmp) )[-1]; + push @{$md2hd{$md_tmp}}, $dev_tmp; + die "# what, duplicate device?" if exists $hd2md{$md_tmp}; # this should never happen ... + $hd2md{$dev_tmp} = $md_tmp; } } for my $k (keys %md2hd) { my @tmp = @{$md2hd{$k}}; - @tmp = map { '/sys/block/' . $_ } @tmp; # redo ... - my $hd_info = $self->get_hd_info(\@tmp); - # printf "%s: %s\n", $k, join(' ', @$hd_info); + my $hd_info = $self->get_hd_info_from_used_devices(\@tmp); $md2info{$k} = [ @$hd_info ]; for (@$hd_info) { @@ -636,7 +660,6 @@ exit; } - $self->member_info_by_mddev(\%md2info); $self->mddev_by_member(\%hd2md); $self->member_dev_serial_cross_lookup(\%cross_lookup); @@ -661,11 +684,10 @@ exit; $self->{configured_raids}; } - sub member_serials { + sub member_dev_recs { my $self = shift; my $label = shift; - my $di = $self->{configured_raids}{$label}; - [ map {$_->[1]} @$di ]; + $self->{configured_raids}{$label}; } sub member_devs { @@ -675,6 +697,13 @@ exit; [ map {$_->[0]} @$di ]; } + sub member_serials { + my $self = shift; + my $label = shift; + my $di = $self->{configured_raids}{$label}; + [ map {$_->[1]} @$di ]; + } + sub member_info_by_mddev { my $self = shift; $self->{member_info_by_mddev} = shift if @_; @@ -698,54 +727,81 @@ exit; return [ sort keys %{$self->{member_info_by_mddev}} ]; } - # 'shortcut' - sub get_hd_serial { + # Careful here: The device list consits of device names as keys sda, sdb, ... , + # ------------- and possibly sday3, sday4, ... So, either raw or partitioned. + # Raw devices are found at /sys/block/sda, partitions under /sys/block/sdb/sdb1 + + sub get_hd_info_extended_to_partitions { + my $self = shift; + my $dev_list = shift; + return $self->get_hd_info($dev_list, 1); + } + + sub get_hd_info_from_used_devices { my $self = shift; - $self->get_hd_info($_[0], 1) + my $dev_list = shift; + return $self->get_hd_info($dev_list, 0); } sub get_hd_info { my $self = shift; my $dev_list = shift; - my $short_info = shift; + my $lookup_partitions = shift; my @ret; my $num = scalar @$dev_list; for (my $i=0; $i<$num; $i++) { - my ($dk) = $dev_list->[$i] =~ m|([^/]+)$|; # '/sys/block/sda' -> 'sda' + my $dk = $dev_list->[$i]; + $dk =~ s|^/sys/block/||; # '/sys/block/sda' -> 'sda' or 'sda/sda2' + + my $sysprefix = '/sys/block/'.$dk; + my $sysprefix_part = $sysprefix; + + if (not -d $sysprefix) { # then it is a partition-key, not a raw device + my @chk = glob('/sys/block/*/'.$dk); + if (not @chk or scalar(@chk) > 1) { die '# Error: Expected an uniqe parent device for '.$dk; } + my @tmp = split m|/|, $chk[0]; + $sysprefix_part = '/sys/block/'.$tmp[3].'/'.$dk; + $sysprefix = '/sys/block/'.$tmp[3]; + } - my $model = sys_fs_get_prop('/sys/block/'.$dk.'/device/model'); - my $sizeb = sys_fs_get_prop('/sys/block/'.$dk.'/size') * 512; + my $model = sys_fs_get_prop($sysprefix.'/device/model'); + my $sizeb = sys_fs_get_prop($sysprefix_part.'/size') * 512; - # if (! -e '/sys/block/'.$dk.'/device/vpd_pg80') { - # warn "# Note failed to read serial via sysfs ($dk).\n" if $self->{verbose}; - # next; - # } + my @partitions; + if ($lookup_partitions) { + @partitions = glob('/sys/block/'.$dk.'/'.$dk.'*'); # find /sys/block/sda/sda1 + @partitions = map { m|$dk/$dk(.+)$| } @partitions; # get the partition id (Mind: NVMEs use 'pX') + # NB: if it has partitions, then the whole disk should never appear as disk key ... 8) + } my $serial_number; - if (-e '/sys/block/'.$dk.'/device/vpd_pg80') { # exists even for ahci attached disks + if (-e $sysprefix.'/device/vpd_pg80') { # exists even for ahci attached disks # Vital Product Data, page xyz - my $vpd_pg80 = parse_vpd_pg80(sys_fs_get_prop('/sys/block/'.$dk.'/device/vpd_pg80')); + my $vpd_pg80 = parse_vpd_pg80(sys_fs_get_prop($sysprefix.'/device/vpd_pg80')); if (defined $MxRaid::HostData::BAD_MODELS{uc($model)}) { $vpd_pg80 = $MxRaid::HostData::BAD_MODELS{uc($model)}($vpd_pg80); } $serial_number = $vpd_pg80; - } elsif (-e '/sys/block/'.$dk.'/device/serial') { # nvme disks are supposed to have this - $serial_number = sys_fs_get_prop('/sys/block/'.$dk.'/device/serial'); + } elsif (-e $sysprefix.'/device/serial') { # nvme disks are supposed to have this + $serial_number = sys_fs_get_prop($sysprefix.'/device/serial'); $serial_number =~ s/^\s+//; } else { if ($self->{root_priv}) { $serial_number = smartctl_info('/dev/'.$dk, 'Serial_Number'); } else { - warn "# You must be root to query '$dk'.\n" if $self->{verbose} >= 1; + warn "# NOTE: You must be root to query '$dk'.\n" if $self->{verbose} >= 1; } } if (defined $serial_number) { - if ($short_info) { - push @ret, $serial_number; + if (scalar @partitions) { + for my $part (@partitions) { + my $psize = sys_fs_get_prop($sysprefix_part.'/size'); + push @ret, [$dk.$part, $serial_number.'.'.$part, $model, $psize, $dk]; + } } else { - push @ret, [$dk, $serial_number, $model, $sizeb]; + push @ret, [$dk, $serial_number, $model, $sizeb, '']; } } } @@ -787,7 +843,7 @@ exit; # alternative if no vpd_pg80 avail sub smartctl_info { my $dev = shift; - my @keys = @_; + my @keys = @_; # smartctl -i /dev/sda | grep 'Serial Number:' my @res = split m/\n/, `smartctl -i $dev`; @@ -821,7 +877,7 @@ exit; sub verbose { my $self = shift; $self->{verbose} = shift if @_; - $self->{verbose} + $self->{verbose}; } } diff --git a/mxraid/mxraid_journal b/mxraid/mxraid_journal new file mode 100755 index 00000000..74161ca9 --- /dev/null +++ b/mxraid/mxraid_journal @@ -0,0 +1,254 @@ +#! /bin/bash + +# globally in use: DEVMD JOURNALDEV JOURNALDEV_NEW LABEL, thus: +set -u +set -e + +function help() { + +cat << __EOF + + + ${0##*/} usage: + + ${0##*/} COMMAND mdX [journal-device] + + COMMANDS: + + bitmap switch RAID into bitmap mode + journal switch RAID into journal configuration using given journal-device + show terse information about the selected RAID + + + --- === ### === --- + + If the script got struck, here are the manual steps to bring the RAID into + shape again (depending on where something failed ...): + + # *** journal to bitmap *** + bash + DEVMD=mdX + JOURNALDEV=nvme0nXpY + + ${0##*/} show \$DEVMD + + mdadm --readonly /dev/\$DEVMD + mdadm --fail /dev/\$DEVMD -v \$JOURNALDEV + mdadm --remove /dev/\$DEVMD -v \$JOURNALDEV + + echo resync > /sys/block/\$DEVMD/md/consistency_policy + mdadm --readwrite /dev/\$DEVMD + mdadm -G --bitmap=internal /dev/\$DEVMD + + # *** bitmap to journal *** + bash + DEVMD=mdX + JOURNALDEV=nvme0nXpY + + ${0##*/} show \$DEVMD + + mdadm --grow --bitmap=none /dev/\$DEVMD + mdadm --readonly /dev/\$DEVMD + + mdadm /dev/\$DEVMD --add-journal /dev/\$JOURNALDEV + echo write-through > /sys/block/\$DEVMD/md/journal_mode + +__EOF + +} + +# ------------------------------------------------------ helper functions - + +function die() { echo "$@" >&2 ; exit 1; } + +function get_raid_label() { + LABEL=$( mdadm -DY /dev/$DEVMD | grep MD_NAME | cut -d= -f2 ) +} + +function get_journal_device() { + Verbose=$1; shift; + JOURNALDEV= + for P in /sys/block/$DEVMD/md/dev-*; do + state=$(cat $P/state) + if [ "$state" = 'journal' ]; then + JOURNALDEV=${P##*/dev-} + break + fi + done + if [ -z "$JOURNALDEV" -a "$Verbose" = 'verbose' ]; then + echo "# NOTE: no journaldevice found on $DEVMD." + fi +} + +function show() { + echo "# --- Info for $DEVMD ---" + echo "# label: $LABEL"; + Cp=$(cat /sys/block/$DEVMD/md/consistency_policy) + echo "# consistency policy: $Cp" + if [ "$Cp" = 'journal' ]; then + Jmode=$(cat /sys/block/$DEVMD/md/journal_mode) + # '[write-through] write-back' or 'write-through [write-back]' - super ... + Jmode=${Jmode##*[}; Jmode=${Jmode%%]*} + echo "# journal mode: $Jmode" + JOURNALDEV= + get_journal_device 'quiet' + echo "# journal device: $JOURNALDEV" + fi +} + +function check_mounts() { + echo -n "# INFO: running 'lsof -bw'" + Count=$(lsof -bw | grep "/$LABEL" | wc -l) + if [ "$Count" != '0' ]; then + echo ", found $Count open entities, stopping here. ($LABEL, $DEVMD)" + exit 2 + fi + echo ' [passed]' +} + +function unexport_volume() { + Export_List=() + Exports=/var/lib/nfs/etab + Epath=$( grep -m 1 "/$LABEL" $Exports | sed -e 's|^\(\S*\).*|\1|' ) # -> /amd/wayofthedodo/M/ME002 + # /amd/wayofthedodo/M/MG002 zoophobie.molgen.mpg.de(sync,wdelay, ... ,no_all_squash) + for Host in $( grep "/$LABEL" $Exports | sed -e 's|^\S*\s\s*\(.*\)(.*|\1|' ) ; do + Export_List[${#Export_List[@]}]=$Host # array append, bash flavor :) + done + + echo -n "# NOTE: $LABEL, ending exports for ${#Export_List[@]} hosts .." + i=0; for Ex in ${Export_List[@]}; do + exportfs -u "$Ex:$Epath" + if [ $(( i % 12 )) = 0 ]; then echo -n '.'; fi + i=$((i+1)) + done; echo ' [done]' +} + +function unmount_volume() { + if grep -qe "^/dev/$DEVMD" /proc/self/mounts; then + umount -v /dev/$DEVMD + fi +} + +function run_mxmount() { + echo '# INFO: calling mxmount ...' + echo ' ---------------------------------------------' + mxmount + echo ' ---------------------------------------------' + +} + +# ---------------------------------------------------------- the switches - + +function switch_to_bitmap() { + Cp=$(cat /sys/block/$DEVMD/md/consistency_policy) + if [ "$Cp" = 'bitmap' ]; then + die "# ERROR: $DEVMD is already in bitmap mode." + fi + + get_journal_device 'verbose' + if [ -z "$JOURNALDEV" ]; then + die "# ERROR: can not remove journal device from $DEVMD." + fi + + check_mounts + unexport_volume + unmount_volume + + mdadm --readonly /dev/$DEVMD + mdadm --fail /dev/$DEVMD -v $JOURNALDEV + mdadm --remove /dev/$DEVMD -v $JOURNALDEV + sleep 1 + + # the following two are needed ... + echo resync > /sys/block/$DEVMD/md/consistency_policy + mdadm --readwrite /dev/$DEVMD + mdadm -G --bitmap=internal /dev/$DEVMD + run_mxmount + echo '# INFO: done, internal bitmap enabled ...' +} + +function switch_to_journal() { + Cp=$(cat /sys/block/$DEVMD/md/consistency_policy) + if [ "$Cp" = 'journal' ]; then + die "# ERROR: $DEVMD is already in journal mode." + fi + + check_mounts + unexport_volume + unmount_volume + + mdadm --grow --bitmap=none /dev/$DEVMD + mdadm --readonly /dev/$DEVMD + sleep 1 + mdadm /dev/$DEVMD --add-journal /dev/$JOURNALDEV_NEW + # write-back (or 'write-behind') is not wanted, so just make sure + echo write-through > /sys/block/$DEVMD/md/journal_mode + run_mxmount + echo '# INFO: done, external journal enabled ...' +} + +# ------------------------------------------------------------ the script - + +CMD= +case ${1:-h} in + b|bi|bit|bitm|bitma|bitmap) + CMD='bitmap' + ;; + j|jo|jou|jour|journ|journa|journal) + CMD='journal' + ;; + s|sh|sho|show) + CMD='show' + ;; + -h|--help|h|he|hel|help) + help + exit 0 + ;; + *) + help + exit 1 + ;; +esac; shift + +if [ "$UID" != '0' ]; then + die '# ERROR: you must be root to continue.' +fi + +if [ ${#@} = 0 ] ; then + echo -e '# ERROR: need an md device to proceed.\n' >&2 + echo '# Available devices:' + ls /sys/block/ | grep '^md[0-9]' | nl + echo '' + exit 1 +fi + +DEVMD=$1; shift + +if [ ! -e "/dev/$DEVMD" -o ! -e "/sys/block/$DEVMD" ] ; then + die "# ERROR: md device /dev/$DEVMD (/sys/block/$DEVMD) not found." +fi + +get_raid_label $DEVMD # -> LABEL + +if [ "$CMD" = 'show' ]; then + show +elif [ "$CMD" = 'bitmap' ]; then + switch_to_bitmap +elif [ "$CMD" = 'journal' ]; then + if [ ${#@} = 0 ]; then + die '# ERROR: Need a journal device!' + fi + JOURNALDEV_NEW=$1; shift + + if [ ! -e "/dev/$JOURNALDEV_NEW" ] ; then + die "# ERROR: journal device /dev/$JOURNALDEV_NEW does not exist." + fi + + if [ -e /sys/block/md*/md/dev-$JOURNALDEV_NEW ]; then + die "# ERROR: journal device $JOURNALDEV_NEW is already in use." + fi + + switch_to_journal +fi + +