From c62f14e16d76970939bdb534f8af233f047c9863 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 21 Jan 2019 12:11:55 +0100 Subject: [PATCH 1/7] mxraid_assemble: Add support for nvme devices --- mxraid/mxraid_assemble | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mxraid/mxraid_assemble b/mxraid/mxraid_assemble index 54bb440..de9f5d0 100755 --- a/mxraid/mxraid_assemble +++ b/mxraid/mxraid_assemble @@ -134,7 +134,7 @@ if ($opts{l}) { my $md = $hd->mddev_by_member()->{$rec->[0]}; $md = 'n/a' unless defined $md; $reg{$md}+=1; - printf " %-5s %5s %-16s '%s'\n", $md, @$rec; + printf " %-5s %-9s %-16s '%s'\n", $md, @$rec; } print " Note: this doesn't look like a standard configuration.\n" if scalar keys %reg != 1; print "\n"; @@ -142,7 +142,7 @@ if ($opts{l}) { if ($VERBOSE >= 2) { print "Disks not part of a configured SW-RAID:\n"; for my $rec (@{$hd->non_raid_disks()}) { - printf " %5s %-16s '%s'\n", @$rec; + printf " %-9s %-16s '%s'\n", @$rec; } print "\n"; } @@ -159,7 +159,7 @@ if ($opts{n}) { $entry .= $opts{n}.':'; for my $rec (@{$hd->non_raid_disks()}) { my $dev = $rec->[0]; - next unless $dev =~ m/sd[a-z]+/; + next unless $dev =~ m/(sd[a-z]+|nvme\d+n\d+)/; if (grep {m|^/dev/$dev\d*\b|} @mounts) { warn "# NOTE: /dev/$dev is mounted, skipping.\n" if $VERBOSE >= 2; next; @@ -520,7 +520,10 @@ exit; sub discover_basic { my $self = shift; - my @disks = ( glob('/sys/block/sd[a-z]'), glob('/sys/block/sd[a-z][a-z]') ); # 676 disks max + # take only whole disks, no partitions + my @disks = grep {m:/(sd[a-z]+|nvme\d+n\d+)$:} ( + glob('/sys/block/sd[a-z]*'), glob('/sys/block/nvme[0-9]*') + ); my $diskinfo = $self->get_hd_info(\@disks); my @non_raid; my %configured_raids; @@ -670,13 +673,16 @@ exit; # } my $serial_number; - if (-e '/sys/block/'.$dk.'/device/vpd_pg80') { + if (-e '/sys/block/'.$dk.'/device/vpd_pg80') { # exists even for ahci attached disks # Vital Product Data, page xyz my $vpd_pg80 = parse_vpd_pg80(sys_fs_get_prop('/sys/block/'.$dk.'/device/vpd_pg80')); if (defined $MxRaid::HostData::BAD_MODELS{uc($model)}) { $vpd_pg80 = $MxRaid::HostData::BAD_MODELS{uc($model)}($vpd_pg80); } $serial_number = $vpd_pg80; + } elsif (-e '/sys/block/'.$dk.'/device/serial') { # nvme disks are supposed to have this + $serial_number = sys_fs_get_prop('/sys/block/'.$dk.'/device/serial'); + $serial_number =~ s/^\s+//; } else { if ($self->{root_priv}) { $serial_number = smartctl_info($dev_list->[$i], 'Serial_Number'); From 1e5e2f68bd65d3563f864065298d6e3693496043 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 21 Jan 2019 12:50:53 +0100 Subject: [PATCH 2/7] mxraid_assemble: Indicate disks that are mounted Verbose listing mode now adds a hint when a disk (or parts on it) are mounted. --- mxraid/mxraid_assemble | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mxraid/mxraid_assemble b/mxraid/mxraid_assemble index de9f5d0..0005f0c 100755 --- a/mxraid/mxraid_assemble +++ b/mxraid/mxraid_assemble @@ -140,9 +140,11 @@ if ($opts{l}) { print "\n"; } if ($VERBOSE >= 2) { + my @mounts = split m/\n/, `cat /proc/self/mounts`; print "Disks not part of a configured SW-RAID:\n"; for my $rec (@{$hd->non_raid_disks()}) { - printf " %-9s %-16s '%s'\n", @$rec; + my $mnt = scalar(grep {m|^/dev/$rec->[0]\d*\b|} @mounts) ? ' (mounted)':''; + printf " %-9s %-16s '%s'%s\n", @$rec, $mnt; } print "\n"; } From 5524a320ba14b97423a126f8225af8e18f10ec0a Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 21 Jan 2019 13:33:14 +0100 Subject: [PATCH 3/7] mxraid_assemble: Switch logic of get_hd_info/get_hd_serial --- mxraid/mxraid_assemble | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mxraid/mxraid_assemble b/mxraid/mxraid_assemble index 0005f0c..d9461d3 100755 --- a/mxraid/mxraid_assemble +++ b/mxraid/mxraid_assemble @@ -652,15 +652,15 @@ exit; } # 'shortcut' - sub get_hd_info { + sub get_hd_serial { my $self = shift; - $self->get_hd_serial($_[0], 1) + $self->get_hd_info($_[0], 1) } - sub get_hd_serial { + sub get_hd_info { my $self = shift; my $dev_list = shift; - my $full_info = shift; + my $short_info = shift; my @ret; my $num = scalar @$dev_list; @@ -694,10 +694,10 @@ exit; } if (defined $serial_number) { - if ($full_info) { - push @ret, [$dk, $serial_number, $model]; - } else { + if ($short_info) { push @ret, $serial_number; + } else { + push @ret, [$dk, $serial_number, $model]; } } } From 413b9c6c514ae9bd1d35566677b6eb0b740122ca Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 21 Jan 2019 13:49:30 +0100 Subject: [PATCH 4/7] mxraid_assemble: Store size of device --- mxraid/mxraid_assemble | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mxraid/mxraid_assemble b/mxraid/mxraid_assemble index d9461d3..82150ac 100755 --- a/mxraid/mxraid_assemble +++ b/mxraid/mxraid_assemble @@ -134,7 +134,7 @@ if ($opts{l}) { my $md = $hd->mddev_by_member()->{$rec->[0]}; $md = 'n/a' unless defined $md; $reg{$md}+=1; - printf " %-5s %-9s %-16s '%s'\n", $md, @$rec; + printf " %-5s %-9s %-16s '%s'\n", $md, @$rec[0..2]; } print " Note: this doesn't look like a standard configuration.\n" if scalar keys %reg != 1; print "\n"; @@ -144,7 +144,7 @@ if ($opts{l}) { print "Disks not part of a configured SW-RAID:\n"; for my $rec (@{$hd->non_raid_disks()}) { my $mnt = scalar(grep {m|^/dev/$rec->[0]\d*\b|} @mounts) ? ' (mounted)':''; - printf " %-9s %-16s '%s'%s\n", @$rec, $mnt; + printf " %-9s %-16s '%s'%s\n", @$rec[0..2], $mnt; } print "\n"; } @@ -668,6 +668,7 @@ exit; my ($dk) = $dev_list->[$i] =~ m|([^/]+)$|; my $model = sys_fs_get_prop('/sys/block/'.$dk.'/device/model'); + my $sizeb = sys_fs_get_prop('/sys/block/'.$dk.'/size') * 512; # if (! -e '/sys/block/'.$dk.'/device/vpd_pg80') { # warn "# Note failed to read serial via sysfs ($dk).\n" if $self->{verbose}; @@ -697,7 +698,7 @@ exit; if ($short_info) { push @ret, $serial_number; } else { - push @ret, [$dk, $serial_number, $model]; + push @ret, [$dk, $serial_number, $model, $sizeb]; } } } From f94893c674b2bc98a8b916152ca5dc5f58a5dd84 Mon Sep 17 00:00:00 2001 From: thomas Date: Tue, 22 Jan 2019 11:33:32 +0100 Subject: [PATCH 5/7] mxraid_assemble: Improve usability Creation hint is now complete, the mkfs command is also given. (bonus!) Since the array size is calculated, one can infer a reasonable chunk size. (100TB arrays use 512k, 50TB get 256k, and the lower limit is 64k) The new -r option allows to specify non default RAID types, this might come handy if assembling these scratch-RAIDS. --- mxraid/mxraid_assemble | 96 +++++++++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 30 deletions(-) diff --git a/mxraid/mxraid_assemble b/mxraid/mxraid_assemble index 82150ac..1746d91 100755 --- a/mxraid/mxraid_assemble +++ b/mxraid/mxraid_assemble @@ -30,17 +30,23 @@ sub exec_usage { options: - -a assemble arrays when run as root - -c check database - -d file alternative database file - -h print this help and exit - -l list arrays on host, with -v will also show candidates - -m monochrome warnings (for the purists) - -n label print config records for new assemblies on host - -q be quiet - -v be more verbose - -V print Version information and exit - + -a assemble arrays when run as root + -c check database + -d file alternative database file + -h print this help and exit + -l list arrays on host, with -v will also show candidates + -m monochrome warnings (for the purists) + -n label print config records for new assemblies on host + -q be quiet + -r l:d(:m) config hints, if not a RAID 6 with 16 disks (level, disks, match) + -v be more verbose + -V print Version information and exit + + example (list state and assist in assembling a RAID1): + + mxraid_assemble -lv -n D0014 -r 1:2:flash + + __HELP =things to come @@ -81,7 +87,7 @@ sub check_enclosures { my $ROOT=$<==0?1:0; my %opts; -getopts('acd:hlmn:qvV', \%opts) or die "# ERROR: getopts failed, try -h.\n"; # Values in %opts +getopts('acd:hlmn:qr:vV', \%opts) or die "# ERROR: getopts failed, try -h.\n"; # Values in %opts exec_usage if $opts{h}; exec_version if $opts{V}; @@ -154,10 +160,16 @@ if ($opts{n}) { my $color = MxRaid::Color->new(); $do_default_action = 0; - my ($entry, $cnt) = ('',0); - my @candidates; + my ($entry, $cnt, $size) = ('', 0, 0); + my ($level, $num_wanted, $match) = (6, 16, undef); + my $chunk_size = 512; + my $dev_no = 0; my @mounts = split m/\n/, `cat /proc/self/mounts`; + my @candidates; + if ($opts{r}) { + ($level,$num_wanted,$match) = split m/:/, $opts{r}; + } $entry .= $opts{n}.':'; for my $rec (@{$hd->non_raid_disks()}) { my $dev = $rec->[0]; @@ -166,44 +178,68 @@ if ($opts{n}) { warn "# NOTE: /dev/$dev is mounted, skipping.\n" if $VERBOSE >= 2; next; } + if (defined $match) { + next unless ($rec->[1] =~ m/$match/i or $rec->[2] =~ m/$match/i); + } push @candidates, $rec->[0]; $entry .= ' '.$rec->[1]; + $size += $rec->[3]; $cnt++; } - # warn "# NOTE: got $cnt disk(s), default is 16.\n" if $cnt != 16; - # warn "# NOTE: label '$opts{n}' doesn't look good.\n" unless $opts{n} =~ m/^[CDM][\da-f]\d{3}$/; - if ($cnt != 16) { - my $msg = "got $cnt disk(s), default is 16."; + if ($cnt != $num_wanted) { + my $msg = "got $cnt disk(s), expected $num_wanted."; $msg = $color->t_red($msg) unless $opts{m}; print "# NOTE: $msg\n"; } - if ($opts{n} !~ m/^[CDM][\da-f]\d{3}$/) { - my $msg = "label '$opts{n}' doesn't look good."; + my $msg = "label '$opts{n}' doesn't follow the CDM scheme."; $msg = $color->t_red($msg) unless $opts{m}; print "# NOTE: $msg\n"; } - - my $level=6; - my $chunk_size=512; - my $dev='md127'; if ($cnt) { - print "# Record for '$MDADMCONF_DB'\n"; + if ($level == 0) { + # do nothing + } elsif ($level == 1) { + $size /= 2; + } elsif ($level == 5 and $cnt>1) { + $size = int ((($cnt-1)/$cnt) * $size); + } elsif ($level == 6 and $cnt>2) { + $size = int ((($cnt-2)/$cnt) * $size); + } else { + print "# RAID level $level not handled ($cnt disks)\n"; + } + $size /= 1024**4; # TB + + + # 100TB -> 512k, 50TB -> 256k + while ( $size/$chunk_size < 0.115 and $chunk_size > 64) { + $chunk_size/=2; + } + + while (-e "/dev/md$dev_no") { + $dev_no++; + die "# ERROR: out of devices ($dev_no). Stopped", if $dev_no >= 128; + } + + my $dev='md'.$dev_no; + print "# Record for '$MDADMCONF_DB'\n\n"; print $entry; - print "\n\n"; - print "# Hint for creation with mdadm:\n\n"; + print "\n\n\n"; + print "# Hint for creation with mdadm & mkfs.xfs:\n\n"; printf "mdadm -C /dev/%s -l %d -n %d -N %s -c %s %s\n\n", $dev, $level, $cnt, $opts{n}, - '0x0', + $chunk_size, '/dev/' . join(' /dev/', @candidates); - my $msg = 'always check the chunk-size (-c)!'; - printf "# HINT: %s (8TB:512/4TB:256/2TB:128)\n\n", $opts{m}?$msg:$color->t_red($msg); + printf "mkfs.xfs -L %s /dev/%s\n\n", lc($opts{n}), $dev; + + + printf "# Note: size of array will be %.1f TB\n\n", $size; } } From 1f5d973c43a4c143eb3d4c5e66082e89a6f549ee Mon Sep 17 00:00:00 2001 From: thomas Date: Tue, 22 Jan 2019 11:57:36 +0100 Subject: [PATCH 6/7] mxraid_assemble: Wait for lazy enclosures --- mxraid/mxraid_assemble | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mxraid/mxraid_assemble b/mxraid/mxraid_assemble index 1746d91..2092328 100755 --- a/mxraid/mxraid_assemble +++ b/mxraid/mxraid_assemble @@ -69,6 +69,13 @@ sub exec_version { } sub check_enclosures { + + # Slow down if known HBAs are in use, possibly there is a lazy enclosure attached. + # This might be skipped if /proc/uptime is large enough? + if (-e '/sys/module/aacraid/version' or -e '/sys/module/smartpqi/version') { + usleep(0.3 * 1e6); + } + glob('/sys/class/enclosure/*') or return; my $cnt_0 = () = (glob('/sys/block/sd[a-z]'), glob('/sys/block/sd[a-z][a-z]')); From 560509f807621dd66f431718163755c5bee5e5b3 Mon Sep 17 00:00:00 2001 From: thomas Date: Tue, 22 Jan 2019 12:06:59 +0100 Subject: [PATCH 7/7] mxraid_assemble: Polish and increase version --- mxraid/mxraid_assemble | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/mxraid/mxraid_assemble b/mxraid/mxraid_assemble index 2092328..529891b 100755 --- a/mxraid/mxraid_assemble +++ b/mxraid/mxraid_assemble @@ -14,17 +14,17 @@ import MxRaid::HostData; import MxRaid::Utils; import MxRaid::Color; -my $VERSION = '1.0'; +my $VERSION = '1.1'; sub exec_usage { my ($prog) = $0 =~ m|([^/]+)$|; - + print <<"__HELP"; $prog usage: - - $prog [options...] + + $prog [options...] default is to print assembly commands when run w/o options. @@ -48,10 +48,6 @@ sub exec_usage { __HELP - -=things to come - -1 run script even if only 1 disk is present, well ... -=cut exit 0; } @@ -342,7 +338,7 @@ sub create_mdadm_configs { $conf_dev_no=$dev_no; $dev_no++; } - push @mdadm_conf, [ + push @mdadm_conf, [ $conf_dev_no , sprintf("ARRAY /dev/md%d devices=%s", $conf_dev_no, join ',', @tmp), $rlabel , $condition_active, $condition_incomplete ]; # -2- -3- -4-