diff --git a/install.sh b/install.sh index 9837ab5..f949615 100755 --- a/install.sh +++ b/install.sh @@ -131,4 +131,5 @@ install_exec mxmount/mxmount "$DESTDIR$usr_bindir/mx install_data mxmount/mxmount.service "$DESTDIR$systemdunitdir/mxmount.service" install_exec hostconfig/hostconfig "$DESTDIR$usr_sbindir/hostconfig" install_exec fon/fon.sh "$DESTDIR$usr_bindir/fon" +install_exec nfsdtop/nfsdtop "$DESTDIR$usr_sbindir/nfsdtop" exit diff --git a/nfsdtop/nfsdtop b/nfsdtop/nfsdtop new file mode 100755 index 0000000..1a2619f --- /dev/null +++ b/nfsdtop/nfsdtop @@ -0,0 +1,221 @@ +#! /usr/local/system/perl/bin/perl +use strict; +use warnings; +use Socket; +use Getopt::Long; + +our $USAGE=<<"_EOF_"; +usage: $0 [options] + --continuous : run continuously until interrupted (default: single shot) + --accumulate : add up values when running continuously + --interval seconds : sample for n seconds (default: 1) + --top n : display only top n ops/users (default: 5) - 0 means no limit + --top2 n : display only top n files for each user (default: 0) - 0 means no limit +_EOF_ + +our %opt; + +sub writefile { + my ($path,$data)=@_; + open my $fh,'>',$path or die "$path: $!\n"; + print $fh $data; +} + +our %STAT_BY_IP_UID; # ( "ip user" => { ´OPS`=>5, O=>{read=> 3, write => 4,...} , B=>{read=>123,write=>123123} } +our %STAT_BY_FILE; # ( "dev inode" => ( NAME=>$name, read=>0, write=>0 , U=>{"ip user" => {read=>0, write>=0}, ... } ) , ... ) +our %STAT_USER; # ( "ip user" => { read=>0,write=>0,FILES=>{"dev inode"=>{NAME=>'',read=>0,write=>0}, ...}, ... ) + +{ + my %UID_CACHE; + sub getuser { + my ($uid)=@_; + unless (exists $UID_CACHE{$uid}) { + my $name=getpwuid($uid); + $UID_CACHE{$uid}=defined $name ? $name : "$uid"; + } + return $UID_CACHE{$uid}; + } +} + +{ + my %HOSTNAME_CACHE; + sub hostname_by_u32 { + my ($u32)=@_; + my $addr=pack 'L',$u32; + unless (exists $HOSTNAME_CACHE{$addr}) { + $HOSTNAME_CACHE{$addr}=gethostbyaddr($addr,2) || inet_ntoa($addr); + } + return $HOSTNAME_CACHE{$addr}; + } +} + +sub devinodename { + my ($dev,$inode,$name)=@_; + my $major=($dev & 0xfff00000)>>20; + my $minor=($dev & 0x000fffff); + return sprintf "%s [%d,%d:%d]",$name,$major,$minor,$inode; +} + +sub ipuser { + my ($ip,$user)=@_; + return "$user\@$ip"; +} + +sub dump_stat { + my ($top,$top2)=@_; + my $count; + + print $top ? "Top $top operation count:\n\n" : "Operation count\n\n"; + $count=0; + for my $ip_user (sort {$STAT_BY_IP_UID{$b}{'OPS'} <=> $STAT_BY_IP_UID{$a}{'OPS'}} keys %STAT_BY_IP_UID) { + $STAT_BY_IP_UID{$ip_user}{'OPS'} or next; + my ($ip,$user)=split " ",$ip_user; + printf "%10d ops by %s (",$STAT_BY_IP_UID{$ip_user}{'OPS'},ipuser($ip,$user); + + my $s=$STAT_BY_IP_UID{$ip_user}{'O'}; + for my $op (sort {$s->{$b} <=> $s->{$a}} keys %$s) { + printf "%d %s ",$s->{$op},$op; + } + print ")\n"; + last if $top and $count++>=$top; + } + print "\n"; + + print $top ? "Top $top user read:\n\n" : "User read\n\n"; + $count=0; + for my $ip_user (sort {$STAT_USER{$b}{'read'} <=> $STAT_USER{$a}{'read'}} keys %STAT_USER) { + $STAT_USER{$ip_user}{'read'} or next; + my ($ip,$user)=split " ",$ip_user; + printf "%10d bytes %s <--\n",$STAT_USER{$ip_user}{'read'},ipuser($ip,$user); + my $s=$STAT_USER{$ip_user}{'FILES'}; + my $count2=0; + for my $dev_inode (sort {$s->{$b}{'read'} <=> $s->{$a}{'read'}} keys %$s) { + $s->{$dev_inode}{'read'} or next; + my ($dev,$inode)=split " ",$dev_inode; + my $name=$s->{$dev_inode}{'NAME'}; + printf " %10d bytes <-- %s\n",$s->{$dev_inode}{'read'},devinodename($dev,$inode,$name); + last if $top2 and ++$count2>=$top2; + } + last if $top and ++$count>=$top; + } + print "\n"; + + print $top ? "Top $top user write:\n\n" : "User write\n\n"; + $count=0; + for my $ip_user (sort {$STAT_USER{$b}{'write'} <=> $STAT_USER{$a}{'write'}} keys %STAT_USER) { + $STAT_USER{$ip_user}{'write'} or next; + my ($ip,$user)=split " ",$ip_user; + printf "%10d bytes %s -->\n",$STAT_USER{$ip_user}{'write'},ipuser($ip,$user); + my $s=$STAT_USER{$ip_user}{'FILES'}; + my $count2=0; + for my $dev_inode (sort {$s->{$b}{'write'} <=> $s->{$a}{'write'}} keys %$s) { + $s->{$dev_inode}{'write'} or next; + my ($dev,$inode)=split " ",$dev_inode; + my $name=$s->{$dev_inode}{'NAME'}; + printf " %10d bytes --> %s\n",$s->{$dev_inode}{'write'},devinodename($dev,$inode,$name); + last if $top2 and ++$count2>=$top2; + } + last if $top and ++$count>=$top; + } + print "\n"; +} + +sub analyze_trace { + open my $p,'<','/sys/kernel/debug/tracing/instances/nfsdtop/trace' or die "/sys/kernel/debug/tracing/instances/nfsdtop/trace: $!\n"; + while (<$p>) { + /^#/ and next; + my ($taskpid,$cpu,$flags,$time,$op,$addr,$uid,$rest) = /^\s*(\S+)\s+\[(\d+)\]\s+(....)\s+(\d+\.\d*):\s*nfsd4_(\S+): ip=0x(........) uid=(\d+)\s*(.*)/ or die "format: $_"; + my $u32= + my $ip=hostname_by_u32(hex($addr)); + my $user=getuser($uid); + my $ip_user="$ip $user"; + $STAT_BY_IP_UID{$ip_user} ||= {'OPS'=>0,B=>{'read'=>0,'write'=>0}}; + $STAT_BY_IP_UID{$ip_user}{'OPS'}++; + $STAT_BY_IP_UID{$ip_user}{'O'}{$op} ++; + + if ($op eq 'read' || $op eq 'write') { + my ($dev,$inode,$offset,$length,$name) = $rest =~ /dev=(\d+) inode=(\d+) offset=(\d+) length=(\d+) name=\"(.*)\"$/ or die "format?? $_\n"; + + my $dev_inode="$dev $inode"; + + $STAT_BY_IP_UID{$ip_user}{'B'}{$op} += $length; + $STAT_USER{$ip_user} ||= { read=>0, write=>0, FILES=>{} }; + $STAT_USER{$ip_user}{'FILES'}{$dev_inode} ||= { read=>0, write=>0 }; + $STAT_USER{$ip_user}{$op}+=$length; + $STAT_USER{$ip_user}{'FILES'}{$dev_inode}{$op}+=$length; + $STAT_USER{$ip_user}{'FILES'}{$dev_inode}{'NAME'}=$name; + + $STAT_BY_FILE{$dev_inode} ||= {NAME=>$name,read=>0,write=>0,U=>{}}; + $STAT_BY_FILE{$dev_inode}{$op} += $length; + + $STAT_BY_FILE{$dev_inode}{U}{$ip_user} ||= {read=>0,write=>0}; + $STAT_BY_FILE{$dev_inode}{U}{$ip_user}{$op} += $length; + } + } +} + +sub enable_events { + open my $in,'<','/sys/kernel/debug/tracing/available_events' or die "/sys/kernel/debug/tracing/available_events: $!\n"; + open my $out,'>','/sys/kernel/debug/tracing/instances/nfsdtop/set_event' or die "/sys/kernel/debug/tracing/instances/nfsdtop/set_event: $!\n"; + my $i=0; + while (<$in>) { + /^nfsd:nfsd4_/ or next; + print $out $_; + $i++ + } + $i or die "I don't think this kernel has the required infrastructure. nfsd trace patch missing?\n"; +} + +sub disable_events { + writefile('/sys/kernel/debug/tracing/instances/nfsdtop/set_event',''); +} + +our $mydir; +END { + $mydir and rmdir "/sys/kernel/debug/tracing/instances/nfsdtop"; +} +$SIG{'INT'}=sub { exit(0) }; + +GetOptions ( + 'continuous' => \$opt{'continuous'}, + 'accumulate' => \$opt{'accumulate'}, + 'interval=i' => \$opt{'interval'}, + 'top=i' => \$opt{'top'}, + 'top2=i' => \$opt{'top2'}, +) or die $USAGE; + +$opt{'interval'} ||= 1; +defined $opt{'top'} or $opt{'top'}=5; + +unless (mkdir ("/sys/kernel/debug/tracing/instances/nfsdtop")) { + warn "/sys/kernel/debug/tracing/instances/nfsdtop: $!\n"; + if ($! == 17 ) { + warn "maybe another admin is running this command\n"; + } + exit 1; +} +$mydir=1; + +%STAT_BY_IP_UID=(); +%STAT_BY_FILE=(); +%STAT_USER=(); + +$|=1; + +while (1) { + printf ("%s %d second%s trace\n\n",'*'x20,$opt{'interval'},$opt{'accumulate'}?' accumulated':''); + writefile('/sys/kernel/debug/tracing/trace',''); + enable_events(); + sleep ($opt{'interval'}); + disable_events(); + analyze_trace(); + dump_stat($opt{'top'},$opt{'top2'}); + $opt{'continuous'} or last; + unless ($opt{'accumulate'}) { + %STAT_BY_IP_UID=(); + %STAT_BY_FILE=(); + %STAT_USER=(); + } +} + +