Skip to content

Commit

Permalink
Add nfsdtop
Browse files Browse the repository at this point in the history
nfsdtop is a rudimentary tool to make use of our nfsd tracing
feature added to the kernel by [1].

If can be used to trace nfsd usage by remote clients on overloaded nfs
servers.

    usage: /usr/sbin/nfsdtop [options]
      --continuous       : run continuously until interrupted (default: single shot)
      --accumulate       : add up values when running continuously
      --interval seconds : sample for n seconds (default: 1)
      --top n            : display only top n ops/users (default: 5) - 0 means no limit
      --top2 n           : display only top n files for each user (default: 0) - 0 means no limit

[1] /src/mariux/patches/linux-4.14-nfsd-Add-trace-events-for-all-v4-operations.patch
  • Loading branch information
donald committed Jan 9, 2019
1 parent a11ea89 commit 435f7d9
Show file tree
Hide file tree
Showing 2 changed files with 222 additions and 0 deletions.
1 change: 1 addition & 0 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,5 @@ install_exec mxmount/mxmount "$DESTDIR$usr_bindir/mx
install_data mxmount/mxmount.service "$DESTDIR$systemdunitdir/mxmount.service"
install_exec hostconfig/hostconfig "$DESTDIR$usr_sbindir/hostconfig"
install_exec fon/fon.sh "$DESTDIR$usr_bindir/fon"
install_exec nfsdtop/nfsdtop "$DESTDIR$usr_sbindir/nfsdtop"
exit
221 changes: 221 additions & 0 deletions nfsdtop/nfsdtop
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
#! /usr/local/system/perl/bin/perl
use strict;
use warnings;
use Socket;
use Getopt::Long;

our $USAGE=<<"_EOF_";
usage: $0 [options]
--continuous : run continuously until interrupted (default: single shot)
--accumulate : add up values when running continuously
--interval seconds : sample for n seconds (default: 1)
--top n : display only top n ops/users (default: 5) - 0 means no limit
--top2 n : display only top n files for each user (default: 0) - 0 means no limit
_EOF_

our %opt;

sub writefile {
my ($path,$data)=@_;
open my $fh,'>',$path or die "$path: $!\n";
print $fh $data;
}

our %STAT_BY_IP_UID; # ( "ip user" => { ´OPS`=>5, O=>{read=> 3, write => 4,...} , B=>{read=>123,write=>123123} }
our %STAT_BY_FILE; # ( "dev inode" => ( NAME=>$name, read=>0, write=>0 , U=>{"ip user" => {read=>0, write>=0}, ... } ) , ... )
our %STAT_USER; # ( "ip user" => { read=>0,write=>0,FILES=>{"dev inode"=>{NAME=>'',read=>0,write=>0}, ...}, ... )

{
my %UID_CACHE;
sub getuser {
my ($uid)=@_;
unless (exists $UID_CACHE{$uid}) {
my $name=getpwuid($uid);
$UID_CACHE{$uid}=defined $name ? $name : "$uid";
}
return $UID_CACHE{$uid};
}
}

{
my %HOSTNAME_CACHE;
sub hostname_by_u32 {
my ($u32)=@_;
my $addr=pack 'L',$u32;
unless (exists $HOSTNAME_CACHE{$addr}) {
$HOSTNAME_CACHE{$addr}=gethostbyaddr($addr,2) || inet_ntoa($addr);
}
return $HOSTNAME_CACHE{$addr};
}
}

sub devinodename {
my ($dev,$inode,$name)=@_;
my $major=($dev & 0xfff00000)>>20;
my $minor=($dev & 0x000fffff);
return sprintf "%s [%d,%d:%d]",$name,$major,$minor,$inode;
}

sub ipuser {
my ($ip,$user)=@_;
return "$user\@$ip";
}

sub dump_stat {
my ($top,$top2)=@_;
my $count;

print $top ? "Top $top operation count:\n\n" : "Operation count\n\n";
$count=0;
for my $ip_user (sort {$STAT_BY_IP_UID{$b}{'OPS'} <=> $STAT_BY_IP_UID{$a}{'OPS'}} keys %STAT_BY_IP_UID) {
$STAT_BY_IP_UID{$ip_user}{'OPS'} or next;
my ($ip,$user)=split " ",$ip_user;
printf "%10d ops by %s (",$STAT_BY_IP_UID{$ip_user}{'OPS'},ipuser($ip,$user);

my $s=$STAT_BY_IP_UID{$ip_user}{'O'};
for my $op (sort {$s->{$b} <=> $s->{$a}} keys %$s) {
printf "%d %s ",$s->{$op},$op;
}
print ")\n";
last if $top and $count++>=$top;
}
print "\n";

print $top ? "Top $top user read:\n\n" : "User read\n\n";
$count=0;
for my $ip_user (sort {$STAT_USER{$b}{'read'} <=> $STAT_USER{$a}{'read'}} keys %STAT_USER) {
$STAT_USER{$ip_user}{'read'} or next;
my ($ip,$user)=split " ",$ip_user;
printf "%10d bytes %s <--\n",$STAT_USER{$ip_user}{'read'},ipuser($ip,$user);
my $s=$STAT_USER{$ip_user}{'FILES'};
my $count2=0;
for my $dev_inode (sort {$s->{$b}{'read'} <=> $s->{$a}{'read'}} keys %$s) {
$s->{$dev_inode}{'read'} or next;
my ($dev,$inode)=split " ",$dev_inode;
my $name=$s->{$dev_inode}{'NAME'};
printf " %10d bytes <-- %s\n",$s->{$dev_inode}{'read'},devinodename($dev,$inode,$name);
last if $top2 and ++$count2>=$top2;
}
last if $top and ++$count>=$top;
}
print "\n";

print $top ? "Top $top user write:\n\n" : "User write\n\n";
$count=0;
for my $ip_user (sort {$STAT_USER{$b}{'write'} <=> $STAT_USER{$a}{'write'}} keys %STAT_USER) {
$STAT_USER{$ip_user}{'write'} or next;
my ($ip,$user)=split " ",$ip_user;
printf "%10d bytes %s -->\n",$STAT_USER{$ip_user}{'write'},ipuser($ip,$user);
my $s=$STAT_USER{$ip_user}{'FILES'};
my $count2=0;
for my $dev_inode (sort {$s->{$b}{'write'} <=> $s->{$a}{'write'}} keys %$s) {
$s->{$dev_inode}{'write'} or next;
my ($dev,$inode)=split " ",$dev_inode;
my $name=$s->{$dev_inode}{'NAME'};
printf " %10d bytes --> %s\n",$s->{$dev_inode}{'write'},devinodename($dev,$inode,$name);
last if $top2 and ++$count2>=$top2;
}
last if $top and ++$count>=$top;
}
print "\n";
}

sub analyze_trace {
open my $p,'<','/sys/kernel/debug/tracing/instances/nfsdtop/trace' or die "/sys/kernel/debug/tracing/instances/nfsdtop/trace: $!\n";
while (<$p>) {
/^#/ and next;
my ($taskpid,$cpu,$flags,$time,$op,$addr,$uid,$rest) = /^\s*(\S+)\s+\[(\d+)\]\s+(....)\s+(\d+\.\d*):\s*nfsd4_(\S+): ip=0x(........) uid=(\d+)\s*(.*)/ or die "format: $_";
my $u32=
my $ip=hostname_by_u32(hex($addr));
my $user=getuser($uid);
my $ip_user="$ip $user";
$STAT_BY_IP_UID{$ip_user} ||= {'OPS'=>0,B=>{'read'=>0,'write'=>0}};
$STAT_BY_IP_UID{$ip_user}{'OPS'}++;
$STAT_BY_IP_UID{$ip_user}{'O'}{$op} ++;
if ($op eq 'read' || $op eq 'write') {
my ($dev,$inode,$offset,$length,$name) = $rest =~ /dev=(\d+) inode=(\d+) offset=(\d+) length=(\d+) name=\"(.*)\"$/ or die "format?? $_\n";
my $dev_inode="$dev $inode";
$STAT_BY_IP_UID{$ip_user}{'B'}{$op} += $length;
$STAT_USER{$ip_user} ||= { read=>0, write=>0, FILES=>{} };
$STAT_USER{$ip_user}{'FILES'}{$dev_inode} ||= { read=>0, write=>0 };
$STAT_USER{$ip_user}{$op}+=$length;
$STAT_USER{$ip_user}{'FILES'}{$dev_inode}{$op}+=$length;
$STAT_USER{$ip_user}{'FILES'}{$dev_inode}{'NAME'}=$name;
$STAT_BY_FILE{$dev_inode} ||= {NAME=>$name,read=>0,write=>0,U=>{}};
$STAT_BY_FILE{$dev_inode}{$op} += $length;
$STAT_BY_FILE{$dev_inode}{U}{$ip_user} ||= {read=>0,write=>0};
$STAT_BY_FILE{$dev_inode}{U}{$ip_user}{$op} += $length;
}
}
}
sub enable_events {
open my $in,'<','/sys/kernel/debug/tracing/available_events' or die "/sys/kernel/debug/tracing/available_events: $!\n";
open my $out,'>','/sys/kernel/debug/tracing/instances/nfsdtop/set_event' or die "/sys/kernel/debug/tracing/instances/nfsdtop/set_event: $!\n";
my $i=0;
while (<$in>) {
/^nfsd:nfsd4_/ or next;
print $out $_;
$i++
}
$i or die "I don't think this kernel has the required infrastructure. nfsd trace patch missing?\n";
}

sub disable_events {
writefile('/sys/kernel/debug/tracing/instances/nfsdtop/set_event','');
}

our $mydir;
END {
$mydir and rmdir "/sys/kernel/debug/tracing/instances/nfsdtop";
}
$SIG{'INT'}=sub { exit(0) };

GetOptions (
'continuous' => \$opt{'continuous'},
'accumulate' => \$opt{'accumulate'},
'interval=i' => \$opt{'interval'},
'top=i' => \$opt{'top'},
'top2=i' => \$opt{'top2'},
) or die $USAGE;

$opt{'interval'} ||= 1;
defined $opt{'top'} or $opt{'top'}=5;

unless (mkdir ("/sys/kernel/debug/tracing/instances/nfsdtop")) {
warn "/sys/kernel/debug/tracing/instances/nfsdtop: $!\n";
if ($! == 17 ) {
warn "maybe another admin is running this command\n";
}
exit 1;
}
$mydir=1;

%STAT_BY_IP_UID=();
%STAT_BY_FILE=();
%STAT_USER=();

$|=1;

while (1) {
printf ("%s %d second%s trace\n\n",'*'x20,$opt{'interval'},$opt{'accumulate'}?' accumulated':'');
writefile('/sys/kernel/debug/tracing/trace','');
enable_events();
sleep ($opt{'interval'});
disable_events();
analyze_trace();
dump_stat($opt{'top'},$opt{'top2'});
$opt{'continuous'} or last;
unless ($opt{'accumulate'}) {
%STAT_BY_IP_UID=();
%STAT_BY_FILE=();
%STAT_USER=();
}
}


0 comments on commit 435f7d9

Please sign in to comment.