Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 1185 lines (942 sloc) 34 KB
#! /usr/bin/perl -w
=head1 NAME
sousage.pl
=head1 SYNOPSIS
sousage.pl [options...] [query...]
sousage.pl libxslt.so.1
sousage.pl -h
=head1 DESCRIPTION
sousage.pl is at tool to analyse the usage and the dependencies of shared libraries.
To shorten the runtime for queries the program generates a cache file.
Environment and cache location:
Because there is no perfect place for the cache file (named 'sou_store') there is
a harcoded default (/var/cache/sousage). But the user might also utilize environment
variables and command line options to define the cache file.
Variables:
SOU_STORE full name of the cache file, overridden by -c switch.
- or the DIR/sou_store case -
PKGDB_CACHE_HOME proposed default in the molgen domain (/scratch/tmp/pkgdb_caches)
Examples:
create an own cache for the whole disk as root ignoring defaults (takes approx 2-20 min):
sousage.pl -c/dev/shm/i_ve_got_time -C/
query the product from above
sousage.pl -c/dev/shm/i_ve_got_time libxml2
get 'users' of /usr/lib/libgtop-2.0.so.7
sousage.pl /usr/lib/libgtop-2.0.so.7
find suspicious bee packages
sousage.pl -t lsbee /usr/bin | grep ,
see which bee packages are involved
sousage.pl -t ldd_bee /usr/lib/libgtop-2.0.so.7
see 32-bit binaries:
sousage.pl -DELVES_32BIT | less
=cut
use strict;
use Data::Dumper; $Data::Dumper::Sortkeys=1;
use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION=1;
sub HELP_MESSAGE{}
sub VERSION_MESSAGE{}
use Storable;
use File::Find;
use File::Basename;
use Text::Soundex;
use Carp;
use warnings;
# use TinyProgressBar; # see below!
# flags for lib queries
use constant {
VALID_PATH => 1,
EXISTS => 2,
DECENT_SO => 4,
};
use constant {
FILENAME => 0,
PATH => 1,
LIBNAME => 2,
LIB_CLASS => 3,
};
my $VERSION = '2017.07.07';
my $CACHEDIR = $ENV{PKGDB_CACHE_HOME} || '/var/cache/sousage';
my $SOU_STORE_DEFAULT = "$CACHEDIR/sou_store";
my $PBAR_1 = undef;
sub exec_usage {
my ($prog) = $0 =~ m|([^/]+)$|;
print <<"__HELP";
PROTOTYPE!
$prog usage:
$prog [options...] [query...]
options:
-- cache location and maintenance --
-c file name of cache file (default: $SOU_STORE_DEFAULT)
-s print cache status information
-C dir create cache for dir (dir1:dir2:... works too)
-D key Dump database (list keys w '?')
-- common usage and queries --
-h print this help and exit
-i case insensitive lookups
-l dirs alternative ld path string (use colons, or +path to just add)
-q be quiet
-t type query type
-v be verbose
-V show version (default: $VERSION)
-H prints more usage information (unless you are root)
-X devel switch (use the source)
query types (-t option) are:
sodep shared object dependencies (default)
altsodep shared object dependencies (alternative strategy)
ldd_bee prints bee references and info
so_tgt shows known symlinks
lsbee list directories with corresponding bee packages
ldd for comparing with 'real' ldd output (debug feature)
more info: perldoc $prog (this is what the -H switch does)
__HELP
exit 0;
}
# obsolete ? -A lib show alternative locations/spellings for given lib
# obsolete ? -g use glob expansion when searching libnames (default is soundalike)
my %opts;
exec_usage unless @ARGV;
getopts('c:l:hiqst:v:C:D:HX:V3', \%opts) or die "# ERROR: getopts failed, try -h.\n"; # Values in %opts
exec_usage if $opts{h};
$opts{3}=1; # enforce 32 bit lookup
my $SOU_STORE = $SOU_STORE_DEFAULT; # see above!
$SOU_STORE = $ENV{SOU_STORE} if $ENV{SOU_STORE};
$SOU_STORE = $opts{c} if $opts{c};
my $SOU_DB;
my $TIME_STAMP = time;
my $DB_VERSION = '0.1a';
# XXX fix need devnode:inode as key, (host:dev:inode huhhhh)
my %ELVES; # { '/usr/local/openmotif-2.3/lib/libXm.so.4.0.3' => inode }
my %SEVLE_tmp; # the reverse
my %SYML_ELVES; # { '/usr/lib/libudf.so.0' => 1++ }
my %SO_LINKS; # { '/usr/local/qt5/lib/libQt5Widgets.so.5.4.1' => '/usr/local/qt5/lib/libQt5Widgets.so.5'}, real -> link/alias
my %SO_SOUNDEX; # { 'C120' => ['/usr/lib/libcups.so', '/usr/lib/libcupscgi.so.1', ...] }
my %EDGES_FORW; # { '/usr/local/qt5/bin/qtpaths' => {'/usr/local/qt5/lib/libQt5Core.so.5'=>1, ...} } -- 1:n
my %EDGES_REV; # { '/usr/local/qt5/lib/libQt5Core.so.5' => ['/usr/local/qt5/bin/qtpaths', ...] } -- 1:n
# ldd_bee
my %BEECONTENT; # { '/usr/bin/bsdcpio' => ['libarchive-2.8.4-0.x86_64', 'libarchive-3.2.1-0.x86_64', ...] } -- usually 1:1
my %BEE_TO_LIBS; # { 'glibc-2.19_p13_p2-3.x86_64' => { '/lib/ld-linux-x86-64.so.2' => 1, '/lib/libc.so.6' => 1, ...} }
# for the stats
my @ELVES_32BIT;
my %HARDLINKS;
my %DEADLINKS;
my @UNREADABLE;
my %LIBS_MISSING;
my %RPATH_ENTRIES; # see where other crap might be found
my @AMBIGUOUS_ONES; # [$elf_file ,@lib_candidates]
# my %_READELF_CACHE; # devel only
# mind the order
my @LD_SO_CONF = qw(
/lib
/usr/lib
/usr/local/lib
);
if ($opts{l}) {
my $path = $opts{l};
if ( $path =~ m/^\+/ ) {
$path =~ s/^\+//;
push @LD_SO_CONF, split m/:/, $path;
} else {
@LD_SO_CONF = split m/:/, $path;
}
}
my %LD_SO_CONF_register = map { $_ , 1 } @LD_SO_CONF;
# my %_READLINK_CACHE;
# my %BLACKLIST = ();
if ($opts{H}) {
if ($< == 0) { # $UID
warn "# NOTE: will call 'perldoc' utility as root, which in turn might fail due to a privilege drop.\n";
}
exec 'perldoc', $0;
exit;
}
if ($opts{V}) {
my ($prog) = $0 =~ m|([^/]+)$|;
print "# $prog $VERSION\n";
exit;
}
if ($opts{C}) {
# my $dir = `pwd`;
# chomp $dir;
my $dir = $opts{C};
for my $d (split m/:/, $dir) {
-d $d || die "# ERROR: '$d' ($!)\n";
}
if (@ARGV) {
warn "# NOTE: found extra args on commandline (ignored).\n".
"# NOTE: creating cache '$SOU_STORE' for directory '$dir'\n";
}
create_cache($dir);
exit;
}
if (-e $SOU_STORE) {
$SOU_DB = retrieve($SOU_STORE) || die "# cache load failed ($SOU_STORE)\n";
# check version
my $ver = $SOU_DB->{DB_VERSION} || 'undef';
warn "# VERSION MISMATCH: read '$ver' wanted '$DB_VERSION'\n" if $ver ne $DB_VERSION;
}
if ($opts{D}) {
check_cache_presence();
my $key = $opts{D};
if (exists $SOU_DB->{$key}) {
print Dumper $SOU_DB->{$key};
} else {
print Dumper sort keys %{$SOU_DB};
}
exit;
}
if ($opts{A}) {
check_cache_presence();
my $query = $opts{A};
# my $ret = infer_libname($query,'guess_by_sound') || 'not found!';
# my $ret = infer_libname($query,'guess_by_glob') || 'not found!';
infer_libnames($query,'grep');
# infer_libnames($query,$opts{g}?'guess_by_glob':'guess_by_sound','lib');
# printf "# ret: %s\n", $ret;
exit;
}
if ($opts{X}) {
check_cache_presence();
print Dumper $SOU_DB->{BEECONTENT};
exit;
}
if ($opts{s}) {
check_cache_presence();
printf "# cache file: '%s'\n", $SOU_STORE;
printf "# start dir(s): '%s'\n", $SOU_DB->{start_dirs};
printf "# lib entries: %d\n", scalar keys %{$SOU_DB->{EDGES_FORW}};
printf "# db version: '%s'\n", $SOU_DB->{DB_VERSION};
printf "# age: %s\n", years(time-$SOU_DB->{x_created}[1],2);
}
while (@ARGV) {
my $q = shift @ARGV;
query($q);
}
exit;
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# main subroutines
sub check_cache_presence {
return 1 if $SOU_DB;
my ($prog) = $0 =~ m|([^/]+)$|;
print <<__MSG; # be a bit more verbose to help the impatient :)
The expected cache file is missing ('$SOU_STORE').
You may create one with '$prog -c my_cache -C /'.
A look at the output of '$prog -h' might be usefull too.
__MSG
exit 1;
}
sub query {
check_cache_presence();
my $query = shift || die "need a query. (try -h)\n";
my $type = $opts{t} || 'sodep';
if (0) {
} elsif ( $type eq 'sodep') {
my $libname = $query;
my $libname_esc = $libname;
# :xxx:tk: use a more bulletproof way
$libname_esc =~ s/\+/\\\+/g;
$libname_esc =~ s/\./\\\./g;
my @findings;
if ($opts{i}) {
@findings = sort grep { /$libname_esc/i } keys %{$SOU_DB->{EDGES_REV}};
} else {
@findings = sort grep { /$libname_esc/ } keys %{$SOU_DB->{EDGES_REV}};
}
print "# NOTE: '$libname' is unknown.\n" unless @findings;
print "# NOTE: '$libname' found multiple matches.\n" if scalar @findings > 1;
for my $f (@findings) {
my @refs = sort @{$SOU_DB->{EDGES_REV}{$f}};
my $width = 0;
for my $r (@refs) { $width = length($r) if length($r) > $width }
$width++;
print "\n";
printf "# '%s' dependencies:\n", $f;
printf "# %s\n", '-'x ($width+42);
for my $r (@refs) {
my $beepkg = join ', ', @{$SOU_DB->{BEECONTENT}{$r}} if exists $SOU_DB->{BEECONTENT}{$r};
printf " %-${width}s %s\n", $r, $beepkg || 'N/A';
}
printf "# %d hit(s)\n", scalar @refs;
}
print "\n";
} elsif ( $type eq 'altsodep') {
# my $mangled_query = infer_libname($query,'strict');
# (my $filename, my $path, my $libname, my $lib_class) = classify_libquery($query);
my $librec = classify_libquery($query);
my $feel_happy = -1;
my $msg='';
if ($librec->[LIB_CLASS] & EXISTS) {
my $lib = "$librec->[PATH]/$librec->[FILENAME]";
$feel_happy = report_libusage($lib); # or print "# NOTE: '$query' -- the file exists, but no known binary/lib uses it.\n";
} else {
# if ($librec->[LIB_CLASS] & VALID_PATH) {
if ($librec->[LIB_CLASS] & VALID_PATH) {
# $msg .= sprintf "# NOTE: file '%s' isn't located in '%s'.", $librec->[FILENAME], $librec->[PATH];
} else {
# $msg .= sprintf "# NOTE: file '%s' not found, trying some heuristics.", $librec->[FILENAME];
}
my @cand;
# if ($librec->[LIB_CLASS] & DECENT_SO) {
for my $p (@LD_SO_CONF) { # + all other locations from rpath?
if ( -e "$p/$librec->[FILENAME]" ) {
push @cand, "$p/$librec->[FILENAME]";
}
}
if (@cand) {
# print Dumper @cand;
if (scalar @cand == 1 or (scalar @cand == 2 and samefile(@cand))) {
my $lib = shift @cand;
$feel_happy = report_libusage($lib);# or $msg .= "\n# NOTE: '$lib' -- the file exists, but no known binary/lib uses it.";
# $msg .= ' (but the identical lib occurs in different places)' if scalar @cand > 1; # sym/hard-links
} else {
print "# Holla die Waldfee...\n# multiple libs under the same name...\n# right now we bail out here (sorry)\n";
print Dumper @cand; exit;
}
} else {
# $msg .= " (The file wasn't found in current search path.)";
# try to infer
if ($librec->[LIB_CLASS] & DECENT_SO) {
print "# '$query' wasn't found, looking for things like '$librec->[LIBNAME]'.\n";
$feel_happy = infer_libnames($librec->[LIBNAME],$opts{g}?'guess_by_glob':'guess_by_sound'); # 'guess_by_sound', 'guess_by_glob'
} else {
print "# '$query' wasn't found, looking for things like '$query'.\n";
$feel_happy = infer_libnames($query,$opts{g}?'guess_by_glob':'guess_by_sound','lib');
}
}
}
# print "\n# happy? $feel_happy\n\n" if $feel_happy < 1;
# confident but ! existing
if ($feel_happy >= 1 and !($librec->[LIB_CLASS] & EXISTS)) {
printf "# NOTE: '%s' doesn't exist, results came from searching the library path.\n", $query;
printf "# (sousage distinguishes between /lib/libfoo.so and /usr/lib/libfoo.so)\n", $query;
} elsif ($feel_happy <= 0 and 0) {
$msg .= sprintf "# NOTE: query for '%s' gave no or unexpected results,\n", $query;
$msg .= sprintf " it does%s look like a decent shared object\n", ($librec->[LIB_CLASS] & DECENT_SO)?'':"n't";
$msg .= sprintf " it does%s have a valid path\n", ($librec->[LIB_CLASS] & VALID_PATH)?'':"n't";
$msg .= sprintf " it does%s exist (as a file)\n", ($librec->[LIB_CLASS] & EXISTS)?'':"n't";
}
print $msg."\n";
exit;
} elsif ( $type eq 'ldd') {
# ldd walks the dep chain down, so we do ...
my $res = dependency_walk($query);
print Dumper $res;
} elsif ( $type eq 'ldd_bee') {
my @tgts = ($query); # init with single item, beeing compatible to ldd_bee
my $res = dependency_walk($query);
push @tgts, keys %{$res};
ldd_bee_prepare(\@tgts);
ldd_bee_report();
} elsif ( $type eq 'so_tgt') {
print "# dumping known symlinks for '$query'\n";
print Dumper $SOU_DB->{SO_LINKS}{$query};
} elsif ( $type eq 'lsbee') {
# print "lsbee got query '$query'\n";
$query =~ s{/+$}{};
# hmmm, really re-implement a ls like thing?
# handle quotes or not?
# its just a dir
if (-d $query) {
my @listing = `ls -bola '$query' 2>&1`; die "# Error: ls failed on '$query'" if $?;
chomp @listing;
shift @listing; # dump the total
for my $rec (@listing) {
my @tmp = split m/\s+/, $rec, 8;
my $filename = '';
my $tgtname = '';
$filename = $tmp[7] if defined $tmp[7];
$filename =~ m/^(.+)\s+->\s+(.+)$/ and $filename = $1 and $tgtname = $2;
my $beepkg = '';
$beepkg = join ', ', @{$SOU_DB->{BEECONTENT}{"$query/$filename"}} if exists $SOU_DB->{BEECONTENT}{"$query/$filename"};
$rec =~ s/\s+->\s+.+$// if defined $tgtname;
# ls did some nice preformatting, keep it
$rec =~ s/(^\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+)(.*)$/$1/;
#printf "%s %-42s %s%s\n", $rec, $beepkg?$beepkg:'N/A', $filename, $tgtname?" -> $tgtname":'';
printf "%s %-42s %s\n", $rec, $beepkg?$beepkg:'N/A', $tmp[7];
}
} else {
my @listing = `ls -bola '$query' 2>&1`; die "# Error: ls failed on '$query'" if $?;
chomp @listing;
for my $rec (@listing) {
my @tmp = split m/\s+/, $rec, 8;
my $filename = '';
my $tgtname = '';
# readjust sizes
$tmp[3] = sprintf "%12d", $tmp[3] if defined $tmp[3];
$tmp[5] = sprintf "%2d", $tmp[5] if defined $tmp[5];
$tmp[6] = sprintf "%5s", $tmp[6] if defined $tmp[6];
$filename = $tmp[7] if defined $tmp[7];
$filename =~ m/^(.+)\s+->\s+(.+)$/ and $filename = $1 and $tgtname = $2;
$tmp[7] = $filename;
my $beepkg = '';
$beepkg = join ', ', @{$SOU_DB->{BEECONTENT}{$query}} if exists $SOU_DB->{BEECONTENT}{$query};
# print Dumper \@tmp, $beepkg;
# $rec =~ s/\s+->\s+$tgtname// if defined $tgtname; # fails on libFLAC++.so.6.3.0 and friends
$rec =~ s/\s+->\s+.+$// if defined $tgtname;
printf "%-82s %s%s\n", join(' ',@tmp) , $beepkg?$beepkg:'N/A', $tgtname?" (link $tgtname)":'';
}
}
} else {
print "# unkown query type ($type).\n";
}
}
sub report_libusage {
my $lib = shift; defined $lib or die 'need an arg';
if (defined $SOU_DB->{EDGES_REV}{$lib}) {
# print Dumper $SOU_DB->{EDGES_REV}{$lib};
print "\n";
printf "# '%s' dependencies:\n", $lib;
my $width = 0;
for my $e ( @{$SOU_DB->{EDGES_REV}{$lib}} ) { $width = length($e) if length($e) > $width }
$width++;
printf "# %s\n", '-'x ($width+42);
map {
printf " %-${width}s %s\n",
$_, exists $SOU_DB->{BEECONTENT}{$_}?join(', ', @{$SOU_DB->{BEECONTENT}{$_}}):'no bee reference found'
} sort @{$SOU_DB->{EDGES_REV}{$lib}};
printf "# %d hit(s)\n", scalar @{$SOU_DB->{EDGES_REV}{$lib}};
print "\n";
return 1;
}
return 0;
}
sub dependency_walk { # not walking yet, rather blunt hash-cramming
my $entity = shift || die '';
my %res;
# map {$res{$_} = 1} @{$SOU_DB->{EDGES_FORW}{$query}}; # bad array
map {$res{$_} = 1} keys %{$SOU_DB->{EDGES_FORW}{$entity}};
# print Dumper $SOU_DB->{EDGES_FORW}{$query};
my $limit=12;
my $old_size = scalar keys %res;
while (0 < $limit--) {
for my $k (keys %res) {
# map {$res{$_}++} @{$SOU_DB->{EDGES_FORW}{$k}}; # bad array
map {$res{$_}++} keys %{$SOU_DB->{EDGES_FORW}{$k}};
}
my $s = scalar keys %res;
last if $s == $old_size;
$old_size = $s;
}
warn "# library search limit exceeded.\n" if $limit<=0;
return \%res;
}
sub classify_libquery {
my $q = shift;
my $filename = ''; # libpthread.so.0
my $path = ''; # /usr/lib
my $libname = ''; # pthread
my $lib_class = 0;
$lib_class |= EXISTS if -e $q;
if ($q =~ m{/}) {
($path,$filename) = $q =~ m{(.*)/([^/]+)$};
die 'wass denn?' unless defined $path and defined $filename;
} else {
$filename = $q;
}
$lib_class |= VALID_PATH if -d $path;
if ( $filename =~ m/^lib(.+)\.so.*?$/ ) {
$libname = $1;
$lib_class |= DECENT_SO;
}
wantarray ?
($filename, $path, $libname, $lib_class):
[$filename, $path, $libname, $lib_class];
}
sub infer_libnames { # resolve libfrzt.so -> /usr/lib/libfrzt.so
my $libname = shift;
my $heuristic = shift || 'guess_by_sound';
my $chopaway = shift;
if ($chopaway) {
$libname =~ s/^lib//;
}
my $result = -1;
if ($heuristic eq 'guess_by_sound') {
# go fuzzy and tell user.
my ($name) = $libname =~ m/^([^\.]+)/;
my $soundex = soundex_nara($name);
if ($soundex) {
print "# showing similiar sounding libs ($soundex):\n\n";
my $libref = $SOU_DB->{SO_SOUNDEX}{$soundex};
if ($libref) {
for my $i (@$libref) { # ['/usr/lib/libgstapp-1.0.so.0.502.0', 'gstapp-1.0'];
# $SOU_DB->{EDGES_REV}{$mangled_query}
my $num_refs = 0;
$num_refs = scalar @{$SOU_DB->{EDGES_REV}{$i->[0]}} if defined $SOU_DB->{EDGES_REV}{$i->[0]};
printf " %-36s %5d refs, %s\n", $i->[0], $num_refs, $i->[1];
map {
$num_refs = 0;
$num_refs = scalar @{$SOU_DB->{EDGES_REV}{$_}} if defined $SOU_DB->{EDGES_REV}{$_};
printf " %-36s %5d refs, symlink\n", $_, $num_refs;
} @{$SOU_DB->{SO_LINKS}{$i->[0]}};
print "\n";
}
}
} else {
die "# soundex failed on '$libname' \n";
}
} elsif ($heuristic eq 'guess_by_glob') {
my $pq;
map { $pq .= "$_/lib$libname* " } @LD_SO_CONF; # mind the space at the end
my @res = glob($pq);
print "# showing results from path search:\n\n";
if (@res) {
for my $file (@res) { # ['/usr/lib/libgstapp-1.0.so.0.502.0', 'gstapp-1.0'];
if (! -f $file and ! -l $file) {
print "# skipping '$file' (neither file nor symlink)\n" if $opts{v};
next;
}
if (defined $SOU_DB->{ELVES}{$file} or defined $SOU_DB->{SYML_ELVES}{$file}) {
my $num_refs = 0;
$num_refs = scalar @{$SOU_DB->{EDGES_REV}{$file}} if defined $SOU_DB->{EDGES_REV}{$file};
if (defined $SOU_DB->{ELVES}{$file}) {
printf " %-36s %5d refs, %s\n", $file, $num_refs, 'elf-file'; # use libname if avail?
} elsif (defined $SOU_DB->{SYML_ELVES}{$file}) {
printf " %-36s %5d refs, symlink\n", $file, $num_refs; # $SOU_DB->{SYML_ELVES}{$file}
} else { die "fuck!\n"; }
} else {
print "# skipping '$file' (unknown, or unreffed)\n" if $opts{v}; # xxx fixme
next;
}
}
}
} elsif ($heuristic eq 'grep') { # other experiments, drop it
# print Dumper keys %{$SOU_DB->{ELVES}}; exit;
my @hits = sort
grep { /$libname/ } (keys %{$SOU_DB->{ELVES}}, keys %{$SOU_DB->{SYML_ELVES}});
print Dumper \@hits;
# =nerve
# /usr/lib/libaspell.so.15.1.5 0 refs, aspell the real thing, is in ELVES
# /usr/lib/libaspell.so 0 refs, symlink not in SYML_ELVES, caus of zero count
# /usr/lib/libaspell.so.15 13 refs, symlink is in SYML_ELVES
# =cut
print Dumper 'syml', $SOU_DB->{SYML_ELVES}{'/usr/lib/libaspell.so.15.1.5'},
$SOU_DB->{SYML_ELVES}{'/usr/lib/libaspell.so.15'},
$SOU_DB->{SYML_ELVES}{'/usr/lib/libaspell.so'};
print Dumper 'elf', $SOU_DB->{ELVES}{'/usr/lib/libaspell.so.15.1.5'},
$SOU_DB->{ELVES}{'/usr/lib/libaspell.so.15'},
$SOU_DB->{ELVES}{'/usr/lib/libaspell.so'};
} else {
die "# can't use '$heuristic' for libname search\n";
}
return $result;
}
sub create_cache {
my $start_dirs = shift || die '# Error: need a dir, or a list separated with :';
for my $dir (split m/:/, $start_dirs) {
-d $dir || die "# Error: not a directory '$dir'";
}
printf "### looking for ELF files (might take a little while).\n" if ! $opts{q};
find_elves($start_dirs);
# print Dumper \%ELVES;die;
# XXX this will turn into a show stopper, when checking different filesystems
%SEVLE_tmp = reverse %ELVES; # hardlinks? what hardlinks ....
# die 'mist!' if (scalar keys %ELVES != scalar keys %SEVLE_tmp);
printf "### checking symlinks.\n" if $opts{v};
find_so_symlinks($start_dirs);
# print Dumper \%SO_LINKS;
printf "### soundex on libnames.\n" if $opts{v};
build_so_soundex();
# print Dumper \%SO_SOUNDEX;
my $num_elves = scalar keys %ELVES;
if (! $opts{q}) {
printf "### building dependency graph with %d files.\n", $num_elves;
$PBAR_1 = TinyProgressBar->new(60,'working',' DONE! ');
$PBAR_1->adjust($num_elves);
$| = 1;
$PBAR_1->print_bar();
}
my $linked_so_prev = -1;
my $linked_so = grow_graph(\%ELVES,1);
$PBAR_1->print_finalbar if $PBAR_1; $PBAR_1 = undef;
printf "### checking symlinks.\n" unless $opts{q};
for (my $i=0; $i<10; $i++) {
my $syml_cnt = scalar keys %SYML_ELVES;
if (! $opts{q} and $syml_cnt>100) {
$PBAR_1 = TinyProgressBar->new(60,'working',' DONE! ');
$PBAR_1->adjust($syml_cnt);
$| = 1;
$PBAR_1->print_bar();
}
$linked_so = grow_graph(\%SYML_ELVES,1);
$PBAR_1->print_finalbar if $PBAR_1; $PBAR_1 = undef;
$|=0;
last if $linked_so == $linked_so_prev;
$linked_so_prev = $linked_so;
}
$|=0;
# die 'aus die maus';
printf "### creating reverse dependency graph with %d files\n", (scalar(keys %SYML_ELVES) + scalar( keys %ELVES) ) if $opts{v};
finish_graph(\%SYML_ELVES);
printf "### reading bee_content.\n" if $opts{v};
read_bee_content();
printf "### writing cache\n" if $opts{v};
$SOU_DB->{start_dirs} = $start_dirs;
$SOU_DB->{ELVES} = \%ELVES;
$SOU_DB->{SYML_ELVES} = \%SYML_ELVES;
$SOU_DB->{SO_LINKS} = \%SO_LINKS;
$SOU_DB->{SO_SOUNDEX} = \%SO_SOUNDEX;
$SOU_DB->{EDGES_FORW} = \%EDGES_FORW;
$SOU_DB->{EDGES_REV} = \%EDGES_REV;
$SOU_DB->{ELVES_32BIT} = \@ELVES_32BIT;
$SOU_DB->{HARDLINKS} = \%HARDLINKS;
$SOU_DB->{DEADLINKS} = \%DEADLINKS;
$SOU_DB->{UNREADABLE} = \@UNREADABLE;
$SOU_DB->{LIBS_MISSING} = \%LIBS_MISSING;
$SOU_DB->{BEECONTENT} = \%BEECONTENT; # hmmm ~ 60 meg, when taken as is, ~4meg when pruned
$SOU_DB->{LD_SO_CONF} = \@LD_SO_CONF;
$SOU_DB->{RPATH_ENTRIES} = \%RPATH_ENTRIES;
$SOU_DB->{AMBIGUOUS_ONES} = \@AMBIGUOUS_ONES;
$SOU_DB->{x_created} = [$TIME_STAMP, time];
$SOU_DB->{DB_VERSION} = $DB_VERSION;
# $SOU_DB->{_READELF_CACHE} = \%_READELF_CACHE; # use with caution
store $SOU_DB, $SOU_STORE if $SOU_DB;
printf "### done.\n" if $opts{v};
}
sub build_so_soundex {
for my $k (keys %SO_LINKS) {
my $name = '';
$k =~ m|/lib([^/]+)\.so| and $name = $1;
next if ! $name; # only 'libfoobar' at the moment, *not* libfoobar.so
my $soundex = soundex_nara($name);
push @{$SO_SOUNDEX{$soundex}}, [$k,$name];
}
}
sub grow_graph {
my $elf_in = shift || die 'hashref needed';
ref $elf_in || die 'hashref needed';
my $gather = shift || 0; # collect symlinked libs
my $cnt=0;
for my $elf_file (sort keys %{$elf_in}) {
my @libs;
my @rpath_add;
my %rpath_register;
$PBAR_1->do_progress() if $PBAR_1;
# if (($cnt++)%5e2 == 0) {
# printf "# %d done\n",$cnt-1 unless $opts{q}; # xxx ugly
# }
# # HACK to avoid re-scanning
# ######################
# if ( defined $SOU_DB->{_READELF_CACHE} and defined $SOU_DB->{_READELF_CACHE}{$f} and (@libs=@{$SOU_DB->{_READELF_CACHE}{$f}}) ) {
# $_READELF_CACHE{$f} = [ @libs ] if @libs;
# } else {
# ######################
# 0x0000000000000001 (NEEDED) Shared library: [librt.so.1]
# 0x000000000000000f (RPATH) Library rpath: [/usr/local/openmotif-2.3/lib]
# 0x000000000000001d (RUNPATH) Library runpath: [/usr/local/lib]
my @readelf = `readelf -d '$elf_file' 2>&1`; die "# Error: readelf failed on '$elf_file'" if $?;
chomp @readelf;
# map { /Shared library:\s+\[([^]]+)\]/ && push @libs, $1 } @_; # beware of libs w ']' inda name...
for my $line (@readelf) {
if ($line =~ m/Shared library:\s+\[([^]]+)\]/ ) {
push @libs, $1;
} elsif ($line =~ m/Library rpath:\s+\[([^]]+)\]/) {
# print "rpath $1\n"; # exit;
# $RPATH_ENTRIES{$1}++;
push @{$RPATH_ENTRIES{$1}}, $elf_file;
map {
if (! exists $LD_SO_CONF_register{$_} && $_ !~ '$ORIGIN' ) {
$rpath_register{$_}++;
}
} split(m/:/, $1);
} elsif ($line =~ m/Library runpath:\s+\[([^]]+)\]/) {
}
}
# treat this one too
# 0x000000000000000f (RPATH) Library rpath: [/usr/local/openmotif-2.3/lib]
# ######################
# $_READELF_CACHE{$f} = [ @libs ] if @libs;
# }
# ######################
@rpath_add = keys %rpath_register;
# print Dumper 'rpath_add', $elf_file, \@rpath_add if @rpath_add;
# test with ld library paths
for my $lib (@libs) {
my @lib_candidates;
for my $p (@LD_SO_CONF,@rpath_add) {
my $i = (stat "$p/$lib")[1];
if (defined $i) {
push @lib_candidates, ["$p/$lib", $i];
}
}
my $cnt= scalar @lib_candidates;
if (0) {
} elsif ($cnt == 0) { # error
$LIBS_MISSING{$lib}+=1;
} elsif ($cnt == 1) { # lovely
my $cand = $lib_candidates[0]->[0];
if ( -l $cand && $gather) { $SYML_ELVES{$cand}++; }
# push @{$EDGES_FORW{$f}}, $cand;
$EDGES_FORW{$elf_file}->{$cand}++;
} elsif ($cnt > 1) { # all the same? what if not? die, or add all entries?
my $inode = $lib_candidates[0]->[1];
my $ok=1;
for (my $i=1; $i<$cnt; $i++) {
if ($inode != $lib_candidates[$i]->[1]) {
$ok=0;
# last;
}
}
if (! $ok) {
# print Dumper 'ERROR: AMBIGUOUS lib entries', $elf_file ,\@lib_candidates;
# die '# bad, look';
push @AMBIGUOUS_ONES, [$elf_file ,@lib_candidates];
# printf "# using first entry '%s'.\n", $lib_candidates[0]->[0];
}
my $cand = $lib_candidates[0]->[0];
if ( -l $cand && $gather) { $SYML_ELVES{$cand}++; }
# push @{$EDGES_FORW{$f}}, $cand;
$EDGES_FORW{$elf_file}->{$cand}++;
}
}
} # for my $elf_file
return scalar keys %SYML_ELVES;
}
sub finish_graph {
for my $k (keys %EDGES_FORW) {
# for my $dep ( @{$EDGES_FORW{$k}} ) {
for my $dep ( keys %{$EDGES_FORW{$k}} ) {
push @{$EDGES_REV{$dep}}, $k;
}
}
}
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# subroutines related to find
sub find_elves {
my $dirs = shift || '/';
# avoid clutter
no warnings; # see /usr/local/lib/perl5/5.12.1/File/Find.pm lines 881 +/-
for my $dir (split m/:/, $dirs) {
warn "# on '$dir'\n"; # xxx debug
find({wanted => \&elves_wanted}, $dir);
}
}
# what links to a real so ?
sub find_so_symlinks {
my $dirs = shift || '/';
# avoid clutter
no warnings;
for my $dir (split m/:/, $dirs) {
find({wanted => \&so_symlinks_wanted}, $dir);
}
}
sub elves_wanted {
my ($dev,$ino,$mode,$nlink,$uid,$gid);
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
!($File::Find::prune |= ($dev != $File::Find::topdev)) && # restrict to local fs
# (($mode & 0111) > 0) && # any executable
-f _ && isElf_le64($_) &&
($ELVES{$File::Find::name}="$ino"); # hopefully never 0, and just used for lookup purposes during build
}
sub isElf_le64 {
my $f = shift;
defined $f or croak "# need a filename (isElf_le64)\n";
return 0 unless -e $f;
if (! -r _) {
print "# can not read '$f' ($File::Find::name)\n" if $opts{v};
push @UNREADABLE, $File::Find::name;
return 0;
}
my $probe;
open(F, '<', $f) || die "# $! ($f)";
my $size = read F, $probe, 16;
close F;
return 0 if $size < 16; # too short for a candidate, the 64-bit header is 64 bytes long.
# https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
# 0: 7f45 4c46 0201 0100 0000 0000 0000 0000 .ELF............
# 0: 7f45 4c46 0201 0103 0000 0000 0000 0000 .ELF............
my ($p1,$p2,$p3,$p4) = unpack('N4', $probe);
return 0 if $p3 || $p4;
if ($p1 == 0x7f454c46 && ($p2 == 0x02010100 || $p2 == 0x02010103)) { # System V or Linux
return 1; # looks like a 64bit elf.
}
# 32bit elves?
if ($p1 == 0x7f454c46 && ($p2 & 0x01000000) && $opts{3}) {
push @ELVES_32BIT, $File::Find::name;
return 0;
}
return 0; # bad luck
}
sub so_symlinks_wanted {
my ($dev,$ino,$mode,$nlink,$uid,$gid);
(($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
!($File::Find::prune |= ($dev != $File::Find::topdev)) && # restrict to local fs
-l _ && so_readlink()
# push @LINKS, $File::Find::name;
}
# we enter with any symlink
sub so_readlink {
my ($dev,$ino,$mode,$nlink,$uid,$gid) = stat($File::Find::name); # stat again ...
die '# oh my god...' if $?;
if (! defined $ino) {
$DEADLINKS{$File::Find::name}++;
return 0;
}
return 0 unless defined $SEVLE_tmp{$ino};
die 'hmmm' if exists $SO_LINKS{$File::Find::name}; # should never happen
# $SO_LINKS{$File::Find::name} = $full;
push @{$SO_LINKS{$SEVLE_tmp{$ino}}}, $File::Find::name;
1;
}
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# utility subroutines
sub same_file {
(stat shift)[1] == (stat shift)[1];
}
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# peters ldd_bee slaughtered
# slurp all bee content
sub read_bee_content {
for my $bee (</usr/share/bee/*>) {
next unless -e "$bee/CONTENT";
$bee=~/.*\/(.*)/;
my $b = $1;
open I,'<',"$bee/CONTENT" or die;
while (<I>) {
next if m/^type=directory/;
if (m{:file=(.*)}) {
$_=$1;
### XXX option wanted
# don't store every file
# $_ =~ m{\.so[0-9\.]*$} || exists $ELVES{$_} || next;
s{//.*}{}; # unsymlink, unhardlink
push @{$BEECONTENT{$_}},$b;
}
}
close I;
}
return 1;
}
sub ldd_bee_prepare {
my $libs = shift;
for my $l (@$libs) {
print "# $l\n";
if ( exists( $SOU_DB->{BEECONTENT}{$l}) ) {
map { $BEE_TO_LIBS{$_}{$l}++ } @{$SOU_DB->{BEECONTENT}{$l}};
}
else {
warn "## $l not found in any bee file\n";
}
}
return 1;
}
# xxx place a time summary ?
sub ldd_bee_report {
my $b = 0;
for my $bee ( sort keys %BEE_TO_LIBS ) {
print "\n";
my ($beefile) = </usr/share/bee/$bee/*.bee>;
my $mt = (stat($beefile))[9];
print "PKGALLPKG[$b]=$bee\n";
print " BEEFILE[$b]=$beefile\n";
printf " BEETIME[$b]=%d # %s (%s)\n",$mt,years(time-$mt,2),scalar(localtime($mt));
my $bc = 0;
for my $lib ( sort keys %{$BEE_TO_LIBS{$bee}} ) {
print " BEEF[$b][$bc]=$lib\n";
$bc++;
}
$b++;
}
}
# duration in pretty format
sub years {
my $s = shift;
my $n = shift;
if ($s == 0) {
return '0 secs';
}
my @T=();
my $t = int($s/(365*60*60*24));
push @T, ($t > 0) ? $t : 0; # months
$s -= $t*int(365.25*60*60*24);
$t = int($s/(30.42*60*60*24));
push @T, ($t > 0) ? $t : 0; # months
$s -= $t*int(30.42*60*60*24);
$t = int($s/(60*60*24));
push @T, ($t > 0) ? $t : 0; # days
$s -= $t*(60*60*24);
$t = int($s/(60*60));
push @T, ($t > 0) ? $t : 0; # hrs
$s -= $t*(60*60);
$t = int($s/(60));
push @T, ($t > 0) ? $t : 0; # min
$s -= $t*60;
$t = int($s);
push @T, ($t > 0) ? $t : 0; # sec
my @L;
for my $x ('yrs','mon','days','hrs','min','secs') {
my $y = shift @T;
if ($y != 0) {
if (defined($n)) {
$n--;
last if ($n < 0);
}
push @L,"$y $x";
}
}
return join(' ',@L);
}
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# tiny gadget
{ # hmmm do the '$|=1' here ?
package TinyProgressBar;
use strict;
sub new {
my $type = shift;
my $len = shift || 70;
my $label = shift || 'working';
my $label_end = shift || uc($label);
my $self = {
BARCHARS => ["\b/", "\b|", "\b\\", "\b-", "\b/", "\b|", "\b\\", "\b-", "\b.\b"], # 9 items
STEPS => 0,
BAR => '',
FINALBAR => '',
ITEMS => 0,
PSTEP_0 => 0,
PSTEP_CUR => 0,
PSTEPS_DONE => 0,
_LEN => $len,
_LABEL_LEN => length($label)+2,
_N_BARCHARS => 0,
};
# '`~"
# $self->{BARCHARS} = ["'\b", "`\b", "~\b", "\"\b", "'\b", "`\b", "~\b", "\"\b", ".\b\b"];
# +x*X o0@O
$self->{BARCHARS} = ["+\b", "x\b", "*\b", "X\b", "+\b", "x\b", "*\b", "X\b", ".\b\b"];
$self->{_N_BARCHARS} = scalar @{$self->{BARCHARS}};
$self->{BAR} = sprintf "[%s]%s", $label, '-' x $len;
$self->{FINALBAR} = sprintf "[%s]%s", $label_end, '.' x $len;
bless $self, $type;
}
sub print_bar {
my $self = shift;
print STDOUT $self->{BAR};
}
sub print_finalbar {
my $self = shift;
my $leftovers = $self->{_LEN} - int($self->{PSTEPS_DONE}/$self->{_N_BARCHARS});
$leftovers=0 if $leftovers < 0;
print STDOUT "\b" x ($leftovers + $self->{_LABEL_LEN} + 1);
# print "\n<< Debug >>\n";
print STDOUT $self->{FINALBAR} . "\n";
#printf "# stepped %d -- PSTEP_0: %d\n", $self->{PSTEPS_DONE}, $self->{PSTEP_0};
#printf "# distance: %f leftovers $leftovers\n", $self->{PSTEPS_DONE}/$self->{_N_BARCHARS};
}
sub adjust {
my $self = shift;
my $items = shift;
$self->{PSTEP_0} = int($items/($self->{_N_BARCHARS}*$self->{_LEN}) ) - 1;
$self->{PSTEP_0} = 1 if $self->{PSTEP_0}<=0;
}
sub do_progress {
my $self = shift;
if ($self->{PSTEP}++ >= $self->{PSTEP_0}) {
$self->bar_step();
$self->{PSTEP}=0;
$self->{PSTEPS_DONE} += 1;
}
}
sub bar_step {
my $self = shift;
print STDOUT $self->{BARCHARS}[$self->{STEPS} % $self->{_N_BARCHARS}];
$self->{STEPS}++;
}
}