Skip to content

Commit

Permalink
quickcheck: Cosmetics
Browse files Browse the repository at this point in the history
  • Loading branch information
donald committed Apr 7, 2025
1 parent 1030ca3 commit 52308a5
Showing 1 changed file with 104 additions and 108 deletions.
212 changes: 104 additions & 108 deletions bin/quickcheck.pl
Original file line number Diff line number Diff line change
Expand Up @@ -2,147 +2,143 @@
use strict;
use warnings;

our $hostname=`/bin/hostname -s`;$? and exit 1;chomp($hostname);
our $DB="/project/pbackup_$hostname/db/pbackup.db";
our $hostname = `/bin/hostname -s` ; $? and exit 1 ; chomp($hostname);
our $DB = "/project/pbackup_$hostname/db/pbackup.db";



my $fail_count=0;
my $fail_count = 0;

sub ok {
print "OK $_[0]\n";
print "OK $_[0]\n";
}
sub fail {
print "FAIL $_[0]\n";
$fail_count++;
print "FAIL $_[0]\n";
$fail_count++;

}

sub sqlite3 {
my ($db,$sql)=@_;
my $retry=0;
while (1) {
open P,'-|','sqlite3',$db,$sql or die "$!\n";;
my $ret=join('',<P>);
close P;
$? or return $ret;
$?>>8==5 or die "sqlite3 failed\n";
++$retry<60 or die "database locked timeout\n";
warn "(sleep and retry)\n";
sleep 1;
}
my ($db, $sql) = @_;
my $retry = 0;
while (1) {
open P, '-|','sqlite3', $db, $sql or die "$!\n";
my $ret = join('', <P>);
close P;
$? or return $ret;
$? >> 8 == 5 or die "sqlite3 failed\n";
++$retry < 60 or die "database locked timeout\n";
warn "(sleep and retry)\n";
sleep 1;
}
}

sub check_processes {
my ($expire,$balance,$backup)=(0,0,0);
open P,'-|','ps -Aoargs' or die "$!\n";
while (<P>) {
if (m"^\S*/perl \S*pbackup (balance|do_jobs|expire)") {
$1 eq 'balance' and $balance++;
$1 eq 'expire' and $expire++;
$1 eq 'do_jobs' and $backup++;
}
}
close P;

$expire>=1 ? ok("expire running once") : fail("expire not running");
$balance>=1 ? ok("balance running once") : fail("balance not running");
$backup>=2 ? ok("backup running two times") : fail ("backup not running two times (no: $backup)");
my ($expire, $balance, $backup) = (0, 0, 0);
open P, '-|','ps -Aoargs' or die "$!\n";
while (<P>) {
if (m"^\S*/perl \S*pbackup (balance|do_jobs|expire)") {
$1 eq 'balance' and $balance++;
$1 eq 'expire' and $expire++;
$1 eq 'do_jobs' and $backup++;
}
}
close P;

$expire >= 1 ? ok("expire running once") : fail("expire not running");
$balance >= 1 ? ok("balance running once") : fail("balance not running");
$backup >= 2 ? ok("backup running two times") : fail ("backup not running two times (no: $backup)");
}

sub df {
my ($path)=@_;
my $pid=open P,'-|';
defined $pid or die "$0: $!\n";
unless ($pid) {
exec 'df','-k',$path;
die "$0: $!\n";
}
my $l;
$l=<P>;
$l=<P>;
chomp $l;
my ($device,$blocks,$used,$avail,$perc,$ppath)=split " ",$l;
1 while ($l=readline(*P));
close P;
return $avail;
my ($path) = @_;
my $pid = open P, '-|';
defined $pid or die "$0: $!\n";
unless ($pid) {
exec 'df', '-k', $path;
die "$0: $!\n";
}
my $l;
$l=<P>;
$l=<P>;
chomp $l;
my ($device, $blocks, $used, $avail, $perc, $ppath) = split " ", $l;
1 while ($l=readline(*P));
close P;
return $avail;
}


sub check_space {
my @vol=</project/pbackup_$hostname/data/C*>;

my $sum=0;
my $max_free_vol=0;
for my $vol (@vol) {
my $df=df("$vol/.")/1024/1024/1024;
my $dfp=sprintf('%5.2f',$df);
$df>=2 ? ok ("$vol over 2 TB ($dfp TB)") : fail ("$vol below 2 TB ($dfp TB)");
$df>$max_free_vol and $max_free_vol=$df;
$sum+=$df;
}
my $dfp=sprintf('%5.2f',$max_free_vol);
$max_free_vol>5 ? ok ("more than 5 TB free on a volume (max $dfp TB free)") : fail ("no volume has over 5 TB free (max: $dfp TB free)");
$dfp=sprintf('%5.2f',$sum);
$sum>20 ? ok ("total free space over 20 TB ($dfp TB)") : fail ("total free space below 20 TB ($dfp TB)");
my @vol = </project/pbackup_$hostname/data/C*>;

my $sum = 0;
my $max_free_vol = 0;
for my $vol (@vol) {
my $df = df("$vol/.") / 1024 / 1024 / 1024;
my $dfp = sprintf('%5.2f', $df);
$df >= 2 ? ok ("$vol over 2 TB ($dfp TB)") : fail ("$vol below 2 TB ($dfp TB)");
$df > $max_free_vol and $max_free_vol = $df;
$sum += $df;
}
my $dfp = sprintf('%5.2f', $max_free_vol);
$max_free_vol > 5 ? ok ("more than 5 TB free on a volume (max $dfp TB free)") : fail ("no volume has over 5 TB free (max: $dfp TB free)");
$dfp = sprintf('%5.2f', $sum);
$sum > 20 ? ok ("total free space over 20 TB ($dfp TB)") : fail ("total free space below 20 TB ($dfp TB)");
}

sub check_failed_jobs {
my $failed=sqlite3($DB,'select count(*) from job where job_enabled=1 and job_ok=0 and job_name not like "sys_%"');
chomp($failed);
$failed<25 ? ok("less then 25 active jobs failed ($failed failed)") : fail("over 25 active jobs failed ($failed failed)");
my $failed = sqlite3($DB,'select count(*) from job where job_enabled=1 and job_ok=0 and job_name not like "sys_%"');
chomp($failed);
$failed < 25 ? ok("less then 25 active jobs failed ($failed failed)") : fail("over 25 active jobs failed ($failed failed)");
}


sub check_job {
my ($job_name,$expected_files) = @_;
my ($job_name, $expected_files) = @_;

my $line=sqlite3($DB,qq'select stat_started,stat_files_transferred from stat where stat_job_id in (select job_id from job where job_name="$job_name") order by stat_started desc limit 1');
chomp($line);
my ($started,$files_transferred)=split '\|',$line;
my $line = sqlite3($DB,qq'select stat_started,stat_files_transferred from stat where stat_job_id in (select job_id from job where job_name="$job_name") order by stat_started desc limit 1');
chomp($line);
my ($started, $files_transferred) = split '\|', $line;

my $hours=(time-$started)/60/60;
my $p=sprintf('%5.2f',$hours);
$hours<=36 ? ok("backup $job_name is not older than 36 hours ($p hours)") : fail("backup $job_name is older than 36 hours ($p hours)");
$files_transferred>=$expected_files ? ok ("last backup $job_name transferred over $expected_files files ($files_transferred files)") : fail ("last backup $job_name transferred less than $expected_files files ($files_transferred files)");
my $hours = (time - $started) / 60 / 60;
my $p = sprintf('%5.2f', $hours);
$hours <= 36 ? ok("backup $job_name is not older than 36 hours ($p hours)") : fail("backup $job_name is older than 36 hours ($p hours)");
$files_transferred >= $expected_files ? ok ("last backup $job_name transferred over $expected_files files ($files_transferred files)") : fail ("last backup $job_name transferred less than $expected_files files ($files_transferred files)");
}


sub check_progress {
my $lines=sqlite3($DB,'SELECT upid_pid,upid_text,upid_since FROM upid');
my $fail;
for my $line (split "\n",$lines) {
chomp($line);
my ($pid,$text,$since)=split '\|',$line;
$text =~ /idle, waiting/ and next;
my $hours=(time - $since) / 60 / 60;
if ($hours > 10) {
if ($text =~ /^BACKUP I/) {
fail("INCREMENTAL job running for over 10 hours: $text since" . localtime($since));
} elsif ($text =~ /^EXPIRE/) {
fail("EXPIRE job running for over 10 hours: $text since" . localtime($since));
}
if ($hours > 72) {
if ($text =~ /^BACKUP F/) {
fail("FULL job running for over 3 days $text since" . localtime($since));
} elsif ($text =~ /^BALANCE/) {
fail ("BALANCE job running for over 3 days : $text since ".localtime($since));
} elsif ($text =~ /^BACKUP %/) {
fail("REFRESH job running for over 3 days $text since" . localtime($since));
}
}
}
}
$fail or ok('all known jobs in their time limits');
my $lines = sqlite3($DB, 'SELECT upid_pid,upid_text,upid_since FROM upid');
my $fail;
for my $line (split "\n", $lines) {
chomp($line);
my ($pid, $text, $since) = split '\|', $line;
$text =~ /idle, waiting/ and next;
my $hours = (time - $since) / 60 / 60;
if ($hours > 10) {
if ($text =~ /^BACKUP I/) {
fail("INCREMENTAL job running for over 10 hours: $text since" . localtime($since));
} elsif ($text =~ /^EXPIRE/) {
fail("EXPIRE job running for over 10 hours: $text since" . localtime($since));
}
if ($hours > 72) {
if ($text =~ /^BACKUP F/) {
fail("FULL job running for over 3 days $text since" . localtime($since));
} elsif ($text =~ /^BALANCE/) {
fail ("BALANCE job running for over 3 days : $text since ".localtime($since));
} elsif ($text =~ /^BACKUP %/) {
fail("REFRESH job running for over 3 days $text since" . localtime($since));
}
}
}
}
$fail or ok('all known jobs in their time limits');
}

check_processes();
check_space();
check_job('cfdl_imapspool_2',1600) if $hostname eq 'done';
check_job('home_vingron',0) if $hostname eq 'gone';
check_job('sys_void',10) if $hostname eq 'null';
check_job('sys_null',10) if $hostname eq 'void';
check_job('sys_gone',10) if $hostname eq 'done';
check_job('sys_done',10) if $hostname eq 'gone';
check_job('cfdl_imapspool_2', 1600) if $hostname eq 'done';
check_job('home_vingron', 0) if $hostname eq 'gone';
check_job('sys_void', 10) if $hostname eq 'null';
check_job('sys_null', 10) if $hostname eq 'void';
check_job('sys_gone', 10) if $hostname eq 'done';
check_job('sys_done', 10) if $hostname eq 'gone';
check_failed_jobs();
check_progress();

0 comments on commit 52308a5

Please sign in to comment.