From 96bdc489f25371385af0329cb3e9d0379aae0c2f Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 4 Apr 2025 09:27:54 +0200 Subject: [PATCH] pbackup: Only dedup with jobs with same id Only dedup with jobid with same id, not with jobs with different jobid but same name or same distmaster family. While there is a wwin in used space for thee extra deduplications, there is also a reason to keep the additional redundancy, because data can be corrupted when a project is moved or during dist. Additionally, the multiple database queries significantly slow down the "search for the next job to do" loop. This is more relevant for the search for FULL backups, becase ypically there are no FULL backups to be done during normal operation and the process has to walk over all jobs. --- bin/pbackup | 51 +-------------------------------------------------- 1 file changed, 1 insertion(+), 50 deletions(-) diff --git a/bin/pbackup b/bin/pbackup index 121441b..35e077e 100755 --- a/bin/pbackup +++ b/bin/pbackup @@ -875,12 +875,8 @@ sub distmaster { } sub find_linkfrom { - my ($volume_id, $job_id, $job_name, $stat_started, $min_generation) = @_; my $linkfrom_stat_rowid; - - # 1 : same job_id - ($linkfrom_stat_rowid) = $dbh->selectrow_array(<<'EOF', undef, $job_id, $volume_id, $min_generation, $stat_started); SELECT stat.rowid FROM stat WHERE stat_job_id = ? @@ -889,52 +885,7 @@ AND stat_generation >= ? ORDER BY ABS(stat_started - ?) EOF - return $linkfrom_stat_rowid if defined $linkfrom_stat_rowid; - - # 2 : same job_name - - ($linkfrom_stat_rowid) = $dbh->selectrow_array(<<'EOF', undef, $job_name, $volume_id, $min_generation, $stat_started); -SELECT stat.rowid FROM stat,job -WHERE stat_job_id = job_id -AND job_name = ? -AND stat_volume_id = ? -AND stat_generation >= ? -ORDER BY ABS(stat_started - ?) -EOF - - return $linkfrom_stat_rowid if defined $linkfrom_stat_rowid; - - return undef; # when distmaster() ist working.... - - # 3 : try same sys_ or usr_ family - - $job_name =~ /^(sys|usr)_(.+)/ or return undef; - my ($prefix, $hostname) = ($1, $2); - my $distmaster = distmaster($hostname); - - my $sth = $dbh->prepare(<<'EOF'); -SELECT stat.rowid, job_name FROM stat,job -WHERE - stat_job_id = job_id - AND job_name GLOB ? - AND stat_volume_id = ? -AND stat_generation >= ? -ORDER BY ABS(stat_started - ?) -EOF - $sth->execute($prefix . '_*', $volume_id, $min_generation, $stat_started); - while (my $row = $sth->fetchrow_arrayref) { - my $linkfrom_job_name; - ($linkfrom_stat_rowid, $linkfrom_job_name) = @$row; - $linkfrom_job_name =~ /^(sys|usr)_(.+)/ or die; - my ($linkfrom_prefix, $linkfrom_hostname) = ($1, $2); - my $linkfrom_distmaster = distmaster($linkfrom_hostname); - if ($linkfrom_distmaster eq $distmaster) { - $sth->finish(); - return $linkfrom_stat_rowid; - } - } - - return undef; + return $linkfrom_stat_rowid; } sub cmd_disable {