From 4439dadb1e3b4a1feda0df825fc5052cb78f78ec Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 29 Jan 2025 12:20:11 +0100 Subject: [PATCH 1/5] clusterd: Reap children Currently we leave zombies after udp_rx_push2() and possibly in other places. Reap children in the main loop. --- clusterd/clusterd | 1 + 1 file changed, 1 insertion(+) diff --git a/clusterd/clusterd b/clusterd/clusterd index afcb828..2ce4626 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -283,6 +283,7 @@ sub run { for (@WRITER) { vec($wvec,$_->[0]->fileno,1)=1 } ; for (@EXCEPT) { vec($evec,$_->[0]->fileno,1)=1 } ; + wait; my $ready=select($rvec,$wvec,$evec,1); if ($ready>0) { for (my $i=0;$i<@READER;$i++) { From afb494681e2056d0a869af2c561c8260d91eb430 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 29 Jan 2025 12:29:37 +0100 Subject: [PATCH 2/5] clusterd: close STDIN in daemon When in daemon mode, close stdin right away. This avoids the need to close stdin in various places after a fork. --- clusterd/clusterd | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 2ce4626..021e292 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -884,7 +884,6 @@ sub udp_rx_exec { defined $pid or exit 1; $pid and exit; - open STDIN,'<','/dev/null'; open STDOUT,'>','/dev/null'; open STDERR,'>','/dev/null'; alarm(60); @@ -903,7 +902,6 @@ sub udp_rx_exec2 { return; } if ($pid == 0) { - open STDIN,'<','/dev/null'; alarm(60); chdir '/'; for my $cmd (@cmd) { @@ -1412,7 +1410,6 @@ sub run_cmd { warn "exec ".join(' ',@cmd)."\n"; $opipe->writer(); $epipe->writer(); - open STDIN,'<','/dev/null'; open STDOUT,'>&',$opipe; open STDERR,'>&',$epipe; exec @cmd; @@ -2021,7 +2018,6 @@ FILE: } } } - open STDIN, '<', '/dev/null'; chdir '/'; alarm(60); for my $cmd (@$post_ary) { @@ -2167,6 +2163,7 @@ if (defined $options{'push'}) { udp_broadcast_message($donald_s,'reexport'); } elsif (defined $options{'daemon'}) { $SIG{PIPE}='IGNORE'; + open STDIN,'<','/dev/null'; $donald_s=new My::Select::INET(Proto=>'udp',Broadcast=>1,LocalPort=>$UDP_PORT) or die "$!\n"; $donald_s->receive_data(\&udp_message,$donald_s); From 3e1cff57b2dc5a4f8cd0245346751685e7ba7f97 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 29 Jan 2025 12:34:57 +0100 Subject: [PATCH 3/5] clusters: Do not return ignored values from clp_rx_ functions --- clusterd/clusterd | 2 -- 1 file changed, 2 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 021e292..7d33865 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1391,7 +1391,6 @@ sub clp_rx_LSOF { } close $socket; wait; - return 1; } sub run_cmd { @@ -1538,7 +1537,6 @@ sub clp_rx_CMD { my ($socket,@args)=@_; run_cmd($socket,@args); close $socket; - return 1; } # send_tcp_cp($socket,$cb,$timeout,@args) From 1264eb272f8f2d03d80c4aefc69cff34b035a2af Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 29 Jan 2025 12:51:35 +0100 Subject: [PATCH 4/5] clusterd: Wait for specific pids Now that we accept random children to exist and exit at any time, use waitpid() instead of wait() when we wait for a specific child somewhere else in the code. This change revealed an error in udp_rx_exec2() which might have executed /sbin/make-automaps redundantly. Fix that. This is not tested or analyzed very much, but udp_rx_exec2() is obsolete and should go away soon anyway. --- clusterd/clusterd | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 7d33865..5d82389 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -783,7 +783,7 @@ sub push_amd_tar { exec 'tar','cf',$filename,'.'; die "$!\n"; } - wait; + waitpid $pid, 0; $? and return; my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; @@ -796,7 +796,7 @@ sub push_amd_tar { exec 'gzip','-f',$filename; die "$!\n"; } - wait; + waitpid $pid, 0; $? and return; $filename='/tmp/amd.tar.gz'; @@ -891,7 +891,6 @@ sub udp_rx_exec { exec '/bin/sh','-c',$CMD{$cmd}; exit 1; } - wait; } sub udp_rx_exec2 { @@ -973,13 +972,13 @@ sub udp_rx_amdtardata { exec 'tar','xzf',$st_want->name; die "$!\n"; } - } - wait; - $? and return; + waitpid $pid, 0; + $? and return; - warn "installed /etc/amd - ",Digest::MD5::md5_hex($digest),"\n"; - $INSTALLED_DIGEST=$digest; - system '/sbin/make-automaps'; + warn "installed /etc/amd - ",Digest::MD5::md5_hex($digest),"\n"; + $INSTALLED_DIGEST=$digest; + system '/sbin/make-automaps'; + } } our ($machine,$SYS_lchown,$SYS_mknod); @@ -1390,7 +1389,7 @@ sub clp_rx_LSOF { exit; } close $socket; - wait; + waitpid $pid, 0; } sub run_cmd { From 9a2eaf985ebc8566bb15ff9a3e50c908c8a69fe1 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 29 Jan 2025 12:59:43 +0100 Subject: [PATCH 5/5] clusterd: Remove double-forks In two places we used double forks, probably so that we don't need to reap. Now children are reaped in the main loop, so remove the extra forks. --- clusterd/clusterd | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 5d82389..8e53815 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -880,10 +880,6 @@ sub udp_rx_exec { return; } unless ($pid) { - $pid=fork; - defined $pid or exit 1; - $pid and exit; - open STDOUT,'>','/dev/null'; open STDERR,'>','/dev/null'; alarm(60); @@ -1366,30 +1362,24 @@ sub clp_rx_LSOF { return; } unless ($pid) { - my $pid=fork; - defined $pid or die "$!\n"; - unless ($pid) { - $socket->blocking(1); - # -n inhibits the conversion of network numbers to host names for network files. - # -b causes lsof to avoid kernel functions that might block - lstat(2), readlink(2), and stat(2). - # -w disables warning messages. - open P,'timeout -k 92s 90s lsof -n -b -w|' or die "$!\n"; - while (

) { - next if defined $pattern && index($_,$pattern)<0; - $socket->send(pack('n',length($_)).$_,0); - } - close P; - if ($?) { - $_=sprintf("** lsof timout/error on %s\n",$my_hostname); - $socket->send(pack('n',length($_)).$_,0); - } - close $socket; - exit; + $socket->blocking(1); + # -n inhibits the conversion of network numbers to host names for network files. + # -b causes lsof to avoid kernel functions that might block - lstat(2), readlink(2), and stat(2). + # -w disables warning messages. + open P,'timeout -k 92s 90s lsof -n -b -w|' or die "$!\n"; + while (

) { + next if defined $pattern && index($_,$pattern)<0; + $socket->send(pack('n',length($_)).$_,0); + } + close P; + if ($?) { + $_=sprintf("** lsof timout/error on %s\n",$my_hostname); + $socket->send(pack('n',length($_)).$_,0); } + close $socket; exit; } close $socket; - waitpid $pid, 0; } sub run_cmd {