From 9137fb9b7147ad0c7ad710b1e3521b11ae2c817f Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Mon, 5 Oct 2015 15:45:27 +0200 Subject: [PATCH 01/55] clusterd: initial --- clusterd/clusterd | 1456 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1456 insertions(+) create mode 100755 clusterd/clusterd diff --git a/clusterd/clusterd b/clusterd/clusterd new file mode 100755 index 0000000..df9c731 --- /dev/null +++ b/clusterd/clusterd @@ -0,0 +1,1456 @@ +#! /usr/local/bin/perl + +use warnings; +use strict; + +# $Header: /home/buczek/cluster/clusterd,v 1.101 2013/11/20 19:55:35 root Exp $ + +our ($RCS_REVISION)='$Revision: 1.101 $'=~/([\d.]+)/; + +#use lib ('/home/buczek/cluster/Donald/blib/lib'); + +use Donald::Tools qw(encode sign check_sign decode); +use Donald::Select; +use Donald::FileInfo; +use Donald::Select::INET; +use POSIX; +use IO::Pipe; + + +#-------------------------------------- +package My::Cluster::Updown; + +# +# monitor nodes +# + + +our %H; # ( name=> [state , last_seen , expect_seq , @data ] , ... ) +our $MONITOR_STARTING=1; + +our %DUPLICATES; + +# data currently: +# 0: float load-average +# 1: clusterd version +# 2: processor load +# 3: processor capacity +# 4: string unix version + + +use Storable; + +sub save_state { + store \%H,'/root/clusterd.monitor.state'; +} + +sub restore_state { + + -e '/root/clusterd.monitor.state' and %H=%{retrieve '/root/clusterd.monitor.state'}; + for (values %H) { + $_->[1]=0; + } +} + + +sub status { + my ($f)=@_; + my (@up,@down); + for (sort keys %H) { push @up, $_ if $H{$_}->[0] eq 'UP' } + for (sort keys %H) { push @down,$_ if $H{$_}->[0] eq 'DOWN' } + + $f->printf("UP (%d): %s\n\n",scalar(@up),join(' ',@up)); + $f->printf("DOWN (%d): %s\n\n",scalar(@down),join(' ',@down)); + + my (@k,$max); + + @k=sort({$H{$b}->[3] <=> $H{$a}->[3]} grep({$H{$_}->[0] eq 'UP'} (keys %H))); + $max=0; + $f->print("TOP 10: "); + for (@k) { + $f->print(sprintf "%s (%4.2f) ",$_,$H{$_}->[3]); + last if $max++>=10; + } + $f->print("\n\n"); + + @k=sort({($H{$b}->[5]||0) <=> ($H{$a}->[5]||0)} grep({$H{$_}->[0] eq 'UP'} (keys %H))); + $max=0; + $f->print("TOP 10 CPU load : "); + for (@k) { + $f->print(sprintf "%s (%3.1f%%) ",$_,($H{$_}->[5]||0)*100); + last if $max++>=10; + } + $f->print("\n\n"); + + @k=sort({($H{$b}->[6]||0) <=> ($H{$a}->[6]||0)} grep({$H{$_}->[0] eq 'UP'} (keys %H))); + $max=0; + my $total_bogomips=0; + $f->print("TOP 10 free capacity : "); + for (@k) { + my $bogo=$H{$_}->[6]||0; + $f->print(sprintf "%s (%3.1f) ",$_,$bogo) if $max++<10; + $total_bogomips+=$bogo + } + $f->printf("\ntotal available bogomips: %d\n\n",$total_bogomips); +} + +sub msg_text { + my ($text)=@_; + warn($text."\n"); + my $time=scalar(localtime); + main::mgmt_print_all($time.': '.$text."\n"); +} + +sub msg_state { + my ($state_old,$state_new,$hostname,$extra)=@_; + msg_text(sprintf "%-4s -> %-4s %-20s %s",$state_old,$state_new,$hostname,$extra); +} + +sub init { + restore_state; + msg_text('node monitor: started. (recovery mode)'); + Donald::Select::timeout(630,sub{$MONITOR_STARTING=0;msg_text('node monitor: recovery finished');}); + Donald::Select::timeout(630,\&timeout_hosts); +} + +sub timeout_hosts { + Donald::Select::timeout_requeue(60); + my $timeout=Donald::Select::time()-1230; # 2x10 minutes + 30 seconds + + for (keys %H) { + my $h=$H{$_}; + if ($h->[0] eq 'UP' && $h->[1]<=$timeout) { + msg_state('UP','DOWN',$_,"timeout!"); + $h->[0]='DOWN'; + } + } + save_state(); +} + + +sub rx_hostannounce { + + # ($hostname,$seq,$load_avg,$opt_version,$opt_pload,$opt_pcapacity,$opt_unixrev) + + my ($host,$seq,@more)=@_; + my ($load_average,$version,$pload,$pcapacity,$unixrev)=@more; + $unixrev ||= '?'; + unless (exists $H{$host}) { + if ($seq==0) { + msg_state('NEW','UP',$host,"discovered new node $version - $unixrev"); + } else { + $MONITOR_STARTING or msg_state('NEW','UP',$host,"discovered running node $version - $unixrev"); + } + $H{$host}=['UP',Donald::Select::time,$seq+1,@more]; + } else { + my $h=$H{$host}; + if ($h->[0] eq 'UP') { + if ($seq == $h->[2]) { + ; + } else { + if ($seq==0) { + # msg_state('UP','UP',$host,"node rebootet $version - $unixrev"); + + } elsif ($seq>$h->[2]) { + $MONITOR_STARTING or msg_state('UP','UP',$host,$seq-$h->[2]." packet(s) lost!"); + } elsif ($seq+1==$h->[2]) { + warn "DUPLICATE from $host\n"; + $DUPLICATES{$host}=($DUPLICATES{$host}||0)+1; + } else { + msg_state('UP','UP',$host,"node rebooted and ".$seq." packet(s) lost!"); + } + } + } else { + if ($seq==0) { + msg_state('DOWN','UP',$host,"node rebooted $version - $unixrev"); + } elsif ($seq==$h->[2]) { + msg_state('DOWN','UP',$host,"sequence error. Node to slow? (seq=$seq,exp=".$h->[2].")"); + } else { + msg_state('DOWN','UP',$host,$seq-$h->[2]." packet(s) lost"); + } + } + @$h=('UP',Donald::Select::time,$seq+1,@more); + } +} + +sub delete_host { + my ($host)=@_; + my $h=delete $H{$host} or return; + msg_text("host $host removed from monitor"); +} + + +#----------------------------------------------------------------------- + +package My::NetlogReceiver; + +our $listen_socket; +our $TCP_MAX=1024; + +our $DAY_LAST_MSG; + +sub day { + my @f=localtime; + return sprintf "%04d%02d%02d",$f[5]+1900,$f[4]+1,$f[3]; +} + + +sub bigben { + my $day=day(); + $day le $DAY_LAST_MSG and return; + warn "NETLOG ==================================================== morning has broken ====\n"; + $DAY_LAST_MSG=$day; +} + +sub bigben_timer { + bigben(); + Donald::Select::timeout_requeue(30); +} + + +sub bigben_init { + $DAY_LAST_MSG=day(); + Donald::Select::timeout(30,\&bigben_timer); + +} + + +sub receive { + my ($socket,$peernode,$bufref)=@_; + my $data; + + bigben(); + + defined $socket->recv($data,$TCP_MAX) or return; + # length $data or warn "$peernode: disconnect\n"; + length $data or return; + + $$bufref.=$data; + + while (1) { + last if length($$bufref)<2; + my $l=unpack('n',$$bufref); + # warn "wait for $l+2 bytes got ".length($$bufref)."\n"; + last if length($$bufref)<2+$l; + my $msg=substr($$bufref,2,$l); + $$bufref=substr($$bufref,2+$l); + $|=1; + warn "NETLOG $msg\n" unless $msg=~/NETLOG/; + } + Donald::Select::reader_requeue(); +} + + +sub connect_request { + Donald::Select::reader_requeue(); + + my $socket=$listen_socket->accept(); + $socket->blocking(0); + + my $peernode=$socket->peerhost; + my $buffer=''; + Donald::Select::reader($socket,\&receive,$socket,$peernode,\$buffer); + # warn "$peernode: connect\n"; +} + + +sub init { + $listen_socket=new IO::Socket::INET(Proto=>'tcp',LocalPort=>1028,Listen=>1,ReuseAddr=>1) or die "$!\n"; + Donald::Select::reader($listen_socket,\&connect_request); + bigben_init(); +} + + + +#------------------------------------------------------------------------ +package main; +use strict; +use IO::File; +use Sys::Syslog; +use IO::Socket::INET; +use Data::Dumper; + +our $UDP_MAX=1472; # for broadcast on alphas +our $UDP_PORT=234; +our $BC_RATE=8; # packets per second broadcast + +our (%options); # RUN OPTIONS + +our $donald_s; # Donald::Select::INET udp socket + +our $my_hostname; +our $my_ip; # '141.14.12.12' +$my_hostname=lc `/bin/hostname`; +chomp($my_hostname); +$my_hostname =~ s/\.molgen\.mpg\.de$//; + + +while (1) { + my $addr=inet_aton($my_hostname); + if(defined $addr) { + $my_ip=inet_ntoa(inet_aton($my_hostname)); + } + last if defined $my_ip; + my $once; + unless ($once) { + warn "no IP (yet)= - waiting\n"; + $once++; + } + sleep 30; +} + + +our $my_unixrev; +$my_unixrev=`uname -r`; +chomp($my_unixrev); + +our $CLUSTER_PW; +our $CLUSTER_PW_FILE='/etc/clusterd.password'; +our $OLD_CLUSTER_PW_FILE='/root/clusterd.password'; +our $CLUSTER_PW_TIMESTAMP=0; + + +$ENV{'PATH'} = '/usr/local/bin:/sbin:/usr/sbin:/bin'.($ENV{PATH}?':'.$ENV{PATH}:''); # for amq , ps , tar (gnu!) + + +#---------------------------------------------------------- UDP + +our ($udp_peer_addr,$udp_peer_port); # ('141.14.12.12',1234) + +our %UDP_HANDLER= +( + 'filedata' => \&udp_rx_filedata, + 'amdtardata' => \&udp_rx_amdtardata, + 'loadavg.2' => \&udp_rx_loadavg2, + 'restart' => \&udp_rx_restart, + 'log' => \&udp_rx_log, + 'exec' => \&udp_rx_exec, +); + + +sub udp_message { + my ($data,$x_udp_peer_addr,$x_udp_peer_port,$donald_s)=@_; + + ($udp_peer_addr,$udp_peer_port)=($x_udp_peer_addr,$x_udp_peer_port); + + defined $CLUSTER_PW or return; + + my ($handler_name,@args)=decode(check_sign($CLUSTER_PW,$data)) or return; + $UDP_HANDLER{$handler_name}->(@args) if exists $UDP_HANDLER{$handler_name}; +} + + +sub udp_send_message { # udp_send_message( dst, @args) # dst='141.14.31.255' 'zork' '141.14.16.1' etc. data is anything + my ($ip,@args)=@_; + defined $CLUSTER_PW or return; + $donald_s->send_data($ip,$UDP_PORT,sign($CLUSTER_PW,encode(@args))); +} + + + +#---------------------------------------------------------- + +sub push_amd_tar { + my ($donald_s)=@_; + my $filename='/tmp/amd.tar'; + + my $pid=fork; + defined $pid or return warn "$!\n"; + unless($pid) { + chdir '/etc/amd' or die "/etc/amd: $!\n"; + exec 'tar','cf',$filename,'.'; + die "$!\n"; + } + wait; + $? and return; + + + my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; + my $digest=Digest::MD5->new->addfile($fh)->digest; + warn "tar digest is ",Digest::MD5::md5_hex($digest),"\n"; + + $pid=fork; + defined $pid or return warn "$!\n"; + unless($pid) { + exec 'gzip','-f',$filename; + die "$!\n"; + } + wait; + $? and return; + + $filename='/tmp/amd.tar.gz'; + + my $st=Donald::FileInfo->lstat($filename); + defined $st or return warn "$filename: $!\n"; + $st->type eq 'F' or return warn "$filename: not a plain file\n"; + + $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; + + my $i=0; + for (my $pos=0;$pos<$st->size;$pos+=1024) { + my $data; + defined $fh->sysread($data,1024) or return warn "$filename: $!\n"; + # warn "send bytes $pos to ",$pos+length($data),"\n"; + udp_broadcast_message($donald_s,'amdtardata',$st,$pos,$data,$digest); + ++$i % $BC_RATE or sleep 1; + } +} + + + +sub push_file { + my ($donald_s,$filename)=@_; + + $filename =~ m"^/" or return warn "$filename: please use absolute path\n"; + + my $st=Donald::FileInfo->lstat($filename); + defined $st or return warn "$filename: $!\n"; + $st->type eq 'F' or return warn "$filename: not a plain file\n"; + $st->size<=40960 or return warn "$filename: to big for broadcast (max 40960 bytes)\n"; + + my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; + my $i=0; + for (my $pos=0;$pos<$st->size;$pos+=1024) { + my $data; + defined $fh->sysread($data,1024) or return warn "$filename: $!\n"; + # warn "send bytes $pos to ",$pos+length($data),"\n"; + udp_broadcast_message($donald_s,'filedata',$st,$pos,$data); + ++$i % $BC_RATE or sleep 1; + } +} + + +our %CMD=( + 'mkmotd'=>'/usr/sbin/mkmotd.pl', +); + + +sub send_exec { + my ($donald_s,$cmd)=@_; + unless (exists $CMD{$cmd}) { + die "available commands: ",join(' , ',keys %CMD),"\n"; + } + udp_broadcast_message($donald_s,'exec',$cmd); +} + + +sub udp_rx_exec { + my ($cmd)=@_; + + warn "exec $cmd\n"; + exists $CMD{$cmd} or return; + + my $pid; + $pid=fork; + unless (defined $pid) { + warn "$!\n"; + return; + } + unless ($pid) { + $pid=fork; + defined $pid or exit 1; + $pid and exit; + + open STDIN,'<','/dev/null'; + open STDOUT,'>','/dev/null'; + open STDERR,'>','/dev/null'; + alarm(60); + chdir '/'; + exec '/bin/sh','-c',$CMD{$cmd}; + exit 1; + } + wait; +} + + + +#------------------------------------------------------------- + + + +sub normalize_seg { # [pos,len],[pos,len],... + my @s=sort {$a->[0] <=> $b->[0]} @_; + + my $i=0; + while ($i<$#s) { + # is element $i joinable with next element + + my $end_0=$s[$i]->[0]+$s[$i]->[1]; + if ($end_0 >= $s[$i+1]->[0] ) { + my $end_1=$s[$i+1]->[0]+$s[$i+1]->[1]; + $s[$i]->[1] = ($end_0>$end_1 ? $end_0 : $end_1)-$s[$i]->[0]; + splice @s,$i+1,1; + } else { + $i++; + } + } + return @s; +} + + +my %RECEIVER; # ( filename => $receiver, .... ) + + +# $receiver : [ st_want , last_rx , io_handle , [ [pos,len] , [pos,len] , ... ] ] + + +sub purge_old_receiver { + while (my ($n,$v)=each %RECEIVER) { + if ($v->[1]+10[0]->name,"\n"; + log_to_stat_target('timeout receiving ',$v->[0]->name); + delete $RECEIVER{$n}; + } + } + Donald::Select::timeout_requeue(60); +} + +#------------------------------------------------------------- + +our $INSTALLED_DIGEST=''; + +our $rx_filedata_done; + +sub udp_rx_amdtardata { + my ($st_want,$pos,$data,$digest)=@_; + + ref($st_want) eq 'My::FileInfo' and bless $st_want,'Donald::FileInfo'; + + ### $digest eq $INSTALLED_DIGEST and $pos==0 and warn "/etc/amd - ",Digest::MD5::md5_hex($digest)," already installed\n"; + $digest eq $INSTALLED_DIGEST and return; + + #### $pos==0 and warn "receiving /etc/amd - ",Digest::MD5::md5_hex($digest),"\n"; + + udp_rx_filedata($st_want,$pos,$data); + if ($rx_filedata_done) { + my $pid=fork; + defined $pid or return warn "$!\n"; + unless($pid) { + chdir '/etc/amd' or die "/etc/amd: $!\n"; + exec 'tar','xzf',$st_want->name; + die "$!\n"; + } + } + wait; + $? and return; + + warn "installed /etc/amd - ",Digest::MD5::md5_hex($digest),"\n"; + $INSTALLED_DIGEST=$digest; + system 'amq','-f'; + system '/sbin/make-automaps'; +} + +sub udp_rx_filedata { + +# set rx_filedata_done as a side effect + + my ($st_want,$pos,$data)=@_; + + ref($st_want) eq 'My::FileInfo' and bless $st_want,'Donald::FileInfo'; + + my $filename=$st_want->name; + my $tmp_filename="$filename.tmp"; + + $rx_filedata_done=0; + + my $st_is=Donald::FileInfo->lstat($st_want->name); + if ($st_is && $st_is->type eq 'F' && $st_is->size==$st_want->size && $st_is->mtime==$st_want->mtime) { + #### $pos==0 and warn " $filename seems to be current\n"; + return; + } + + if (length($data) == $st_want->size) { + # complete file in one broadcast + my $fh=IO::File->new($tmp_filename,O_WRONLY|O_CREAT,0); + defined $fh or return warn "$tmp_filename: $!\n"; + $fh->syswrite($data); + $fh->close; + chown $st_want->uid,$st_want->gid,$tmp_filename or return warn "$tmp_filename: $!\n"; + chmod $st_want->perm,$tmp_filename or return warn "$tmp_filename: $!\n"; + $st_is and unlink($filename); + rename($tmp_filename,$filename); + utime($st_want->mtime,$st_want->mtime,$filename); + warn "installed $filename\n"; + $rx_filedata_done=1; + return; + } + + length($data) or return; # shouldn't happen. + + + my $receiver=$RECEIVER{$st_want->name}; + + if (defined $receiver) { + if ( $receiver->[0]->size != $st_want->size or $receiver->[0]->mtime != $st_want->mtime ) { + $receiver=undef; + } + } + + unless (defined $receiver) { + # create new receiver + ## warn "start receiving $filename from $udp_peer_addr\n"; + -e $tmp_filename and unlink($tmp_filename); + my $fh=IO::File->new($tmp_filename,O_WRONLY|O_CREAT,0); + defined $fh or return warn "$tmp_filename: $!\n"; + $receiver = [$st_want,Donald::Select::time,$fh,[]]; + $RECEIVER{$filename}=$receiver; + } + + { + # warn "$filename: receive $pos length ",length($data),"\n"; + + # write data ( size cant be 0 here ) + $receiver->[2]->seek($pos,0) or return warn "$tmp_filename: $!\n"; + $receiver->[2]->syswrite($data) or return warn "$tmp_filename: $!\n";; + $receiver->[1]=Donald::Select::time; + my $s=$receiver->[3]=[normalize_seg(@{$receiver->[3]},[$pos,length($data)])]; + + #warn "$filename: receive $pos length ",length($data)," segments now: ",join(",",map( {'['.$_->[0].','.$_->[1].']'} @{$receiver->[3]})),"\n"; + + # all there ? + + if (@$s == 1 && $s->[0]->[0]==0 && $s->[0]->[1]==$st_want->size) { + $receiver->[2]->close; + chown $st_want->uid,$st_want->gid,$tmp_filename or return warn "$tmp_filename: $!\n"; + chmod $st_want->perm,$tmp_filename or return warn "$tmp_filename: $!\n"; + $st_is and unlink($filename); + rename($tmp_filename,$filename); + utime($st_want->mtime,$st_want->mtime,$filename); + warn "installed $filename\n"; + delete $RECEIVER{$filename}; + $rx_filedata_done=1; + } + } +} + + + +#----------------------------------------------------------- sample running proc every 10 seconds + +our @proc_running=(0)x12; # 12 samples every 5 seconds -> minute average +our @proc_running_10=(0)x10; # 10 samples every minute -> 10 minute average +our $SAMPLE_TICK=0; + +sub xget_cur_running_proc { + open L,'<','/proc/stat' or return 0; + while () { + # warn $_; + return $1 if /^procs_running\s*(\d+)/; + } + return 0; +} + +sub get_cur_running_proc { + our $CPUS; + my $r=xget_cur_running_proc(); + if ($CPUS && $r>$CPUS+1) { + pload_debug("more than $CPUS+1 proc"); + } + return $r; +} + +sub running_proc { + my $ret=0; + $ret+=$_ for @proc_running; + return $ret/@proc_running; +} + +sub running_proc_10 { + my $ret=0; + $ret+=$_ for @proc_running_10; + return $ret/@proc_running_10; +} + + +sub sample_rproc { # every 5 seconds + @proc_running=(@proc_running[1..@proc_running-1],get_cur_running_proc()-1); + if ($SAMPLE_TICK<12) { + $SAMPLE_TICK++; + } else { + @proc_running_10=(@proc_running_10[1..@proc_running_10-1],running_proc()); + $SAMPLE_TICK=0; + } + Donald::Select::timeout_requeue(5); +} + + + + +#----------------------------------------------------------- stat + + +#----------------------------------------------------------- + +our ($CPUS,$BOGOMIPS); + +sub init_cpuinfo { + Donald::Tools::is_alpha and return; + open L,'<','/proc/cpuinfo' or return; + while () { + if (/^bogomips\s*:\s*([\d.]+)/) {$CPUS++;$BOGOMIPS+=$1;} + } +} + +sub loadavg { # AXP : (system load average) , LINUX: (system load average, pload, freebogo) + if (Donald::Tools::is_alpha) { + my $data=pack 'l!3lx4l!3'; + my $i=syscall(85,3,0,$data,1,length($data)); # table(id=TBL_LOADAVG,index=0,addr=data,nel=1,lel=56) + my ($l0,$l1,$l2,$scale,$mach0,$mach1,$mach2)=unpack 'l!3lx4l!3',$data; + $scale or return undef; + return $l0/$scale; + } else { + $CPUS or init_cpuinfo(); + my $running_proc=running_proc(); + open L,'<','/proc/loadavg' or return undef; + my $data; + sysread(L,$data,1024); + close L; + $data =~ /^(\d+\.?\d*)/ or return undef; # 5 min loadavg + if ($CPUS) { + return ($1+0,($running_proc)/$CPUS,($running_proc>=$CPUS?0:($CPUS-$running_proc)*$BOGOMIPS)); + } else { + return $1+0; + } + } +} + +our $STAT_TARGET='lol'; +our $STAT_SEQ=0; +sub send_stat { + Donald::Select::timeout_requeue(600); + my ($load_avg,$pload,$pcapacity)=loadavg(); + defined $load_avg or return; + udp_send_message($STAT_TARGET,'loadavg.2',$my_hostname,$STAT_SEQ++,$load_avg,version_info(),$pload,$pcapacity,$my_unixrev); +} + +sub udp_rx_loadavg2 { + my ($hostname,$seq,$load_avg,$opt_version,$opt_pload,$opt_pcapacity,$opt_unixrev)=@_; + $my_hostname eq $STAT_TARGET and My::Cluster::Updown::rx_hostannounce($hostname,$seq,$load_avg,$opt_version,$opt_pload,$opt_pcapacity,$opt_unixrev) +} + + + + +sub udp_rx_log { + my ($msg)=@_; + My::Cluster::Updown::msg_text($msg); +} + +sub log_to_stat_target { + my ($msg)=join '',@_; + udp_send_message($STAT_TARGET,'log',"$my_hostname: $msg"); +} + + +# ---------------------------------------------------------- + +sub udp_rx_restart { + # double-fork, because kill_previous_server() won't kill its parent + my $pid=fork; + if (defined $pid && $pid==0) { + my $pid2=fork; + if (defined $pid2 && $pid==0) { + exec '/sbin/clusterd','--kill','--daemon'; + die "exec failed: $!\n"; + } + } +} + + +#----------- tcp mgmt console ----------------------------- + +our $MGMT_PORT=234; +our $mgmt_listen_socket; + +our %mgmt_sockets; + +sub mgmt_init { + $mgmt_listen_socket=new IO::Socket::INET(LocalPort=>$MGMT_PORT,Proto=>'tcp',Listen=>1,ReuseAddr=>1); + defined $mgmt_listen_socket or die "$!\n"; + Donald::Select::reader($mgmt_listen_socket,\&mgmt_connect_request); +} + +sub mgmt_connect_request { + + Donald::Select::reader_requeue(); + + # listen socket ready + + my $socket=$mgmt_listen_socket->accept(); + $socket->blocking(0); + my $peernode=$socket->peerhost; + + ### warn "accepted mgmt connection from $peernode\n"; + + Donald::Select::reader($socket,\&mgmt_receive,$socket); + $mgmt_sockets{$socket}=$socket; + $socket->print("clusterd ".version_info()." stupid console\n"); + $socket->print("For historical messages, grep \"clusterd\" from /var/log/messages on $STAT_TARGET (or \"tail -f /var/log/messages |grep cluster\")\n"); +} + +sub mgmt_receive { + my ($s)=@_; + my $data; + my $len=$s->sysread($data,1024); + if (!defined $len or $len==0 or $len==1 && $data eq "\cD") { + delete $mgmt_sockets{$s}; + $s->close; + ###warn "lost connection to mgmt port\n"; + return; + } + + Donald::Select::reader_requeue(); + $data=~s/\r?\n$//; + length $data or return; + if ($data eq 'l') { + $my_hostname eq $STAT_TARGET and My::Cluster::Updown::status($s); + $s->print("AREA: ",area_config_as_string(),"\n"); + $s->print("STAT TARGET: ",$STAT_TARGET,"\n"); + $s->print(' (',scalar(localtime),')',"\n"); + } + elsif ($data eq 'r') { + for my $host (sort keys %H) { + if ($H{$host}->[0] eq 'UP') { + $s->printf("%-40s : %s\n",$host,$H{$host}->[7]); + } + } + } elsif ($data eq 'v') { + $Data::Dumper::Terse=1; + $Data::Dumper::Indent=0; + for (sort keys %H) { + $s->printf("%15s : %s\n",$_,Dumper($H{$_})); + } + } elsif ($data eq 'd') { + my $running_proc=running_proc(); + my $running_proc_10=running_proc_10(); + $s->printf("RPROC : %s : %.2f\n",join(',',@proc_running),$running_proc); + $s->printf("RPROC_10 : %s : %.2f\n",join(',',map({sprintf '%.2f',$_} @proc_running_10)),$running_proc_10); + $CPUS or init_cpuinfo; + $CPUS and $s->printf("run: %.2f , CPUs: %d , bogo %.2f , load %.2f , capacity %.1f\n",$running_proc, $CPUS,$BOGOMIPS,$running_proc/$CPUS, $running_proc>=$CPUS?0:($CPUS-$running_proc)*$BOGOMIPS); + $CPUS and $s->printf("run10: %.2f , CPUs: %d , bogo %.2f , load %.2f , capacity %.1f\n",$running_proc_10,$CPUS,$BOGOMIPS,$running_proc_10/$CPUS,$running_proc_10>=$CPUS?0:($CPUS-$running_proc_10)*$BOGOMIPS); + } elsif ($data =~ /^delete (\S+)$/) { + My::Cluster::Updown::delete_host($1); + } elsif ($data eq 'dup show') { + for my $h (sort {$DUPLICATES{$b} <=> $DUPLICATES{$a} } keys %DUPLICATES) { + $s->printf("%-10s : %5d\n",$h,$DUPLICATES{$h}); + } + } elsif ($data eq 'dup clear') { + %DUPLICATES=(); + } else { + $s->print(<<"_EOF_"); +unknown command: $data + +l : list status +v : dump status array +d : debug (cpu speed calc) +r : dump unix revisions +delete HOST : forget about HOST + +dup show : show duplicates stat +dup clear : clear duplicates stat + +to exit use ^D + +_EOF_ + } +} + + +sub mgmt_print_all { + my ($msg)=@_; + + $options{'foreground'} and not $options{'syslog'} and print $msg; + + for my $s (values(%mgmt_sockets)) { + $s->print($msg); + } +} + +#----------------------------------------------------------- + + + + +sub clp_rx_LSOF { + my ($socket,$pattern)=@_; + + my $pid=fork; + unless (defined $pid) { + warn"$!\n"; + return; + } + unless ($pid) { + my $pid=fork; + defined $pid or die "$!\n"; + unless ($pid) { + $socket->blocking(1); + open P,'lsof|' or die "$!\n"; + while (

) { + next if defined $pattern && index($_,$pattern)<0; + $socket->send(pack('n',length($_)).$_,0); + } + close P; + close $socket; + exit; + } + exit; + } + close $socket; + wait; + return 1; +} + +sub run_cmd { + my ($socket,@cmd)=@_; + my $pid=fork; + unless (defined $pid) { + warn"$!\n"; + return; + } + unless ($pid) { + my $opipe=new IO::Pipe; + my $epipe=new IO::Pipe; + my $cpid=fork; + defined $cpid or die "$!\n"; + unless ($cpid) { + warn "exec ".join(' ',@cmd)."\n"; + $opipe->writer(); + $epipe->writer(); + open STDIN,'<','/dev/null'; + open STDOUT,'>&',$opipe; + open STDERR,'>&',$epipe; + exec @cmd; + die "$!\n"; + } +# $::SIG{'CHLD'}=sub { +# my $pid=wait; +# my $buffer="X$?"; +# $socket->send(pack('n',length($buffer)).$buffer,0); +# exit; +# }; + $opipe->reader(); + $epipe->reader(); + my $ofn=$opipe->fileno; + my $efn=$epipe->fileno; + my ($rvec_in,$wvec_in,$evec_in)=('','',''); + vec($rvec_in,$ofn,1)=1; + vec($rvec_in,$efn,1)=1; + my $channel=2; + my $buffer; + $socket->blocking(1); + while (1) { + my ($rvec,$wvec,$evec)=($rvec_in,$wvec_in,$evec_in); + my $ready=select($rvec,$wvec,$evec,60); + $ready or die "timeout\n"; + if (vec($rvec,$ofn,1)) { + my $len=$opipe->sysread($buffer,1024-1-2); + defined $len or die "$!\n"; + if ($len) { + $socket->send(pack('n',$len+1)."O$buffer",0); + } else { + vec($rvec_in,$ofn,1)=0; + $opipe->close; + $channel--; + } + } + if (vec($rvec,$efn,1)) { + my $len=$epipe->sysread($buffer,1024-1-2); + defined $len or die "$!\n"; + if ($len) { + $socket->send(pack('n',$len+1)."E$buffer",0); + } else { + vec($rvec_in,$efn,1)=0; + $epipe->close; + $channel--; + } + } + if ($channel==0) { + my $pid=wait; + my $buffer="X$?"; + $socket->send(pack('n',length($buffer)).$buffer,0); + exit; + } + } + + } +} + + + +#----------- CLP cluster protocol ----------------------------- + +our $CLP_PORT=235; +our $clp_listen_socket; + +our %clp_sockets; + +our %CLP_HANDLER=('CMD'=>\&clp_rx_CMD,'LSOF'=>\&clp_rx_LSOF); + +sub clp_init { + $clp_listen_socket=new IO::Socket::INET(LocalPort=>$CLP_PORT,Proto=>'tcp',Listen=>1,ReuseAddr=>1); + defined $clp_listen_socket or die "$!\n"; + Donald::Select::reader($clp_listen_socket,\&clp_connect_request); +} + +sub clp_connect_request { + + Donald::Select::reader_requeue(); + + # listen socket ready + + my $socket=$clp_listen_socket->accept(); + $socket->blocking(0); + my $peernode=$socket->peerhost; + + my $buffer=''; + + Donald::Select::reader($socket,\&clp_receive,$socket,\$buffer); + $clp_sockets{$socket}=$socket; +} + +sub clp_receive { + my ($s,$bufref)=@_; + my $data; + defined $s->recv($data,$TCP_MAX) or return; + if (!length($data) ) { + delete $mgmt_sockets{$s}; + $s->close; + return; + } + $$bufref.=$data; + while (1) { + last if length($$bufref)<2; + my $l=unpack('n',$$bufref); + last if length($$bufref)<2+$l; + my $msg=substr($$bufref,2,$l); + $$bufref=substr($$bufref,2+$l); + clp_message($s,$msg) and return; + } + Donald::Select::reader_requeue(); +} + +sub clp_message { + my ($socket,$data)=@_; + + defined $CLUSTER_PW or return; + + my ($handler_name,@args)=decode(check_sign($CLUSTER_PW,$data)) or return; + $CLP_HANDLER{$handler_name}->($socket,@args) if exists $CLP_HANDLER{$handler_name}; +} + + +sub clp_send_message { # clp_send_message($socket, @args) + my ($s,@args)=@_; + defined $CLUSTER_PW or return; + my $data=sign($CLUSTER_PW,encode(@args)); + unless ($s->peername) { + return 0; + } + $s->send(pack('n',length($data)).$data); +} + + +sub clp_rx_CMD { + my ($socket,@args)=@_; + run_cmd($socket,@args); + close $socket; + return 1; +} + + +#---------------------------------------------------------- + +#our $CLUSTER_PW; +#our $CLUSTER_PW_FILE='/etc/clusterd.password'; +#our $OLD_CLUSTER_PW_FILE='/root/clusterd.password'; +#our $CLUSTER_PW_TIMESTAMP=0; + + +sub sync_cluster_pw { + my $st=Donald::FileInfo->lstat($CLUSTER_PW_FILE); + + # upgrade : move cluster password file from /root to /etc + + if (!$st && -e $OLD_CLUSTER_PW_FILE) { + warn "upgrading cluster password file location $OLD_CLUSTER_PW_FILE -> $CLUSTER_PW_FILE\n"; + my $in=new IO::File $OLD_CLUSTER_PW_FILE,'<'; + unless (defined $in) {warn "$OLD_CLUSTER_PW_FILE: $!\n";return undef;} + my $out=new IO::File $CLUSTER_PW_FILE,O_WRONLY|O_CREAT,0600; + unless (defined $out) {warn "$CLUSTER_PW_FILE: $!\n";return undef;} + my $data; + $in->read($data,1024); + $out->write($data); + $in->close; + $out->close; + $st=Donald::FileInfo->lstat($CLUSTER_PW_FILE); + defined $st or die "$CLUSTER_PW_FILE: $!\n"; + unlink $OLD_CLUSTER_PW_FILE; + } + + if ($st) { + if (!defined $CLUSTER_PW or $CLUSTER_PW_TIMESTAMP != $st->mtime) { + my $fh=new IO::File $CLUSTER_PW_FILE,'<'; + if (defined ($fh)) { + defined $CLUSTER_PW and warn "update cluster password\n"; + $fh->read($CLUSTER_PW,1024); + $CLUSTER_PW_TIMESTAMP=$st->mtime; + } else { + defined $CLUSTER_PW and warn "$CLUSTER_PW_FILE: $!\n"; + $CLUSTER_PW=undef; + } + } + } else { + defined $CLUSTER_PW and warn "$CLUSTER_PW_FILE: $!\n"; + $CLUSTER_PW=undef; + } + Donald::Select::timeout(60,\&sync_cluster_pw); + return defined $CLUSTER_PW; +} + + +#------------------------------------------------------------ + +# area routing + + +our %AREA_ROUTER= +( +lol => '141.14.31.255', +# orkrist=> '10.14.0.255', +); + + +our $area_socket; + + +sub init_area { + exists $AREA_ROUTER{$my_hostname} or return; + warn "I am area router for $AREA_ROUTER{$my_hostname}\n"; + + $area_socket=new IO::Socket::INET (Proto=>'udp',LocalPort=>$UDP_PORT+1) or die "$!\n"; + Donald::Select::reader($area_socket,\&area_message,$area_socket); +} + +sub area_message { + my ($area_socket)=@_; + my $data; + my $peer = $area_socket->recv($data,$UDP_MAX); + my ($udp_peer_port,$peer_iaddr)=unpack_sockaddr_in($peer); + my $udp_peer_addr=inet_ntoa($peer_iaddr); + + $donald_s->send_data($AREA_ROUTER{$my_hostname},$UDP_PORT,$data); # broadcast to our network + Donald::Select::reader_requeue(); +} + + +sub udp_broadcast_message { + my ($donald_s,@args)=@_; + + defined $CLUSTER_PW or return; + my $data=sign($CLUSTER_PW,encode(@args)); + for my $ip (keys %AREA_ROUTER) { + $donald_s->send_data($ip,$UDP_PORT+1,$data); + } +} + +sub area_config_as_string { + return join(',',map({"$_ => $AREA_ROUTER{$_}"} keys %AREA_ROUTER)) +} + +#------------------------------------------------------------ + +our $PROG_FILE; # saved $0 - may be relative path +our $PROG_MTIME; + +sub check_progfile_status { + defined $PROG_FILE or $PROG_FILE=$0; + my @f=lstat $PROG_FILE or return; + if (defined $PROG_MTIME) { + if ($f[9] != $PROG_MTIME) { + warn "progfile $PROG_FILE has changed - upgrade restart from version ".version_info()."\n"; + exec $PROG_FILE,'--daemon',($options{'foreground'}?'--foreground':()),($options{'syslog'}?'--syslog':()); + } + } + else { + $PROG_MTIME=$f[9]; + } + Donald::Select::timeout(60,\&check_progfile_status); +} + +sub version_info { # 'V1.31 - 20090617-155314' + my $t; + if (defined $PROG_MTIME) { + my @f=localtime($PROG_MTIME); + $t=sprintf '%4d%02d%02d-%02d%02d%02d',$f[5]+1900,$f[4]+1,$f[3],$f[2],$f[1],$f[0]; + } else { + $t='?'; + } + return "V$RCS_REVISION - ".$t; +} + +#------------------------------------------------------------ + +sub pload_debug { + my ($why)=@_; + open T,'>','/var/log/pload_debug.txt' or return; + print T "$why at ".scalar(localtime),"\n"; + my $running_proc=running_proc(); + my $running_proc_10=running_proc_10(); + printf T ("RPROC : %s : %.2f\n",join(',',@proc_running),$running_proc); + printf T ("RPROC_10 : %s : %.2f\n",join(',',map({sprintf '%.2f',$_} @proc_running_10)),$running_proc_10); + $CPUS or init_cpuinfo; + $CPUS and printf T ("run: %.2f , CPUs: %d , bogo %.2f , load %.2f , capacity %.1f\n",$running_proc, $CPUS,$BOGOMIPS,$running_proc/$CPUS, $running_proc>=$CPUS?0:($CPUS-$running_proc)*$BOGOMIPS); + $CPUS and printf T ("run10: %.2f , CPUs: %d , bogo %.2f , load %.2f , capacity %.1f\n",$running_proc_10,$CPUS,$BOGOMIPS,$running_proc_10/$CPUS,$running_proc_10>=$CPUS?0:($CPUS-$running_proc_10)*$BOGOMIPS); + system "uptime>>/var/log/pload_debug.txt"; + system "ps -AlfT>>/var/log/pload_debug.txt"; + system "cat /proc/stat>>/var/log/pload_debug.txt"; + system "cat /proc/swaps>>/var/log/pload_debug.txt"; + close T; +} + +sub check_overload() { + Donald::Select::timeout_requeue(600); + if ($CPUS) { + my $running_proc_10=running_proc_10(); + my $pload_10=running_proc_10()/$CPUS; + $pload_10>1.5 and warn (sprintf("pload>150%% (%3.1f%%)\n",$pload_10*100)); + } +} + + +##################################################### + +my $slave=0; +my $exit_value=0; + +sub expand_netgroup_hosts { + my (@netgroups)=@_; + my %DID=(); + my @out=(); + + while (@netgroups) { + my $ng=pop @netgroups; + for my $entry (split ' ',`ypmatch $ng netgroup`) { + if ($entry=~/^\((\S+),.*,.*\)$/) { + push @out,$1; + } elsif ($entry=~/^[a-z0-9_]+$/i) { + push @netgroups,$entry; + } else { + warn "ignored entry $entry\n"; + } + } + } + return @out; +} + +sub lsof { + my ($pattern)=@_; + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + #for my $host ('theinternet') { + for my $host (sort(expand_netgroup_hosts('AMD_SHORT'))) { + next if $host eq 'tux'; + my $s=new IO::Socket::INET (PeerAddr=>$host,PeerPort=>$CLP_PORT); + unless (defined $s) { + warn "$host: $!\n"; + next; + } + clp_send_message($s,'LSOF',$pattern); + my $pbuffer=''; + my $olbuffer=''; + Donald::Select::reader($s,\&lsof_rx,$host,$s,\$pbuffer,\$olbuffer); + $slave++; + } + Donald::Select::run() if $slave;; +} + + +sub lsof_rx { + my ($host,$s,$pbufref,$olbufref)=@_; + my $data; + defined $s->recv($data,$TCP_MAX*2) or return; + #print "$host : received ",length($data),"\n"; + if (!length($data)) { + close $s; + --$slave; + exit $exit_value unless $slave; + return; + } + $$pbufref.=$data; + while (1) { + last if length($$pbufref)<2; + my $l=unpack('n',$$pbufref); + #print "$host : unpacked length $l\n"; + last if length($$pbufref)<2+$l; + my $msg=substr($$pbufref,2,$l); + $$pbufref=substr($$pbufref,2+$l); + #print "$host : remaining pbuf length ",length($$pbufref),"\n"; + #print "$host : message length ",length($msg),"\n"; + lsof_msg($host,$s,$msg,$olbufref); + } + Donald::Select::reader_requeue(); +} + +sub lsof_msg { + my ($host,$s,$data,$olbufref)=@_; + my $msg=$$olbufref.$data; + for ($msg=~/([^\n]*\n)/gs) { + print "$host $_"; + } + ($$olbufref)=$msg=~/([^\n]*)\z/; +} + + +sub cmd_rx { + my ($host,$s,$pbufref,$olbufref,$elbufref)=@_; + my $data; + defined $s->recv($data,$TCP_MAX*2) or return; + if (!length($data)) { + close $s; + --$slave; + exit $exit_value unless $slave; + return; + } + #print "$host received ".length($data),"\n"; + $$pbufref.=$data; + while (1) { + last if length($$pbufref)<2; + my $l=unpack('n',$$pbufref); + #print "$host : unpacked length $l\n"; + #die "$host: to long" if $l>1024; + last if length($$pbufref)<2+$l; + my $msg=substr($$pbufref,2,$l); + $$pbufref=substr($$pbufref,2+$l); + #print "$host : remaining pbuf length ",length($$pbufref),"\n"; + #print "$host : message length ",length($msg),"\n"; + cmd_msg($host,$s,$msg,$olbufref,$elbufref); + } + Donald::Select::reader_requeue(); +} + +sub cmd_msg { + my ($host,$s,$data,$olbufref,$elbufref)=@_; + + my $channel=substr($data,0,1); + if ($channel eq 'X') { + my $rdata=substr($data,1); + $rdata!=0 and $exit_value=1; + return; + } elsif ($channel eq 'O') { + my $msg=$$olbufref.substr($data,1); + for ($msg=~/([^\n]*\n)/gs) { + print "$host $_"; + } + ($$olbufref)=$msg=~/([^\n]*)\z/; + } elsif ($channel eq 'E') { + my $msg=$$elbufref.substr($data,1); + for ($msg=~/([^\n]*\n)/gs) { + print STDERR "$host $_"; + } + ($$elbufref)=$msg=~/([^\n]*)\z/; + } +} + + +#------------------------------------------------------------ + + +use constant USAGE => <<'__EOF__'; + +usage: $0 [options] + + --push file # broadcast this file + --push-amd-tar # broadcast /etc/amd + --send-restart # broadcast a restart request to all nodes + --exec mkmotd # execute /usr/sbin/mkmotd.pl on all nodes + + --lsof=pattern + + --kill # try to kill a running server + + --daemon # start a daemon + --kill # try to kill previous server first + --foreground # stay in foreground, log to stderr + --syslog # log to syslog instead of stderr + +__EOF__ + + +use Getopt::Long; +GetOptions +( + 'kill' => \$options{'kill'}, + 'daemon' => \$options{'daemon'}, + 'push=s' => \$options{'push'}, + 'exec=s' => \$options{'exec'}, + 'foreground' => \$options{'foreground'}, + 'syslog' => \$options{'syslog'}, + 'push-amd-tar' => \$options{'push_amd_tar'}, + 'send-restart' => \$options{'send-restart'}, + 'lsof=s' => \$options{'lsof'}, + +) or die USAGE; + + +if (defined $options{'push'}) { + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + push_file($donald_s,$options{'push'}); +} elsif (defined $options{'exec'}) { + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + send_exec($donald_s,$options{'exec'}); +} elsif (defined $options{'push_amd_tar'}) { + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + push_amd_tar($donald_s); +} elsif (defined $options{'send-restart'}) { + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + udp_broadcast_message($donald_s,'restart'); +} elsif (defined $options{'daemon'}) { + $options{'kill'} and Donald::Tools::kill_previous_server('clusterd') and sleep 2; + + $SIG{PIPE}='IGNORE'; + + $donald_s=new Donald::Select::INET(Proto=>'udp',Broadcast=>1,LocalPort=>$UDP_PORT) or die "$!\n"; + $donald_s->receive_data(\&udp_message,$donald_s); + + unless ($options{'foreground'}) { + my $pid=fork; + defined $pid or die "$!\n"; + $pid and exit 0; + } + + if ($options{'syslog'} or not $options{'foreground'}) { + openlog('clusterd','pid','daemon'); + $SIG{__WARN__} = sub { syslog('warning',@_); }; + $SIG{__DIE__} = sub { syslog('crit',@_);syslog('crit','exiting');exit 1;}; + open (STDOUT,'>','/dev/null'); + open (STDERR,'>','/dev/null'); + open (STDIN,'<','/dev/null'); + } + + check_progfile_status(); + warn "server started - ".version_info()."\n"; + init_area(); + mgmt_init(); + clp_init(); + + sync_cluster_pw() or warn "$CLUSTER_PW_FILE: $!\n"; + + Donald::Select::timeout(60,\&purge_old_receiver); + Donald::Select::timeout(rand(60),\&send_stat); + Donald::Select::timeout(0,\&sample_rproc) unless Donald::Tools::is_alpha; + $my_hostname eq $STAT_TARGET and My::Cluster::Updown::init(); + $my_hostname eq 'lol' and My::NetlogReceiver::init(); + Donald::Select::timeout(600,\&check_overload); + + + Donald::Select::run(); +} elsif ($options{'lsof'}) { + lsof($options{'lsof'}); +} elsif ($options{'kill'}) { + Donald::Tools::kill_previous_server('clusterd'); +} else { + die USAGE; +} From 5bfbdcd67ae98f1a7d29a40700d8f282720dfc65 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Mon, 5 Oct 2015 15:54:52 +0200 Subject: [PATCH 02/55] clusterd: add primitive Makefile --- clusterd/Makefile | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 clusterd/Makefile diff --git a/clusterd/Makefile b/clusterd/Makefile new file mode 100644 index 0000000..0817b7b --- /dev/null +++ b/clusterd/Makefile @@ -0,0 +1,13 @@ +PREFIX=/usr +BINDIR=${PREFIX}/bin +SBINDIR=${PREFIX}/sbin +DESTDIR= + +all: + +install: clusterd + install -d ${DESTDIR}${SBINDIR} + install -m 755 clusterd ${DESTDIR}${SBINDIR}/clusterd + +restart: install + systemctl restart clusterd.service From 915aed90af7e281227b5e58bfdac34de7517e37f Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Mon, 5 Oct 2015 16:15:53 +0200 Subject: [PATCH 03/55] clusterd: add --flush-gidcache --- clusterd/clusterd | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index df9c731..e32da2a 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -323,6 +323,7 @@ our %UDP_HANDLER= 'amdtardata' => \&udp_rx_amdtardata, 'loadavg.2' => \&udp_rx_loadavg2, 'restart' => \&udp_rx_restart, + 'flush-gidcache' => \&udp_rx_flush_gidcache, 'log' => \&udp_rx_log, 'exec' => \&udp_rx_exec, ); @@ -756,6 +757,13 @@ sub udp_rx_restart { } } +sub udp_rx_flush_gidcache { + if (open my $out,'>','/proc/net/rpc/auth.unix.gid/flush') { + print $out time(); + } else { + warn "proc/net/rpc/auth.unix.gid/flush: $!\n"; + } +} #----------- tcp mgmt console ----------------------------- @@ -1358,10 +1366,11 @@ use constant USAGE => <<'__EOF__'; usage: $0 [options] - --push file # broadcast this file - --push-amd-tar # broadcast /etc/amd - --send-restart # broadcast a restart request to all nodes - --exec mkmotd # execute /usr/sbin/mkmotd.pl on all nodes + --push file # broadcast this file + --push-amd-tar # broadcast /etc/amd + --send-restart # broadcast a restart request to all nodes + --exec mkmotd # execute /usr/sbin/mkmotd.pl on all nodes + --flush-gidcache # flush rpc auth.unix.gid cache on all nodes --lsof=pattern @@ -1386,6 +1395,7 @@ GetOptions 'syslog' => \$options{'syslog'}, 'push-amd-tar' => \$options{'push_amd_tar'}, 'send-restart' => \$options{'send-restart'}, + 'flush-gidcache' => \$options{'flush-gidcache'}, 'lsof=s' => \$options{'lsof'}, ) or die USAGE; @@ -1407,6 +1417,10 @@ if (defined $options{'push'}) { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; udp_broadcast_message($donald_s,'restart'); +} elsif (defined $options{'flush-gidcache'}) { + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + udp_broadcast_message($donald_s,'flush-gidcache'); } elsif (defined $options{'daemon'}) { $options{'kill'} and Donald::Tools::kill_previous_server('clusterd') and sleep 2; From 1fb094e62e7beb96d08868977174be3f60ecf354 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Mon, 5 Oct 2015 16:25:27 +0200 Subject: [PATCH 04/55] clusterd: add clusterd.service --- clusterd/Makefile | 3 ++- clusterd/clusterd.service | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 clusterd/clusterd.service diff --git a/clusterd/Makefile b/clusterd/Makefile index 0817b7b..e130d3a 100644 --- a/clusterd/Makefile +++ b/clusterd/Makefile @@ -5,9 +5,10 @@ DESTDIR= all: -install: clusterd +install: clusterd clusterd.service install -d ${DESTDIR}${SBINDIR} install -m 755 clusterd ${DESTDIR}${SBINDIR}/clusterd + install -m 644 clusterd.service ${DESTDIR}/etc/systemd/system/clusterd.service restart: install systemctl restart clusterd.service diff --git a/clusterd/clusterd.service b/clusterd/clusterd.service new file mode 100644 index 0000000..79f0beb --- /dev/null +++ b/clusterd/clusterd.service @@ -0,0 +1,14 @@ +[Unit] +Description=ClusterDonald +Requires=network.target +After=network.target + +[Service] +ExecStart=/usr/sbin/clusterd --daemon --foreground --kill --syslog +StandardOutput=syslog +Restart=always +RestartSec=10s + +[Install] +WantedBy=multi-user.target + From 396c2dd8456e90c5da5e50856d75b41809cc4a2a Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 3 Mar 2016 16:45:46 +0100 Subject: [PATCH 05/55] clusterd: we live on github now --- clusterd/clusterd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index e32da2a..974a980 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -3,9 +3,9 @@ use warnings; use strict; -# $Header: /home/buczek/cluster/clusterd,v 1.101 2013/11/20 19:55:35 root Exp $ +# https://github.molgen.mpg.de/donald/clusterd -our ($RCS_REVISION)='$Revision: 1.101 $'=~/([\d.]+)/; +our $REVISION='1.102'; #use lib ('/home/buczek/cluster/Donald/blib/lib'); @@ -1193,7 +1193,7 @@ sub version_info { # 'V1.31 - 20090617-155314' } else { $t='?'; } - return "V$RCS_REVISION - ".$t; + return "V$REVISION - ".$t; } #------------------------------------------------------------ From dad756ec5231e18a4f1fdc763ffd1658ae0e2696 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 3 Mar 2016 17:28:29 +0100 Subject: [PATCH 06/55] clusterd: use unix socket for syslog native seems to be broken and delivers newlines into the logile --- clusterd/clusterd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 974a980..909ea33 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -5,7 +5,7 @@ use strict; # https://github.molgen.mpg.de/donald/clusterd -our $REVISION='1.102'; +our $REVISION='1.103'; #use lib ('/home/buczek/cluster/Donald/blib/lib'); @@ -1437,6 +1437,7 @@ if (defined $options{'push'}) { if ($options{'syslog'} or not $options{'foreground'}) { openlog('clusterd','pid','daemon'); + Sys::Syslog::setlogsock('unix'); # with 'native' we get EOLs in the logfile, option "noeol" doesn't work $SIG{__WARN__} = sub { syslog('warning',@_); }; $SIG{__DIE__} = sub { syslog('crit',@_);syslog('crit','exiting');exit 1;}; open (STDOUT,'>','/dev/null'); From 7bd4edeebbf00d5a9e526f8031186926205bdbb3 Mon Sep 17 00:00:00 2001 From: "root@eldersoftheinternet /dev/pts/25 141.14.28.170" Date: Tue, 19 Apr 2016 16:41:47 +0200 Subject: [PATCH 07/55] clusterd: unter die knute des bee --- clusterd/Makefile | 3 ++- clusterd/mkbee.sh | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100755 clusterd/mkbee.sh diff --git a/clusterd/Makefile b/clusterd/Makefile index e130d3a..33a1fee 100644 --- a/clusterd/Makefile +++ b/clusterd/Makefile @@ -8,7 +8,8 @@ all: install: clusterd clusterd.service install -d ${DESTDIR}${SBINDIR} install -m 755 clusterd ${DESTDIR}${SBINDIR}/clusterd + install -d ${DESTDIR}/etc/systemd/system/ install -m 644 clusterd.service ${DESTDIR}/etc/systemd/system/clusterd.service -restart: install +restart: systemctl restart clusterd.service diff --git a/clusterd/mkbee.sh b/clusterd/mkbee.sh new file mode 100755 index 0000000..dab3a2e --- /dev/null +++ b/clusterd/mkbee.sh @@ -0,0 +1,8 @@ +#!/bin/bash +VERSION=`grep -Po "(?<=Revision:\s)(\S+)" clusterd` +PACKAGE=clusterd-${VERSION} +DOWNLOADS=/src/mariux/beeroot/downloads +cd .. +tar zcvf ${DOWNLOADS}/${PACKAGE}.tgz clusterd +bee init ${DOWNLOADS}/${PACKAGE}.tgz -f +./${PACKAGE}-0.bee -c From ddc8cef24d9986e9398e628647aa039973c84cb3 Mon Sep 17 00:00:00 2001 From: "AUTHOR root@afk /dev/pts/33 141.14.30.12" Date: Wed, 20 Apr 2016 10:19:26 +0200 Subject: [PATCH 08/55] clusterd: nach /usr/sbin installieren aber in /sbin aufrufen is nix gut --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index e32da2a..3a8d0d0 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -751,7 +751,7 @@ sub udp_rx_restart { if (defined $pid && $pid==0) { my $pid2=fork; if (defined $pid2 && $pid==0) { - exec '/sbin/clusterd','--kill','--daemon'; + exec '/usr/sbin/clusterd','--kill','--daemon'; die "exec failed: $!\n"; } } From 43787286ca9c52b6a61bc43c8938bbaabcaa3944 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 18 May 2016 15:15:17 +0200 Subject: [PATCH 09/55] clusterd: remove mkbee.sh we now create release tags .e.g. bee init https://github.molgen.mpg.de/donald/clusterd/archive/v1.104.tar.gz would work --- clusterd/mkbee.sh | 8 -------- 1 file changed, 8 deletions(-) delete mode 100755 clusterd/mkbee.sh diff --git a/clusterd/mkbee.sh b/clusterd/mkbee.sh deleted file mode 100755 index dab3a2e..0000000 --- a/clusterd/mkbee.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -VERSION=`grep -Po "(?<=Revision:\s)(\S+)" clusterd` -PACKAGE=clusterd-${VERSION} -DOWNLOADS=/src/mariux/beeroot/downloads -cd .. -tar zcvf ${DOWNLOADS}/${PACKAGE}.tgz clusterd -bee init ${DOWNLOADS}/${PACKAGE}.tgz -f -./${PACKAGE}-0.bee -c From 2db26c3c05e27ce525ea1b4bf998d7cf02c8ed16 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 18 May 2016 15:50:51 +0200 Subject: [PATCH 10/55] clusterd: ping mlx once a day --- clusterd/clusterd | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 9cf3dad..259a5d1 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1225,6 +1225,13 @@ sub check_overload() { } } +sub ping_mlx { + Donald::Select::timeout_requeue(86400); + system 'ping','-c','1','mlx'; + if ($?) { + warn "failed to ping mlx\n"; + } +} ##################################################### @@ -1459,7 +1466,7 @@ if (defined $options{'push'}) { $my_hostname eq $STAT_TARGET and My::Cluster::Updown::init(); $my_hostname eq 'lol' and My::NetlogReceiver::init(); Donald::Select::timeout(600,\&check_overload); - + Donald::Select::timeout(30,\&ping_mlx); Donald::Select::run(); } elsif ($options{'lsof'}) { From b364ed900424d0b4414f27d6dd20bd8c2f39751a Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 18 May 2016 16:52:26 +0200 Subject: [PATCH 11/55] clusterd: bump up revision --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 259a5d1..b9e1499 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -5,7 +5,7 @@ use strict; # https://github.molgen.mpg.de/donald/clusterd -our $REVISION='1.103'; +our $REVISION='1.105.1'; #use lib ('/home/buczek/cluster/Donald/blib/lib'); From 30c64ed715c00bfe81bea0430ad85d7541abf4bb Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Tue, 21 Jun 2016 08:39:58 +0200 Subject: [PATCH 12/55] clusterd: update comment pointing to github home --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index b9e1499..f97ce68 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -3,7 +3,7 @@ use warnings; use strict; -# https://github.molgen.mpg.de/donald/clusterd +# https://github.molgen.mpg.de/mariux64/clusterd our $REVISION='1.105.1'; From d7e9bc5357eb98a4279b61e6a7a43b0fb5c83116 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Tue, 21 Jun 2016 09:16:02 +0200 Subject: [PATCH 13/55] clusterd: style: remove some extra whitespace --- clusterd/clusterd | 153 +++++++++++++--------------------------------- 1 file changed, 41 insertions(+), 112 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index f97ce68..386daa9 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -16,7 +16,6 @@ use Donald::Select::INET; use POSIX; use IO::Pipe; - #-------------------------------------- package My::Cluster::Updown; @@ -24,7 +23,6 @@ package My::Cluster::Updown; # monitor nodes # - our %H; # ( name=> [state , last_seen , expect_seq , @data ] , ... ) our $MONITOR_STARTING=1; @@ -37,7 +35,6 @@ our %DUPLICATES; # 3: processor capacity # 4: string unix version - use Storable; sub save_state { @@ -45,25 +42,23 @@ sub save_state { } sub restore_state { - -e '/root/clusterd.monitor.state' and %H=%{retrieve '/root/clusterd.monitor.state'}; for (values %H) { $_->[1]=0; } } - sub status { my ($f)=@_; my (@up,@down); for (sort keys %H) { push @up, $_ if $H{$_}->[0] eq 'UP' } for (sort keys %H) { push @down,$_ if $H{$_}->[0] eq 'DOWN' } - + $f->printf("UP (%d): %s\n\n",scalar(@up),join(' ',@up)); $f->printf("DOWN (%d): %s\n\n",scalar(@down),join(' ',@down)); my (@k,$max); - + @k=sort({$H{$b}->[3] <=> $H{$a}->[3]} grep({$H{$_}->[0] eq 'UP'} (keys %H))); $max=0; $f->print("TOP 10: "); @@ -127,7 +122,6 @@ sub timeout_hosts { save_state(); } - sub rx_hostannounce { # ($hostname,$seq,$load_avg,$opt_version,$opt_pload,$opt_pcapacity,$opt_unixrev) @@ -138,7 +132,7 @@ sub rx_hostannounce { unless (exists $H{$host}) { if ($seq==0) { msg_state('NEW','UP',$host,"discovered new node $version - $unixrev"); - } else { + } else { $MONITOR_STARTING or msg_state('NEW','UP',$host,"discovered running node $version - $unixrev"); } $H{$host}=['UP',Donald::Select::time,$seq+1,@more]; @@ -179,7 +173,6 @@ sub delete_host { msg_text("host $host removed from monitor"); } - #----------------------------------------------------------------------- package My::NetlogReceiver; @@ -194,7 +187,6 @@ sub day { return sprintf "%04d%02d%02d",$f[5]+1900,$f[4]+1,$f[3]; } - sub bigben { my $day=day(); $day le $DAY_LAST_MSG and return; @@ -207,14 +199,12 @@ sub bigben_timer { Donald::Select::timeout_requeue(30); } - sub bigben_init { $DAY_LAST_MSG=day(); Donald::Select::timeout(30,\&bigben_timer); } - sub receive { my ($socket,$peernode,$bufref)=@_; my $data; @@ -226,7 +216,7 @@ sub receive { length $data or return; $$bufref.=$data; - + while (1) { last if length($$bufref)<2; my $l=unpack('n',$$bufref); @@ -240,7 +230,6 @@ sub receive { Donald::Select::reader_requeue(); } - sub connect_request { Donald::Select::reader_requeue(); @@ -253,15 +242,12 @@ sub connect_request { # warn "$peernode: connect\n"; } - sub init { $listen_socket=new IO::Socket::INET(Proto=>'tcp',LocalPort=>1028,Listen=>1,ReuseAddr=>1) or die "$!\n"; Donald::Select::reader($listen_socket,\&connect_request); bigben_init(); } - - #------------------------------------------------------------------------ package main; use strict; @@ -284,12 +270,11 @@ $my_hostname=lc `/bin/hostname`; chomp($my_hostname); $my_hostname =~ s/\.molgen\.mpg\.de$//; - while (1) { my $addr=inet_aton($my_hostname); if(defined $addr) { $my_ip=inet_ntoa(inet_aton($my_hostname)); - } + } last if defined $my_ip; my $once; unless ($once) { @@ -299,7 +284,6 @@ while (1) { sleep 30; } - our $my_unixrev; $my_unixrev=`uname -r`; chomp($my_unixrev); @@ -309,11 +293,9 @@ our $CLUSTER_PW_FILE='/etc/clusterd.password'; our $OLD_CLUSTER_PW_FILE='/root/clusterd.password'; our $CLUSTER_PW_TIMESTAMP=0; - $ENV{'PATH'} = '/usr/local/bin:/sbin:/usr/sbin:/bin'.($ENV{PATH}?':'.$ENV{PATH}:''); # for amq , ps , tar (gnu!) - -#---------------------------------------------------------- UDP +#---------------------------------------------------------- UDP our ($udp_peer_addr,$udp_peer_port); # ('141.14.12.12',1234) @@ -328,33 +310,29 @@ our %UDP_HANDLER= 'exec' => \&udp_rx_exec, ); - sub udp_message { my ($data,$x_udp_peer_addr,$x_udp_peer_port,$donald_s)=@_; - + ($udp_peer_addr,$udp_peer_port)=($x_udp_peer_addr,$x_udp_peer_port); - + defined $CLUSTER_PW or return; my ($handler_name,@args)=decode(check_sign($CLUSTER_PW,$data)) or return; $UDP_HANDLER{$handler_name}->(@args) if exists $UDP_HANDLER{$handler_name}; } - sub udp_send_message { # udp_send_message( dst, @args) # dst='141.14.31.255' 'zork' '141.14.16.1' etc. data is anything my ($ip,@args)=@_; defined $CLUSTER_PW or return; $donald_s->send_data($ip,$UDP_PORT,sign($CLUSTER_PW,encode(@args))); } - - #---------------------------------------------------------- sub push_amd_tar { my ($donald_s)=@_; my $filename='/tmp/amd.tar'; - + my $pid=fork; defined $pid or return warn "$!\n"; unless($pid) { @@ -365,8 +343,7 @@ sub push_amd_tar { wait; $? and return; - - my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; + my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; my $digest=Digest::MD5->new->addfile($fh)->digest; warn "tar digest is ",Digest::MD5::md5_hex($digest),"\n"; @@ -380,7 +357,7 @@ sub push_amd_tar { $? and return; $filename='/tmp/amd.tar.gz'; - + my $st=Donald::FileInfo->lstat($filename); defined $st or return warn "$filename: $!\n"; $st->type eq 'F' or return warn "$filename: not a plain file\n"; @@ -397,35 +374,31 @@ sub push_amd_tar { } } - - sub push_file { my ($donald_s,$filename)=@_; - + $filename =~ m"^/" or return warn "$filename: please use absolute path\n"; - + my $st=Donald::FileInfo->lstat($filename); defined $st or return warn "$filename: $!\n"; $st->type eq 'F' or return warn "$filename: not a plain file\n"; $st->size<=40960 or return warn "$filename: to big for broadcast (max 40960 bytes)\n"; - my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; + my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; my $i=0; for (my $pos=0;$pos<$st->size;$pos+=1024) { my $data; defined $fh->sysread($data,1024) or return warn "$filename: $!\n"; # warn "send bytes $pos to ",$pos+length($data),"\n"; udp_broadcast_message($donald_s,'filedata',$st,$pos,$data); - ++$i % $BC_RATE or sleep 1; + ++$i % $BC_RATE or sleep 1; } } - our %CMD=( 'mkmotd'=>'/usr/sbin/mkmotd.pl', ); - sub send_exec { my ($donald_s,$cmd)=@_; unless (exists $CMD{$cmd}) { @@ -434,7 +407,6 @@ sub send_exec { udp_broadcast_message($donald_s,'exec',$cmd); } - sub udp_rx_exec { my ($cmd)=@_; @@ -451,7 +423,7 @@ sub udp_rx_exec { $pid=fork; defined $pid or exit 1; $pid and exit; - + open STDIN,'<','/dev/null'; open STDOUT,'>','/dev/null'; open STDERR,'>','/dev/null'; @@ -463,15 +435,11 @@ sub udp_rx_exec { wait; } - - #------------------------------------------------------------- - - sub normalize_seg { # [pos,len],[pos,len],... my @s=sort {$a->[0] <=> $b->[0]} @_; - + my $i=0; while ($i<$#s) { # is element $i joinable with next element @@ -488,13 +456,10 @@ sub normalize_seg { # [pos,len],[pos,len],... return @s; } - -my %RECEIVER; # ( filename => $receiver, .... ) - +my %RECEIVER; # ( filename => $receiver, .... ) # $receiver : [ st_want , last_rx , io_handle , [ [pos,len] , [pos,len] , ... ] ] - sub purge_old_receiver { while (my ($n,$v)=each %RECEIVER) { if ($v->[1]+10lstat($st_want->name); if ($st_is && $st_is->type eq 'F' && $st_is->size==$st_want->size && $st_is->mtime==$st_want->mtime) { #### $pos==0 and warn " $filename seems to be current\n"; return; - } + } if (length($data) == $st_want->size) { # complete file in one broadcast @@ -575,10 +540,9 @@ sub udp_rx_filedata { $rx_filedata_done=1; return; } - - length($data) or return; # shouldn't happen. - - + + length($data) or return; # shouldn't happen. + my $receiver=$RECEIVER{$st_want->name}; if (defined $receiver) { @@ -599,7 +563,7 @@ sub udp_rx_filedata { { # warn "$filename: receive $pos length ",length($data),"\n"; - + # write data ( size cant be 0 here ) $receiver->[2]->seek($pos,0) or return warn "$tmp_filename: $!\n"; $receiver->[2]->syswrite($data) or return warn "$tmp_filename: $!\n";; @@ -609,7 +573,7 @@ sub udp_rx_filedata { #warn "$filename: receive $pos length ",length($data)," segments now: ",join(",",map( {'['.$_->[0].','.$_->[1].']'} @{$receiver->[3]})),"\n"; # all there ? - + if (@$s == 1 && $s->[0]->[0]==0 && $s->[0]->[1]==$st_want->size) { $receiver->[2]->close; chown $st_want->uid,$st_want->gid,$tmp_filename or return warn "$tmp_filename: $!\n"; @@ -624,8 +588,6 @@ sub udp_rx_filedata { } } - - #----------------------------------------------------------- sample running proc every 10 seconds our @proc_running=(0)x12; # 12 samples every 5 seconds -> minute average @@ -662,7 +624,6 @@ sub running_proc_10 { return $ret/@proc_running_10; } - sub sample_rproc { # every 5 seconds @proc_running=(@proc_running[1..@proc_running-1],get_cur_running_proc()-1); if ($SAMPLE_TICK<12) { @@ -674,14 +635,8 @@ sub sample_rproc { # every 5 seconds Donald::Select::timeout_requeue(5); } - - - #----------------------------------------------------------- stat - -#----------------------------------------------------------- - our ($CPUS,$BOGOMIPS); sub init_cpuinfo { @@ -729,9 +684,6 @@ sub udp_rx_loadavg2 { $my_hostname eq $STAT_TARGET and My::Cluster::Updown::rx_hostannounce($hostname,$seq,$load_avg,$opt_version,$opt_pload,$opt_pcapacity,$opt_unixrev) } - - - sub udp_rx_log { my ($msg)=@_; My::Cluster::Updown::msg_text($msg); @@ -742,7 +694,6 @@ sub log_to_stat_target { udp_send_message($STAT_TARGET,'log',"$my_hostname: $msg"); } - # ---------------------------------------------------------- sub udp_rx_restart { @@ -787,7 +738,7 @@ sub mgmt_connect_request { my $socket=$mgmt_listen_socket->accept(); $socket->blocking(0); my $peernode=$socket->peerhost; - + ### warn "accepted mgmt connection from $peernode\n"; Donald::Select::reader($socket,\&mgmt_receive,$socket); @@ -814,7 +765,7 @@ sub mgmt_receive { $my_hostname eq $STAT_TARGET and My::Cluster::Updown::status($s); $s->print("AREA: ",area_config_as_string(),"\n"); $s->print("STAT TARGET: ",$STAT_TARGET,"\n"); - $s->print(' (',scalar(localtime),')',"\n"); + $s->print(' (',scalar(localtime),')',"\n"); } elsif ($data eq 'r') { for my $host (sort keys %H) { @@ -855,7 +806,7 @@ r : dump unix revisions delete HOST : forget about HOST dup show : show duplicates stat -dup clear : clear duplicates stat +dup clear : clear duplicates stat to exit use ^D @@ -863,12 +814,11 @@ _EOF_ } } - sub mgmt_print_all { my ($msg)=@_; $options{'foreground'} and not $options{'syslog'} and print $msg; - + for my $s (values(%mgmt_sockets)) { $s->print($msg); } @@ -876,9 +826,6 @@ sub mgmt_print_all { #----------------------------------------------------------- - - - sub clp_rx_LSOF { my ($socket,$pattern)=@_; @@ -886,8 +833,8 @@ sub clp_rx_LSOF { unless (defined $pid) { warn"$!\n"; return; - } - unless ($pid) { + } + unless ($pid) { my $pid=fork; defined $pid or die "$!\n"; unless ($pid) { @@ -914,7 +861,7 @@ sub run_cmd { unless (defined $pid) { warn"$!\n"; return; - } + } unless ($pid) { my $opipe=new IO::Pipe; my $epipe=new IO::Pipe; @@ -934,7 +881,7 @@ sub run_cmd { # my $pid=wait; # my $buffer="X$?"; # $socket->send(pack('n',length($buffer)).$buffer,0); -# exit; +# exit; # }; $opipe->reader(); $epipe->reader(); @@ -979,12 +926,9 @@ sub run_cmd { exit; } } - - } + } } - - #----------- CLP cluster protocol ----------------------------- our $CLP_PORT=235; @@ -1009,7 +953,7 @@ sub clp_connect_request { my $socket=$clp_listen_socket->accept(); $socket->blocking(0); my $peernode=$socket->peerhost; - + my $buffer=''; Donald::Select::reader($socket,\&clp_receive,$socket,\$buffer); @@ -1046,7 +990,6 @@ sub clp_message { $CLP_HANDLER{$handler_name}->($socket,@args) if exists $CLP_HANDLER{$handler_name}; } - sub clp_send_message { # clp_send_message($socket, @args) my ($s,@args)=@_; defined $CLUSTER_PW or return; @@ -1057,7 +1000,6 @@ sub clp_send_message { # clp_send_message($socket, @args) $s->send(pack('n',length($data)).$data); } - sub clp_rx_CMD { my ($socket,@args)=@_; run_cmd($socket,@args); @@ -1065,7 +1007,6 @@ sub clp_rx_CMD { return 1; } - #---------------------------------------------------------- #our $CLUSTER_PW; @@ -1073,7 +1014,6 @@ sub clp_rx_CMD { #our $OLD_CLUSTER_PW_FILE='/root/clusterd.password'; #our $CLUSTER_PW_TIMESTAMP=0; - sub sync_cluster_pw { my $st=Donald::FileInfo->lstat($CLUSTER_PW_FILE); @@ -1115,22 +1055,18 @@ sub sync_cluster_pw { return defined $CLUSTER_PW; } - #------------------------------------------------------------ # area routing - our %AREA_ROUTER= ( lol => '141.14.31.255', # orkrist=> '10.14.0.255', ); - our $area_socket; - sub init_area { exists $AREA_ROUTER{$my_hostname} or return; warn "I am area router for $AREA_ROUTER{$my_hostname}\n"; @@ -1150,7 +1086,6 @@ sub area_message { Donald::Select::reader_requeue(); } - sub udp_broadcast_message { my ($donald_s,@args)=@_; @@ -1268,7 +1203,7 @@ sub lsof { unless (defined $s) { warn "$host: $!\n"; next; - } + } clp_send_message($s,'LSOF',$pattern); my $pbuffer=''; my $olbuffer=''; @@ -1278,7 +1213,6 @@ sub lsof { Donald::Select::run() if $slave;; } - sub lsof_rx { my ($host,$s,$pbufref,$olbufref)=@_; my $data; @@ -1314,7 +1248,6 @@ sub lsof_msg { ($$olbufref)=$msg=~/([^\n]*)\z/; } - sub cmd_rx { my ($host,$s,$pbufref,$olbufref,$elbufref)=@_; my $data; @@ -1365,10 +1298,8 @@ sub cmd_msg { } } - #------------------------------------------------------------ - use constant USAGE => <<'__EOF__'; usage: $0 [options] @@ -1378,11 +1309,11 @@ usage: $0 [options] --send-restart # broadcast a restart request to all nodes --exec mkmotd # execute /usr/sbin/mkmotd.pl on all nodes --flush-gidcache # flush rpc auth.unix.gid cache on all nodes - + --lsof=pattern --kill # try to kill a running server - + --daemon # start a daemon --kill # try to kill previous server first --foreground # stay in foreground, log to stderr @@ -1390,7 +1321,6 @@ usage: $0 [options] __EOF__ - use Getopt::Long; GetOptions ( @@ -1407,7 +1337,6 @@ GetOptions ) or die USAGE; - if (defined $options{'push'}) { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; @@ -1459,7 +1388,7 @@ if (defined $options{'push'}) { clp_init(); sync_cluster_pw() or warn "$CLUSTER_PW_FILE: $!\n"; - + Donald::Select::timeout(60,\&purge_old_receiver); Donald::Select::timeout(rand(60),\&send_stat); Donald::Select::timeout(0,\&sample_rproc) unless Donald::Tools::is_alpha; @@ -1468,7 +1397,7 @@ if (defined $options{'push'}) { Donald::Select::timeout(600,\&check_overload); Donald::Select::timeout(30,\&ping_mlx); - Donald::Select::run(); + Donald::Select::run(); } elsif ($options{'lsof'}) { lsof($options{'lsof'}); } elsif ($options{'kill'}) { From 5e0a491477460af0d755e5e7f2a12e7ad69d1cca Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Tue, 21 Jun 2016 08:41:51 +0200 Subject: [PATCH 14/55] clusterd: change wait to happen before broadcast if we need to throttle --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 386daa9..6454a40 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -369,8 +369,8 @@ sub push_amd_tar { my $data; defined $fh->sysread($data,1024) or return warn "$filename: $!\n"; # warn "send bytes $pos to ",$pos+length($data),"\n"; + ++$i % $BC_RATE or sleep 1; udp_broadcast_message($donald_s,'amdtardata',$st,$pos,$data,$digest); - ++$i % $BC_RATE or sleep 1; } } From 7fdd29c7f95ad12e0d90757f8c50a45a00a76cd6 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Tue, 21 Jun 2016 09:00:33 +0200 Subject: [PATCH 15/55] clusterd: make push_file errors like "to big" fatal --- clusterd/clusterd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 6454a40..e1deb59 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -381,8 +381,8 @@ sub push_file { my $st=Donald::FileInfo->lstat($filename); defined $st or return warn "$filename: $!\n"; - $st->type eq 'F' or return warn "$filename: not a plain file\n"; - $st->size<=40960 or return warn "$filename: to big for broadcast (max 40960 bytes)\n"; + $st->type eq 'F' or die "$filename: not a plain file\n"; + $st->size<=40960 or die "$filename: to big for broadcast (max 40960 bytes)\n"; my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; my $i=0; From 7cd2f868408e5ed61e89380953612693c7d317ce Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Tue, 21 Jun 2016 09:02:13 +0200 Subject: [PATCH 16/55] clusterd: increase push file limit from 5*8*1024 to 8*8*1024 --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index e1deb59..4c7a88a 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -382,7 +382,7 @@ sub push_file { my $st=Donald::FileInfo->lstat($filename); defined $st or return warn "$filename: $!\n"; $st->type eq 'F' or die "$filename: not a plain file\n"; - $st->size<=40960 or die "$filename: to big for broadcast (max 40960 bytes)\n"; + $st->size<=65536 or die "$filename: to big for broadcast (max 65536 bytes)\n"; my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; my $i=0; From 986ab43c3486f87516b8e71286e8be0ad999ac58 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 1 Jul 2016 17:08:01 +0200 Subject: [PATCH 17/55] clusterd: enable push to send empty files --- clusterd/clusterd | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clusterd/clusterd b/clusterd/clusterd index 4c7a88a..5a0d3a7 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -384,6 +384,11 @@ sub push_file { $st->type eq 'F' or die "$filename: not a plain file\n"; $st->size<=65536 or die "$filename: to big for broadcast (max 65536 bytes)\n"; + if ($st->size==0) { + udp_broadcast_message($donald_s,'filedata',$st,0,''); + return; + } + my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; my $i=0; for (my $pos=0;$pos<$st->size;$pos+=1024) { From 62794194c9d65354e9ea25dc7fba3c04b8631e4a Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 1 Jul 2016 17:10:02 +0200 Subject: [PATCH 18/55] clusterd: add lchown() , lmitime() --- clusterd/clusterd | 48 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/clusterd/clusterd b/clusterd/clusterd index 5a0d3a7..5335d89 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -511,6 +511,54 @@ sub udp_rx_amdtardata { system '/sbin/make-automaps'; } +our ($machine,$SYS_lchown,$SYS_mknod,$lmtime_sub); +chomp($machine=`uname -m`); +if ($machine eq 'i686') { + $SYS_lchown=198; # __NR_lchown32 in /usr/include/asm/unistd.h + $SYS_mknod=14; # __NR_mknod +} elsif ($machine eq 'x86_64') { + $SYS_lchown=94; # __NR_lchown in /usr/include/asm-x86_64/unistd.h + $SYS_mknod=133; # __NR_mknod +} elsif ($machine eq 'alpha') { + $SYS_lchown=208; # SYS_lchown in /usr/include/syscall.h + $SYS_mknod=14; # SYS_mknod +} elsif ($machine eq 'amd64') { + $SYS_lchown=254; # SYS_lchown in /usr/include/syscall.h + $SYS_mknod=14; # SYS_mknod +} else { + warn "unknown machine type $machine: symlink ownership can't be set.\n"; + warn "unknown machine type $machine: named pipes,character and block devices can't be created\n"; +} +if ($machine eq 'x86_64') { + our $SYS_utimensat=280; # /usr/include/asm/unistd_64.h + our $AT_FDCWD=-100; # /usr/include/fcntl.h + our $UTIME_OMIT=(1<<30)-2; # /usr/include/bits/stat.h + our $AT_SYMLINK_NOFOLLOW=0x100; # /usr/include/fcntl.h + + $lmtime_sub=sub { + my ($path,$mtime)=@_; + my $tsa=pack 'qqqq',0,$UTIME_OMIT,$mtime,0; + syscall($SYS_utimensat,$AT_FDCWD,$path,$tsa,$AT_SYMLINK_NOFOLLOW)==0 or return warn "$path: failed to lmtime: $!\n"; + } +} else { + $lmtime_sub=sub { + my ($path,$mtime)=@_; + warn "$path: don't known how to change symlink mtime on target architecture\n"; + } +} + +sub lchown { + my ($uid,$gid,$path)=@_; + $SYS_lchown or return; + syscall($SYS_lchown,$path,$uid+0,$gid+0)==0 or return warn "$path: failed to lchown: $!\n"; +} + +sub lmtime { + my ($mtime,$path)=@_; + $lmtime_sub or return; + $lmtime_sub->($path,$mtime); +} + sub udp_rx_filedata { # set rx_filedata_done as a side effect From 1f591278be5636925a91b633024887d6daa495a9 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 1 Jul 2016 17:12:19 +0200 Subject: [PATCH 19/55] clusterd: enable push to send symlink for the rolling upgrade we need to request another rpc version, because the current receiver trusts the sender to send only plain files --- clusterd/clusterd | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 5335d89..c4bd8f5 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -381,23 +381,32 @@ sub push_file { my $st=Donald::FileInfo->lstat($filename); defined $st or return warn "$filename: $!\n"; - $st->type eq 'F' or die "$filename: not a plain file\n"; - $st->size<=65536 or die "$filename: to big for broadcast (max 65536 bytes)\n"; + my $rpc; + if ($st->type eq 'F') { + $rpc='filedata'; + $st->size<=65536 or die "$filename: to big for broadcast (max 65536 bytes)\n"; + if ($st->size==0) { + udp_broadcast_message($donald_s,$rpc,$st,0,''); + return; + } - if ($st->size==0) { - udp_broadcast_message($donald_s,'filedata',$st,0,''); + my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; + my $i=0; + for (my $pos=0;$pos<$st->size;$pos+=1024) { + my $data; + defined $fh->sysread($data,1024) or return warn "$filename: $!\n"; + # warn "send bytes $pos to ",$pos+length($data),"\n"; + udp_broadcast_message($donald_s,$rpc,$st,$pos,$data); + ++$i % $BC_RATE or sleep 1; + } + } elsif ($st->type eq 'L') { + $rpc='filedata.2'; + udp_broadcast_message($donald_s,$rpc,$st,0,''); return; + } else { + die "file type not supported\n"; } - my $fh=new IO::File $filename,'<' or return warn "$filename: $!\n"; - my $i=0; - for (my $pos=0;$pos<$st->size;$pos+=1024) { - my $data; - defined $fh->sysread($data,1024) or return warn "$filename: $!\n"; - # warn "send bytes $pos to ",$pos+length($data),"\n"; - udp_broadcast_message($donald_s,'filedata',$st,$pos,$data); - ++$i % $BC_RATE or sleep 1; - } } our %CMD=( From 2e4c2e22a9efbaa45d74016093c0b96c424b2180 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 1 Jul 2016 17:48:22 +0200 Subject: [PATCH 20/55] clusterd: update receiver to handle symlinks symlinks need to be send to a new rpc "filedata.2" because the old rpc can't handle it. the updated receiver is installed under the old name "filedata" too in case older daemons send push --- clusterd/clusterd | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/clusterd/clusterd b/clusterd/clusterd index c4bd8f5..e657052 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -302,6 +302,7 @@ our ($udp_peer_addr,$udp_peer_port); # ('141.14.12.12',1234) our %UDP_HANDLER= ( 'filedata' => \&udp_rx_filedata, + 'filedata.2' => \&udp_rx_filedata, 'amdtardata' => \&udp_rx_amdtardata, 'loadavg.2' => \&udp_rx_loadavg2, 'restart' => \&udp_rx_restart, @@ -587,6 +588,24 @@ sub udp_rx_filedata { return; } + if ($st_want->type eq 'L') { + if (!$st_is || $st_is->type ne 'L' || $st_is->target ne $st_want->target) { + $st_is and (unlink($filename) or return warn "$filename: failed to unlink: $!\n"); + symlink($st_want->target,$filename) or return warn "$filename: failed to create symlink: $!\n"; + lchown($st_want->uid,$st_want->gid,$filename); + lmtime($st_want->mtime,$filename); + warn "installed $filename -> ".$st_want->target."\n"; + } else { + if ($st_is->uid != $st_want->uid || $st_is->gid != $st_want->gid) { + lchown($st_want->uid,$st_want->gid,$filename); + } + if ($st_is->mtime != $st_want->mtime) { + lmtime($st_want->mtime,$filename); + } + } + return; + } + if (length($data) == $st_want->size) { # complete file in one broadcast my $fh=IO::File->new($tmp_filename,O_WRONLY|O_CREAT,0); From a27bacd3adc846450731d51d558ab09bc1e2da7d Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 2 Jul 2016 16:44:20 +0200 Subject: [PATCH 21/55] clusterd: bump up revision --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index e657052..a577cb4 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -5,7 +5,7 @@ use strict; # https://github.molgen.mpg.de/mariux64/clusterd -our $REVISION='1.105.1'; +our $REVISION='1.106'; #use lib ('/home/buczek/cluster/Donald/blib/lib'); From 630d90a5d54dd5a8b5d3ef9931b40a3ea2b6044f Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Tue, 27 Sep 2016 08:52:33 +0200 Subject: [PATCH 22/55] clusterd: fix shebang to use /usr/bin/perl --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index a577cb4..5ec48f5 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1,4 +1,4 @@ -#! /usr/local/bin/perl +#! /usr/bin/perl use warnings; use strict; From 0ff9634cb2ae3170ef1936eb557ac0647d1019e1 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 29 Dec 2016 14:10:36 +0100 Subject: [PATCH 23/55] clusterd: Implement --exec @node cmd [args...] This command might be usefull, e.g. if sshd is not responding. Examples: clusterd --exec @theinternet restart sshd.service clusterd --exec @theinternet dmesg clusterd --exec @theinternet 'grep sshd /var/log/messages|tail -100' The remote command is executed by bash, so the pipe in the last example is on the remote node stdout and stderr are delivered seperatly and might be redirected to different channels on the local side. If the remote command exits with a non-zero exit status, the local command fails with exit status 1. The slave part already existed, because we once had a remote execution command. We removed it, because it was considered to be to dangerous. The former remote execution command allowed parallel execution on all nodes, which enabled an admin to kill all systems by mistake in an instance. Now we only enable remote execution on a single node. --- clusterd/clusterd | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 5ec48f5..9b01eb3 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1274,6 +1274,22 @@ sub expand_netgroup_hosts { return @out; } +sub exec_at { + my ($host,@cmd)=@_; + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + my $s=new IO::Socket::INET(PeerAddr=>$host,PeerPort=>$CLP_PORT); + unless (defined $s) { + die "$host: $!\n"; + } + clp_send_message($s,'CMD',@cmd); + my $pbuffer=''; + my $olbuffer=''; + my $elbuffer=''; + Donald::Select::reader($s,\&cmd_rx,$host,$s,\$pbuffer,\$olbuffer,\$elbuffer); + $slave=1; + Donald::Select::run() if $slave;; +} + sub lsof { my ($pattern)=@_; sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; @@ -1367,13 +1383,13 @@ sub cmd_msg { } elsif ($channel eq 'O') { my $msg=$$olbufref.substr($data,1); for ($msg=~/([^\n]*\n)/gs) { - print "$host $_"; + print "$host: $_"; } ($$olbufref)=$msg=~/([^\n]*)\z/; } elsif ($channel eq 'E') { my $msg=$$elbufref.substr($data,1); for ($msg=~/([^\n]*\n)/gs) { - print STDERR "$host $_"; + print STDERR "$host: $_"; } ($$elbufref)=$msg=~/([^\n]*)\z/; } @@ -1389,6 +1405,7 @@ usage: $0 [options] --push-amd-tar # broadcast /etc/amd --send-restart # broadcast a restart request to all nodes --exec mkmotd # execute /usr/sbin/mkmotd.pl on all nodes + --exec @node cmd [args...] # execute cmd on node --flush-gidcache # flush rpc auth.unix.gid cache on all nodes --lsof=pattern @@ -1423,9 +1440,15 @@ if (defined $options{'push'}) { $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; push_file($donald_s,$options{'push'}); } elsif (defined $options{'exec'}) { - sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; - $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; - send_exec($donald_s,$options{'exec'}); + if (substr($options{'exec'},0,1) eq '@') { + length(length($options{'exec'})>1) or die USAGE; + @ARGV>=1 or die USAGE; + exec_at(substr($options{'exec'},1),@ARGV); + } else { + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + send_exec($donald_s,$options{'exec'}); + } } elsif (defined $options{'push_amd_tar'}) { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; From fec0a5bd223a7490828473e13916a69b7729a5a7 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 29 Dec 2016 14:51:55 +0100 Subject: [PATCH 24/55] clusterd: remove some debug relicts --- clusterd/clusterd | 9 --------- 1 file changed, 9 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 9b01eb3..151cc01 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1314,7 +1314,6 @@ sub lsof_rx { my ($host,$s,$pbufref,$olbufref)=@_; my $data; defined $s->recv($data,$TCP_MAX*2) or return; - #print "$host : received ",length($data),"\n"; if (!length($data)) { close $s; --$slave; @@ -1325,12 +1324,9 @@ sub lsof_rx { while (1) { last if length($$pbufref)<2; my $l=unpack('n',$$pbufref); - #print "$host : unpacked length $l\n"; last if length($$pbufref)<2+$l; my $msg=substr($$pbufref,2,$l); $$pbufref=substr($$pbufref,2+$l); - #print "$host : remaining pbuf length ",length($$pbufref),"\n"; - #print "$host : message length ",length($msg),"\n"; lsof_msg($host,$s,$msg,$olbufref); } Donald::Select::reader_requeue(); @@ -1355,18 +1351,13 @@ sub cmd_rx { exit $exit_value unless $slave; return; } - #print "$host received ".length($data),"\n"; $$pbufref.=$data; while (1) { last if length($$pbufref)<2; my $l=unpack('n',$$pbufref); - #print "$host : unpacked length $l\n"; - #die "$host: to long" if $l>1024; last if length($$pbufref)<2+$l; my $msg=substr($$pbufref,2,$l); $$pbufref=substr($$pbufref,2+$l); - #print "$host : remaining pbuf length ",length($$pbufref),"\n"; - #print "$host : message length ",length($msg),"\n"; cmd_msg($host,$s,$msg,$olbufref,$elbufref); } Donald::Select::reader_requeue(); From 724d5be505bfc6d6b4c337217b861b3599e65c69 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 29 Dec 2016 14:52:33 +0100 Subject: [PATCH 25/55] clusterd: bump up version --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 151cc01..2d160ee 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -5,7 +5,7 @@ use strict; # https://github.molgen.mpg.de/mariux64/clusterd -our $REVISION='1.106'; +our $REVISION='1.107'; #use lib ('/home/buczek/cluster/Donald/blib/lib'); From c15e01879e80d9ffc8fa7b85d5a3bba85a7eeefe Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 15 Feb 2017 15:20:51 +0100 Subject: [PATCH 26/55] clusterd: implement --make-automaps --- clusterd/clusterd | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 2d160ee..624ec61 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -5,7 +5,7 @@ use strict; # https://github.molgen.mpg.de/mariux64/clusterd -our $REVISION='1.107'; +our $REVISION='1.108'; #use lib ('/home/buczek/cluster/Donald/blib/lib'); @@ -307,6 +307,7 @@ our %UDP_HANDLER= 'loadavg.2' => \&udp_rx_loadavg2, 'restart' => \&udp_rx_restart, 'flush-gidcache' => \&udp_rx_flush_gidcache, + 'make-automaps' => \&udp_rx_make_automaps, 'log' => \&udp_rx_log, 'exec' => \&udp_rx_exec, ); @@ -797,6 +798,15 @@ sub udp_rx_flush_gidcache { } } +sub udp_rx_make_automaps { + if (open my $out,'>','/proc/net/rpc/auth.unix.gid/flush') { + print $out time(); + } else { + warn "proc/net/rpc/auth.unix.gid/flush: $!\n"; + } + system '/sbin/make-automaps'; +} + #----------- tcp mgmt console ----------------------------- our $MGMT_PORT=234; @@ -1398,6 +1408,7 @@ usage: $0 [options] --exec mkmotd # execute /usr/sbin/mkmotd.pl on all nodes --exec @node cmd [args...] # execute cmd on node --flush-gidcache # flush rpc auth.unix.gid cache on all nodes + --make-automaps # execute /usr/sbin/make-automaps on all nodes --lsof=pattern @@ -1422,6 +1433,7 @@ GetOptions 'push-amd-tar' => \$options{'push_amd_tar'}, 'send-restart' => \$options{'send-restart'}, 'flush-gidcache' => \$options{'flush-gidcache'}, + 'make-automaps' => \$options{'make-automaps'}, 'lsof=s' => \$options{'lsof'}, ) or die USAGE; @@ -1452,6 +1464,10 @@ if (defined $options{'push'}) { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; udp_broadcast_message($donald_s,'flush-gidcache'); +} elsif (defined $options{'make-automaps'}) { + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + udp_broadcast_message($donald_s,'make-automaps'); } elsif (defined $options{'daemon'}) { $options{'kill'} and Donald::Tools::kill_previous_server('clusterd') and sleep 2; From 4cc68dc9986d6c302aa9c2bc118241be0bc1fbf5 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 18 Oct 2017 07:48:47 +0200 Subject: [PATCH 27/55] clusterd: Clean up indentation mix --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 624ec61..3e439b2 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -591,7 +591,7 @@ sub udp_rx_filedata { if ($st_want->type eq 'L') { if (!$st_is || $st_is->type ne 'L' || $st_is->target ne $st_want->target) { - $st_is and (unlink($filename) or return warn "$filename: failed to unlink: $!\n"); + $st_is and (unlink($filename) or return warn "$filename: failed to unlink: $!\n"); symlink($st_want->target,$filename) or return warn "$filename: failed to create symlink: $!\n"; lchown($st_want->uid,$st_want->gid,$filename); lmtime($st_want->mtime,$filename); From fcbb6094ce62fb7546648c4353b4c73a90d08fdc Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 18 Oct 2017 14:00:32 +0200 Subject: [PATCH 28/55] clusterd: Dereference CLP tcp socket after hangup We have a small leak here, that clp sockets keep referenced after the peer closed. Fix. --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 3e439b2..5a9c30e 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1056,7 +1056,7 @@ sub clp_receive { my $data; defined $s->recv($data,$TCP_MAX) or return; if (!length($data) ) { - delete $mgmt_sockets{$s}; + delete $clp_sockets{$s}; $s->close; return; } From 2f547a781d914c642ad8ec22b4f94eda715a41aa Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 18 Oct 2017 15:04:08 +0200 Subject: [PATCH 29/55] clusterd: Remove mlx ping --- clusterd/clusterd | 9 --------- 1 file changed, 9 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 5a9c30e..e813978 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1251,14 +1251,6 @@ sub check_overload() { } } -sub ping_mlx { - Donald::Select::timeout_requeue(86400); - system 'ping','-c','1','mlx'; - if ($?) { - warn "failed to ping mlx\n"; - } -} - ##################################################### my $slave=0; @@ -1506,7 +1498,6 @@ if (defined $options{'push'}) { $my_hostname eq $STAT_TARGET and My::Cluster::Updown::init(); $my_hostname eq 'lol' and My::NetlogReceiver::init(); Donald::Select::timeout(600,\&check_overload); - Donald::Select::timeout(30,\&ping_mlx); Donald::Select::run(); } elsif ($options{'lsof'}) { From a651c04f3b0aa37daa6978f7563a02a4cbadadb4 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 2 Dec 2017 11:12:43 +0100 Subject: [PATCH 30/55] clusterd: Import Donald::Select into script The design of Donald::Select makes use of Donald::Callback objects, which are objects conainting a sub and call arguments. However, because of the pattern sub do_something_later() { my ($cb_or_sub,@args)=@_; $store_callback_somehere=new Donald::Callback($cb_or_sub,@args); } do_something_later(\&callback,$arg1,$arg2) the caller doesn't have a reference to the Donald::Callback object, which makes its diffucult to identify it, e.g. to cancel the callback. We want to change the design to accept only references to subs as callbacks. Instead of passing arguments, we exepect the caller to make use of closures to pass data to the callback if needed. sub do_something_later() { my ($cb)=@_; $store_callback_somewhere=$cb; } do_something_later(sub{callback($arg1,$args)}); Instead of changing the API of Donald::Select, we import the code directly into clusterd to make the modifications here. --- clusterd/clusterd | 212 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 210 insertions(+), 2 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index e813978..8d865b4 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -10,12 +10,220 @@ our $REVISION='1.108'; #use lib ('/home/buczek/cluster/Donald/blib/lib'); use Donald::Tools qw(encode sign check_sign decode); -use Donald::Select; use Donald::FileInfo; -use Donald::Select::INET; use POSIX; use IO::Pipe; +#-------------------------------------- +package Donald::Select; + +use warnings; +use strict; + +our $VERSION = '1.00'; + +use Donald::Callback 1.01 ; +use Donald::Tools; + +our $time=Donald::Tools::uptime(); + +sub Donald::Select::time +{ + return $time; +} + +#----------------------------------------- + +our @TIMER=(); # ( [duetime,cb] , ) sorted by time +our $active_timer_cb; + +sub timeout # cb=Select::timeout(seconds,cb) or cb=Select::Timeout(seconds,subref [,args,...]) +{ + my ($delta,$cb_or_sub,@args)=@_; + my $cb=new Donald::Callback($cb_or_sub,@args); + + my $duetime=$time+$delta; + @TIMER=sort {$a->[0] <=> $b->[0]} ( [$duetime,$cb] , @TIMER ); + return $cb; +} + +sub timeout_cancel # cb=Select::timeout_cancel(cb) +{ + my ($cb)=@_; + @TIMER=grep {$_->[1] != $cb && $_->[1]->[0] != $cb} @TIMER; + return $cb; +} + +sub timeout_requeue {timeout(shift,$active_timer_cb); } # Select::timeout_requeue(seconds) + +#------------------------------------ + +our @READER; # ( [Handle,cb] , ... ) +our @WRITER; +our @EXCEPT; +our $active_io; # [Handle,cb] + +sub reader # cb = Select::reader(Handle,cb) +{ + my ($handle,$cb_or_sub,@args)=@_; + my $cb=new Donald::Callback($cb_or_sub,@args); + push @READER,[$handle,$cb]; + return $cb; +} + +sub writer # cb = Select::writer(Handle,cb) +{ + my ($handle,$cb_or_sub,@args)=@_; + my $cb=new Donald::Callback($cb_or_sub,@args); + push @WRITER,[$handle,$cb]; + return $cb; +} + +sub except # cb = Select::except(Handle,cb) +{ + my ($handle,$cb_or_sub,@args)=@_; + my $cb=new Donald::Callback($cb_or_sub,@args); + push @EXCEPT,[$handle,$cb]; + return $cb; +} + +sub reader_requeue {push @READER,$active_io} +sub writer_requeue {push @WRITER,$active_io} +sub except_requeue {push @EXCEPT,$active_io} + +sub cancel # $cb = Select::cancel([cb]) +{ + my ($cb)=@_; + defined $cb or $cb=$active_io->[1]; + @READER=grep {$_->[1] != $cb} @READER; + @WRITER=grep {$_->[1] != $cb} @WRITER; + @EXCEPT=grep {$_->[1] != $cb} @EXCEPT; +} + +sub cancel_handle +{ + my ($handle)=@_; + @READER=grep {$_->[0] != $handle} @READER; + @WRITER=grep {$_->[0] != $handle} @WRITER; + @EXCEPT=grep {$_->[0] != $handle} @EXCEPT; +} + +#-------------------------------------------- + +sub heartbeat +{ + $time++; + while (@TIMER && $TIMER[0]->[0]<=$time) { + $active_timer_cb=(shift @TIMER)->[1]; + $active_timer_cb->call(); + } + $active_timer_cb=undef; +} + +sub run +{ + while (1) { + $time=Donald::Tools::uptime(); + while (@TIMER && $TIMER[0]->[0]<=$time) { + $active_timer_cb=(shift @TIMER)->[1]; + $active_timer_cb->call(); + } + $active_timer_cb=undef; + + my ($rvec,$wvec,$evec)=('','',''); + + for (@READER) { vec($rvec,$_->[0]->fileno,1)=1 } ; + for (@WRITER) { vec($wvec,$_->[0]->fileno,1)=1 } ; + for (@EXCEPT) { vec($evec,$_->[0]->fileno,1)=1 } ; + + my $ready=select($rvec,$wvec,$evec,1); + if ($ready>0) { + for (my $i=0;$i<@READER;$i++) { + if (vec($rvec,$READER[$i]->[0]->fileno,1)) { + $active_io=splice @READER,$i,1; + $active_io->[1]->call(); + $active_io=undef; + last; + } + } + for (my $i=0;$i<@WRITER;$i++) { + if (vec($wvec,$WRITER[$i]->[0]->fileno,1)) { + $active_io=splice @WRITER,$i,1; + $active_io->[1]->call(); + $active_io=undef; + last; + } + } + for (my $i=0;$i<@EXCEPT;$i++) { + if (vec($evec,$EXCEPT[$i]->[0]->fileno,1)) { + $active_io=splice @EXCEPT,$i,1; + $active_io->[1]->call(); + $active_io=undef; + last; + } + } + } + } +} + +1; +#-------------------------------------- +package Donald::Select::INET ; + +use warnings; +use strict; + +use Carp; +use IO::Socket::INET; +use Digest::MD5; +use Storable; + +our $VERSION = '1.00'; + +our $UDP_MAX=1472; # for broadcast on alphas + + +sub new # ( Proto=>'udp',Broadcast=>1,LocalPort=>$UDP_PORT ) +{ + my ($class,@args)=@_; + our $socket=new IO::Socket::INET (@args) or return undef; + return bless \$socket,$class; +} + + +sub send_data +{ + my ($self,$ip,$port,$data)=@_; + my $ip_address=inet_aton($ip); + unless (defined $ip_address) {carp("can't resolve $ip\n");return undef} + unless (length($data)<=$UDP_MAX) {carp("message to long\n");return undef} + $$self->send($data,0,pack_sockaddr_in($port,$ip_address)) or carp "$!\n"; +} + + +sub reader +{ + my ($self,$sub,@args)=@_; + Donald::Select::reader($$self,$sub,@args); +} + +sub receive_data +{ + my ($self,$sub,@args)=@_; + Donald::Select::reader($$self,\&receive_data_cb,$self,$sub,@args); +} + +sub receive_data_cb +{ + my ($self,$sub,@args)=@_; + my $data; + my $peer = $$self->recv($data,$UDP_MAX); + my ($udp_peer_port,$peer_iaddr)=unpack_sockaddr_in($peer); + my $udp_peer_addr=inet_ntoa($peer_iaddr); + Donald::Select::reader_requeue(); + $sub->($data,$udp_peer_addr,$udp_peer_port,@args); +} + #-------------------------------------- package My::Cluster::Updown; From 6d2fa4ef55397cc606a0af3635eef142494b6129 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 2 Dec 2017 11:28:18 +0100 Subject: [PATCH 31/55] clusterd: Rename Donald::Select to My::Select Use the namespace My:: for packages declared inside the main script. --- clusterd/clusterd | 112 +++++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 8d865b4..150f802 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -15,7 +15,7 @@ use POSIX; use IO::Pipe; #-------------------------------------- -package Donald::Select; +package My::Select; use warnings; use strict; @@ -27,7 +27,7 @@ use Donald::Tools; our $time=Donald::Tools::uptime(); -sub Donald::Select::time +sub My::Select::time { return $time; } @@ -168,7 +168,7 @@ sub run 1; #-------------------------------------- -package Donald::Select::INET ; +package My::Select::INET ; use warnings; use strict; @@ -204,13 +204,13 @@ sub send_data sub reader { my ($self,$sub,@args)=@_; - Donald::Select::reader($$self,$sub,@args); + My::Select::reader($$self,$sub,@args); } sub receive_data { my ($self,$sub,@args)=@_; - Donald::Select::reader($$self,\&receive_data_cb,$self,$sub,@args); + My::Select::reader($$self,\&receive_data_cb,$self,$sub,@args); } sub receive_data_cb @@ -220,7 +220,7 @@ sub receive_data_cb my $peer = $$self->recv($data,$UDP_MAX); my ($udp_peer_port,$peer_iaddr)=unpack_sockaddr_in($peer); my $udp_peer_addr=inet_ntoa($peer_iaddr); - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); $sub->($data,$udp_peer_addr,$udp_peer_port,@args); } @@ -312,13 +312,13 @@ sub msg_state { sub init { restore_state; msg_text('node monitor: started. (recovery mode)'); - Donald::Select::timeout(630,sub{$MONITOR_STARTING=0;msg_text('node monitor: recovery finished');}); - Donald::Select::timeout(630,\&timeout_hosts); + My::Select::timeout(630,sub{$MONITOR_STARTING=0;msg_text('node monitor: recovery finished');}); + My::Select::timeout(630,\&timeout_hosts); } sub timeout_hosts { - Donald::Select::timeout_requeue(60); - my $timeout=Donald::Select::time()-1230; # 2x10 minutes + 30 seconds + My::Select::timeout_requeue(60); + my $timeout=My::Select::time()-1230; # 2x10 minutes + 30 seconds for (keys %H) { my $h=$H{$_}; @@ -343,7 +343,7 @@ sub rx_hostannounce { } else { $MONITOR_STARTING or msg_state('NEW','UP',$host,"discovered running node $version - $unixrev"); } - $H{$host}=['UP',Donald::Select::time,$seq+1,@more]; + $H{$host}=['UP',My::Select::time,$seq+1,@more]; } else { my $h=$H{$host}; if ($h->[0] eq 'UP') { @@ -371,7 +371,7 @@ sub rx_hostannounce { msg_state('DOWN','UP',$host,$seq-$h->[2]." packet(s) lost"); } } - @$h=('UP',Donald::Select::time,$seq+1,@more); + @$h=('UP',My::Select::time,$seq+1,@more); } } @@ -404,12 +404,12 @@ sub bigben { sub bigben_timer { bigben(); - Donald::Select::timeout_requeue(30); + My::Select::timeout_requeue(30); } sub bigben_init { $DAY_LAST_MSG=day(); - Donald::Select::timeout(30,\&bigben_timer); + My::Select::timeout(30,\&bigben_timer); } @@ -435,24 +435,24 @@ sub receive { $|=1; warn "NETLOG $msg\n" unless $msg=~/NETLOG/; } - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); } sub connect_request { - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); my $socket=$listen_socket->accept(); $socket->blocking(0); my $peernode=$socket->peerhost; my $buffer=''; - Donald::Select::reader($socket,\&receive,$socket,$peernode,\$buffer); + My::Select::reader($socket,\&receive,$socket,$peernode,\$buffer); # warn "$peernode: connect\n"; } sub init { $listen_socket=new IO::Socket::INET(Proto=>'tcp',LocalPort=>1028,Listen=>1,ReuseAddr=>1) or die "$!\n"; - Donald::Select::reader($listen_socket,\&connect_request); + My::Select::reader($listen_socket,\&connect_request); bigben_init(); } @@ -470,7 +470,7 @@ our $BC_RATE=8; # packets per second broadcast our (%options); # RUN OPTIONS -our $donald_s; # Donald::Select::INET udp socket +our $donald_s; # My::Select::INET udp socket our $my_hostname; our $my_ip; # '141.14.12.12' @@ -686,13 +686,13 @@ my %RECEIVER; # ( filename => $receiver, .... ) sub purge_old_receiver { while (my ($n,$v)=each %RECEIVER) { - if ($v->[1]+10[1]+10[0]->name,"\n"; log_to_stat_target('timeout receiving ',$v->[0]->name); delete $RECEIVER{$n}; } } - Donald::Select::timeout_requeue(60); + My::Select::timeout_requeue(60); } #------------------------------------------------------------- @@ -847,7 +847,7 @@ sub udp_rx_filedata { -e $tmp_filename and unlink($tmp_filename); my $fh=IO::File->new($tmp_filename,O_WRONLY|O_CREAT,0); defined $fh or return warn "$tmp_filename: $!\n"; - $receiver = [$st_want,Donald::Select::time,$fh,[]]; + $receiver = [$st_want,My::Select::time,$fh,[]]; $RECEIVER{$filename}=$receiver; } @@ -857,7 +857,7 @@ sub udp_rx_filedata { # write data ( size cant be 0 here ) $receiver->[2]->seek($pos,0) or return warn "$tmp_filename: $!\n"; $receiver->[2]->syswrite($data) or return warn "$tmp_filename: $!\n";; - $receiver->[1]=Donald::Select::time; + $receiver->[1]=My::Select::time; my $s=$receiver->[3]=[normalize_seg(@{$receiver->[3]},[$pos,length($data)])]; #warn "$filename: receive $pos length ",length($data)," segments now: ",join(",",map( {'['.$_->[0].','.$_->[1].']'} @{$receiver->[3]})),"\n"; @@ -922,7 +922,7 @@ sub sample_rproc { # every 5 seconds @proc_running_10=(@proc_running_10[1..@proc_running_10-1],running_proc()); $SAMPLE_TICK=0; } - Donald::Select::timeout_requeue(5); + My::Select::timeout_requeue(5); } #----------------------------------------------------------- stat @@ -963,7 +963,7 @@ sub loadavg { # AXP : (system load average) , LINUX: (system load average, pl our $STAT_TARGET='lol'; our $STAT_SEQ=0; sub send_stat { - Donald::Select::timeout_requeue(600); + My::Select::timeout_requeue(600); my ($load_avg,$pload,$pcapacity)=loadavg(); defined $load_avg or return; udp_send_message($STAT_TARGET,'loadavg.2',$my_hostname,$STAT_SEQ++,$load_avg,version_info(),$pload,$pcapacity,$my_unixrev); @@ -1025,12 +1025,12 @@ our %mgmt_sockets; sub mgmt_init { $mgmt_listen_socket=new IO::Socket::INET(LocalPort=>$MGMT_PORT,Proto=>'tcp',Listen=>1,ReuseAddr=>1); defined $mgmt_listen_socket or die "$!\n"; - Donald::Select::reader($mgmt_listen_socket,\&mgmt_connect_request); + My::Select::reader($mgmt_listen_socket,\&mgmt_connect_request); } sub mgmt_connect_request { - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); # listen socket ready @@ -1040,7 +1040,7 @@ sub mgmt_connect_request { ### warn "accepted mgmt connection from $peernode\n"; - Donald::Select::reader($socket,\&mgmt_receive,$socket); + My::Select::reader($socket,\&mgmt_receive,$socket); $mgmt_sockets{$socket}=$socket; $socket->print("clusterd ".version_info()." stupid console\n"); $socket->print("For historical messages, grep \"clusterd\" from /var/log/messages on $STAT_TARGET (or \"tail -f /var/log/messages |grep cluster\")\n"); @@ -1057,7 +1057,7 @@ sub mgmt_receive { return; } - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); $data=~s/\r?\n$//; length $data or return; if ($data eq 'l') { @@ -1240,12 +1240,12 @@ our %CLP_HANDLER=('CMD'=>\&clp_rx_CMD,'LSOF'=>\&clp_rx_LSOF); sub clp_init { $clp_listen_socket=new IO::Socket::INET(LocalPort=>$CLP_PORT,Proto=>'tcp',Listen=>1,ReuseAddr=>1); defined $clp_listen_socket or die "$!\n"; - Donald::Select::reader($clp_listen_socket,\&clp_connect_request); + My::Select::reader($clp_listen_socket,\&clp_connect_request); } sub clp_connect_request { - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); # listen socket ready @@ -1255,7 +1255,7 @@ sub clp_connect_request { my $buffer=''; - Donald::Select::reader($socket,\&clp_receive,$socket,\$buffer); + My::Select::reader($socket,\&clp_receive,$socket,\$buffer); $clp_sockets{$socket}=$socket; } @@ -1277,7 +1277,7 @@ sub clp_receive { $$bufref=substr($$bufref,2+$l); clp_message($s,$msg) and return; } - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); } sub clp_message { @@ -1350,7 +1350,7 @@ sub sync_cluster_pw { defined $CLUSTER_PW and warn "$CLUSTER_PW_FILE: $!\n"; $CLUSTER_PW=undef; } - Donald::Select::timeout(60,\&sync_cluster_pw); + My::Select::timeout(60,\&sync_cluster_pw); return defined $CLUSTER_PW; } @@ -1371,7 +1371,7 @@ sub init_area { warn "I am area router for $AREA_ROUTER{$my_hostname}\n"; $area_socket=new IO::Socket::INET (Proto=>'udp',LocalPort=>$UDP_PORT+1) or die "$!\n"; - Donald::Select::reader($area_socket,\&area_message,$area_socket); + My::Select::reader($area_socket,\&area_message,$area_socket); } sub area_message { @@ -1382,7 +1382,7 @@ sub area_message { my $udp_peer_addr=inet_ntoa($peer_iaddr); $donald_s->send_data($AREA_ROUTER{$my_hostname},$UDP_PORT,$data); # broadcast to our network - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); } sub udp_broadcast_message { @@ -1416,7 +1416,7 @@ sub check_progfile_status { else { $PROG_MTIME=$f[9]; } - Donald::Select::timeout(60,\&check_progfile_status); + My::Select::timeout(60,\&check_progfile_status); } sub version_info { # 'V1.31 - 20090617-155314' @@ -1451,7 +1451,7 @@ sub pload_debug { } sub check_overload() { - Donald::Select::timeout_requeue(600); + My::Select::timeout_requeue(600); if ($CPUS) { my $running_proc_10=running_proc_10(); my $pload_10=running_proc_10()/$CPUS; @@ -1495,9 +1495,9 @@ sub exec_at { my $pbuffer=''; my $olbuffer=''; my $elbuffer=''; - Donald::Select::reader($s,\&cmd_rx,$host,$s,\$pbuffer,\$olbuffer,\$elbuffer); + My::Select::reader($s,\&cmd_rx,$host,$s,\$pbuffer,\$olbuffer,\$elbuffer); $slave=1; - Donald::Select::run() if $slave;; + My::Select::run() if $slave;; } sub lsof { @@ -1514,10 +1514,10 @@ sub lsof { clp_send_message($s,'LSOF',$pattern); my $pbuffer=''; my $olbuffer=''; - Donald::Select::reader($s,\&lsof_rx,$host,$s,\$pbuffer,\$olbuffer); + My::Select::reader($s,\&lsof_rx,$host,$s,\$pbuffer,\$olbuffer); $slave++; } - Donald::Select::run() if $slave;; + My::Select::run() if $slave;; } sub lsof_rx { @@ -1539,7 +1539,7 @@ sub lsof_rx { $$pbufref=substr($$pbufref,2+$l); lsof_msg($host,$s,$msg,$olbufref); } - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); } sub lsof_msg { @@ -1570,7 +1570,7 @@ sub cmd_rx { $$pbufref=substr($$pbufref,2+$l); cmd_msg($host,$s,$msg,$olbufref,$elbufref); } - Donald::Select::reader_requeue(); + My::Select::reader_requeue(); } sub cmd_msg { @@ -1640,7 +1640,7 @@ GetOptions if (defined $options{'push'}) { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; - $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + $donald_s=new My::Select::INET(Proto=>'udp') or die "$!\n"; push_file($donald_s,$options{'push'}); } elsif (defined $options{'exec'}) { if (substr($options{'exec'},0,1) eq '@') { @@ -1649,31 +1649,31 @@ if (defined $options{'push'}) { exec_at(substr($options{'exec'},1),@ARGV); } else { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; - $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + $donald_s=new My::Select::INET(Proto=>'udp') or die "$!\n"; send_exec($donald_s,$options{'exec'}); } } elsif (defined $options{'push_amd_tar'}) { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; - $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + $donald_s=new My::Select::INET(Proto=>'udp') or die "$!\n"; push_amd_tar($donald_s); } elsif (defined $options{'send-restart'}) { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; - $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + $donald_s=new My::Select::INET(Proto=>'udp') or die "$!\n"; udp_broadcast_message($donald_s,'restart'); } elsif (defined $options{'flush-gidcache'}) { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; - $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + $donald_s=new My::Select::INET(Proto=>'udp') or die "$!\n"; udp_broadcast_message($donald_s,'flush-gidcache'); } elsif (defined $options{'make-automaps'}) { sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; - $donald_s=new Donald::Select::INET(Proto=>'udp') or die "$!\n"; + $donald_s=new My::Select::INET(Proto=>'udp') or die "$!\n"; udp_broadcast_message($donald_s,'make-automaps'); } elsif (defined $options{'daemon'}) { $options{'kill'} and Donald::Tools::kill_previous_server('clusterd') and sleep 2; $SIG{PIPE}='IGNORE'; - $donald_s=new Donald::Select::INET(Proto=>'udp',Broadcast=>1,LocalPort=>$UDP_PORT) or die "$!\n"; + $donald_s=new My::Select::INET(Proto=>'udp',Broadcast=>1,LocalPort=>$UDP_PORT) or die "$!\n"; $donald_s->receive_data(\&udp_message,$donald_s); unless ($options{'foreground'}) { @@ -1700,14 +1700,14 @@ if (defined $options{'push'}) { sync_cluster_pw() or warn "$CLUSTER_PW_FILE: $!\n"; - Donald::Select::timeout(60,\&purge_old_receiver); - Donald::Select::timeout(rand(60),\&send_stat); - Donald::Select::timeout(0,\&sample_rproc) unless Donald::Tools::is_alpha; + My::Select::timeout(60,\&purge_old_receiver); + My::Select::timeout(rand(60),\&send_stat); + My::Select::timeout(0,\&sample_rproc) unless Donald::Tools::is_alpha; $my_hostname eq $STAT_TARGET and My::Cluster::Updown::init(); $my_hostname eq 'lol' and My::NetlogReceiver::init(); - Donald::Select::timeout(600,\&check_overload); + My::Select::timeout(600,\&check_overload); - Donald::Select::run(); + My::Select::run(); } elsif ($options{'lsof'}) { lsof($options{'lsof'}); } elsif ($options{'kill'}) { From 01c0d8f3ed4206cb3c33fb57e1e3cb8ffcdce308 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 2 Dec 2017 11:36:21 +0100 Subject: [PATCH 32/55] clusterd: Remove declarations no longer needeid after import We imported some lines which do no longer have a function now. Remove them. --- clusterd/clusterd | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 150f802..01a8acc 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -17,11 +17,6 @@ use IO::Pipe; #-------------------------------------- package My::Select; -use warnings; -use strict; - -our $VERSION = '1.00'; - use Donald::Callback 1.01 ; use Donald::Tools; @@ -166,20 +161,14 @@ sub run } } -1; #-------------------------------------- package My::Select::INET ; -use warnings; -use strict; - use Carp; use IO::Socket::INET; use Digest::MD5; use Storable; -our $VERSION = '1.00'; - our $UDP_MAX=1472; # for broadcast on alphas From 8b373b2147f27e357ce7003e1f56f87e517f6b9f Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 2 Dec 2017 11:38:05 +0100 Subject: [PATCH 33/55] clusterd: Remove unused sub heartbeat. --- clusterd/clusterd | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 01a8acc..3f5fb5e 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -105,16 +105,6 @@ sub cancel_handle #-------------------------------------------- -sub heartbeat -{ - $time++; - while (@TIMER && $TIMER[0]->[0]<=$time) { - $active_timer_cb=(shift @TIMER)->[1]; - $active_timer_cb->call(); - } - $active_timer_cb=undef; -} - sub run { while (1) { From 1c4fb1c52ca03cd51cd10374130c17fed289cb63 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 2 Dec 2017 11:44:20 +0100 Subject: [PATCH 34/55] clusterd: Remove/fix some comments Non-functional change to bring source into sync with another working branch. --- clusterd/clusterd | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 3f5fb5e..fd857e1 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -27,9 +27,7 @@ sub My::Select::time return $time; } -#----------------------------------------- - -our @TIMER=(); # ( [duetime,cb] , ) sorted by time +our @TIMER=(); # ( [duetime,cb] , ... ) sorted by time our $active_timer_cb; sub timeout # cb=Select::timeout(seconds,cb) or cb=Select::Timeout(seconds,subref [,args,...]) @@ -86,7 +84,7 @@ sub reader_requeue {push @READER,$active_io} sub writer_requeue {push @WRITER,$active_io} sub except_requeue {push @EXCEPT,$active_io} -sub cancel # $cb = Select::cancel([cb]) +sub cancel # $cb = Select::cancel(cb) { my ($cb)=@_; defined $cb or $cb=$active_io->[1]; @@ -103,8 +101,6 @@ sub cancel_handle @EXCEPT=grep {$_->[0] != $handle} @EXCEPT; } -#-------------------------------------------- - sub run { while (1) { From 27c6521c0e0115673bce1362f3e7da8a7ef05bb6 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 2 Dec 2017 12:44:19 +0100 Subject: [PATCH 35/55] clusterd: Remove My::Callback Use refenerces to subs as callback arguments. If the caller wants to pass additional arguments, he can use closures. --- clusterd/clusterd | 67 ++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index fd857e1..5d15339 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -14,10 +14,9 @@ use Donald::FileInfo; use POSIX; use IO::Pipe; -#-------------------------------------- +#------------------------------------- package My::Select; -use Donald::Callback 1.01 ; use Donald::Tools; our $time=Donald::Tools::uptime(); @@ -30,11 +29,9 @@ sub My::Select::time our @TIMER=(); # ( [duetime,cb] , ... ) sorted by time our $active_timer_cb; -sub timeout # cb=Select::timeout(seconds,cb) or cb=Select::Timeout(seconds,subref [,args,...]) +sub timeout # cb=Select::timeout(seconds,cb) { - my ($delta,$cb_or_sub,@args)=@_; - my $cb=new Donald::Callback($cb_or_sub,@args); - + my ($delta,$cb)=@_; my $duetime=$time+$delta; @TIMER=sort {$a->[0] <=> $b->[0]} ( [$duetime,$cb] , @TIMER ); return $cb; @@ -43,7 +40,7 @@ sub timeout # cb=Select::timeout(seconds,cb) or cb=Select::Timeout(seconds,sub sub timeout_cancel # cb=Select::timeout_cancel(cb) { my ($cb)=@_; - @TIMER=grep {$_->[1] != $cb && $_->[1]->[0] != $cb} @TIMER; + @TIMER=grep {$_->[1] != $cb} @TIMER; return $cb; } @@ -58,24 +55,21 @@ our $active_io; # [Handle,cb] sub reader # cb = Select::reader(Handle,cb) { - my ($handle,$cb_or_sub,@args)=@_; - my $cb=new Donald::Callback($cb_or_sub,@args); + my ($handle,$cb)=@_; push @READER,[$handle,$cb]; return $cb; } sub writer # cb = Select::writer(Handle,cb) { - my ($handle,$cb_or_sub,@args)=@_; - my $cb=new Donald::Callback($cb_or_sub,@args); + my ($handle,$cb)=@_; push @WRITER,[$handle,$cb]; return $cb; } sub except # cb = Select::except(Handle,cb) { - my ($handle,$cb_or_sub,@args)=@_; - my $cb=new Donald::Callback($cb_or_sub,@args); + my ($handle,$cb)=@_; push @EXCEPT,[$handle,$cb]; return $cb; } @@ -107,7 +101,7 @@ sub run $time=Donald::Tools::uptime(); while (@TIMER && $TIMER[0]->[0]<=$time) { $active_timer_cb=(shift @TIMER)->[1]; - $active_timer_cb->call(); + $active_timer_cb->(); } $active_timer_cb=undef; @@ -122,7 +116,7 @@ sub run for (my $i=0;$i<@READER;$i++) { if (vec($rvec,$READER[$i]->[0]->fileno,1)) { $active_io=splice @READER,$i,1; - $active_io->[1]->call(); + $active_io->[1]->(); $active_io=undef; last; } @@ -130,7 +124,7 @@ sub run for (my $i=0;$i<@WRITER;$i++) { if (vec($wvec,$WRITER[$i]->[0]->fileno,1)) { $active_io=splice @WRITER,$i,1; - $active_io->[1]->call(); + $active_io->[1]->(); $active_io=undef; last; } @@ -138,7 +132,7 @@ sub run for (my $i=0;$i<@EXCEPT;$i++) { if (vec($evec,$EXCEPT[$i]->[0]->fileno,1)) { $active_io=splice @EXCEPT,$i,1; - $active_io->[1]->call(); + $active_io->[1]->(); $active_io=undef; last; } @@ -157,7 +151,6 @@ use Storable; our $UDP_MAX=1472; # for broadcast on alphas - sub new # ( Proto=>'udp',Broadcast=>1,LocalPort=>$UDP_PORT ) { my ($class,@args)=@_; @@ -178,25 +171,23 @@ sub send_data sub reader { - my ($self,$sub,@args)=@_; - My::Select::reader($$self,$sub,@args); + my ($self,$sub)=@_; + My::Select::reader($$self,$sub); } sub receive_data { - my ($self,$sub,@args)=@_; - My::Select::reader($$self,\&receive_data_cb,$self,$sub,@args); -} + my ($self,$cb,@args)=@_; -sub receive_data_cb -{ - my ($self,$sub,@args)=@_; - my $data; - my $peer = $$self->recv($data,$UDP_MAX); - my ($udp_peer_port,$peer_iaddr)=unpack_sockaddr_in($peer); - my $udp_peer_addr=inet_ntoa($peer_iaddr); - My::Select::reader_requeue(); - $sub->($data,$udp_peer_addr,$udp_peer_port,@args); + my $receive_data_cb=sub { + my $data; + my $peer = $$self->recv($data,$UDP_MAX); + my ($udp_peer_port,$peer_iaddr)=unpack_sockaddr_in($peer); + my $udp_peer_addr=inet_ntoa($peer_iaddr); + My::Select::reader_requeue(); + $cb->($data,$udp_peer_addr,$udp_peer_port); + }; + My::Select::reader($$self,$receive_data_cb); } #-------------------------------------- @@ -421,7 +412,7 @@ sub connect_request { my $peernode=$socket->peerhost; my $buffer=''; - My::Select::reader($socket,\&receive,$socket,$peernode,\$buffer); + My::Select::reader($socket,sub{receive($socket,$peernode,\$buffer)}); # warn "$peernode: connect\n"; } @@ -1015,7 +1006,7 @@ sub mgmt_connect_request { ### warn "accepted mgmt connection from $peernode\n"; - My::Select::reader($socket,\&mgmt_receive,$socket); + My::Select::reader($socket,sub{mgmt_receive($socket)}); $mgmt_sockets{$socket}=$socket; $socket->print("clusterd ".version_info()." stupid console\n"); $socket->print("For historical messages, grep \"clusterd\" from /var/log/messages on $STAT_TARGET (or \"tail -f /var/log/messages |grep cluster\")\n"); @@ -1230,7 +1221,7 @@ sub clp_connect_request { my $buffer=''; - My::Select::reader($socket,\&clp_receive,$socket,\$buffer); + My::Select::reader($socket,sub{clp_receive($socket,\$buffer)}); $clp_sockets{$socket}=$socket; } @@ -1346,7 +1337,7 @@ sub init_area { warn "I am area router for $AREA_ROUTER{$my_hostname}\n"; $area_socket=new IO::Socket::INET (Proto=>'udp',LocalPort=>$UDP_PORT+1) or die "$!\n"; - My::Select::reader($area_socket,\&area_message,$area_socket); + My::Select::reader($area_socket,sub{area_message($area_socket)}); } sub area_message { @@ -1470,7 +1461,7 @@ sub exec_at { my $pbuffer=''; my $olbuffer=''; my $elbuffer=''; - My::Select::reader($s,\&cmd_rx,$host,$s,\$pbuffer,\$olbuffer,\$elbuffer); + My::Select::reader($s,sub{cmd_rx($host,$s,\$pbuffer,\$olbuffer,\$elbuffer)}); $slave=1; My::Select::run() if $slave;; } @@ -1489,7 +1480,7 @@ sub lsof { clp_send_message($s,'LSOF',$pattern); my $pbuffer=''; my $olbuffer=''; - My::Select::reader($s,\&lsof_rx,$host,$s,\$pbuffer,\$olbuffer); + My::Select::reader($s,sub{lsof_rx($host,$s,\$pbuffer,\$olbuffer)}); $slave++; } My::Select::run() if $slave;; From 7f8859967c3185e55436662f607f3162b1bf2722 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 2 Dec 2017 12:55:50 +0100 Subject: [PATCH 36/55] clusterd: Remove/Move imports --- clusterd/clusterd | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 5d15339..0689eef 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -13,12 +13,11 @@ use Donald::Tools qw(encode sign check_sign decode); use Donald::FileInfo; use POSIX; use IO::Pipe; +use Digest::MD5; #------------------------------------- package My::Select; -use Donald::Tools; - our $time=Donald::Tools::uptime(); sub My::Select::time @@ -146,8 +145,6 @@ package My::Select::INET ; use Carp; use IO::Socket::INET; -use Digest::MD5; -use Storable; our $UDP_MAX=1472; # for broadcast on alphas From 871a2b2ece3d90cd6dfd596c0bb2b8cca58b22dd Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 6 Dec 2017 16:57:27 +0100 Subject: [PATCH 37/55] clusterd: Remove clp_sockets hash This hash is not used but it prevents these sockets to be able to be closed by running out of scope. --- clusterd/clusterd | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 0689eef..06989b6 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1196,8 +1196,6 @@ sub run_cmd { our $CLP_PORT=235; our $clp_listen_socket; -our %clp_sockets; - our %CLP_HANDLER=('CMD'=>\&clp_rx_CMD,'LSOF'=>\&clp_rx_LSOF); sub clp_init { @@ -1219,7 +1217,6 @@ sub clp_connect_request { my $buffer=''; My::Select::reader($socket,sub{clp_receive($socket,\$buffer)}); - $clp_sockets{$socket}=$socket; } sub clp_receive { @@ -1227,7 +1224,6 @@ sub clp_receive { my $data; defined $s->recv($data,$TCP_MAX) or return; if (!length($data) ) { - delete $clp_sockets{$s}; $s->close; return; } From d7b683448561aac0f0883f8bc0f0e89e8c25c6ef Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 6 Dec 2017 17:11:15 +0100 Subject: [PATCH 38/55] clusterd: Add functions for nonblocking tcp to My::Select::INET --- clusterd/clusterd | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/clusterd/clusterd b/clusterd/clusterd index 06989b6..8ee7aea 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -148,6 +148,15 @@ use IO::Socket::INET; our $UDP_MAX=1472; # for broadcast on alphas +our $SOL_SOCKET=1; +our $SO_ERROR=4; + +sub get_socket_error +{ + my ($s)=@_; + return unpack('i',getsockopt($s,$SOL_SOCKET,$SO_ERROR)); +} + sub new # ( Proto=>'udp',Broadcast=>1,LocalPort=>$UDP_PORT ) { my ($class,@args)=@_; @@ -187,6 +196,98 @@ sub receive_data My::Select::reader($$self,$receive_data_cb); } +# send_tcp($socket,$data,$timeout,$cb); +# +# send data asynchronously over noblocking tcp socket +# call callback when done ($!=0) or on error ($! set) +# +# all arguments required +# +sub send_tcp { + my ($s,$data,$timeout,$cb)=@_; + my $len=$s->send($data,0); + defined $len or return $cb->(); + if ($len==length($data)) { + $!=0; + $cb->(); + return; + } + my $pos=$len; + my $cb_tmo=sub { + My::Select::cancel_handle($s); + $!=110; + $cb->(); + }; + my $cb_write=sub { + My::Select::timeout_cancel($cb_tmo); + my $len=send($s,substr($data,$pos),0); + defined $len or return $cb->(); + if ($len==length($data)-$pos) { + $!=0; + $cb->(); + return; + } + $pos+=$len; + My::Select::timeout($timeout,$cb_tmo); + My::Select::writer_requeue(); + }; + My::Select::timeout($timeout,$cb_tmo); + My::Select::writer($s,$cb_write); +} + +# $socket = connect_tcp ($ip,$port,$timeout,$cb) +# +# asynchronously connect to tcp socket. +# call callback when done or on error (with $! set) +# +# all arguments required +# +sub connect_tcp { + my ($ip,$port,$timeout,$cb)=@_; + + my $s=new IO::Socket::INET (PeerAddr=>$ip,PeerPort=>$port,Blocking=>0); + defined $s or return $cb->(); + my $cb_tmo=sub { + My::Select::cancel_handle($s); + $!=110; + $cb->(); + }; + my $cb_write=sub { + My::Select::timeout_cancel($cb_tmo); + $!=get_socket_error($s); + $cb->(); + }; + My::Select::timeout($timeout,$cb_tmo); + My::Select::writer($s,$cb_write); + return $s; +} + +# read_with_timeout($socket,$callback,$timeout) +# +# asynchronously read from tcp socket. +# +sub read_with_timeout { + my ($s,$cb,$timeout)=@_; + my $cb_tmo=sub { + My::Select::cancel_handle($s); + $!=110; + $cb->(undef); + }; + my $cb_read=sub { + My::Select::timeout_cancel($cb_tmo); + my $buf=''; + my $l=sysread($s,$buf,102400,0); + if (!defined $l) { + $cb->(undef); + } else { + $!=0; + $cb->($buf); + } + }; + My::Select::timeout($timeout,$cb_tmo); + My::Select::reader($s,$cb_read) +} + #-------------------------------------- package My::Cluster::Updown; From 082e78ad710da10754d667c8e1d411c923b87a85 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 6 Dec 2017 17:13:59 +0100 Subject: [PATCH 39/55] clusterd: Add send_tcp_cp() to send a cluster protocol message over tcp --- clusterd/clusterd | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/clusterd/clusterd b/clusterd/clusterd index 8ee7aea..fcefac3 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1366,6 +1366,17 @@ sub clp_rx_CMD { return 1; } +# send_tcp_cp($socket,$cb,$timeout,@args) +# +# send a cluster protocoll message over an async tcp socket. +# +# assume $CLUSTER_PW is valid +# +sub send_tcp_cp { + my ($s,$cb,$timeout,@args)=@_; + my $data=sign($CLUSTER_PW,encode(@args)); + My::Select::INET::send_tcp($s,pack('n',length($data)).$data,$timeout,$cb); +} #---------------------------------------------------------- #our $CLUSTER_PW; From 96806c6b180b525ba108b1c29e68a17de8b96110 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 6 Dec 2017 17:16:32 +0100 Subject: [PATCH 40/55] clusterd: Add command "clusterd push files..." The command "clusterd push file..." has a new syntax with a command verb. (as opposed to "clusterd --push file") The push command is distributed via the area routers to the cluster daemons on all nodes. It is intended that the cluster daemons call back over tcp to the originator to pull the file if needed. --- clusterd/clusterd | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index fcefac3..1cf5f3f 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1667,6 +1667,22 @@ sub cmd_msg { } } +sub cmd_push { + my @files=@_; + for my $filename (@files) { + $filename =~ m"^/" or return warn "$filename: please use absolute path\n"; + -e $filename or die "$filename: no such file\n"; + } + sync_cluster_pw() or die "$CLUSTER_PW_FILE: $!\n"; + $donald_s=new My::Select::INET(Proto=>'udp') or die "$!\n"; + for my $filename (@files) { + my $st=Donald::FileInfo->lstat($filename); + defined $st or die "$filename: $!\n"; + $st->type eq 'F' or die "$filename: only plain files currently supported\n"; + udp_broadcast_message($donald_s,'push',$my_ip,$st); + } +} + #------------------------------------------------------------ use constant USAGE => <<'__EOF__'; @@ -1690,6 +1706,8 @@ usage: $0 [options] --foreground # stay in foreground, log to stderr --syslog # log to syslog instead of stderr + push files.... # push files over tcp + __EOF__ use Getopt::Long; @@ -1784,5 +1802,12 @@ if (defined $options{'push'}) { } elsif ($options{'kill'}) { Donald::Tools::kill_previous_server('clusterd'); } else { - die USAGE; + @ARGV or die USAGE; + my ($cmd,@args)=@ARGV; + if ($cmd eq 'push') { + @args>0 or die USAGE; + cmd_push(@args); + } else { + die USAGE; + } } From 8f5d59af9ffe464269cce0017d8d0cf0d5726ca1 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 6 Dec 2017 17:19:15 +0100 Subject: [PATCH 41/55] clusterd: Implement PUSH in daemon When a daemon receices a push command, it checks whether it already has the offered file or not. If not, it calls back to the daemon where the push originated and pulls the file over tcp. --- clusterd/clusterd | 75 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/clusterd/clusterd b/clusterd/clusterd index 1cf5f3f..b9e834e 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -582,6 +582,7 @@ our %UDP_HANDLER= 'make-automaps' => \&udp_rx_make_automaps, 'log' => \&udp_rx_log, 'exec' => \&udp_rx_exec, + 'push' => \&udp_rx_push, ); sub udp_message { @@ -1667,6 +1668,79 @@ sub cmd_msg { } } +sub udp_rx_push { + my ($ip,$st_want)=@_; + + my $filename=$st_want->name; + my $tmp_filename="$filename.tmp"; + + $ip eq $my_ip and return; + + my $st_is=Donald::FileInfo->lstat($st_want->name); + + unless ($st_want->type eq 'F') { + warn "$filename: type ".$st_want->type." not yet implemented\n"; + return; + } + + if ($st_is + && $st_is->type eq 'F' + && $st_is->size == $st_want->size + && $st_is->mtime == $st_want->mtime + && $st_is->uid == $st_want->uid + && $st_is->gid == $st_want->gid + && $st_is->perm == $st_want->perm + ) { + warn "$filename: already okay\n"; + return; + } + + if ($st_want->size==0) { + my $fh=IO::File->new($tmp_filename,O_WRONLY|O_CREAT,0); + defined $fh or return warn "$tmp_filename: $!\n"; + $fh->close; + chown $st_want->uid,$st_want->gid,$tmp_filename or return warn "$tmp_filename: $!\n"; + chmod $st_want->perm,$tmp_filename or return warn "$tmp_filename: $!\n"; + rename($tmp_filename,$filename); + utime($st_want->mtime,$st_want->mtime,$filename); + warn "installed (empty) $filename\n"; + return; + } + + my $s; + $s=My::Select::INET::connect_tcp($ip,$CLP_PORT,5,sub { + $! and return warn "$ip: $!\n"; + send_tcp_cp($s,sub { + $! and return warn "$ip: $!\n"; + my $fh = IO::File->new($tmp_filename,O_WRONLY|O_CREAT,0); + defined $fh or return warn "$tmp_filename: $!\n"; + + my $cb; + my $bytes=$st_want->size; + $cb=sub { + # note, we need to break the circular references $cb of our caller, if no longer needed + my ($buf)=@_; + if ($!) { warn "$ip: $!\n";$cb=undef;return; } + if (length($buf)==0) { warn "$ip: EOF\n";$cb=undef;return;} + print $fh $buf; + $bytes-=length($buf); + if ($bytes>0) { + My::Select::INET::read_with_timeout($s,$cb,5); + return; + } + $cb=undef; + close $fh; + chown $st_want->uid,$st_want->gid,$tmp_filename or return warn "$tmp_filename: $!\n"; + chmod $st_want->perm,$tmp_filename or return warn "$tmp_filename: $!\n"; + rename($tmp_filename,$filename) or return warn "$filename: $!\n"; + utime($st_want->mtime,$st_want->mtime,$filename); + warn "installed $filename\n"; + }; + My::Select::INET::read_with_timeout($s,$cb,5); + },5,'PULL',$st_want); + }); +} + sub cmd_push { my @files=@_; for my $filename (@files) { @@ -1679,6 +1753,7 @@ sub cmd_push { my $st=Donald::FileInfo->lstat($filename); defined $st or die "$filename: $!\n"; $st->type eq 'F' or die "$filename: only plain files currently supported\n"; + open my $test,'<',$filename or die "$filename: $!\n"; udp_broadcast_message($donald_s,'push',$my_ip,$st); } } From 7312dd47f90bbe0ef0e4e2e673e563c704a9f7b0 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 6 Dec 2017 17:20:51 +0100 Subject: [PATCH 42/55] clusterd: Implement PULL in daemon Implement the server side to pull a file over tcp. --- clusterd/clusterd | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index b9e834e..3b44b15 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1298,7 +1298,7 @@ sub run_cmd { our $CLP_PORT=235; our $clp_listen_socket; -our %CLP_HANDLER=('CMD'=>\&clp_rx_CMD,'LSOF'=>\&clp_rx_LSOF); +our %CLP_HANDLER=('CMD'=>\&clp_rx_CMD,'LSOF'=>\&clp_rx_LSOF,'PULL'=>\&clp_rx_PULL); sub clp_init { $clp_listen_socket=new IO::Socket::INET(LocalPort=>$CLP_PORT,Proto=>'tcp',Listen=>1,ReuseAddr=>1); @@ -1668,6 +1668,46 @@ sub cmd_msg { } } +our $SYS_SENDFILE=40; # /usr/include/asm/unistd_64.h + +sub sendfile { + my ($out_fd,$in_fd,$offset,$count)=@_; + my $ret=syscall($SYS_SENDFILE,fileno($out_fd),fileno($in_fd),$offset,$count); + return $ret<0 ? undef : $ret; +} + +sub clp_rx_PULL { + my ($s,$st_want)=@_; + + my $st_is=Donald::FileInfo->lstat($st_want->name); + if (!defined $st_is or $st_is->type ne 'F' or $st_is->size != $st_want->size or $st_is->mtime != $st_want->mtime) { + warn $st_want->name." requested by ".$s->peerhost.": no longer available\n"; + return; + } + my $fh; + unless (open $fh,'<',$st_want->name) { + warn $st_want->name.": $!\n"; + return; + } + my $bytes=$st_is->size; + my $cb_tmo=sub { My::Select::cancel_handle($s); }; + my $cb_write=sub { + my $l=sendfile($s,$fh,0,$bytes); + unless (defined $l) { + warn "$!"; + return; + } + $bytes-=$l; + if ($bytes) { + My::Select::writer_requeue if $bytes; + } else { + close($s); + } + }; + My::Select::timeout(5,$cb_tmo); + My::Select::writer($s,$cb_write); +} + sub udp_rx_push { my ($ip,$st_want)=@_; From fcedb3a4a62022d582ee3cd3c44f5dba073aa649 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 7 Dec 2017 12:58:34 +0100 Subject: [PATCH 43/55] clusterd: Increase TCP timeout from 5 to 30 While 5 seconds seems to be enough for normal behaviour, we might need more time if the daemon is slowed down .e.g. by strace. --- clusterd/clusterd | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 3b44b15..60fc54f 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -531,6 +531,7 @@ use Data::Dumper; our $UDP_MAX=1472; # for broadcast on alphas our $UDP_PORT=234; our $BC_RATE=8; # packets per second broadcast +our $TCP_TIMEOUT=30; # default timeout for tcp processing our (%options); # RUN OPTIONS @@ -1748,7 +1749,7 @@ sub udp_rx_push { } my $s; - $s=My::Select::INET::connect_tcp($ip,$CLP_PORT,5,sub { + $s=My::Select::INET::connect_tcp($ip,$CLP_PORT,$TCP_TIMEOUT,sub { $! and return warn "$ip: $!\n"; send_tcp_cp($s,sub { $! and return warn "$ip: $!\n"; @@ -1765,7 +1766,7 @@ sub udp_rx_push { print $fh $buf; $bytes-=length($buf); if ($bytes>0) { - My::Select::INET::read_with_timeout($s,$cb,5); + My::Select::INET::read_with_timeout($s,$cb,$TCP_TIMEOUT); return; } $cb=undef; @@ -1776,8 +1777,8 @@ sub udp_rx_push { utime($st_want->mtime,$st_want->mtime,$filename); warn "installed $filename\n"; }; - My::Select::INET::read_with_timeout($s,$cb,5); - },5,'PULL',$st_want); + My::Select::INET::read_with_timeout($s,$cb,$TCP_TIMEOUT); + },$TCP_TIMEOUT,'PULL',$st_want); }); } From cb8964c7abcf7e8a878768e8b8806f23f23642dd Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 7 Dec 2017 13:22:34 +0100 Subject: [PATCH 44/55] clusterd: Increase listen queue size for clp port With 234 hosts pulling a file we failed to service a single on with the old setting listen=1. --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 60fc54f..97867d9 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1302,7 +1302,7 @@ our $clp_listen_socket; our %CLP_HANDLER=('CMD'=>\&clp_rx_CMD,'LSOF'=>\&clp_rx_LSOF,'PULL'=>\&clp_rx_PULL); sub clp_init { - $clp_listen_socket=new IO::Socket::INET(LocalPort=>$CLP_PORT,Proto=>'tcp',Listen=>1,ReuseAddr=>1); + $clp_listen_socket=new IO::Socket::INET(LocalPort=>$CLP_PORT,Proto=>'tcp',Listen=>128,ReuseAddr=>1); defined $clp_listen_socket or die "$!\n"; My::Select::reader($clp_listen_socket,\&clp_connect_request); } From 22887ea695e14799f33e4d6064526fed1ffa3e90 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 7 Dec 2017 14:32:43 +0100 Subject: [PATCH 45/55] clusterd: Bump up version --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 97867d9..92bc6ac 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -5,7 +5,7 @@ use strict; # https://github.molgen.mpg.de/mariux64/clusterd -our $REVISION='1.108'; +our $REVISION='1.109'; #use lib ('/home/buczek/cluster/Donald/blib/lib'); From 2d1294962fdfdbcc4b53bdb637e901832d173ee6 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 10 Mar 2018 14:05:42 +0100 Subject: [PATCH 46/55] clusterd: Add timeout for lsof If we have dead nfs mounts, lsof might hangover. Limit a timeout and return an error message if lsof fails. --- clusterd/clusterd | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 92bc6ac..f14fb82 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1204,12 +1204,16 @@ sub clp_rx_LSOF { defined $pid or die "$!\n"; unless ($pid) { $socket->blocking(1); - open P,'lsof|' or die "$!\n"; + open P,'timeout -k 12s 10s lsof|' or die "$!\n"; while (

) { next if defined $pattern && index($_,$pattern)<0; $socket->send(pack('n',length($_)).$_,0); } close P; + if ($?) { + $_=sprintf("** lsof timout/error on %s\n",$my_hostname); + $socket->send(pack('n',length($_)).$_,0); + } close $socket; exit; } From 3724deb9cbd3c896255408aee1f377cb8333dc92 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 10 Mar 2018 14:07:54 +0100 Subject: [PATCH 47/55] clusterd: Bump up version --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index f14fb82..d7dc9e5 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -5,7 +5,7 @@ use strict; # https://github.molgen.mpg.de/mariux64/clusterd -our $REVISION='1.109'; +our $REVISION='1.110'; #use lib ('/home/buczek/cluster/Donald/blib/lib'); From 768d7dc5effc7bf27e267c52500656339abfd93d Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 10 Mar 2018 15:48:09 +0100 Subject: [PATCH 48/55] clusterd: Do not resolve hostname when doing lsof --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index d7dc9e5..765cdac 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1204,7 +1204,7 @@ sub clp_rx_LSOF { defined $pid or die "$!\n"; unless ($pid) { $socket->blocking(1); - open P,'timeout -k 12s 10s lsof|' or die "$!\n"; + open P,'timeout -k 12s 10s lsof -n|' or die "$!\n"; while (

) { next if defined $pattern && index($_,$pattern)<0; $socket->send(pack('n',length($_)).$_,0); From 98bb305be0ebcd51da21305c1fd94cae4ccfc3f5 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 10 Mar 2018 16:37:27 +0100 Subject: [PATCH 49/55] clusterd: Increase lsof timeout --- clusterd/clusterd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 765cdac..32009bc 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -1204,7 +1204,7 @@ sub clp_rx_LSOF { defined $pid or die "$!\n"; unless ($pid) { $socket->blocking(1); - open P,'timeout -k 12s 10s lsof -n|' or die "$!\n"; + open P,'timeout -k 32s 30s lsof -n|' or die "$!\n"; while (

) { next if defined $pattern && index($_,$pattern)<0; $socket->send(pack('n',length($_)).$_,0); From 5a917d54e1bbae94b6d13da33930e1e31fa287df Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 13 Jun 2018 14:32:20 +0200 Subject: [PATCH 50/55] clusterd: Refactor lmtime setting Refactor the code, so that the mltime specific settings are done from inside the machine type switch. This makes it easier to add another machine type which can set the time of a symlink. --- clusterd/clusterd | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 32009bc..586e1e8 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -797,6 +797,19 @@ sub udp_rx_amdtardata { } our ($machine,$SYS_lchown,$SYS_mknod,$lmtime_sub); +our ($SYS_utimensat,$AT_FDCWD,$UTIME_OMIT,$AT_SYMLINK_NOFOLLOW); + +sub lmtime_unsupported { + my ($path,$mtime)=@_; + warn "$path: don't known how to change symlink mtime on target architecture\n"; +} +sub lmtime_utimensat { + my ($path,$mtime)=@_; + my $tsa=pack 'qqqq',0,$UTIME_OMIT,$mtime,0; + syscall($SYS_utimensat,$AT_FDCWD,$path,$tsa,$AT_SYMLINK_NOFOLLOW)==0 or return warn "$path: failed to lmtime: $!\n"; +} +$lmtime_sub=\&lmtime_unsupported; + chomp($machine=`uname -m`); if ($machine eq 'i686') { $SYS_lchown=198; # __NR_lchown32 in /usr/include/asm/unistd.h @@ -804,6 +817,12 @@ if ($machine eq 'i686') { } elsif ($machine eq 'x86_64') { $SYS_lchown=94; # __NR_lchown in /usr/include/asm-x86_64/unistd.h $SYS_mknod=133; # __NR_mknod + + $SYS_utimensat=280; # /usr/include/asm/unistd_64.h + $AT_FDCWD=-100; # /usr/include/fcntl.h + $UTIME_OMIT=(1<<30)-2; # /usr/include/bits/stat.h + $AT_SYMLINK_NOFOLLOW=0x100; # /usr/include/fcntl.h + $lmtime_sub=\&lmtime_utimensat; } elsif ($machine eq 'alpha') { $SYS_lchown=208; # SYS_lchown in /usr/include/syscall.h $SYS_mknod=14; # SYS_mknod @@ -814,23 +833,6 @@ if ($machine eq 'i686') { warn "unknown machine type $machine: symlink ownership can't be set.\n"; warn "unknown machine type $machine: named pipes,character and block devices can't be created\n"; } -if ($machine eq 'x86_64') { - our $SYS_utimensat=280; # /usr/include/asm/unistd_64.h - our $AT_FDCWD=-100; # /usr/include/fcntl.h - our $UTIME_OMIT=(1<<30)-2; # /usr/include/bits/stat.h - our $AT_SYMLINK_NOFOLLOW=0x100; # /usr/include/fcntl.h - - $lmtime_sub=sub { - my ($path,$mtime)=@_; - my $tsa=pack 'qqqq',0,$UTIME_OMIT,$mtime,0; - syscall($SYS_utimensat,$AT_FDCWD,$path,$tsa,$AT_SYMLINK_NOFOLLOW)==0 or return warn "$path: failed to lmtime: $!\n"; - } -} else { - $lmtime_sub=sub { - my ($path,$mtime)=@_; - warn "$path: don't known how to change symlink mtime on target architecture\n"; - } -} sub lchown { my ($uid,$gid,$path)=@_; From 5aa3a289b35dbb33b850a77227294404156448c6 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 13 Jun 2018 14:34:48 +0200 Subject: [PATCH 51/55] clusterd: Add ppc64le architecture --- clusterd/clusterd | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/clusterd/clusterd b/clusterd/clusterd index 586e1e8..619ef9f 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -829,6 +829,15 @@ if ($machine eq 'i686') { } elsif ($machine eq 'amd64') { $SYS_lchown=254; # SYS_lchown in /usr/include/syscall.h $SYS_mknod=14; # SYS_mknod +} elsif ($machine eq 'ppc64le') { + $SYS_lchown=16; # __NR_lchown in /usr/include/powerpc64le-linux-gnu/asm/unistd.h + $SYS_mknod=14; # __NR_mknod + + $SYS_utimensat=304; # __NR_utimensat in /usr/include/powerpc64le-linux-gnu/asm/unistd.h + $AT_FDCWD=-100; # /usr/include/linux/fcntl.h + $UTIME_OMIT=(1<<30)-2; # /usr/include/powerpc64le-linux-gnu/bits/stat.h + $AT_SYMLINK_NOFOLLOW=0x100; # /usr/include/linux/fcntl.h + $lmtime_sub=\&lmtime_utimensat; } else { warn "unknown machine type $machine: symlink ownership can't be set.\n"; warn "unknown machine type $machine: named pipes,character and block devices can't be created\n"; From 3adb42f47275cea1da44f29ad36648589cc3690a Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 13 Jun 2018 14:47:56 +0200 Subject: [PATCH 52/55] clusterd: Remove call to amq amd automounted is gone on all systems, so don't try to call amq. --- clusterd/clusterd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 619ef9f..769e374 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -566,7 +566,7 @@ our $CLUSTER_PW_FILE='/etc/clusterd.password'; our $OLD_CLUSTER_PW_FILE='/root/clusterd.password'; our $CLUSTER_PW_TIMESTAMP=0; -$ENV{'PATH'} = '/usr/local/bin:/sbin:/usr/sbin:/bin'.($ENV{PATH}?':'.$ENV{PATH}:''); # for amq , ps , tar (gnu!) +$ENV{'PATH'} = '/usr/local/bin:/sbin:/usr/sbin:/bin'.($ENV{PATH}?':'.$ENV{PATH}:''); # for ps , tar (gnu!) #---------------------------------------------------------- UDP @@ -792,7 +792,6 @@ sub udp_rx_amdtardata { warn "installed /etc/amd - ",Digest::MD5::md5_hex($digest),"\n"; $INSTALLED_DIGEST=$digest; - system 'amq','-f'; system '/sbin/make-automaps'; } From c93face831a99c60f6ad1fdcd0df8b254b409d37 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 5 Oct 2018 10:17:31 +0200 Subject: [PATCH 53/55] clusterd: Add checks for write errors Currently we may move received files in place, even whe the write or close were unsuccessfull (eg. disk full). This is a serious bug and we may end up with an empty passwd. Add checks for all write() and close() calls. Also log rename errors. --- clusterd/clusterd | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 769e374..6473661 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -895,12 +895,12 @@ sub udp_rx_filedata { # complete file in one broadcast my $fh=IO::File->new($tmp_filename,O_WRONLY|O_CREAT,0); defined $fh or return warn "$tmp_filename: $!\n"; - $fh->syswrite($data); - $fh->close; + $fh->syswrite($data) or return warn "$tmp_filename: $!\n"; + $fh->close or return warn "$tmp_filename: $!\n"; chown $st_want->uid,$st_want->gid,$tmp_filename or return warn "$tmp_filename: $!\n"; chmod $st_want->perm,$tmp_filename or return warn "$tmp_filename: $!\n"; $st_is and unlink($filename); - rename($tmp_filename,$filename); + rename($tmp_filename,$filename) or return warn "rename $tmp_filename $filename: $!\n"; utime($st_want->mtime,$st_want->mtime,$filename); warn "installed $filename\n"; $rx_filedata_done=1; @@ -945,7 +945,7 @@ sub udp_rx_filedata { chown $st_want->uid,$st_want->gid,$tmp_filename or return warn "$tmp_filename: $!\n"; chmod $st_want->perm,$tmp_filename or return warn "$tmp_filename: $!\n"; $st_is and unlink($filename); - rename($tmp_filename,$filename); + rename($tmp_filename,$filename) or return warn "rename $tmp_filename $filename: $!\n"; utime($st_want->mtime,$st_want->mtime,$filename); warn "installed $filename\n"; delete $RECEIVER{$filename}; @@ -1753,10 +1753,10 @@ sub udp_rx_push { if ($st_want->size==0) { my $fh=IO::File->new($tmp_filename,O_WRONLY|O_CREAT,0); defined $fh or return warn "$tmp_filename: $!\n"; - $fh->close; + $fh->close or return warn "$tmp_filename: $!\n"; chown $st_want->uid,$st_want->gid,$tmp_filename or return warn "$tmp_filename: $!\n"; chmod $st_want->perm,$tmp_filename or return warn "$tmp_filename: $!\n"; - rename($tmp_filename,$filename); + rename($tmp_filename,$filename) or return warn "rename $tmp_filename $filename: $!\n";; utime($st_want->mtime,$st_want->mtime,$filename); warn "installed (empty) $filename\n"; return; @@ -1777,17 +1777,17 @@ sub udp_rx_push { my ($buf)=@_; if ($!) { warn "$ip: $!\n";$cb=undef;return; } if (length($buf)==0) { warn "$ip: EOF\n";$cb=undef;return;} - print $fh $buf; + print $fh $buf or return warn "$tmp_filename: $!\n"; $bytes-=length($buf); if ($bytes>0) { My::Select::INET::read_with_timeout($s,$cb,$TCP_TIMEOUT); return; } $cb=undef; - close $fh; + close $fh or return warn "$tmp_filename: $!\n"; chown $st_want->uid,$st_want->gid,$tmp_filename or return warn "$tmp_filename: $!\n"; chmod $st_want->perm,$tmp_filename or return warn "$tmp_filename: $!\n"; - rename($tmp_filename,$filename) or return warn "$filename: $!\n"; + rename($tmp_filename,$filename) or return warn "rename $tmp_filename $filename: $!\n"; utime($st_want->mtime,$st_want->mtime,$filename); warn "installed $filename\n"; }; From 5ce65e5d95baa1d1b5388e5ad520e6cffa8323ac Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 5 Oct 2018 10:22:10 +0200 Subject: [PATCH 54/55] clusterd: Remove redundant blank lines. --- clusterd/clusterd | 2 -- 1 file changed, 2 deletions(-) diff --git a/clusterd/clusterd b/clusterd/clusterd index 6473661..510cfb7 100755 --- a/clusterd/clusterd +++ b/clusterd/clusterd @@ -164,7 +164,6 @@ sub new # ( Proto=>'udp',Broadcast=>1,LocalPort=>$UDP_PORT ) return bless \$socket,$class; } - sub send_data { my ($self,$ip,$port,$data)=@_; @@ -174,7 +173,6 @@ sub send_data $$self->send($data,0,pack_sockaddr_in($port,$ip_address)) or carp "$!\n"; } - sub reader { my ($self,$sub)=@_; From 8eed159acef390651f0e7804f9fabad5f383b1ca Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 5 Oct 2018 12:06:26 +0200 Subject: [PATCH 55/55] clusterd: Add to installation script Add clusterd files to installation script and remove its obsolete stand-alone Makefile. --- clusterd/Makefile | 15 --------------- install.sh | 2 ++ 2 files changed, 2 insertions(+), 15 deletions(-) delete mode 100644 clusterd/Makefile diff --git a/clusterd/Makefile b/clusterd/Makefile deleted file mode 100644 index 33a1fee..0000000 --- a/clusterd/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -PREFIX=/usr -BINDIR=${PREFIX}/bin -SBINDIR=${PREFIX}/sbin -DESTDIR= - -all: - -install: clusterd clusterd.service - install -d ${DESTDIR}${SBINDIR} - install -m 755 clusterd ${DESTDIR}${SBINDIR}/clusterd - install -d ${DESTDIR}/etc/systemd/system/ - install -m 644 clusterd.service ${DESTDIR}/etc/systemd/system/clusterd.service - -restart: - systemctl restart clusterd.service diff --git a/install.sh b/install.sh index 4954b52..798c1f6 100755 --- a/install.sh +++ b/install.sh @@ -123,4 +123,6 @@ install_data misc_systemd_units/getcams.service "$DESTDIR$systemdunitdi install_exec blink/blinkd.py "$DESTDUR$udev_helperdir/blinkd.py" install_data blink/blinkd.service "$DESTDIR$systemdunitdir/blinkd.service" install_data blink/51-blink.rules "$DESTDIR$udev_rulesdir/51-blink.rules" +install_data clusterd/clusterd.service "$DESTDIR$systemdunitdir/clusterd.service" +install_exec clusterd/clusterd "$DESTDIR$usr_sbindir/clusterd" exit