📄 mtr_process.pl
字号:
my $retries= 10; # 10 seconds do { kill(9, @pids); mtr_debug("Sleep 1 second waiting for processes to die"); sleep(1) # Wait one second } while ( $retries-- and kill(0, @pids) ); if ( kill(0, @pids) ) # Check if some left { # FIXME maybe just mtr_warning() ? mtr_error("can't kill process(es) " . join(" ", @pids)); } } } } # We may have failed everything, bug we now check again if we have # the listen ports free to use, and if they are free, just go for it. foreach my $srv ( @args ) { if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) ) { mtr_error("can't kill old mysqld holding port $srv->{'port'}"); } }}################################################################################ Shut down mysqld servers we have started from this run of this script################################################################################ To speed things we kill servers in parallel. The argument is a list# of 'ports', 'pids', 'pidfiles' and 'socketfiles'.# FIXME On Cygwin, and maybe some other platforms, $srv->{'pid'} and# $srv->{'pidfile'} will not be the same PID. We need to try to kill# both I think.sub mtr_stop_mysqld_servers ($) { my $spec= shift; # ---------------------------------------------------------------------- # First try nice normal shutdown using 'mysqladmin' # ---------------------------------------------------------------------- # Shutdown time must be high as slave may be in reconnect mtr_mysqladmin_shutdown($spec, 70); # ---------------------------------------------------------------------- # We loop with waitpid() nonblocking to see how many of the ones we # are to kill, actually got killed by mtr_mysqladmin_shutdown(). # Note that we don't rely on this, the mysqld server might have stop # listening to the port, but still be alive. But it is a start. # ---------------------------------------------------------------------- foreach my $srv ( @$spec ) { if ( $srv->{'pid'} and (waitpid($srv->{'pid'},&WNOHANG) == $srv->{'pid'}) ) { $srv->{'pid'}= 0; } } # ---------------------------------------------------------------------- # We know the process was started from this file, so there is a PID # saved, or else we have nothing to do. # Might be that is is recorded to be missing, but we failed to # take away the PID file earlier, then we do it now. # ---------------------------------------------------------------------- my %mysqld_pids; foreach my $srv ( @$spec ) { if ( $srv->{'pid'} ) { $mysqld_pids{$srv->{'pid'}}= 1; } else { # Server is dead, we remove the pidfile if any # Race, could have been removed between I tested with -f # and the unlink() below, so I better check again with -f if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and -f $srv->{'pidfile'} ) { mtr_error("can't remove $srv->{'pidfile'}"); } } } # ---------------------------------------------------------------------- # If the processes where started from this script, and we had no PIDS # then we don't have to do anything. # ---------------------------------------------------------------------- if ( ! keys %mysqld_pids ) { # cluck "This is how we got here!"; return; } # ---------------------------------------------------------------------- # In mtr_mysqladmin_shutdown() we only waited for the mysqld servers # not to listen to the port. But we are not sure we got them all # killed. If we suspect it lives, try nice kill with SIG_TERM. Note # that for true Win32 processes, kill(0,$pid) will not return 1. # ---------------------------------------------------------------------- start_reap_all(); # Avoid zombies my @mysqld_pids= keys %mysqld_pids; mtr_kill_processes(\@mysqld_pids); stop_reap_all(); # Get into control again # ---------------------------------------------------------------------- # Now, we check if all we can find using kill(0,$pid) are dead, # and just assume the rest are. We cleanup socket and PID files. # ---------------------------------------------------------------------- { my $errors= 0; foreach my $srv ( @$spec ) { if ( $srv->{'pid'} ) { if ( kill(0,$srv->{'pid'}) ) { # FIXME In Cygwin there seem to be some fast reuse # of PIDs, so dying may not be the right thing to do. $errors++; mtr_warning("can't kill process $srv->{'pid'}"); } else { # We managed to kill it at last # FIXME In Cygwin, we will get here even if the process lives. # Not needed as we know the process is dead, but to be safe # we unlink and check success in two steps. We first unlink # without checking the error code, and then check if the # file still exists. foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'}) { # Know it is dead so should be no race, careful anyway if ( -f $file and ! unlink($file) and -f $file ) { $errors++; mtr_warning("couldn't delete $file"); } } } } } if ( $errors ) { # We are in trouble, just die.... mtr_error("we could not kill or clean up all processes"); } } # FIXME We just assume they are all dead, for Cygwin we are not # really sure }################################################################################ Shut down mysqld servers using "mysqladmin ... shutdown".# To speed this up, we start them in parallel and use waitpid() to# catch their termination. Note that this doesn't say the servers# are terminated, just that 'mysqladmin' is terminated.## Note that mysqladmin will ask the server about what PID file it uses,# and mysqladmin will wait for it to be removed before it terminates# (unless passes timeout).## This function will take at most about 20 seconds, and we still are not# sure we killed them all. If none is responding to ping, we return 1,# else we return 0.###############################################################################sub mtr_mysqladmin_shutdown { my $spec= shift; my $adm_shutdown_tmo= shift; my %mysql_admin_pids; my @to_kill_specs; foreach my $srv ( @$spec ) { if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) ) { push(@to_kill_specs, $srv); } } foreach my $srv ( @to_kill_specs ) { # FIXME wrong log..... # FIXME, stderr..... # Shutdown time must be high as slave may be in reconnect my $args; mtr_init_args(\$args); mtr_add_arg($args, "--no-defaults"); mtr_add_arg($args, "--user=%s", $::opt_user); mtr_add_arg($args, "--password="); if ( -e $srv->{'sockfile'} ) { mtr_add_arg($args, "--socket=%s", $srv->{'sockfile'}); } if ( $srv->{'port'} ) { mtr_add_arg($args, "--port=%s", $srv->{'port'}); } if ( $srv->{'port'} and ! -e $srv->{'sockfile'} ) { mtr_add_arg($args, "--protocol=tcp"); # Needed if no --socket } mtr_add_arg($args, "--connect_timeout=5"); mtr_add_arg($args, "--shutdown_timeout=$adm_shutdown_tmo"); mtr_add_arg($args, "shutdown"); # We don't wait for termination of mysqladmin my $pid= mtr_spawn($::exe_mysqladmin, $args, "", $::path_manager_log, $::path_manager_log, "", { append_log_file => 1 }); $mysql_admin_pids{$pid}= 1; } # As mysqladmin is such a simple program, we trust it to terminate. # I.e. we wait blocking, and wait wait for them all before we go on. while (keys %mysql_admin_pids) { foreach my $pid (keys %mysql_admin_pids) { if ( waitpid($pid,0) > 0 ) { delete $mysql_admin_pids{$pid}; } } } # If we trusted "mysqladmin --shutdown_timeout= ..." we could just # terminate now, but we don't (FIXME should be debugged). # So we try again to ping and at least wait the same amount of time # mysqladmin would for all to die. my $timeout= 20; # 20 seconds max my $res= 1; # If we just fall through, we are done # in the sense that the servers don't # listen to their ports any longer TIME: while ( $timeout-- ) { foreach my $srv ( @to_kill_specs ) { $res= 1; # We are optimistic if ( mtr_ping_mysqld_server($srv->{'port'}, $srv->{'sockfile'}) ) { mtr_debug("Sleep 1 second waiting for processes to stop using port"); sleep(1); # One second $res= 0; next TIME; } } last; # If we got here, we are done } $timeout or mtr_debug("At least one server is still listening to its port"); sleep(5) if $::glob_win32; # FIXME next startup fails if no sleep return $res;}################################################################################ The operating system will keep information about dead children, # we read this information here, and if we have records the process# is alive, we mark it as dead.###############################################################################sub mtr_record_dead_children () { my $ret_pid; # FIXME the man page says to wait for -1 to terminate, # but on OS X we get '0' all the time... while ( ($ret_pid= waitpid(-1,&WNOHANG)) > 0 ) { mtr_debug("waitpid() catched exit of child $ret_pid"); foreach my $idx (0..1) { if ( $::master->[$idx]->{'pid'} eq $ret_pid ) { mtr_debug("child $ret_pid was master[$idx]"); $::master->[$idx]->{'pid'}= 0; } } foreach my $idx (0..2) { if ( $::slave->[$idx]->{'pid'} eq $ret_pid ) { mtr_debug("child $ret_pid was slave[$idx]"); $::slave->[$idx]->{'pid'}= 0; last; } } }}sub start_reap_all { # This causes terminating processes to not become zombies, avoiding # the need for (or possibility of) explicit waitpid(). $SIG{CHLD}= 'IGNORE'; # On some platforms (Linux, QNX, OSX, ...) there is potential race # here. If a process terminated before setting $SIG{CHLD} (but after # any attempt to waitpid() it), it will still be a zombie. So we # have to handle any such process here. while(waitpid(-1, &WNOHANG) > 0) { };}sub stop_reap_all { $SIG{CHLD}= 'DEFAULT';}sub mtr_ping_mysqld_server () { my $port= shift; my $remote= "localhost"; my $iaddr= inet_aton($remote); if ( ! $iaddr ) { mtr_error("can't find IP number for $remote"); } my $paddr= sockaddr_in($port, $iaddr); my $proto= getprotobyname('tcp'); if ( ! socket(SOCK, PF_INET, SOCK_STREAM, $proto) ) { mtr_error("can't create socket: $!"); } if ( connect(SOCK, $paddr) ) { close(SOCK); # FIXME check error? return 1; } else { return 0; }}################################################################################ Wait for a file to be created################################################################################ FIXME check that the pidfile contains the expected pid!sub sleep_until_file_created ($$$) { my $pidfile= shift; my $timeout= shift; my $pid= shift; for ( my $loop= 1; $loop <= $timeout; $loop++ ) { if ( -r $pidfile ) { return $pid; } # Check if it died after the fork() was successful if ( $pid > 0 && waitpid($pid,&WNOHANG) == $pid ) { return 0; } mtr_debug("Sleep 1 second waiting for creation of $pidfile"); if ( $loop % 60 == 0 ) { my $left= $timeout - $loop; mtr_warning("Waited $loop seconds for $pidfile to be created, " . "still waiting for $left seconds..."); } sleep(1); } return 0;}sub mtr_kill_processes ($) { my $pids = shift; foreach my $sig (15, 9) { my $retries= 10; while (1) { kill($sig, @{$pids}); last unless kill (0, @{$pids}) and $retries--; mtr_debug("Sleep 2 second waiting for processes to die"); sleep(2); } }}################################################################################ When we exit, we kill off all children################################################################################ FIXME something is wrong, we sometimes terminate with "Hangup" written# to tty, and no STDERR output telling us why.# FIXME for some readon, setting HUP to 'IGNORE' will cause exit() to# write out "Hangup", and maybe loose some output. We insert a sleep...sub mtr_exit ($) { my $code= shift;# cluck("Called mtr_exit()"); mtr_timer_stop_all($::glob_timers); local $SIG{HUP} = 'IGNORE'; # ToDo: Signalling -$$ will only work if we are the process group # leader (in fact on QNX it will signal our session group leader, # which might be Do-compile or Pushbuild, causing tests to be # aborted). So we only do it if we are the group leader. We might # set ourselves as the group leader at startup (with # POSIX::setpgrp(0,0)), but then care must be needed to always do # proper child process cleanup. kill('HUP', -$$) if $$ == getpgrp(); sleep 2; exit($code);}1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -