--- loncom/Attic/lonc 2002/04/04 22:04:54 1.38 +++ loncom/Attic/lonc 2002/04/10 04:35:31 1.39 @@ -5,7 +5,7 @@ # provides persistent TCP connections to the other servers in the network # through multiplexed domain sockets # -# $Id: lonc,v 1.38 2002/04/04 22:04:54 foxr Exp $ +# $Id: lonc,v 1.39 2002/04/10 04:35:31 foxr Exp $ # # Copyright Michigan State University Board of Trustees # @@ -175,7 +175,14 @@ $SIG{USR1} = \&USRMAN; # And maintain the population. while (1) { my $deadpid = wait; # Wait for the next child to die. - # See who died and start new one + # See who died and start new one + # or a signal (e.g. USR1 for restart). + # if a signal, the wait will fail + # This is ordinarily detected by + # checking for the existence of the + # pid index inthe children hash since + # the return value from a failed wait is -1 + # which is an impossible PID. &status("Woke up"); my $skipping=''; @@ -870,8 +877,25 @@ sub checkchildren { sub USRMAN { &logthis("USR1: Trying to establish connections again"); - %childatt=(); - &checkchildren(); + # + # It is really important not to just clear the childatt hash or we will + # lose all memory of the children. What we really want to do is this: + # For each index where childatt is >= $childmaxattempts + # Zero the associated counter and do a make_child for the host. + # Regardles, the childatt entry is zeroed: + my $host; + foreach $host (keys %childatt) { + if ($childatt{$host} >= $childmaxattempts) { + $childatt{$host} = 0; + &logthis("INFO: Restarting child for server: " + .$host."\n"); + make_new_child($host); + } + else { + $childatt{$host} = 0; + } + } + &checkchildren(); # See if any children are still dead... } # -------------------------------------------------- Non-critical communication