--- loncom/loncnew 2004/10/04 10:30:50 1.62 +++ loncom/loncnew 2004/10/05 10:10:31 1.64 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.62 2004/10/04 10:30:50 foxr Exp $ +# $Id: loncnew,v 1.64 2004/10/05 10:10:31 foxr Exp $ # # Copyright Michigan State University Board of Trustees # @@ -302,6 +302,44 @@ sub SocketTimeout { } } +# +# This function should be called by the child in all cases where it must +# exit. If the child process is running with the DieWhenIdle turned on +# it must create a lock file for the AF_UNIX socket in order to prevent +# connection requests from lonnet in the time between process exit +# and the parent picking up the listen again. +# Parameters: +# exit_code - Exit status value, however see the next parameter. +# message - If this optional parameter is supplied, the exit +# is via a die with this message. +# +sub child_exit { + my ($exit_code, $message) = @_; + + # Regardless of how we exit, we may need to do the lock thing: + + if($DieWhenIdle) { + # + # Create a lock file since there will be a time window + # between our exit and the parent's picking up the listen + # during which no listens will be done on the + # lonnet client socket. + # + my $lock_file = GetLoncSocketPath().".lock"; + open(LOCK,">$lock_file"); + print LOCK "Contents not important"; + close(LOCK); + + exit(0); + } + # Now figure out how we exit: + + if($message) { + die $message; + } else { + exit($exit_code); + } +} #----------------------------- Timer management ------------------------ =pod @@ -332,18 +370,8 @@ sub Tick { $IdleSeconds = 0; # Otherwise all connections get trimmed to fast. UpdateStatus(); if(($ConnectionCount == 0) && $DieWhenIdle) { - # - # Create a lock file since there will be a time window - # between our exit and the parent's picking up the listen - # during which no listens will be done on the - # lonnet client socket. - # - my $lock_file = GetLoncSocketPath().".lock"; - open(LOCK,">$lock_file"); - print LOCK "Contents not important"; - close(LOCK); - - exit(0); + &child_exit(0); + } } } else { @@ -1177,7 +1205,7 @@ sub MakeLondConnection { # my $Socket = $Connection->GetSocket(); if($Socket eq undef) { - die "did not get a socket from the connection"; + &child_exit(-1, "did not get a socket from the connection"); } else { &Debug(9,"MakeLondConnection got socket: ".$Socket); } @@ -1491,7 +1519,11 @@ sub SetupLoncListener { unless ($socket =IO::Socket::UNIX->new(Local => $SocketName, Listen => 250, Type => SOCK_STREAM)) { - die "Failed to create a lonc listner socket"; + if($I_am_child) { + &child_exit(-1, "Failed to create a lonc listener socket"); + } else { + die "Failed to create a lonc listner socket"; + } } return $socket; } @@ -1621,8 +1653,8 @@ sub ChildProcess { undef $parent_dispatchers{$listener}; } - $I_am_child = 1; # Seems like in spite of it all I'm still getting - # parent event dispatches. + $I_am_child = 1; # Seems like in spite of it all I may still getting + # parent event dispatches.. flag I'm a child. # @@ -1679,7 +1711,7 @@ sub ChildProcess { my $ret = Event::loop(); # Start the main event loop. - die "Main event loop exited!!!"; + &child_exit (-1,"Main event loop exited!!!"); } # Create a new child for host passed in: @@ -1716,7 +1748,6 @@ sub CreateChild { # a connection request arrives. We must: # Start a child process to accept the connection request. # Kill our listen on the socket. -# Setup an event to handle the child process exit. (SIGCHLD). # Parameter: # event - The event object that was created to monitor this socket. # event->w->fd is the socket. @@ -1821,6 +1852,39 @@ sub listen_on_all_unix_sockets { } } +# server_died is called whenever a child process exits. +# Since this is dispatched via a signal, we must process all +# dead children until there are no more left. The action +# is to: +# - Remove the child from the bookeeping hashes +# - Re-establish a listen on the unix domain socket associated +# with that host. +# Parameters: +# The event, but we don't actually care about it. +sub server_died { + &Debug(9, "server_died called..."); + + while(1) { # Loop until waitpid nowait fails. + my $pid = waitpid(-1, WNOHANG); + if($pid <= 0) { + return; # Nothing left to wait for. + } + # need the host to restart: + + my $host = $ChildHash{$pid}; + if($host) { # It's for real... + &Debug(9, "Caught sigchild for $host"); + delete($ChildHash{$pid}); + delete($HostToPid{$host}); + &parent_listen($host); + + } else { + &Debug(5, "Caught sigchild for pid not in hosts hash: $pid"); + } + } + +} + # # Parent process logic pass 1: # For each entry in the hosts table, we will @@ -1893,6 +1957,14 @@ ShowStatus("Parent keeping the flock"); if ($DieWhenIdle) { + # We need to setup a SIGChild event to handle the exit (natural or otherwise) + # of the children. + + Event->signal(cb => \&server_died, + desc => "Child exit handler", + signal => "CHLD"); + + $Event::DebugLevel = $DebugLevel; Debug(9, "Parent entering event loop"); my $ret = Event::loop();