--- loncom/loncnew 2003/09/15 09:24:49 1.23 +++ loncom/loncnew 2003/10/14 15:36:21 1.28 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.23 2003/09/15 09:24:49 foxr Exp $ +# $Id: loncnew,v 1.28 2003/10/14 15:36:21 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -45,6 +45,26 @@ # Change log: # $Log: loncnew,v $ +# Revision 1.28 2003/10/14 15:36:21 albertel +# - making it easier to run loncnew, +# /etc/init.d/loncontrol startnew +# /etc/init.d/loncontrol restartnew +# will now start loncnew in place of lonc +# +# Revision 1.27 2003/10/07 11:23:03 foxr +# Installed and tested code to process reinit in parent server. +# +# Revision 1.26 2003/09/30 11:11:17 foxr +# Add book-keeping hashes to support the re-init procedure. +# +# Revision 1.25 2003/09/23 11:22:14 foxr +# Tested ability to receive sigusr2 This is now logged and must be +# properly implemented as a re-read of hosts and re-init of appropriate +# children. +# +# Revision 1.24 2003/09/16 09:46:42 foxr +# Added skeletal infrastructure to support SIGUSR2 update hosts request. +# # Revision 1.23 2003/09/15 09:24:49 foxr # Add use strict and fix all the fallout from that. # @@ -141,12 +161,6 @@ use LONCAPA::HashIterator; # # Disable all signals we might receive from outside for now. # -#$SIG{QUIT} = IGNORE; -#$SIG{HUP} = IGNORE; -#$SIG{USR1} = IGNORE; -#$SIG{INT} = IGNORE; -#$SIG{CHLD} = IGNORE; -#$SIG{__DIE__} = IGNORE; # Read the httpd configuration file to get perl variables @@ -159,6 +173,8 @@ my %perlvar = %{$perlvarref}; # parent and shared variables. my %ChildHash; # by pid -> host. +my %HostToPid; # By host -> pid. +my %HostHash; # by loncapaname -> IP. my $MaxConnectionCount = 10; # Will get from config later. @@ -1517,7 +1533,8 @@ sub CreateChild { my $pid = fork; if($pid) { # Parent $RemoteHost = "Parent"; - $ChildHash{$pid} = $RemoteHost; + $ChildHash{$pid} = $host; + $HostToPid{$host}= $pid; sigprocmask(SIG_UNBLOCK, $sigset); } else { # child. @@ -1580,6 +1597,7 @@ while (! $HostIterator->end()) { my $hostentryref = $HostIterator->get(); CreateChild($hostentryref->[0]); + $HostHash{$hostentryref->[0]} = $hostentryref->[4]; $HostIterator->next(); } $RemoteHost = "Parent Server"; @@ -1596,11 +1614,13 @@ $SIG{INT} = \&Terminate; $SIG{TERM} = \&Terminate; $SIG{HUP} = \&Restart; $SIG{USR1} = \&CheckKids; +$SIG{USR2} = \&UpdateKids; # LonManage update request. while(1) { my $deadchild = wait(); if(exists $ChildHash{$deadchild}) { # need to restart. my $deadhost = $ChildHash{$deadchild}; + delete($HostToPid{$deadhost}); delete($ChildHash{$deadchild}); Log("WARNING","Lost child pid= ".$deadchild. "Connected to host ".$deadhost); @@ -1639,6 +1659,114 @@ sub CheckKids { =pod +=head1 UpdateKids + +parent's SIGUSR2 handler. This handler: + +=item + +Rereads the hosts file. + +=item + +Kills off (via sigint) children for hosts that have disappeared. + +=item + +QUITs children for hosts that already exist (this just forces a status display +and resets the connection retry count for that host. + +=item + +Starts new children for hosts that have been added to the hosts.tab file since +the start of the master program and maintains them. + +=cut + +sub UpdateKids { + + Log("INFO", "Updating connections via SIGUSR2"); + + # Just in case we need to kill our own lonc, we wait a few seconds to + # give it a chance to receive and relay lond's response to the + # re-init command. + # + + sleep(2); # Wait a couple of seconds. + + my %hosts; # Indexed by loncapa hostname, value=ip. + + # Need to re-read the host table: + + + LondConnection::ReadConfig(); + my $I = LondConnection::GetHostIterator; + while (! $I->end()) { + my $item = $I->get(); + $hosts{$item->[0]} = $item->[4]; + $I->next(); + } + + # The logic below is written for clarity not for efficiency. + # Since I anticipate that this function is only rarely called, that's + # appropriate. There are certainly ways to combine the loops below, + # and anyone wishing to obscure the logic is welcome to go for it. + # Note that we don't re-direct sigchild. Instead we do what's needed + # to the data structures that keep track of children to ensure that + # when sigchild is honored, no new child is born. + # + + # For each existing child; if it's host doesn't exist, kill the child. + + foreach my $child (keys %ChildHash) { + my $oldhost = $ChildHash{$child}; + if (!(exists $hosts{$oldhost})) { + Log("CRITICAL", "Killing child for $oldhost host no longer exists"); + delete $ChildHash{$child}; + delete $HostToPid{$oldhost}; + kill 'QUIT' => $child; + } + } + # For each remaining existing child; if it's host's ip has changed, + # Restart the child on the new IP. + + foreach my $child (keys %ChildHash) { + my $oldhost = $ChildHash{$child}; + my $oldip = $HostHash{$oldhost}; + if ($hosts{$oldhost} ne $oldip) { + + # kill the old child. + + Log("CRITICAL", "Killing child for $oldhost host ip has changed..."); + delete $ChildHash{$child}; + delete $HostToPid{$oldhost}; + kill 'QUIT' => $child; + + # Do the book-keeping needed to start a new child on the + # new ip. + + $HostHash{$oldhost} = $hosts{$oldhost}; + CreateChild($oldhost); + } + } + # Finally, for each new host, not in the host hash, create a + # enter the host and create a new child. + # Force a status display of any existing process. + + foreach my $host (keys %hosts) { + if(!(exists $HostHash{$host})) { + Log("INFO", "New host $host discovered in hosts.tab..."); + $HostHash{$host} = $hosts{$host}; + CreateChild($host); + } else { + kill 'HUP' => $HostToPid{$host}; # status display. + } + } +} + + +=pod + =head1 Restart Signal handler for HUP... all children are killed and @@ -1652,7 +1780,7 @@ sub Restart { Log("CRITICAL", "Restarting"); my $execdir = $perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); - exec("$execdir/lonc"); + exec("$execdir/loncnew"); } =pod