--- loncom/loncnew 2003/09/23 11:22:14 1.25 +++ loncom/loncnew 2003/10/14 15:36:21 1.28 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.25 2003/09/23 11:22:14 foxr Exp $ +# $Id: loncnew,v 1.28 2003/10/14 15:36:21 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -45,6 +45,18 @@ # Change log: # $Log: loncnew,v $ +# Revision 1.28 2003/10/14 15:36:21 albertel +# - making it easier to run loncnew, +# /etc/init.d/loncontrol startnew +# /etc/init.d/loncontrol restartnew +# will now start loncnew in place of lonc +# +# Revision 1.27 2003/10/07 11:23:03 foxr +# Installed and tested code to process reinit in parent server. +# +# Revision 1.26 2003/09/30 11:11:17 foxr +# Add book-keeping hashes to support the re-init procedure. +# # Revision 1.25 2003/09/23 11:22:14 foxr # Tested ability to receive sigusr2 This is now logged and must be # properly implemented as a re-read of hosts and re-init of appropriate @@ -161,6 +173,8 @@ my %perlvar = %{$perlvarref}; # parent and shared variables. my %ChildHash; # by pid -> host. +my %HostToPid; # By host -> pid. +my %HostHash; # by loncapaname -> IP. my $MaxConnectionCount = 10; # Will get from config later. @@ -1519,7 +1533,8 @@ sub CreateChild { my $pid = fork; if($pid) { # Parent $RemoteHost = "Parent"; - $ChildHash{$pid} = $RemoteHost; + $ChildHash{$pid} = $host; + $HostToPid{$host}= $pid; sigprocmask(SIG_UNBLOCK, $sigset); } else { # child. @@ -1582,6 +1597,7 @@ while (! $HostIterator->end()) { my $hostentryref = $HostIterator->get(); CreateChild($hostentryref->[0]); + $HostHash{$hostentryref->[0]} = $hostentryref->[4]; $HostIterator->next(); } $RemoteHost = "Parent Server"; @@ -1604,6 +1620,7 @@ while(1) { my $deadchild = wait(); if(exists $ChildHash{$deadchild}) { # need to restart. my $deadhost = $ChildHash{$deadchild}; + delete($HostToPid{$deadhost}); delete($ChildHash{$deadchild}); Log("WARNING","Lost child pid= ".$deadchild. "Connected to host ".$deadhost); @@ -1656,7 +1673,7 @@ Kills off (via sigint) children for host =item -HUP's children for hosts that already exist (this just forces a status display +QUITs children for hosts that already exist (this just forces a status display and resets the connection retry count for that host. =item @@ -1667,7 +1684,84 @@ the start of the master program and main =cut sub UpdateKids { + Log("INFO", "Updating connections via SIGUSR2"); + + # Just in case we need to kill our own lonc, we wait a few seconds to + # give it a chance to receive and relay lond's response to the + # re-init command. + # + + sleep(2); # Wait a couple of seconds. + + my %hosts; # Indexed by loncapa hostname, value=ip. + + # Need to re-read the host table: + + + LondConnection::ReadConfig(); + my $I = LondConnection::GetHostIterator; + while (! $I->end()) { + my $item = $I->get(); + $hosts{$item->[0]} = $item->[4]; + $I->next(); + } + + # The logic below is written for clarity not for efficiency. + # Since I anticipate that this function is only rarely called, that's + # appropriate. There are certainly ways to combine the loops below, + # and anyone wishing to obscure the logic is welcome to go for it. + # Note that we don't re-direct sigchild. Instead we do what's needed + # to the data structures that keep track of children to ensure that + # when sigchild is honored, no new child is born. + # + + # For each existing child; if it's host doesn't exist, kill the child. + + foreach my $child (keys %ChildHash) { + my $oldhost = $ChildHash{$child}; + if (!(exists $hosts{$oldhost})) { + Log("CRITICAL", "Killing child for $oldhost host no longer exists"); + delete $ChildHash{$child}; + delete $HostToPid{$oldhost}; + kill 'QUIT' => $child; + } + } + # For each remaining existing child; if it's host's ip has changed, + # Restart the child on the new IP. + + foreach my $child (keys %ChildHash) { + my $oldhost = $ChildHash{$child}; + my $oldip = $HostHash{$oldhost}; + if ($hosts{$oldhost} ne $oldip) { + + # kill the old child. + + Log("CRITICAL", "Killing child for $oldhost host ip has changed..."); + delete $ChildHash{$child}; + delete $HostToPid{$oldhost}; + kill 'QUIT' => $child; + + # Do the book-keeping needed to start a new child on the + # new ip. + + $HostHash{$oldhost} = $hosts{$oldhost}; + CreateChild($oldhost); + } + } + # Finally, for each new host, not in the host hash, create a + # enter the host and create a new child. + # Force a status display of any existing process. + + foreach my $host (keys %hosts) { + if(!(exists $HostHash{$host})) { + Log("INFO", "New host $host discovered in hosts.tab..."); + $HostHash{$host} = $hosts{$host}; + CreateChild($host); + } else { + kill 'HUP' => $HostToPid{$host}; # status display. + } + } } @@ -1686,7 +1780,7 @@ sub Restart { Log("CRITICAL", "Restarting"); my $execdir = $perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); - exec("$execdir/lonc"); + exec("$execdir/loncnew"); } =pod