--- loncom/loncnew 2004/08/26 12:35:10 1.51 +++ loncom/loncnew 2004/09/20 10:27:35 1.54 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.51 2004/08/26 12:35:10 albertel Exp $ +# $Id: loncnew,v 1.54 2004/09/20 10:27:35 foxr Exp $ # # Copyright Michigan State University Board of Trustees # @@ -104,11 +104,8 @@ my $ConnectionRetries=2; # Number of con my $ConnectionRetriesLeft=2; # Number of connection retries remaining. my $LondVersion = "unknown"; # Version of lond we talk with. my $KeyMode = ""; # e.g. ssl, local, insecure from last connect. +my $LondConnecting = 0; # True when a connection is being built. -my $LongTickLength = 10000000; #Tick Frequency when Idle -my $ShortTickLength = 1; #Tick Frequency when Active (many places in - # the code assume this is one) -my $TickLength = $ShortTickLength;#number of seconds to wait until ticking # # The hash below gives the HTML format for log messages # given a severity. @@ -269,7 +266,7 @@ sub ShowStatus { sub SocketTimeout { my $Socket = shift; Log("WARNING", "A socket timeout was detected"); - Debug(0, " SocketTimeout called: "); + Debug(5, " SocketTimeout called: "); $Socket->Dump(0); if(exists($ActiveTransactions{$Socket})) { FailTransaction($ActiveTransactions{$Socket}); @@ -278,7 +275,7 @@ sub SocketTimeout { # a connection failure: $ConnectionRetriesLeft--; if($ConnectionRetriesLeft <= 0) { - Log("CRITICAL", "Host marked dead: ".GetServerHost()); + Log("CRITICAL", "Host marked DEAD: ".GetServerHost()); } } @@ -294,6 +291,9 @@ Invoked each timer tick. sub Tick { + my ($Event) = @_; + my $clock_watcher = $Event->w; + my $client; if($ConnectionRetriesLeft > 0) { ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount @@ -307,13 +307,11 @@ sub Tick { if($IdleConnections->Count() && ($WorkQueue->Count() == 0)) { # Idle connections and nothing to do? - $IdleSeconds+=$TickLength; + $IdleSeconds++; if($IdleSeconds > $IdleTimeout) { # Prune a connection... my $Socket = $IdleConnections->pop(); KillSocket($Socket); - if ($IdleConnections->Count() == 0) { - &SetupTimer($LongTickLength); - } + $IdleSeconds = 0; # Otherwise all connections get trimmed to fast. } } else { $IdleSeconds = 0; # Reset idle count if not idle. @@ -358,6 +356,7 @@ sub Tick { } if ($ConnectionCount == 0) { $KeyMode = ""; + $clock_watcher->cancel(); } } @@ -377,13 +376,9 @@ Trigger disconnections of idle sockets. =cut -my $timer; sub SetupTimer { - my ($newLength)=@_; - Debug(6, "SetupTimer $TickLength->$newLength"); - $TickLength=$newLength; - if ($timer) { $timer->cancel; } - $timer=Event->timer(interval => $TickLength, cb => \&Tick ); + Debug(6, "SetupTimer"); + Event->timer(interval => 1, cb => \&Tick ); } =pod @@ -483,7 +478,7 @@ sub ClientWritable { } else { # Partial string sent. $Watcher->data(substr($Data, $result)); if($result == 0) { # client hung up on us!! - Log("INFO", "lonc pipe client hung up on us!"); + # Log("INFO", "lonc pipe client hung up on us!"); $Watcher->cancel; $Socket->shutdown(2); $Socket->close(); @@ -611,7 +606,13 @@ Parameters: sub FailTransaction { my $transaction = shift; - Log("WARNING", "Failing transaction ".$transaction->getRequest()); + + # If the socket is dead, that's already logged. + + if ($ConnectionRetriesLeft > 0) { + Log("WARNING", "Failing transaction " + .$transaction->getRequest()); + } Debug(1, "Failing transaction: ".$transaction->getRequest()); if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it. my $client = $transaction->getClient(); @@ -872,6 +873,10 @@ sub LondReadable { .$RemoteHost." now ready for action"); } ServerToIdle($Socket); # Next work unit or idle. + + # + $LondConnecting = 0; # Best spot I can think of for this. + # } elsif ($State eq "SendingRequest") { # We need to be writable for this and probably don't belong @@ -1145,7 +1150,9 @@ sub MakeLondConnection { data => $Connection, desc => 'Connection to lond server'); $ActiveConnections{$Connection} = $event; - + if ($ConnectionCount == 0) { + &SetupTimer; # Need to handle timeouts with connections... + } $ConnectionCount++; Debug(4, "Connection count = ".$ConnectionCount); if($ConnectionCount == 1) { # First Connection: @@ -1153,6 +1160,7 @@ sub MakeLondConnection { } Log("SUCESS", "Created connection ".$ConnectionCount ." to host ".GetServerHost()); + $LondConnecting = 1; # Connection in progress. return 1; # Return success. } @@ -1236,14 +1244,13 @@ sub QueueTransaction { if(!defined $LondSocket) { # Need to queue request. Debug(5,"Must queue..."); $WorkQueue->enqueue($requestData); - if($ConnectionCount < $MaxConnectionCount) { + if(($ConnectionCount < $MaxConnectionCount)) { # && !$LondConnecting) { if($ConnectionRetriesLeft > 0) { Debug(5,"Starting additional lond connection"); if(MakeLondConnection() == 0) { EmptyQueue(); # Fail transactions, can't make connection. CloseAllLondConnections; # Should all be closed but... } - &SetupTimer($ShortTickLength); } else { ShowStatus(GetServerHost()." >>> DEAD !!!! <<<"); EmptyQueue(); # It's worse than that ... he's dead Jim. @@ -1533,7 +1540,6 @@ sub ChildProcess { cb => \&ToggleDebug, data => "INT"); - SetupTimer($LongTickLength); SetupLoncListener(); @@ -1556,9 +1562,10 @@ sub ChildProcess { # Create a new child for host passed in: sub CreateChild { + my $host = shift; + my $sigset = POSIX::SigSet->new(SIGINT); sigprocmask(SIG_BLOCK, $sigset); - my $host = shift; $RemoteHost = $host; Log("CRITICAL", "Forking server for ".$host); my $pid = fork; @@ -1829,17 +1836,33 @@ sub KillThemAll { local($SIG{CHLD}) = 'IGNORE'; # Our children >will< die. foreach my $pid (keys %ChildHash) { my $serving = $ChildHash{$pid}; - Debug(2, "Killing lonc for $serving pid = $pid"); - ShowStatus("Killing lonc for $serving pid = $pid"); - Log("CRITICAL", "Killing lonc for $serving pid = $pid"); + ShowStatus("Nicely Killing lonc for $serving pid = $pid"); + Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid"); kill 'QUIT' => $pid; - delete($ChildHash{$pid}); } - my $execdir = $perlvar{'lonDaemons'}; - unlink("$execdir/logs/lonc.pid"); + } + +# +# Kill all children via KILL. Just in case the +# first shot didn't get them. + +sub really_kill_them_all_dammit +{ + Debug(2, "Kill them all Dammit"); + local($SIG{CHLD} = 'IGNORE'); # In case some purist reenabled them. + foreach my $pid (keys %ChildHash) { + my $serving = $ChildHash{$pid}; + &ShowStatus("Nastily killing lonc for $serving pid = $pid"); + Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid"); + kill 'KILL' => $pid; + delete($ChildHash{$pid}); + my $execdir = $perlvar{'lonDaemons'}; + unlink("$execdir/logs/lonc.pid"); + } +} =pod =head1 Terminate @@ -1849,7 +1872,15 @@ Terminate the system. =cut sub Terminate { - KillThemAll; + &Log("CRITICAL", "Asked to kill children.. first be nice..."); + &KillThemAll; + # + # By now they really should all be dead.. but just in case + # send them all SIGKILL's after a bit of waiting: + + sleep(4); + &Log("CRITICAL", "Now kill children nasty"); + &really_kill_them_all_dammit; Log("CRITICAL","Master process exiting"); exit 0;