--- loncom/loncnew 2003/10/07 11:23:03 1.27 +++ loncom/loncnew 2003/12/11 23:18:37 1.34 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.27 2003/10/07 11:23:03 foxr Exp $ +# $Id: loncnew,v 1.34 2003/12/11 23:18:37 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -43,97 +43,8 @@ # - Detect transmission timeouts. # -# Change log: -# $Log: loncnew,v $ -# Revision 1.27 2003/10/07 11:23:03 foxr -# Installed and tested code to process reinit in parent server. -# -# Revision 1.26 2003/09/30 11:11:17 foxr -# Add book-keeping hashes to support the re-init procedure. -# -# Revision 1.25 2003/09/23 11:22:14 foxr -# Tested ability to receive sigusr2 This is now logged and must be -# properly implemented as a re-read of hosts and re-init of appropriate -# children. -# -# Revision 1.24 2003/09/16 09:46:42 foxr -# Added skeletal infrastructure to support SIGUSR2 update hosts request. -# -# Revision 1.23 2003/09/15 09:24:49 foxr -# Add use strict and fix all the fallout from that. -# -# Revision 1.22 2003/09/02 10:34:47 foxr -# - Fix errors in host dead detection logic (too many cases where the -# retries left were not getting incremented or just not checked). -# - Added some additional status to the ps axuww display: -# o Remaining retries on a host. -# o >>> DEAD <<< indicator if I've given up on a host. -# - Tested the SIGHUP will reset the retries remaining count (thanks to -# the above status stuff, and get allow the loncnew to re-try again -# on the host (thanks to the log). -# -# Revision 1.21 2003/08/26 09:19:51 foxr -# How embarrassing... put in the SocketTimeout function in loncnew and forgot -# to actually hook it into the LondTransaction. Added this to MakeLondConnection -# where it belongs... hopefully transactions (not just connection attempts) will -# timeout more speedily than the socket errors will catch it. -# -# Revision 1.20 2003/08/25 18:48:11 albertel -# - fixing a forgotten ; -# -# Revision 1.19 2003/08/19 09:31:46 foxr -# Get socket directory from configuration rather than the old hard coded test -# way that I forgot to un-hard code. -# -# Revision 1.18 2003/08/06 09:52:29 foxr -# Also needed to remember to fail in-flight transactions if their sends fail. -# -# Revision 1.17 2003/08/03 00:44:31 foxr -# 1. Correct handling of connection failure: Assume it means the host is -# unreachable and fail all of the queued transactions. Note that the -# inflight transactions should fail on their own time due either to timeout -# or send/receive failures. -# 2. Correct handling of logs for forced death signals. Pull the signal -# from the event watcher. -# -# Revision 1.16 2003/07/29 02:33:05 foxr -# Add SIGINT processing to child processes to toggle annoying trace mode -# on/off.. will try to use this to isolate the compute boud process issue. -# -# Revision 1.15 2003/07/15 02:07:05 foxr -# Added code for lonc/lond transaction timeouts. Who knows if it works right. -# The intent is for a timeout to fail any transaction in progress and kill -# off the sockt that timed out. -# -# Revision 1.14 2003/07/03 02:10:18 foxr -# Get all of the signals to work correctly. -# -# Revision 1.13 2003/07/02 01:31:55 foxr -# Added kill -HUP logic (restart). -# -# Revision 1.11 2003/06/25 01:54:44 foxr -# Fix more problems with transaction failure. -# -# Revision 1.10 2003/06/24 02:46:04 foxr -# Put a limit on the number of times we'll retry a connection. -# Start getting the signal stuff put in as well...note that need to get signals -# going or else the client will permanently give up on dead servers. -# -# Revision 1.9 2003/06/13 02:38:43 foxr -# Add logging in 'expected format' -# -# Revision 1.8 2003/06/11 02:04:35 foxr -# Support delayed transactions... this is done uniformly by encapsulating -# transactions in an object ... a LondTransaction that is implemented by -# LondTransaction.pm -# -# Revision 1.7 2003/06/03 01:59:39 foxr -# complete coding to support deferred transactions. -# -# use strict; use lib "/home/httpd/lib/perl/"; -use lib "/home/foxr/newloncapa/types"; use Event qw(:DEFAULT ); use POSIX qw(:signal_h); use POSIX; @@ -175,7 +86,7 @@ my $MaxConnectionCount = 10; # Will get my $ClientConnection = 0; # Uniquifier for client events. my $DebugLevel = 0; -my $NextDebugLevel= 10; # So Sigint can toggle this. +my $NextDebugLevel= 2; # So Sigint can toggle this. my $IdleTimeout= 3600; # Wait an hour before pruning connections. # @@ -192,8 +103,8 @@ my $ConnectionCount = 0; my $IdleSeconds = 0; # Number of seconds idle. my $Status = ""; # Current status string. my $RecentLogEntry = ""; -my $ConnectionRetries=5; # Number of connection retries allowed. -my $ConnectionRetriesLeft=5; # Number of connection retries remaining. +my $ConnectionRetries=2; # Number of connection retries allowed. +my $ConnectionRetriesLeft=2; # Number of connection retries remaining. # # The hash below gives the HTML format for log messages @@ -394,6 +305,14 @@ sub Tick { my $Socket = $ActiveTransactions{$item}->getServer(); $Socket->Tick(); } + foreach my $item (keys %ActiveConnections) { + my $State = $ActiveConnections{$item}->data->GetState(); + if ($State ne 'Idle' && $State ne 'SendingRequest' && + $State ne 'ReceivingReply') { + Debug(5,"Ticking Socket $State $item"); + $ActiveConnections{$item}->data->Tick(); + } + } # Do we have work in the queue, but no connections to service them? # If so, try to make some new connections to get things going again. # @@ -403,18 +322,18 @@ sub Tick { if ($ConnectionRetriesLeft > 0) { my $Connections = ($Requests <= $MaxConnectionCount) ? $Requests : $MaxConnectionCount; - Debug(1,"Work but no connections, start ".$Connections." of them"); + Debug(5,"Work but no connections, start ".$Connections." of them"); my $successCount = 0; for (my $i =0; $i < $Connections; $i++) { $successCount += MakeLondConnection(); } if($successCount == 0) { # All connections failed: - Debug(1,"Work in queue failed to make any connectiouns\n"); + Debug(5,"Work in queue failed to make any connectiouns\n"); EmptyQueue(); # Fail pending transactions with con_lost. } } else { ShowStatus(GetServerHost()." >>> DEAD!!! <<<"); - Debug(1,"Work in queue, but gave up on connections..flushing\n"); + Debug(5,"Work in queue, but gave up on connections..flushing\n"); EmptyQueue(); # Connections can't be established. } @@ -439,7 +358,7 @@ Trigger disconnections of idle sockets. sub SetupTimer { Debug(6, "SetupTimer"); - Event->timer(interval => 1, debug => 1, cb => \&Tick ); + Event->timer(interval => 1, cb => \&Tick ); } =pod @@ -461,19 +380,19 @@ sub ServerToIdle { my $Socket = shift; # Get the socket. delete($ActiveTransactions{$Socket}); # Server has no transaction - &Debug(6, "Server to idle"); + &Debug(5, "Server to idle"); # If there's work to do, start the transaction: my $reqdata = $WorkQueue->dequeue(); # This is a LondTransaction - unless($reqdata eq undef) { - Debug(9, "Queue gave request data: ".$reqdata->getRequest()); + if ($reqdata ne undef) { + Debug(5, "Queue gave request data: ".$reqdata->getRequest()); &StartRequest($Socket, $reqdata); } else { # There's no work waiting, so push the server to idle list. - &Debug(8, "No new work requests, server connection going idle"); + &Debug(5, "No new work requests, server connection going idle"); $IdleConnections->push($Socket); } } @@ -519,7 +438,7 @@ sub ClientWritable { # request. &Debug(9,"Send result is ".$result." Defined: ".defined($result)); - if(defined($result)) { + if($result ne undef) { &Debug(9, "send result was defined"); if($result == length($Data)) { # Entire string sent. &Debug(9, "ClientWritable data all written"); @@ -590,7 +509,7 @@ The transaction that is being completed. =cut sub CompleteTransaction { - &Debug(6,"Complete transaction"); + &Debug(5,"Complete transaction"); my $Socket = shift; my $Transaction = shift; @@ -629,7 +548,6 @@ sub StartClientReply { &Debug(8," Reply was: ".$data); my $Serial = $ActiveClients{$Client}; my $desc = sprintf("Connection to lonc client %d", - $Serial); Event->io(fd => $Client, poll => "w", @@ -878,7 +796,7 @@ sub LondReadable { $ConnectionRetriesLeft = $ConnectionRetries; # success resets the count $Watcher->cancel(); if(exists($ActiveTransactions{$Socket})) { - Debug(8,"Completing transaction!!"); + Debug(5,"Completing transaction!!"); CompleteTransaction($Socket, $ActiveTransactions{$Socket}); } else { @@ -1125,7 +1043,7 @@ sub MakeLondConnection { my $Connection = LondConnection->new(&GetServerHost(), &GetServerPort()); - if($Connection == undef) { # Needs to be more robust later. + if($Connection eq undef) { # Needs to be more robust later. Log("CRITICAL","Failed to make a connection with lond."); $ConnectionRetriesLeft--; return 0; # Failure. @@ -1137,7 +1055,7 @@ sub MakeLondConnection { # exchange underway. # my $Socket = $Connection->GetSocket(); - if($Socket == undef) { + if($Socket eq undef) { die "did not get a socket from the connection"; } else { &Debug(9,"MakeLondConnection got socket: ".$Socket); @@ -1240,11 +1158,11 @@ sub QueueTransaction { my $LondSocket = $IdleConnections->pop(); if(!defined $LondSocket) { # Need to queue request. - Debug(8,"Must queue..."); + Debug(5,"Must queue..."); $WorkQueue->enqueue($requestData); if($ConnectionCount < $MaxConnectionCount) { if($ConnectionRetriesLeft > 0) { - Debug(4,"Starting additional lond connection"); + Debug(5,"Starting additional lond connection"); if(MakeLondConnection() == 0) { EmptyQueue(); # Fail transactions, can't make connection. } @@ -1282,7 +1200,7 @@ sub ClientRequest { my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0); Debug(8, "rcv: data length = ".length($thisread) ." read =".$thisread); - unless (defined $rv && length($thisread)) { + unless (defined $rv && length($thisread)) { # Likely eof on socket. Debug(5,"Client Socket closed on lonc for ".$RemoteHost); close($socket); @@ -1586,6 +1504,7 @@ ShowStatus("Forking node servers"); Log("CRITICAL", "--------------- Starting children ---------------"); +LondConnection::ReadConfig; # Read standard config files. my $HostIterator = LondConnection::GetHostIterator; while (! $HostIterator->end()) { @@ -1774,7 +1693,7 @@ sub Restart { Log("CRITICAL", "Restarting"); my $execdir = $perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); - exec("$execdir/lonc"); + exec("$execdir/loncnew"); } =pod