--- loncom/loncnew 2003/06/11 02:04:35 1.8 +++ loncom/loncnew 2003/06/24 02:46:04 1.10 @@ -2,7 +2,7 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.8 2003/06/11 02:04:35 foxr Exp $ +# $Id: loncnew,v 1.10 2003/06/24 02:46:04 foxr Exp $ # # Copyright Michigan State University Board of Trustees # @@ -41,10 +41,19 @@ # - Add Configuration file I/O (done). # - Add management/status request interface. # - Add deferred request capability. (done) +# - Detect transmission timeouts. # # Change log: # $Log: loncnew,v $ +# Revision 1.10 2003/06/24 02:46:04 foxr +# Put a limit on the number of times we'll retry a connection. +# Start getting the signal stuff put in as well...note that need to get signals +# going or else 6the client will permanently give up on dead servers. +# +# Revision 1.9 2003/06/13 02:38:43 foxr +# Add logging in 'expected format' +# # Revision 1.8 2003/06/11 02:04:35 foxr # Support delayed transactions... this is done uniformly by encapsulating # transactions in an object ... a LondTransaction that is implemented by @@ -62,6 +71,7 @@ use POSIX qw(:signal_h); use IO::Socket; use IO::Socket::INET; use IO::Socket::UNIX; +use IO::File; use IO::Handle; use Socket; use Crypt::IDEA; @@ -72,7 +82,6 @@ use LONCAPA::LondTransaction; use LONCAPA::Configuration; use LONCAPA::HashIterator; -print "Loncnew starting\n"; # # Disable all signals we might receive from outside for now. @@ -97,10 +106,10 @@ my %perlvar = %{$perlvarref}; my %ChildHash; # by pid -> host. -my $MaxConnectionCount = 5; # Will get from config later. +my $MaxConnectionCount = 10; # Will get from config later. my $ClientConnection = 0; # Uniquifier for client events. -my $DebugLevel = 2; +my $DebugLevel = 0; my $IdleTimeout= 3600; # Wait an hour before pruning connections. # @@ -113,17 +122,90 @@ my %ActiveConnections; # Connections to my %ActiveTransactions; # LondTransactions in flight. my %ActiveClients; # Serial numbers of active clients by socket. my $WorkQueue = Queue->new(); # Queue of pending transactions. -# my $ClientQueue = Queue->new(); # Queue of clients causing xactinos. my $ConnectionCount = 0; my $IdleSeconds = 0; # Number of seconds idle. +my $Status = ""; # Current status string. +my $ConnectionRetries=5; # Number of connection retries allowed. +my $ConnectionRetriesLeft=5; # Number of connection retries remaining. # -# This disconnected socket makes posible a bit more regular -# code when processing delayed requests: -# -my $NullSocket = IO::Socket->new(); +# The hash below gives the HTML format for log messages +# given a severity. +# +my %LogFormats; + +$LogFormats{"CRITICAL"} = "CRITICAL: %s"; +$LogFormats{"SUCCESS"} = "SUCCESS: %s"; +$LogFormats{"INFO"} = "INFO: %s"; +$LogFormats{"WARNING"} = "WARNING: %s"; +$LogFormats{"DEFAULT"} = " %s "; + + + +=pod + +=head2 LogPerm + +Makes an entry into the permanent log file. + +=cut +sub LogPerm { + my $message=shift; + my $execdir=$perlvar{'lonDaemons'}; + my $now=time; + my $local=localtime($now); + my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); + print $fh "$now:$message:$local\n"; +} + +=pod + +=head2 Log + +Logs a message to the log file. +Parameters: + +=item severity + +One of CRITICAL, WARNING, INFO, SUCCESS used to select the +format string used to format the message. if the severity is +not a defined severity the Default format string is used. + +=item message + +The base message. In addtion to the format string, the message +will be appended to a string containing the name of our remote +host and the time will be formatted into the message. + +=cut + +sub Log { + my $severity = shift; + my $message = shift; + + if(!$LogFormats{$severity}) { + $severity = "DEFAULT"; + } + + my $format = $LogFormats{$severity}; + + # Put the window dressing in in front of the message format: + + my $now = time; + my $local = localtime($now); + my $finalformat = "$local ($$) [$RemoteHost] [$Status] "; + my $finalformat = $finalformat.$format."\n"; + + # open the file and put the result. + + my $execdir = $perlvar{'lonDaemons'}; + my $fh = IO::File->new(">>$execdir/logs/lonc.log"); + my $msg = sprintf($finalformat, $message); + print $fh $msg; + + +} -# =pod @@ -179,11 +261,15 @@ sub SocketDump { =head2 ShowStatus Place some text as our pid status. + and as what we return in a SIGUSR1 =cut sub ShowStatus { - my $status = shift; - $0 = "lonc: ".$status; + my $state = shift; + my $now = time; + my $local = localtime($now); + $Status = $local.": ".$state; + $0='lonc: '.$state.' '.$local; } =pod @@ -222,12 +308,17 @@ sub Tick { # my $Requests = $WorkQueue->Count(); - if (($ConnectionCount == 0) && ($Requests > 0)) { - my $Connections = ($Requests <= $MaxConnectionCount) ? - $Requests : $MaxConnectionCount; - Debug(1,"Work but no connections, starting ".$Connections." of them"); - for ($i =0; $i < $Connections; $i++) { - MakeLondConnection(); + if (($ConnectionCount == 0) && ($Requests > 0)) { + if ($ConnectionRetriesLeft > 0) { + my $Connections = ($Requests <= $MaxConnectionCount) ? + $Requests : $MaxConnectionCount; + Debug(1,"Work but no connections, start ".$Connections." of them"); + for ($i =0; $i < $Connections; $i++) { + MakeLondConnection(); + } + } else { + Debug(1,"Work in queue, but gave up on connections..flushing\n"); + EmptyQueue(); # Connections can't be established. } } @@ -404,9 +495,8 @@ sub CompleteTransaction { my $data = $Socket->GetReply(); # Data to send. StartClientReply($Transaction, $data); } else { # Delete deferred transaction file. - &Debug(4, "Deferred transaction complete: ".$Transaction->getFile(). - " request: ".$Transaction->getRequest(). - " answer: ".$Socket->GetReply()); + Log("SUCCESS", "A delayed transaction was completed"); + LogPerm("S:$Client:".$Transaction->getRequest()); unlink $Transaction->getFile(); } } @@ -466,13 +556,12 @@ Parameters: sub FailTransaction { my $transaction = shift; - my $Lond = $transaction->getServer(); - if (!$client->isDeferred()) { # If the transaction is deferred we'll get to it. + Debug(1, "Failing transaction: ".$transaction->getRequest()); + if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it. my $client = $transcation->getClient(); - StartClientReply($client, "con_lost"); + Debug(1," Replying con_lost to ".$transaction->getRequest()); + StartClientReply($client, "con_lost\n"); } -# not needed, done elsewhere if active. -# delete $ActiveTransactions{$Lond}; } @@ -485,13 +574,27 @@ sub FailTransaction { =cut sub EmptyQueue { while($WorkQueue->Count()) { - my $request = $Workqueue->dequeue(); # This is a transaction + my $request = $WorkQueue->dequeue(); # This is a transaction FailTransaction($request); } } =pod +=head2 CloseAllLondConnections + +Close all connections open on lond prior to exit e.g. + +=cut +sub CloseAllLondConnections { + foreach $Socket (keys %ActiveConnections) { + KillSocket($Socket); + } +} +=cut + +=pod + =head2 KillSocket Destroys a socket. This function can be called either when a socket @@ -515,6 +618,8 @@ nonzero if we are allowed to create a ne sub KillSocket { my $Socket = shift; + $Socket->Shutdown(); + # If the socket came from the active connection set, # delete its transaction... note that FailTransaction should # already have been called!!! @@ -657,6 +762,9 @@ sub LondReadable { Debug(8,"Completing transaction!!"); CompleteTransaction($Socket, $ActiveTransactions{$Socket}); + } else { + Log("SUCCESS", "Connection ".$ConnectionCount." to " + .$RemoteHost." now ready for action"); } ServerToIdle($Socket); # Next work unit or idle. @@ -759,7 +867,8 @@ sub LondWritable { if ($Socket->Writable() != 0) { # The write resulted in an error. # We'll treat this as if the socket got disconnected: - + Log("WARNING", "Connection to ".$RemoteHost. + " has been disconnected"); $Watcher->cancel(); KillSocket($Socket); return; @@ -898,8 +1007,11 @@ sub MakeLondConnection { &GetServerPort()); if($Connection == undef) { # Needs to be more robust later. - Debug(0,"Failed to make a connection with lond."); + Log("CRITICAL","Failed to make a connection with lond."); + $ConnectionRetriesLeft--; + return 0; # Failure. } else { + $ConnectionRetriesLeft = $ConnectionRetries; # success resets the count # The connection needs to have writability # monitored in order to send the init sequence # that starts the whole authentication/key @@ -925,6 +1037,9 @@ sub MakeLondConnection { if($ConnectionCount == 1) { # First Connection: QueueDelayed; } + Log("SUCESS", "Created connection ".$ConnectionCount + ." to host ".GetServerHost()); + return 1; # Return success. } } @@ -1022,7 +1137,6 @@ sub QueueTransaction { =pod =head2 ClientRequest - Callback that is called when data can be read from the UNIX domain socket connecting us with an apache server process. @@ -1047,11 +1161,18 @@ sub ClientRequest { close($socket); $watcher->cancel(); delete($ActiveClients{$socket}); + return; } Debug(8,"Data: ".$data." this read: ".$thisread); $data = $data.$thisread; # Append new data. $watcher->data($data); if($data =~ /(.*\n)/) { # Request entirely read. + if($data eq "close_connection_exit\n") { + Log("CRITICAL", + "Request Close Connection ... exiting"); + CloseAllLondConnections(); + exit; + } Debug(8, "Complete transaction received: ".$data); my $Transaction = LondTransaction->new($data); $Transaction->SetClient($socket); @@ -1163,6 +1284,24 @@ sub SetupLoncListener { =pod +=head2 SignalledToDeath + +Called in response to a signal that causes a chid process to die. + +=cut + +=pod + +sub SignalledToDeath { + my ($signal) = @_; + chomp($signal); + Log("CRITICAL", "Abnormal exit. Child $$ for $RemoteHost " + ."died through "."\"$signal\""); + LogPerm("F:lonc: $$ on $RemoteHost signalled to death: " + ."\"$signal\""); + die("Signal abnormal end"); + +} =head2 ChildProcess This sub implements a child process for a single lonc daemon. @@ -1171,16 +1310,15 @@ This sub implements a child process for sub ChildProcess { - print "Loncnew\n"; # For now turn off signals. - $SIG{QUIT} = IGNORE; + $SIG{QUIT} = \&SignalledToDeath; $SIG{HUP} = IGNORE; $SIG{USR1} = IGNORE; $SIG{INT} = IGNORE; $SIG{CHLD} = IGNORE; - $SIG{__DIE__} = IGNORE; + $SIG{__DIE__} = \&SignalledToDeath; SetupTimer(); @@ -1192,12 +1330,9 @@ sub ChildProcess { # Setup the initial server connection: - &MakeLondConnection(); + # &MakeLondConnection(); // let first work requirest do it. + - if($ConnectionCount == 0) { - Debug(1,"Could not make initial connection..\n"); - Debug(1,"Will retry when there's work to do\n"); - } Debug(9,"Entering event loop"); my $ret = Event::loop(); # Start the main event loop. @@ -1210,8 +1345,7 @@ sub ChildProcess { sub CreateChild { my $host = shift; $RemoteHost = $host; - Debug(3, "Forking off child for ".$RemoteHost); - sleep(5); + Log("CRITICAL", "Forking server for ".$host); $pid = fork; if($pid) { # Parent $ChildHash{$pid} = $RemoteHost; @@ -1264,6 +1398,8 @@ if (POSIX::setsid() < 0) { ShowStatus("Forking node servers"); +Log("CRITICAL", "--------------- Starting children ---------------"); + my $HostIterator = LondConnection::GetHostIterator; while (! $HostIterator->end()) { @@ -1276,16 +1412,25 @@ while (! $HostIterator->end()) { ShowStatus("Parent keeping the flock"); +# +# Set up parent signals: +# +$SIG{INT} = &KillThemAll; +$SIG{TERM} = &KillThemAll; + while(1) { $deadchild = wait(); if(exists $ChildHash{$deadchild}) { # need to restart. $deadhost = $ChildHash{$deadchild}; delete($ChildHash{$deadchild}); - Debug(4,"Lost child pid= ".$deadchild. + Log("WARNING","Lost child pid= ".$deadchild. "Connected to host ".$deadhost); + Log("INFO", "Restarting child procesing ".$deadhost); CreateChild($deadhost); } } +sub KillThemAll { +} =head1 Theory