--- loncom/loncnew 2003/07/15 02:07:05 1.15 +++ loncom/loncnew 2003/08/26 09:19:51 1.21 @@ -2,13 +2,12 @@ # The LearningOnline Network with CAPA # lonc maintains the connections to remote computers # -# $Id: loncnew,v 1.15 2003/07/15 02:07:05 foxr Exp $ +# $Id: loncnew,v 1.21 2003/08/26 09:19:51 foxr Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). -# -# LON-CAPA is free software; you can redistribute it and/or modify +## LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. @@ -46,6 +45,34 @@ # Change log: # $Log: loncnew,v $ +# Revision 1.21 2003/08/26 09:19:51 foxr +# How embarrassing... put in the SocketTimeout function in loncnew and forgot +# to actually hook it into the LondTransaction. Added this to MakeLondConnection +# where it belongs... hopefully transactions (not just connection attempts) will +# timeout more speedily than the socket errors will catch it. +# +# Revision 1.20 2003/08/25 18:48:11 albertel +# - fixing a forgotten ; +# +# Revision 1.19 2003/08/19 09:31:46 foxr +# Get socket directory from configuration rather than the old hard coded test +# way that I forgot to un-hard code. +# +# Revision 1.18 2003/08/06 09:52:29 foxr +# Also needed to remember to fail in-flight transactions if their sends fail. +# +# Revision 1.17 2003/08/03 00:44:31 foxr +# 1. Correct handling of connection failure: Assume it means the host is +# unreachable and fail all of the queued transactions. Note that the +# inflight transactions should fail on their own time due either to timeout +# or send/receive failures. +# 2. Correct handling of logs for forced death signals. Pull the signal +# from the event watcher. +# +# Revision 1.16 2003/07/29 02:33:05 foxr +# Add SIGINT processing to child processes to toggle annoying trace mode +# on/off.. will try to use this to isolate the compute boud process issue. +# # Revision 1.15 2003/07/15 02:07:05 foxr # Added code for lonc/lond transaction timeouts. Who knows if it works right. # The intent is for a timeout to fail any transaction in progress and kill @@ -125,13 +152,14 @@ my $MaxConnectionCount = 10; # Will get my $ClientConnection = 0; # Uniquifier for client events. my $DebugLevel = 0; +my $NextDebugLevel= 10; # So Sigint can toggle this. my $IdleTimeout= 3600; # Wait an hour before pruning connections. # # The variables below are only used by the child processes. # my $RemoteHost; # Name of host child is talking to. -my $UnixSocketDir= "/home/httpd/sockets"; +my $UnixSocketDir= $perlvar{'lonSockDir'}; my $IdleConnections = Stack->new(); # Set of idle connections my %ActiveConnections; # Connections to the remote lond. my %ActiveTransactions; # LondTransactions in flight. @@ -261,7 +289,7 @@ sub Debug { my $level = shift; my $message = shift; if ($level <= $DebugLevel) { - print $message." host = ".$RemoteHost."\n"; + Log("INFO", "-Debug- $message host = $RemotHost"); } } @@ -597,6 +625,7 @@ Parameters: sub FailTransaction { my $transaction = shift; + Log("WARNING", "Failing transaction ".$transaction->getRequest()); Debug(1, "Failing transaction: ".$transaction->getRequest()); if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it. my $client = $transaction->getClient(); @@ -659,6 +688,7 @@ nonzero if we are allowed to create a ne sub KillSocket { my $Socket = shift; + Log("WARNING", "Shutting down a socket"); $Socket->Shutdown(); # If the socket came from the active connection set, @@ -755,14 +785,16 @@ sub LondReadable { SocketDump(6, $Socket); my $status = $Socket->Readable(); + &Debug(2, "Socket->Readable returned: $status"); if($status != 0) { # bad return from socket read. Currently this means that # The socket has become disconnected. We fail the transaction. + Log("WARNING", + "Lond connection lost."); if(exists($ActiveTransactions{$Socket})) { - Debug(3,"Lond connection lost failing transaction"); FailTransaction($ActiveTransactions{$Socket}); } $Watcher->cancel(); @@ -912,6 +944,7 @@ sub LondWritable { # We'll treat this as if the socket got disconnected: Log("WARNING", "Connection to ".$RemoteHost. " has been disconnected"); + FailTransaction($ActiveTransactions{$Socket}); $Watcher->cancel(); KillSocket($Socket); return; @@ -1067,7 +1100,8 @@ sub MakeLondConnection { &Debug(9,"MakeLondConnection got socket: ".$Socket); } - + $Connection->SetTimeoutCallback(\&SocketTimeout); + $event = Event->io(fd => $Socket, poll => 'w', cb => \&LondWritable, @@ -1167,7 +1201,9 @@ sub QueueTransaction { $WorkQueue->enqueue($requestData); if($ConnectionCount < $MaxConnectionCount) { Debug(4,"Starting additional lond connection"); - MakeLondConnection(); + if(MakeLondConnection() == 0) { + EmptyQueue(); # Fail transactions, can't make connection. + } } } else { # Can start the request: Debug(8,"Can start..."); @@ -1358,16 +1394,31 @@ sub SignalledToDeath { my $watcher= $event->w; Debug(2,"Signalled to death! via ".$watcher->data); - my ($signal) = @_; + my ($signal) = $watcher->data; chomp($signal); Log("CRITICAL", "Abnormal exit. Child $$ for $RemoteHost " ."died through "."\"$signal\""); LogPerm("F:lonc: $$ on $RemoteHost signalled to death: " ."\"$signal\""); - die("Signal abnormal end"); exit 0; } + +=head2 ToggleDebug + +This sub toggles trace debugging on and off. + +=cut + +sub ToggleDebug { + my $Current = $DebugLevel; + $DebugLevel = $NextDebugLevel; + $NextDebugLevel = $Current; + + Log("SUCCESS", "New debugging level for $RemoteHost now $DebugLevel"); + +} + =head2 ChildProcess This sub implements a child process for a single lonc daemon. @@ -1380,12 +1431,6 @@ sub ChildProcess { # # Signals must be handled by the Event framework... # -# $SIG{QUIT} = \&SignalledToDeath; -# $SIG{HUP} = \&ChildStatus; -# $SIG{USR1} = IGNORE; -# $SIG{INT} = DEFAULT; -# $SIG{CHLD} = IGNORE; -# $SIG{__DIE__} = \&SignalledToDeath; Event->signal(signal => "QUIT", cb => \&SignalledToDeath, @@ -1396,6 +1441,9 @@ sub ChildProcess { Event->signal(signal => "USR1", cb => \&ChildStatus, data => "USR1"); + Event->signal(signal => "INT", + cb => \&ToggleDebug, + data => "INT"); SetupTimer(); @@ -1427,6 +1475,7 @@ sub CreateChild { Log("CRITICAL", "Forking server for ".$host); $pid = fork; if($pid) { # Parent + $RemoteHost = "Parent"; $ChildHash{$pid} = $RemoteHost; sigprocmask(SIG_UNBLOCK, $sigset); @@ -1474,6 +1523,8 @@ open (PIDSAVE, ">$execdir/logs/lonc.pid" print PIDSAVE "$$\n"; close(PIDSAVE); + + if (POSIX::setsid() < 0) { print "Could not create new session\n"; exit -1; @@ -1568,7 +1619,7 @@ sub Restart { =head1 KillThemAll Signal handler that kills all children by sending them a -SIGINT. Responds to sigint and sigterm. +SIGHUP. Responds to sigint and sigterm. =cut @@ -1580,13 +1631,12 @@ sub KillThemAll { Debug(2, "Killing lonc for $serving pid = $pid"); ShowStatus("Killing lonc for $serving pid = $pid"); Log("CRITICAL", "Killing lonc for $serving pid = $pid"); - kill('INT', $pid); - delete($ChildeHash{$pid}); + kill 'QUIT' => $pid; + delete($ChildHash{$pid}); } my $execdir = $perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); - ShowStatus("Killing the master process"); - Log("CRITICAL", "Killing the master process."); + } =pod @@ -1599,7 +1649,8 @@ Terminate the system. sub Terminate { KillThemAll; - exit; + Log("CRITICAL","Master process exiting"); + exit 0; } =pod