Diff for /loncom/loncnew between versions 1.30 and 1.38

version 1.30, 2003/10/27 10:09:21 version 1.38, 2004/01/05 09:29:36
Line 35 Line 35
 #    - Add ability to create/negotiate lond connections (done).  #    - Add ability to create/negotiate lond connections (done).
 #    - Add general logic for dispatching requests and timeouts. (done).  #    - Add general logic for dispatching requests and timeouts. (done).
 #    - Add support for the lonc/lond requests.          (done).  #    - Add support for the lonc/lond requests.          (done).
 #    - Add logging/status monitoring.  #    - Add logging/status monitoring.                    (done)
 #    - Add Signal handling - HUP restarts. USR1 status report.  #    - Add Signal handling - HUP restarts. USR1 status report. (done)
 #    - Add Configuration file I/O                       (done).  #    - Add Configuration file I/O                       (done).
 #    - Add management/status request interface.  #    - Add management/status request interface.         (done)
 #    - Add deferred request capability.                  (done)  #    - Add deferred request capability.                  (done)
 #    - Detect transmission timeouts.  #    - Detect transmission timeouts.                     (done)
 #  #
   
 # Change log:  
 #    $Log$  
 #    Revision 1.30  2003/10/27 10:09:21  foxr  
 #    Tighten up a few compares to eq and flip a few debug levels around... nothing  
 #    critical  
 #  
 #    Revision 1.29  2003/10/21 14:24:42  foxr  
 #    Fix little typo that may explain growth of connections  
 #  
 #    Revision 1.28  2003/10/14 15:36:21  albertel  
 #    - making it easier to run loncnew,  
 #       /etc/init.d/loncontrol startnew  
 #       /etc/init.d/loncontrol restartnew  
 #      will now start loncnew in place of lonc  
 #  
 #    Revision 1.27  2003/10/07 11:23:03  foxr  
 #    Installed and tested code to process reinit in parent server.  
 #  
 #    Revision 1.26  2003/09/30 11:11:17  foxr  
 #    Add book-keeping hashes to support the re-init procedure.  
 #  
 #    Revision 1.25  2003/09/23 11:22:14  foxr  
 #    Tested ability to receive sigusr2  This is now logged and must be  
 #    properly implemented as a re-read of hosts and re-init of appropriate  
 #    children.  
 #  
 #    Revision 1.24  2003/09/16 09:46:42  foxr  
 #    Added skeletal infrastructure to support SIGUSR2 update hosts request.  
 #  
 #    Revision 1.23  2003/09/15 09:24:49  foxr  
 #    Add use strict and fix all the fallout from that.  
 #  
 #    Revision 1.22  2003/09/02 10:34:47  foxr  
 #    - Fix errors in host dead detection logic (too many cases where the  
 #      retries left were not getting incremented or just not checked).  
 #    - Added some additional status to the ps axuww display:  
 #      o Remaining retries on a host.  
 #      o >>> DEAD <<< indicator if I've given up on a host.  
 #    - Tested the SIGHUP will reset the retries remaining count (thanks to  
 #      the above status stuff, and get allow the loncnew to re-try again  
 #      on the host (thanks to the log).  
 #  
 #    Revision 1.21  2003/08/26 09:19:51  foxr  
 #    How embarrassing... put in the SocketTimeout function in loncnew and forgot  
 #    to actually hook it into the LondTransaction.  Added this to MakeLondConnection  
 #    where it belongs... hopefully transactions (not just connection attempts) will  
 #    timeout more speedily than the socket errors will catch it.  
 #  
 #    Revision 1.20  2003/08/25 18:48:11  albertel  
 #    - fixing a forgotten ;  
 #  
 #    Revision 1.19  2003/08/19 09:31:46  foxr  
 #    Get socket directory from configuration rather than the old hard coded test  
 #    way that I forgot to un-hard code.  
 #  
 #    Revision 1.18  2003/08/06 09:52:29  foxr  
 #    Also needed to remember to fail in-flight transactions if their sends fail.  
 #  
 #    Revision 1.17  2003/08/03 00:44:31  foxr  
 #    1. Correct handling of connection failure: Assume it means the host is  
 #       unreachable and fail all of the queued transactions.  Note that the  
 #       inflight transactions should fail on their own time due either to timeout  
 #       or send/receive failures.  
 #    2. Correct handling of logs for forced death signals.  Pull the signal  
 #       from the event watcher.  
 #  
 #    Revision 1.16  2003/07/29 02:33:05  foxr  
 #    Add SIGINT processing to child processes to toggle annoying trace mode  
 #    on/off.. will try to use this to isolate the compute boud process issue.  
 #  
 #    Revision 1.15  2003/07/15 02:07:05  foxr  
 #    Added code for lonc/lond transaction timeouts.  Who knows if it works right.  
 #    The intent is for a timeout to fail any transaction in progress and kill  
 #    off the sockt that timed out.  
 #  
 #    Revision 1.14  2003/07/03 02:10:18  foxr  
 #    Get all of the signals to work correctly.  
 #  
 #    Revision 1.13  2003/07/02 01:31:55  foxr  
 #    Added kill -HUP logic (restart).  
 #  
 #    Revision 1.11  2003/06/25 01:54:44  foxr  
 #    Fix more problems with transaction failure.  
 #  
 #    Revision 1.10  2003/06/24 02:46:04  foxr  
 #    Put a limit on  the number of times we'll retry a connection.  
 #    Start getting the signal stuff put in as well...note that need to get signals  
 #    going or else the client will permanently give up on dead servers.  
 #  
 #    Revision 1.9  2003/06/13 02:38:43  foxr  
 #    Add logging in 'expected format'  
 #  
 #    Revision 1.8  2003/06/11 02:04:35  foxr  
 #    Support delayed transactions... this is done uniformly by encapsulating  
 #    transactions in an object ... a LondTransaction that is implemented by  
 #    LondTransaction.pm  
 #  
 #    Revision 1.7  2003/06/03 01:59:39  foxr  
 #    complete coding to support deferred transactions.  
 #  
 #  
 use strict;  use strict;
 use lib "/home/httpd/lib/perl/";  use lib "/home/httpd/lib/perl/";
 use lib "/home/foxr/newloncapa/types";  
 use Event qw(:DEFAULT );  use Event qw(:DEFAULT );
 use POSIX qw(:signal_h);  use POSIX qw(:signal_h);
 use POSIX;  use POSIX;
Line 312  sub GetPeername { Line 210  sub GetPeername {
  return $peerfile;   return $peerfile;
     }      }
 }  }
 #----------------------------- Timer management ------------------------  
 =pod  =pod
   
 =head2 Debug  =head2 Debug
Line 364  sub ShowStatus { Line 261  sub ShowStatus {
 =cut  =cut
 sub SocketTimeout {  sub SocketTimeout {
     my $Socket = shift;      my $Socket = shift;
           Log("WARNING", "A socket timeout was detected");
       Debug(0, " SocketTimeout called: ");
       $Socket->Dump();
     KillSocket($Socket); # A transaction timeout also counts as      KillSocket($Socket); # A transaction timeout also counts as
                                 # a connection failure:                                  # a connection failure:
     $ConnectionRetriesLeft--;      $ConnectionRetriesLeft--;
 }  }
   #----------------------------- Timer management ------------------------
   
 =pod  =pod
   
Line 403  sub Tick { Line 303  sub Tick {
     #      #
     #  For each inflight transaction, tick down its timeout counter.      #  For each inflight transaction, tick down its timeout counter.
     #      #
     foreach my $item (keys %ActiveTransactions) {  
  my $Socket = $ActiveTransactions{$item}->getServer();      foreach my $item (keys %ActiveConnections) {
  $Socket->Tick();   my $State = $ActiveConnections{$item}->data->GetState();
    if ($State ne 'Idle') {
       Debug(5,"Ticking Socket $State $item");
       $ActiveConnections{$item}->data->Tick();
    }
     }      }
     # Do we have work in the queue, but no connections to service them?      # Do we have work in the queue, but no connections to service them?
     # If so, try to make some new connections to get things going again.      # If so, try to make some new connections to get things going again.
Line 452  Trigger disconnections of idle sockets. Line 356  Trigger disconnections of idle sockets.
   
 sub SetupTimer {  sub SetupTimer {
     Debug(6, "SetupTimer");      Debug(6, "SetupTimer");
     Event->timer(interval => 1, debug => 1, cb => \&Tick );      Event->timer(interval => 1, cb => \&Tick );
 }  }
   
 =pod  =pod
Line 753  sub KillSocket { Line 657  sub KillSocket {
     }      }
     if(exists($ActiveConnections{$Socket})) {      if(exists($ActiveConnections{$Socket})) {
  delete($ActiveConnections{$Socket});   delete($ActiveConnections{$Socket});
    $ConnectionCount--;
    if ($ConnectionCount < 0) { $ConnectionCount = 0; }
     }      }
     $ConnectionCount--;  
   
     #  If the connection count has gone to zero and there is work in the      #  If the connection count has gone to zero and there is work in the
     #  work queue, the work all gets failed with con_lost.      #  work queue, the work all gets failed with con_lost.
     #      #
Line 1440  sub ChildStatus { Line 1344  sub ChildStatus {
     my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");      my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");
     print $fh $$."\t".$RemoteHost."\t".$Status."\t".      print $fh $$."\t".$RemoteHost."\t".$Status."\t".
  $RecentLogEntry."\n";   $RecentLogEntry."\n";
       #
       #  Write out information about each of the connections:
       #
       print $fh "Active connection statuses: \n";
       my $i = 1;
       print STDERR  "================================= Socket Status Dump:\n";
       foreach my $item (keys %ActiveConnections) {
    my $Socket = $ActiveConnections{$item}->data;
    my $state  = $Socket->GetState();
    print $fh "Connection $i State: $state\n";
    print STDERR "---------------------- Connection $i \n";
    $Socket->Dump();
    $i++;
       }
     $ConnectionRetriesLeft = $ConnectionRetries;      $ConnectionRetriesLeft = $ConnectionRetries;
 }  }
   
Line 1598  ShowStatus("Forking node servers"); Line 1516  ShowStatus("Forking node servers");
   
 Log("CRITICAL", "--------------- Starting children ---------------");  Log("CRITICAL", "--------------- Starting children ---------------");
   
   LondConnection::ReadConfig;               # Read standard config files.
 my $HostIterator = LondConnection::GetHostIterator;  my $HostIterator = LondConnection::GetHostIterator;
 while (! $HostIterator->end()) {  while (! $HostIterator->end()) {
   

Removed from v.1.30  
changed lines
  Added in v.1.38


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>