Diff for /loncom/loncnew between versions 1.11 and 1.15

version 1.11, 2003/06/25 01:54:44 version 1.15, 2003/07/15 02:07:05
Line 27 Line 27
 # http://www.lon-capa.org/  # http://www.lon-capa.org/
 #  #
 #  #
 # new lonc handles n requestors spread out bver m connections to londs.  # new lonc handles n request out bver m connections to londs.
 # This module is based on the Event class.  # This module is based on the Event class.
 #   Development iterations:  #   Development iterations:
 #    - Setup basic event loop.   (done)  #    - Setup basic event loop.   (done)
Line 46 Line 46
   
 # Change log:  # Change log:
 #    $Log$  #    $Log$
   #    Revision 1.15  2003/07/15 02:07:05  foxr
   #    Added code for lonc/lond transaction timeouts.  Who knows if it works right.
   #    The intent is for a timeout to fail any transaction in progress and kill
   #    off the sockt that timed out.
   #
   #    Revision 1.14  2003/07/03 02:10:18  foxr
   #    Get all of the signals to work correctly.
   #
   #    Revision 1.13  2003/07/02 01:31:55  foxr
   #    Added kill -HUP logic (restart).
   #
 #    Revision 1.11  2003/06/25 01:54:44  foxr  #    Revision 1.11  2003/06/25 01:54:44  foxr
 #    Fix more problems with transaction failure.  #    Fix more problems with transaction failure.
 #  #
Line 71  use lib "/home/httpd/lib/perl/"; Line 82  use lib "/home/httpd/lib/perl/";
 use lib "/home/foxr/newloncapa/types";  use lib "/home/foxr/newloncapa/types";
 use Event qw(:DEFAULT );  use Event qw(:DEFAULT );
 use POSIX qw(:signal_h);  use POSIX qw(:signal_h);
   use POSIX;
 use IO::Socket;  use IO::Socket;
 use IO::Socket::INET;  use IO::Socket::INET;
 use IO::Socket::UNIX;  use IO::Socket::UNIX;
Line 89  use LONCAPA::HashIterator; Line 101  use LONCAPA::HashIterator;
 #  #
 #   Disable all signals we might receive from outside for now.  #   Disable all signals we might receive from outside for now.
 #  #
 $SIG{QUIT}  = IGNORE;  #$SIG{QUIT}  = IGNORE;
 $SIG{HUP}   = IGNORE;  #$SIG{HUP}   = IGNORE;
 $SIG{USR1}  = IGNORE;  #$SIG{USR1}  = IGNORE;
 $SIG{INT}   = IGNORE;  #$SIG{INT}   = IGNORE;
 $SIG{CHLD}  = IGNORE;  #$SIG{CHLD}  = IGNORE;
 $SIG{__DIE__}  = IGNORE;  #$SIG{__DIE__}  = IGNORE;
   
   
 # Read the httpd configuration file to get perl variables  # Read the httpd configuration file to get perl variables
Line 128  my $WorkQueue       = Queue->new(); # Qu Line 140  my $WorkQueue       = Queue->new(); # Qu
 my $ConnectionCount = 0;  my $ConnectionCount = 0;
 my $IdleSeconds     = 0; # Number of seconds idle.  my $IdleSeconds     = 0; # Number of seconds idle.
 my $Status          = ""; # Current status string.  my $Status          = ""; # Current status string.
   my $RecentLogEntry  = "";
 my $ConnectionRetries=5; # Number of connection retries allowed.  my $ConnectionRetries=5; # Number of connection retries allowed.
 my $ConnectionRetriesLeft=5; # Number of connection retries remaining.  my $ConnectionRetriesLeft=5; # Number of connection retries remaining.
   
Line 204  sub Log { Line 217  sub Log {
     my $execdir = $perlvar{'lonDaemons'};      my $execdir = $perlvar{'lonDaemons'};
     my $fh      = IO::File->new(">>$execdir/logs/lonc.log");      my $fh      = IO::File->new(">>$execdir/logs/lonc.log");
     my $msg = sprintf($finalformat, $message);      my $msg = sprintf($finalformat, $message);
       $RecentLogEntry = $msg;
     print $fh $msg;      print $fh $msg;
           
           
Line 277  sub ShowStatus { Line 291  sub ShowStatus {
   
 =pod  =pod
   
   =head 2 SocketTimeout
   
       Called when an action on the socket times out.  The socket is 
      destroyed and any active transaction is failed.
   
   
   =cut
   sub SocketTimeout {
       my $Socket = shift;
       
       KillSocket($Socket);
   }
   
   =pod
   
 =head2 Tick  =head2 Tick
   
 Invoked  each timer tick.  Invoked  each timer tick.
Line 287  Invoked  each timer tick. Line 316  Invoked  each timer tick.
 sub Tick {  sub Tick {
     my $client;      my $client;
     ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount);      ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount);
     Debug(10,"Tick");  
     Debug(10,"    Current connection count: ".$ConnectionCount);  
     foreach $client (keys %ActiveClients) {  
  Debug(10,"    Have client:  with id: ".$ActiveClients{$client});  
     }  
     # Is it time to prune connection count:      # Is it time to prune connection count:
   
   
Line 305  sub Tick { Line 330  sub Tick {
     } else {      } else {
  $IdleSeconds = 0; # Reset idle count if not idle.   $IdleSeconds = 0; # Reset idle count if not idle.
     }      }
       #
       #  For each inflight transaction, tick down its timeout counter.
       #
       foreach $item (keys %ActiveTransactions) {
    my $Socket = $ActiveTransactions{$item}->getServer();
    $Socket->Tick();
       }
     # Do we have work in the queue, but no connections to service them?      # Do we have work in the queue, but no connections to service them?
     # If so, try to make some new connections to get things going again.      # If so, try to make some new connections to get things going again.
     #      #
Line 443  sub ClientWritable { Line 474  sub ClientWritable {
   
     } else { # Partial string sent.      } else { # Partial string sent.
  $Watcher->data(substr($Data, $result));   $Watcher->data(substr($Data, $result));
    if($result == 0) {    # client hung up on us!!
       Log("INFO", "lonc pipe client hung up on us!");
       $Watcher->cancel;
       $Socket->shutdown(2);
       $Socket->close();
    }
     }      }
           
  } else { # Error of some sort...   } else { # Error of some sort...
Line 523  sub StartClientReply { Line 560  sub StartClientReply {
     my $Transaction   = shift;      my $Transaction   = shift;
     my $data     = shift;      my $data     = shift;
   
   
     my $Client   = $Transaction->getClient();      my $Client   = $Transaction->getClient();
   
     &Debug(8," Reply was: ".$data);      &Debug(8," Reply was: ".$data);
Line 716  sub LondReadable { Line 754  sub LondReadable {
     my $State = $Socket->GetState(); # All action depends on the state.      my $State = $Socket->GetState(); # All action depends on the state.
   
     SocketDump(6, $Socket);      SocketDump(6, $Socket);
       my $status = $Socket->Readable();
       &Debug(2, "Socket->Readable returned: $status");
   
     if($Socket->Readable() != 0) {      if($status != 0) {
  # bad return from socket read. Currently this means that   # bad return from socket read. Currently this means that
  # The socket has become disconnected. We fail the transaction.   # The socket has become disconnected. We fail the transaction.
   
Line 982  sub QueueDelayed { Line 1022  sub QueueDelayed {
  my $Handle = IO::File->new($reqfile);   my $Handle = IO::File->new($reqfile);
  my $cmd    = <$Handle>;   my $cmd    = <$Handle>;
  chomp $cmd; # There may or may not be a newline...   chomp $cmd; # There may or may not be a newline...
  $cmd = $cmd."\ny"; # now for sure there's exactly one newline.   $cmd = $cmd."\n"; # now for sure there's exactly one newline.
  my $Transaction = LondTransaction->new($cmd);   my $Transaction = LondTransaction->new($cmd);
  $Transaction->SetDeferred($reqfile);   $Transaction->SetDeferred($reqfile);
  QueueTransaction($Transaction);   QueueTransaction($Transaction);
Line 1285  sub SetupLoncListener { Line 1325  sub SetupLoncListener {
       fd     => $socket);        fd     => $socket);
 }  }
   
   =pod 
   
   =head2 ChildStatus
    
   Child USR1 signal handler to report the most recent status
   into the status file.
   
   =cut
   sub ChildStatus {
       my $event = shift;
       my $watcher = $event->w;
   
       Debug(2, "Reporting child status because : ".$watcher->data);
       my $docdir = $perlvar{'lonDocRoot'};
       my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");
       print $fh $$."\t".$RemoteHost."\t".$Status."\t".
    $RecentLogEntry."\n";
   }
   
 =pod  =pod
   
 =head2 SignalledToDeath  =head2 SignalledToDeath
Line 1293  Called in response to a signal that caus Line 1352  Called in response to a signal that caus
   
 =cut  =cut
   
 =pod  
   
 sub SignalledToDeath {  sub SignalledToDeath {
       my $event  = shift;
       my $watcher= $event->w;
   
       Debug(2,"Signalled to death! via ".$watcher->data);
     my ($signal) = @_;      my ($signal) = @_;
     chomp($signal);      chomp($signal);
     Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "      Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "
Line 1303  sub SignalledToDeath { Line 1365  sub SignalledToDeath {
     LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "      LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
     ."\"$signal\"");      ."\"$signal\"");
     die("Signal abnormal end");      die("Signal abnormal end");
       exit 0;
   
 }  }
 =head2 ChildProcess  =head2 ChildProcess
Line 1314  This sub implements a child process for Line 1377  This sub implements a child process for
 sub ChildProcess {  sub ChildProcess {
   
   
     # For now turn off signals.      #
           #  Signals must be handled by the Event framework...
     $SIG{QUIT}  = \&SignalledToDeath;  #
     $SIG{HUP}   = IGNORE;  #    $SIG{QUIT}  = \&SignalledToDeath;
     $SIG{USR1}  = IGNORE;  #    $SIG{HUP}   = \&ChildStatus;
     $SIG{INT}   = IGNORE;  #    $SIG{USR1}  = IGNORE;
     $SIG{CHLD}  = IGNORE;  #    $SIG{INT}   = DEFAULT;
     $SIG{__DIE__}  = \&SignalledToDeath;  #    $SIG{CHLD}  = IGNORE;
   #    $SIG{__DIE__}  = \&SignalledToDeath;
   
       Event->signal(signal   => "QUIT",
     cb       => \&SignalledToDeath,
     data     => "QUIT");
       Event->signal(signal   => "HUP",
     cb       => \&ChildStatus,
     data     => "HUP");
       Event->signal(signal   => "USR1",
     cb       => \&ChildStatus,
     data     => "USR1");
   
     SetupTimer();      SetupTimer();
           
Line 1333  sub ChildProcess { Line 1407  sub ChildProcess {
   
 # Setup the initial server connection:  # Setup the initial server connection:
           
      # &MakeLondConnection(); // let first work requirest do it.       # &MakeLondConnection(); // let first work requirest do it.
   
   
     Debug(9,"Entering event loop");      Debug(9,"Entering event loop");
Line 1346  sub ChildProcess { Line 1420  sub ChildProcess {
 #  Create a new child for host passed in:  #  Create a new child for host passed in:
   
 sub CreateChild {  sub CreateChild {
       my $sigset = POSIX::SigSet->new(SIGINT);
       sigprocmask(SIG_BLOCK, $sigset);
     my $host = shift;      my $host = shift;
     $RemoteHost = $host;      $RemoteHost = $host;
     Log("CRITICAL", "Forking server for ".$host);      Log("CRITICAL", "Forking server for ".$host);
     $pid          = fork;      $pid          = fork;
     if($pid) { # Parent      if($pid) { # Parent
  $ChildHash{$pid} = $RemoteHost;   $ChildHash{$pid} = $RemoteHost;
    sigprocmask(SIG_UNBLOCK, $sigset);
   
     } else { # child.      } else { # child.
  ShowStatus("Connected to ".$RemoteHost);   ShowStatus("Connected to ".$RemoteHost);
  ChildProcess;   $SIG{INT} = DEFAULT;
    sigprocmask(SIG_UNBLOCK, $sigset);
    ChildProcess; # Does not return.
     }      }
   
 }  }
Line 1410  while (! $HostIterator->end()) { Line 1490  while (! $HostIterator->end()) {
     CreateChild($hostentryref->[0]);      CreateChild($hostentryref->[0]);
     $HostIterator->next();      $HostIterator->next();
 }  }
   $RemoteHost = "Parent Server";
   
 # Maintain the population:  # Maintain the population:
   
Line 1418  ShowStatus("Parent keeping the flock"); Line 1499  ShowStatus("Parent keeping the flock");
 #  #
 #   Set up parent signals:  #   Set up parent signals:
 #  #
 $SIG{INT}  = &KillThemAll;  
 $SIG{TERM} = &KillThemAll;   $SIG{INT}  = \&Terminate;
   $SIG{TERM} = \&Terminate; 
   $SIG{HUP}  = \&Restart;
   $SIG{USR1} = \&CheckKids; 
   
 while(1) {  while(1) {
     $deadchild = wait();      $deadchild = wait();
Line 1432  while(1) { Line 1516  while(1) {
  CreateChild($deadhost);   CreateChild($deadhost);
     }      }
 }  }
   
   
   
   =pod
   
   =head1 CheckKids
   
     Since kids do not die as easily in this implementation
   as the previous one, there  is no need to restart the
   dead ones (all dead kids get restarted when they die!!)
   The only thing this function does is to pass USR1 to the
   kids so that they report their status.
   
   =cut
   
   sub CheckKids {
       Debug(2, "Checking status of children");
       my $docdir = $perlvar{'lonDocRoot'};
       my $fh = IO::File->new(">$docdir/lon-status/loncstatus.txt");
       my $now=time;
       my $local=localtime($now);
       print $fh "LONC status $local - parent $$ \n\n";
       foreach $pid (keys %ChildHash) {
    Debug(2, "Sending USR1 -> $pid");
    kill 'USR1' => $pid; # Tell Child to report status.
    sleep 1; # Wait so file doesn't intermix.
       }
   }
   
   =pod
   
   =head1 Restart
   
   Signal handler for HUP... all children are killed and
   we self restart.  This is an el-cheapo way to re read
   the config file.
   
   =cut
   
   sub Restart {
       KillThemAll; # First kill all the children.
       Log("CRITICAL", "Restarting");
       my $execdir = $perlvar{'lonDaemons'};
       unlink("$execdir/logs/lonc.pid");
       exec("$execdir/lonc");
   }
   
   =pod
   
   =head1 KillThemAll
   
   Signal handler that kills all children by sending them a 
   SIGINT.  Responds to sigint and sigterm.
   
   =cut
   
 sub KillThemAll {  sub KillThemAll {
       Debug(2, "Kill them all!!");
       local($SIG{CHLD}) = 'IGNORE';      # Our children >will< die.
       foreach $pid (keys %ChildHash) {
    my $serving = $ChildHash{$pid};
    Debug(2, "Killing lonc for $serving pid = $pid");
    ShowStatus("Killing lonc for $serving pid = $pid");
    Log("CRITICAL", "Killing lonc for $serving pid = $pid");
    kill('INT', $pid);
    delete($ChildeHash{$pid});
       }
       my $execdir = $perlvar{'lonDaemons'};
       unlink("$execdir/logs/lonc.pid");
       ShowStatus("Killing the master process");
       Log("CRITICAL", "Killing the master process.");
 }  }
   
   =pod
   
   =head1 Terminate
    
   Terminate the system.
   
   =cut
   
   sub Terminate {
       KillThemAll;
       exit;
   
   }
   =pod
   
 =head1 Theory  =head1 Theory
   
 The event class is used to build this as a single process with an  The event class is used to build this as a single process with an

Removed from v.1.11  
changed lines
  Added in v.1.15


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>