Diff for /loncom/loncnew between versions 1.64 and 1.76

version 1.64, 2004/10/05 10:10:31 version 1.76, 2006/08/25 21:12:19
Line 61  use LONCAPA::LondConnection; Line 61  use LONCAPA::LondConnection;
 use LONCAPA::LondTransaction;  use LONCAPA::LondTransaction;
 use LONCAPA::Configuration;  use LONCAPA::Configuration;
 use LONCAPA::HashIterator;  use LONCAPA::HashIterator;
   use Fcntl qw(:flock);
   
   
 # Read the httpd configuration file to get perl variables  # Read the httpd configuration file to get perl variables
Line 79  my %listening_to;  # Socket->host table Line 80  my %listening_to;  # Socket->host table
                                 # is listening to.                                  # is listening to.
 my %parent_dispatchers;         # host-> listener watcher events.   my %parent_dispatchers;         # host-> listener watcher events. 
   
   my %parent_handlers; # Parent signal handlers...
   
 my $MaxConnectionCount = 10; # Will get from config later.  my $MaxConnectionCount = 10; # Will get from config later.
 my $ClientConnection = 0; # Uniquifier for client events.  my $ClientConnection = 0; # Uniquifier for client events.
   
Line 87  my $NextDebugLevel= 2;  # So Sigint can Line 90  my $NextDebugLevel= 2;  # So Sigint can
 my $IdleTimeout= 600; # Wait 10 minutes before pruning connections.  my $IdleTimeout= 600; # Wait 10 minutes before pruning connections.
   
 my $LogTransactions = 0; # When True, all transactions/replies get logged.  my $LogTransactions = 0; # When True, all transactions/replies get logged.
   my $executable      = $0; # Get the full path to me.
   
 #  #
 #  The variables below are only used by the child processes.  #  The variables below are only used by the child processes.
Line 102  my $ConnectionCount = 0; Line 106  my $ConnectionCount = 0;
 my $IdleSeconds     = 0; # Number of seconds idle.  my $IdleSeconds     = 0; # Number of seconds idle.
 my $Status          = ""; # Current status string.  my $Status          = ""; # Current status string.
 my $RecentLogEntry  = "";  my $RecentLogEntry  = "";
 my $ConnectionRetries=2; # Number of connection retries allowed.  my $ConnectionRetries=5; # Number of connection retries allowed.
 my $ConnectionRetriesLeft=2; # Number of connection retries remaining.  my $ConnectionRetriesLeft=5; # Number of connection retries remaining.
 my $LondVersion     = "unknown"; # Version of lond we talk with.  my $LondVersion     = "unknown"; # Version of lond we talk with.
 my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.  my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.
 my $LondConnecting  = 0;       # True when a connection is being built.  my $LondConnecting  = 0;       # True when a connection is being built.
   
   
 # DO NOT SET THE NEXT VARIABLE TO NON ZERO!!!!!!!!!!!!!!!  
   
 my $DieWhenIdle     = 0; # When true children die when trimmed -> 0.  my $DieWhenIdle     = 1; # When true children die when trimmed -> 0.
 my $I_am_child      = 0; # True if this is the child process.  my $I_am_child      = 0; # True if this is the child process.
   
 #  #
Line 149  sub UpdateStatus { Line 152  sub UpdateStatus {
 Makes an entry into the permanent log file.  Makes an entry into the permanent log file.
   
 =cut  =cut
   
 sub LogPerm {  sub LogPerm {
     my $message=shift;      my $message=shift;
     my $execdir=$perlvar{'lonDaemons'};      my $execdir=$perlvar{'lonDaemons'};
Line 194  sub Log { Line 198  sub Log {
     my $now   = time;      my $now   = time;
     my $local = localtime($now);      my $local = localtime($now);
     my $finalformat = "$local ($$) [$RemoteHost] [$Status] ";      my $finalformat = "$local ($$) [$RemoteHost] [$Status] ";
     my $finalformat = $finalformat.$format."\n";      $finalformat = $finalformat.$format."\n";
   
     # open the file and put the result.      # open the file and put the result.
   
Line 268  sub SocketDump { Line 272  sub SocketDump {
  and as what we return in a SIGUSR1   and as what we return in a SIGUSR1
   
 =cut  =cut
   
 sub ShowStatus {  sub ShowStatus {
     my $state = shift;      my $state = shift;
     my $now = time;      my $now = time;
Line 278  sub ShowStatus { Line 283  sub ShowStatus {
   
 =pod  =pod
   
 =head 2 SocketTimeout  =head2 SocketTimeout
   
     Called when an action on the socket times out.  The socket is       Called when an action on the socket times out.  The socket is 
    destroyed and any active transaction is failed.     destroyed and any active transaction is failed.
   
   
 =cut  =cut
   
 sub SocketTimeout {  sub SocketTimeout {
     my $Socket = shift;      my $Socket = shift;
     Log("WARNING", "A socket timeout was detected");      Log("WARNING", "A socket timeout was detected");
Line 417  sub Tick { Line 423  sub Tick {
  $KeyMode = "";    $KeyMode = ""; 
  $clock_watcher->cancel();   $clock_watcher->cancel();
     }      }
       &UpdateStatus();
 }  }
   
 =pod  =pod
Line 670  sub FailTransaction { Line 677  sub FailTransaction {
   
     if ($ConnectionRetriesLeft > 0) {      if ($ConnectionRetriesLeft > 0) {
  Log("WARNING", "Failing transaction "   Log("WARNING", "Failing transaction "
     .$transaction->getRequest());      .$transaction->getLoggableRequest());
     }      }
     Debug(1, "Failing transaction: ".$transaction->getRequest());      Debug(1, "Failing transaction: ".$transaction->getLoggableRequest());
     if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.      if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.
  my $client  = $transaction->getClient();   my $client  = $transaction->getClient();
  Debug(1," Replying con_lost to ".$transaction->getRequest());   Debug(1," Replying con_lost to ".$transaction->getRequest());
Line 682  sub FailTransaction { Line 689  sub FailTransaction {
 }  }
   
 =pod  =pod
   
 =head1  EmptyQueue  =head1  EmptyQueue
   
   Fails all items in the work queue with con_lost.    Fails all items in the work queue with con_lost.
   Note that each item in the work queue is a transaction.    Note that each item in the work queue is a transaction.
   
 =cut  =cut
   
 sub EmptyQueue {  sub EmptyQueue {
     $ConnectionRetriesLeft--; # Counts as connection failure too.      $ConnectionRetriesLeft--; # Counts as connection failure too.
     while($WorkQueue->Count()) {      while($WorkQueue->Count()) {
Line 703  sub EmptyQueue { Line 712  sub EmptyQueue {
 Close all connections open on lond prior to exit e.g.  Close all connections open on lond prior to exit e.g.
   
 =cut  =cut
   
 sub CloseAllLondConnections {  sub CloseAllLondConnections {
     foreach my $Socket (keys %ActiveConnections) {      foreach my $Socket (keys %ActiveConnections) {
       if(exists($ActiveTransactions{$Socket})) {        if(exists($ActiveTransactions{$Socket})) {
Line 711  sub CloseAllLondConnections { Line 721  sub CloseAllLondConnections {
       KillSocket($Socket);        KillSocket($Socket);
     }      }
 }  }
 =cut  
   
 =pod  =pod
   
Line 733  Parameters: Line 742  Parameters:
   
 nonzero if we are allowed to create a new connection.  nonzero if we are allowed to create a new connection.
   
   
 =cut  =cut
   
 sub KillSocket {  sub KillSocket {
     my $Socket = shift;      my $Socket = shift;
   
Line 946  sub LondReadable { Line 955  sub LondReadable {
  #  We need to be writable for this and probably don't belong   #  We need to be writable for this and probably don't belong
  #  here inthe first place.   #  here inthe first place.
   
  Deubg(6, "SendingRequest state encountered in readable");   Debug(6, "SendingRequest state encountered in readable");
  $Watcher->poll("w");   $Watcher->poll("w");
  $Watcher->cb(\&LondWritable);   $Watcher->cb(\&LondWritable);
   
Line 1147  sub LondWritable { Line 1156  sub LondWritable {
 =pod  =pod
           
 =cut  =cut
   
 sub QueueDelayed {  sub QueueDelayed {
     Debug(3,"QueueDelayed called");      Debug(3,"QueueDelayed called");
   
Line 1154  sub QueueDelayed { Line 1164  sub QueueDelayed {
   
     Debug(4, "Delayed path: ".$path);      Debug(4, "Delayed path: ".$path);
     opendir(DIRHANDLE, $path);      opendir(DIRHANDLE, $path);
       
     my @alldelayed = grep /\.$RemoteHost$/, readdir DIRHANDLE;      my @all_host_ids;
       my $host_iterator =   &LondConnection::GetHostIterator();
       while (!$host_iterator->end()) {
    my ($host_id,$host_name) = @{$host_iterator->get()}[0,3];
    if ($host_name eq $RemoteHost) {
       push(@all_host_ids, $host_id);
    }
    $host_iterator->next();
       }
       my $host_id_re = '(?:'.join('|',@all_host_ids).')';
       my @alldelayed = grep(/\.$host_id_re$/, readdir(DIRHANDLE));
     closedir(DIRHANDLE);      closedir(DIRHANDLE);
     my $dfname;      foreach my $dfname (sort(@alldelayed)) {
     my $reqfile;   my $reqfile = "$path/$dfname";
     foreach $dfname (sort  @alldelayed) {   my ($host_id) = ($dfname =~ /\.([^.]*)$/);
  $reqfile = "$path/$dfname";   Debug(4, "queueing ".$reqfile." for $host_id");
  Debug(4, "queueing ".$reqfile);  
  my $Handle = IO::File->new($reqfile);   my $Handle = IO::File->new($reqfile);
  my $cmd    = <$Handle>;   my $cmd    = <$Handle>;
  chomp $cmd; # There may or may not be a newline...   chomp $cmd; # There may or may not be a newline...
  $cmd = $cmd."\n"; # now for sure there's exactly one newline.   $cmd = $cmd."\n"; # now for sure there's exactly one newline.
  my $Transaction = LondTransaction->new($cmd);   my $Transaction = LondTransaction->new("sethost:$host_id:$cmd");
  $Transaction->SetDeferred($reqfile);   $Transaction->SetDeferred($reqfile);
  QueueTransaction($Transaction);   QueueTransaction($Transaction);
     }      }
Line 1555  into the status file. Line 1574  into the status file.
   
 We also use this to reset the retries count in order to allow the  We also use this to reset the retries count in order to allow the
 client to retry connections with a previously dead server.  client to retry connections with a previously dead server.
   
 =cut  =cut
   
 sub ChildStatus {  sub ChildStatus {
Line 1563  sub ChildStatus { Line 1583  sub ChildStatus {
   
     Debug(2, "Reporting child status because : ".$watcher->data);      Debug(2, "Reporting child status because : ".$watcher->data);
     my $docdir = $perlvar{'lonDocRoot'};      my $docdir = $perlvar{'lonDocRoot'};
     my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");      
     print $fh $$."\t".$RemoteHost."\t".$Status."\t".      open(LOG,">>$docdir/lon-status/loncstatus.txt");
       flock(LOG,LOCK_EX);
       print LOG $$."\t".$RemoteHost."\t".$Status."\t".
  $RecentLogEntry."\n";   $RecentLogEntry."\n";
     #      #
     #  Write out information about each of the connections:      #  Write out information about each of the connections:
     #      #
     if ($DebugLevel > 2) {      if ($DebugLevel > 2) {
  print $fh "Active connection statuses: \n";   print LOG "Active connection statuses: \n";
  my $i = 1;   my $i = 1;
  print STDERR  "================================= Socket Status Dump:\n";   print STDERR  "================================= Socket Status Dump:\n";
  foreach my $item (keys %ActiveConnections) {   foreach my $item (keys %ActiveConnections) {
     my $Socket = $ActiveConnections{$item}->data;      my $Socket = $ActiveConnections{$item}->data;
     my $state  = $Socket->GetState();      my $state  = $Socket->GetState();
     print $fh "Connection $i State: $state\n";      print LOG "Connection $i State: $state\n";
     print STDERR "---------------------- Connection $i \n";      print STDERR "---------------------- Connection $i \n";
     $Socket->Dump(-1); # Ensure it gets dumped..      $Socket->Dump(-1); # Ensure it gets dumped..
     $i++;      $i++;
  }   }
     }      }
       flock(LOG,LOCK_UN);
       close(LOG);
     $ConnectionRetriesLeft = $ConnectionRetries;      $ConnectionRetriesLeft = $ConnectionRetries;
       UpdateStatus();
 }  }
   
 =pod  =pod
Line 1603  sub SignalledToDeath { Line 1628  sub SignalledToDeath {
     chomp($signal);      chomp($signal);
     Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "      Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "
  ."died through "."\"$signal\"");   ."died through "."\"$signal\"");
     LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "      #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
     ."\"$signal\"");  #    ."\"$signal\"");
     exit 0;      exit 0;
   
 }  }
   
   =pod
   
 =head2 ToggleDebug  =head2 ToggleDebug
   
 This sub toggles trace debugging on and off.  This sub toggles trace debugging on and off.
Line 1624  sub ToggleDebug { Line 1651  sub ToggleDebug {
   
 }  }
   
   =pod
   
 =head2 ChildProcess  =head2 ChildProcess
   
 This sub implements a child process for a single lonc daemon.  This sub implements a child process for a single lonc daemon.
Line 1650  sub ChildProcess { Line 1679  sub ChildProcess {
  Debug(5, "Killing watcher for $listener");   Debug(5, "Killing watcher for $listener");
   
  $watcher->cancel();   $watcher->cancel();
  undef         $parent_dispatchers{$listener};   delete($parent_dispatchers{$listener});
   
       }
   
       #  kill off the parent's signal handlers too!  
       #
   
       for my $handler (keys %parent_handlers) {
    my $watcher = $parent_handlers{$handler};
    $watcher->cancel();
    delete($parent_handlers{$handler});
     }      }
   
     $I_am_child    = 1; # Seems like in spite of it all I may still getting      $I_am_child    = 1; # Seems like in spite of it all I may still getting
                                 # parent event dispatches.. flag I'm a child.                                  # parent event dispatches.. flag I'm a child.
   
Line 1692  sub ChildProcess { Line 1731  sub ChildProcess {
       desc => 'Lonc Listener Unix Socket',        desc => 'Lonc Listener Unix Socket',
       fd   => $socket);        fd   => $socket);
           
     $Event::Debuglevel = $DebugLevel;      $Event::DebugLevel = $DebugLevel;
           
     Debug(9, "Making initial lond connection for ".$RemoteHost);      Debug(9, "Making initial lond connection for ".$RemoteHost);
   
Line 1845  sub listen_on_all_unix_sockets { Line 1884  sub listen_on_all_unix_sockets {
     my $host_iterator      =   &LondConnection::GetHostIterator();      my $host_iterator      =   &LondConnection::GetHostIterator();
     while (!$host_iterator->end()) {      while (!$host_iterator->end()) {
  my $host_entry_ref =   $host_iterator->get();   my $host_entry_ref =   $host_iterator->get();
  my $host_name      = $host_entry_ref->[0];   my $host_name      = $host_entry_ref->[3];
  Debug(9, "Listen for $host_name");   Debug(9, "Listen for $host_name");
  &parent_listen($host_name);   &parent_listen($host_name);
  $host_iterator->next();   $host_iterator->next();
Line 1965  if ($DieWhenIdle) { Line 2004  if ($DieWhenIdle) {
    signal   => "CHLD");     signal   => "CHLD");
   
   
       # Set up all the other signals we set up.  We'll vector them off to the
       # same subs as we would for DieWhenIdle false and, if necessary, conditionalize
       # the code there.
   
       $parent_handlers{INT} = Event->signal(cb       => \&Terminate,
     desc     => "Parent INT handler",
     signal   => "INT");
       $parent_handlers{TERM} = Event->signal(cb       => \&Terminate,
      desc     => "Parent TERM handler",
      signal   => "TERM");
       $parent_handlers{HUP}  = Event->signal(cb       => \&Restart,
      desc     => "Parent HUP handler.",
      signal   => "HUP");
       $parent_handlers{USR1} = Event->signal(cb       => \&CheckKids,
      desc     => "Parent USR1 handler",
      signal   => "USR1");
       $parent_handlers{USR2} = Event->signal(cb       => \&UpdateKids,
      desc     => "Parent USR2 handler.",
      signal   => "USR2");
       
       #  Start procdesing events.
   
     $Event::DebugLevel = $DebugLevel;      $Event::DebugLevel = $DebugLevel;
     Debug(9, "Parent entering event loop");      Debug(9, "Parent entering event loop");
     my $ret = Event::loop();      my $ret = Event::loop();
Line 2016  sub CheckKids { Line 2077  sub CheckKids {
     my $now=time;      my $now=time;
     my $local=localtime($now);      my $local=localtime($now);
     print $fh "LONC status $local - parent $$ \n\n";      print $fh "LONC status $local - parent $$ \n\n";
       foreach my $host (keys %parent_dispatchers) {
    print $fh "LONC Parent process listening for $host\n";
       }
     foreach my $pid (keys %ChildHash) {      foreach my $pid (keys %ChildHash) {
  Debug(2, "Sending USR1 -> $pid");   Debug(2, "Sending USR1 -> $pid");
  kill 'USR1' => $pid; # Tell Child to report status.   kill 'USR1' => $pid; # Tell Child to report status.
  sleep 1; # Wait so file doesn't intermix.  
     }      }
   
 }  }
   
 =pod  =pod
Line 2053  sub UpdateKids { Line 2117  sub UpdateKids {
   
     Log("INFO", "Updating connections via SIGUSR2");      Log("INFO", "Updating connections via SIGUSR2");
   
     #  Just in case we need to kill our own lonc, we wait a few seconds to      #  I'm not sure what I was thinking in the first implementation.
     #  give it a chance to receive and relay lond's response to the       # someone will have to work hard to convince me the effect is any
     #  re-init command.      # different than Restart, especially now that we don't start up 
     #      # per host servers automatically, may as well just restart.
       # The down side is transactions that are in flight will get timed out
     sleep(2); # Wait a couple of seconds.      # (lost unless they are critical).
   
     my %hosts;                   # Indexed by loncapa hostname, value=ip.  
       
     # Need to re-read  the host table:  
       
       
     LondConnection::ReadConfig();  
     my $I = LondConnection::GetHostIterator;  
     while (! $I->end()) {  
  my $item = $I->get();  
  $hosts{$item->[0]} = $item->[4];  
  $I->next();  
     }  
   
     #  The logic below is written for clarity not for efficiency.  
     #  Since I anticipate that this function is only rarely called, that's  
     #  appropriate.  There are certainly ways to combine the loops below,  
     #  and anyone wishing to obscure the logic is welcome to go for it.  
     #  Note that we don't re-direct sigchild.  Instead we do what's needed  
     #  to the data structures that keep track of children to ensure that  
     #  when sigchild is honored, no new child is born.  
     #  
   
     #  For each existing child; if it's host doesn't exist, kill the child.      &Restart();
   
     foreach my $child (keys %ChildHash) {  
  my $oldhost = $ChildHash{$child};  
  if (!(exists $hosts{$oldhost})) {  
     Log("CRITICAL", "Killing child for $oldhost  host no longer exists");  
     delete $ChildHash{$child};  
     delete $HostToPid{$oldhost};  
     kill 'QUIT' => $child;  
  }  
     }  
     # For each remaining existing child; if it's host's ip has changed,  
     # Restart the child on the new IP.  
   
     foreach my $child (keys %ChildHash) {  
  my $oldhost = $ChildHash{$child};  
  my $oldip   = $HostHash{$oldhost};  
  if ($hosts{$oldhost} ne $oldip) {  
   
     # kill the old child.  
   
     Log("CRITICAL", "Killing child for $oldhost host ip has changed...");  
     delete $ChildHash{$child};  
     delete $HostToPid{$oldhost};  
     kill 'QUIT' => $child;  
   
     # Do the book-keeping needed to start a new child on the  
     # new ip.  
   
     $HostHash{$oldhost} = $hosts{$oldhost};  
     CreateChild($oldhost);  
  }  
     }  
     # Finally, for each new host, not in the host hash, create a  
     # enter the host and create a new child.  
     # Force a status display of any existing process.  
   
     foreach my $host (keys %hosts) {  
  if(!(exists $HostHash{$host})) {  
     Log("INFO", "New host $host discovered in hosts.tab...");  
     $HostHash{$host} = $hosts{$host};  
     CreateChild($host);  
  } else {  
     kill 'HUP' => $HostToPid{$host};    # status display.  
  }  
     }  
 }  }
   
   
Line 2146  sub Restart { Line 2144  sub Restart {
     Log("CRITICAL", "Restarting");      Log("CRITICAL", "Restarting");
     my $execdir = $perlvar{'lonDaemons'};      my $execdir = $perlvar{'lonDaemons'};
     unlink("$execdir/logs/lonc.pid");      unlink("$execdir/logs/lonc.pid");
     exec("$execdir/loncnew");      exec("$executable");
 }  }
   
 =pod  =pod
Line 2190  sub really_kill_them_all_dammit Line 2188  sub really_kill_them_all_dammit
  unlink("$execdir/logs/lonc.pid");   unlink("$execdir/logs/lonc.pid");
     }      }
 }  }
   
 =pod  =pod
   
 =head1 Terminate  =head1 Terminate

Removed from v.1.64  
changed lines
  Added in v.1.76


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>