Diff for /loncom/loncnew between versions 1.62 and 1.87

version 1.62, 2004/10/04 10:30:50 version 1.87, 2007/06/18 22:49:52
Line 60  use LONCAPA::Stack; Line 60  use LONCAPA::Stack;
 use LONCAPA::LondConnection;  use LONCAPA::LondConnection;
 use LONCAPA::LondTransaction;  use LONCAPA::LondTransaction;
 use LONCAPA::Configuration;  use LONCAPA::Configuration;
 use LONCAPA::HashIterator;  use Fcntl qw(:flock);
   
   
 # Read the httpd configuration file to get perl variables  # Read the httpd configuration file to get perl variables
Line 72  my %perlvar    = %{$perlvarref}; Line 72  my %perlvar    = %{$perlvarref};
 #  #
 #  parent and shared variables.  #  parent and shared variables.
   
 my %ChildHash; # by pid -> host.  my %ChildPid; # by pid -> host.
 my %HostToPid; # By host -> pid.  my %ChildHost; # by host.
 my %HostHash; # by loncapaname -> IP.  
 my %listening_to; # Socket->host table for who the parent  my %listening_to; # Socket->host table for who the parent
                                 # is listening to.                                  # is listening to.
 my %parent_dispatchers;         # host-> listener watcher events.   my %parent_dispatchers;         # host-> listener watcher events. 
   
   my %parent_handlers; # Parent signal handlers...
   
 my $MaxConnectionCount = 10; # Will get from config later.  my $MaxConnectionCount = 10; # Will get from config later.
 my $ClientConnection = 0; # Uniquifier for client events.  my $ClientConnection = 0; # Uniquifier for client events.
   
Line 87  my $NextDebugLevel= 2;  # So Sigint can Line 88  my $NextDebugLevel= 2;  # So Sigint can
 my $IdleTimeout= 600; # Wait 10 minutes before pruning connections.  my $IdleTimeout= 600; # Wait 10 minutes before pruning connections.
   
 my $LogTransactions = 0; # When True, all transactions/replies get logged.  my $LogTransactions = 0; # When True, all transactions/replies get logged.
   my $executable      = $0; # Get the full path to me.
   
 #  #
 #  The variables below are only used by the child processes.  #  The variables below are only used by the child processes.
 #  #
 my $RemoteHost; # Name of host child is talking to.  my $RemoteHost; # Name of host child is talking to.
   my $RemoteHostId; # default lonid of host child is talking to.
   my @all_host_ids;
 my $UnixSocketDir= $perlvar{'lonSockDir'};  my $UnixSocketDir= $perlvar{'lonSockDir'};
 my $IdleConnections = Stack->new(); # Set of idle connections  my $IdleConnections = Stack->new(); # Set of idle connections
 my %ActiveConnections; # Connections to the remote lond.  my %ActiveConnections; # Connections to the remote lond.
Line 102  my $ConnectionCount = 0; Line 106  my $ConnectionCount = 0;
 my $IdleSeconds     = 0; # Number of seconds idle.  my $IdleSeconds     = 0; # Number of seconds idle.
 my $Status          = ""; # Current status string.  my $Status          = ""; # Current status string.
 my $RecentLogEntry  = "";  my $RecentLogEntry  = "";
 my $ConnectionRetries=2; # Number of connection retries allowed.  my $ConnectionRetries=5; # Number of connection retries allowed.
 my $ConnectionRetriesLeft=2; # Number of connection retries remaining.  my $ConnectionRetriesLeft=5; # Number of connection retries remaining.
 my $LondVersion     = "unknown"; # Version of lond we talk with.  my $LondVersion     = "unknown"; # Version of lond we talk with.
 my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.  my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.
 my $LondConnecting  = 0;       # True when a connection is being built.  my $LondConnecting  = 0;       # True when a connection is being built.
   
   
 # DO NOT SET THE NEXT VARIABLE TO NON ZERO!!!!!!!!!!!!!!!  
   
 my $DieWhenIdle     = 0; # When true children die when trimmed -> 0.  
 my $I_am_child      = 0; # True if this is the child process.  my $I_am_child      = 0; # True if this is the child process.
   
 #  #
Line 149  sub UpdateStatus { Line 151  sub UpdateStatus {
 Makes an entry into the permanent log file.  Makes an entry into the permanent log file.
   
 =cut  =cut
   
 sub LogPerm {  sub LogPerm {
     my $message=shift;      my $message=shift;
     my $execdir=$perlvar{'lonDaemons'};      my $execdir=$perlvar{'lonDaemons'};
     my $now=time;      my $now=time;
     my $local=localtime($now);      my $local=localtime($now);
     my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");      my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
       chomp($message);
     print $fh "$now:$message:$local\n";      print $fh "$now:$message:$local\n";
 }  }
   
Line 194  sub Log { Line 198  sub Log {
     my $now   = time;      my $now   = time;
     my $local = localtime($now);      my $local = localtime($now);
     my $finalformat = "$local ($$) [$RemoteHost] [$Status] ";      my $finalformat = "$local ($$) [$RemoteHost] [$Status] ";
     my $finalformat = $finalformat.$format."\n";      $finalformat = $finalformat.$format."\n";
   
     # open the file and put the result.      # open the file and put the result.
   
Line 268  sub SocketDump { Line 272  sub SocketDump {
  and as what we return in a SIGUSR1   and as what we return in a SIGUSR1
   
 =cut  =cut
   
 sub ShowStatus {  sub ShowStatus {
     my $state = shift;      my $state = shift;
     my $now = time;      my $now = time;
Line 278  sub ShowStatus { Line 283  sub ShowStatus {
   
 =pod  =pod
   
 =head 2 SocketTimeout  =head2 SocketTimeout
   
     Called when an action on the socket times out.  The socket is       Called when an action on the socket times out.  The socket is 
    destroyed and any active transaction is failed.     destroyed and any active transaction is failed.
   
   
 =cut  =cut
   
 sub SocketTimeout {  sub SocketTimeout {
     my $Socket = shift;      my $Socket = shift;
     Log("WARNING", "A socket timeout was detected");      Log("WARNING", "A socket timeout was detected");
Line 302  sub SocketTimeout { Line 308  sub SocketTimeout {
     }      }
   
 }  }
   
   #
   #   This function should be called by the child in all cases where it must
   #   exit.  The child process must create a lock file for the AF_UNIX socket
   #   in order to prevent connection requests from lonnet in the time between
   #   process exit and the parent picking up the listen again.
   #
   # Parameters:
   #     exit_code           - Exit status value, however see the next parameter.
   #     message             - If this optional parameter is supplied, the exit
   #                           is via a die with this message.
   #
   sub child_exit {
       my ($exit_code, $message) = @_;
   
       # Regardless of how we exit, we may need to do the lock thing:
   
       #
       #  Create a lock file since there will be a time window
       #  between our exit and the parent's picking up the listen
       #  during which no listens will be done on the
       #  lonnet client socket.
       #
       my $lock_file = &GetLoncSocketPath().".lock";
       open(LOCK,">$lock_file");
       print LOCK "Contents not important";
       close(LOCK);
       unlink(&GetLoncSocketPath());
   
       if ($message) {
    die($message);
       } else {
    exit($exit_code);
       }
   }
 #----------------------------- Timer management ------------------------  #----------------------------- Timer management ------------------------
   
 =pod  =pod
Line 331  sub Tick { Line 372  sub Tick {
     KillSocket($Socket);      KillSocket($Socket);
     $IdleSeconds = 0; # Otherwise all connections get trimmed to fast.      $IdleSeconds = 0; # Otherwise all connections get trimmed to fast.
     UpdateStatus();      UpdateStatus();
     if(($ConnectionCount == 0) && $DieWhenIdle) {      if(($ConnectionCount == 0)) {
  #   &child_exit(0);
  #  Create a lock file since there will be a time window  
  #  between our exit and the parent's picking up the listen  
  #  during which no listens will be done on the  
  #  lonnet client socket.  
  #  
  my $lock_file = GetLoncSocketPath().".lock";  
  open(LOCK,">$lock_file");  
  print LOCK "Contents not important";  
  close(LOCK);  
   
  exit(0);  
     }      }
  }   }
     } else {      } else {
Line 389  sub Tick { Line 420  sub Tick {
  $KeyMode = "";    $KeyMode = ""; 
  $clock_watcher->cancel();   $clock_watcher->cancel();
     }      }
       &UpdateStatus();
 }  }
   
 =pod  =pod
Line 471  the data and Event->w->fd is the socket Line 503  the data and Event->w->fd is the socket
 sub ClientWritable {  sub ClientWritable {
     my $Event    = shift;      my $Event    = shift;
     my $Watcher  = $Event->w;      my $Watcher  = $Event->w;
       if (!defined($Watcher)) {
    &child_exit(-1,'No watcher for event in ClientWritable');
       }
     my $Data     = $Watcher->data;      my $Data     = $Watcher->data;
     my $Socket   = $Watcher->fd;      my $Socket   = $Watcher->fd;
   
Line 534  sub ClientWritable { Line 569  sub ClientWritable {
  }   }
     } else {      } else {
  $Watcher->cancel(); # A delayed request...just cancel.   $Watcher->cancel(); # A delayed request...just cancel.
    return;
     }      }
 }  }
   
Line 573  sub CompleteTransaction { Line 609  sub CompleteTransaction {
  StartClientReply($Transaction, $data);   StartClientReply($Transaction, $data);
     } else { # Delete deferred transaction file.      } else { # Delete deferred transaction file.
  Log("SUCCESS", "A delayed transaction was completed");   Log("SUCCESS", "A delayed transaction was completed");
  LogPerm("S:$Transaction->getClient() :".$Transaction->getRequest());   LogPerm("S:".$Transaction->getClient().":".$Transaction->getRequest());
  unlink $Transaction->getFile();   unlink($Transaction->getFile());
     }      }
 }  }
   
Line 642  sub FailTransaction { Line 678  sub FailTransaction {
   
     if ($ConnectionRetriesLeft > 0) {      if ($ConnectionRetriesLeft > 0) {
  Log("WARNING", "Failing transaction "   Log("WARNING", "Failing transaction "
     .$transaction->getRequest());      .$transaction->getLoggableRequest());
     }      }
     Debug(1, "Failing transaction: ".$transaction->getRequest());      Debug(1, "Failing transaction: ".$transaction->getLoggableRequest());
     if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.      if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.
  my $client  = $transaction->getClient();   my $client  = $transaction->getClient();
  Debug(1," Replying con_lost to ".$transaction->getRequest());   Debug(1," Replying con_lost to ".$transaction->getRequest());
Line 654  sub FailTransaction { Line 690  sub FailTransaction {
 }  }
   
 =pod  =pod
   
 =head1  EmptyQueue  =head1  EmptyQueue
   
   Fails all items in the work queue with con_lost.    Fails all items in the work queue with con_lost.
   Note that each item in the work queue is a transaction.    Note that each item in the work queue is a transaction.
   
 =cut  =cut
   
 sub EmptyQueue {  sub EmptyQueue {
     $ConnectionRetriesLeft--; # Counts as connection failure too.      $ConnectionRetriesLeft--; # Counts as connection failure too.
     while($WorkQueue->Count()) {      while($WorkQueue->Count()) {
Line 675  sub EmptyQueue { Line 713  sub EmptyQueue {
 Close all connections open on lond prior to exit e.g.  Close all connections open on lond prior to exit e.g.
   
 =cut  =cut
   
 sub CloseAllLondConnections {  sub CloseAllLondConnections {
     foreach my $Socket (keys %ActiveConnections) {      foreach my $Socket (keys %ActiveConnections) {
       if(exists($ActiveTransactions{$Socket})) {        if(exists($ActiveTransactions{$Socket})) {
Line 683  sub CloseAllLondConnections { Line 722  sub CloseAllLondConnections {
       KillSocket($Socket);        KillSocket($Socket);
     }      }
 }  }
 =cut  
   
 =pod  =pod
   
Line 705  Parameters: Line 743  Parameters:
   
 nonzero if we are allowed to create a new connection.  nonzero if we are allowed to create a new connection.
   
   
 =cut  =cut
   
 sub KillSocket {  sub KillSocket {
     my $Socket = shift;      my $Socket = shift;
   
Line 918  sub LondReadable { Line 956  sub LondReadable {
  #  We need to be writable for this and probably don't belong   #  We need to be writable for this and probably don't belong
  #  here inthe first place.   #  here inthe first place.
   
  Deubg(6, "SendingRequest state encountered in readable");   Debug(6, "SendingRequest state encountered in readable");
  $Watcher->poll("w");   $Watcher->poll("w");
  $Watcher->cb(\&LondWritable);   $Watcher->cb(\&LondWritable);
   
Line 1116  sub LondWritable { Line 1154  sub LondWritable {
     }      }
           
 }  }
   
 =pod  =pod
           
 =cut  =cut
   
   
 sub QueueDelayed {  sub QueueDelayed {
     Debug(3,"QueueDelayed called");      Debug(3,"QueueDelayed called");
   
Line 1126  sub QueueDelayed { Line 1167  sub QueueDelayed {
   
     Debug(4, "Delayed path: ".$path);      Debug(4, "Delayed path: ".$path);
     opendir(DIRHANDLE, $path);      opendir(DIRHANDLE, $path);
       
     my @alldelayed = grep /\.$RemoteHost$/, readdir DIRHANDLE;      my $host_id_re = '(?:'.join('|',map {quotemeta($_)} (@all_host_ids)).')';
       my @alldelayed = grep(/\.$host_id_re$/, readdir(DIRHANDLE));
     closedir(DIRHANDLE);      closedir(DIRHANDLE);
     my $dfname;      foreach my $dfname (sort(@alldelayed)) {
     my $reqfile;   my $reqfile = "$path/$dfname";
     foreach $dfname (sort  @alldelayed) {   my ($host_id) = ($dfname =~ /\.([^.]*)$/);
  $reqfile = "$path/$dfname";   Debug(4, "queueing ".$reqfile." for $host_id");
  Debug(4, "queueing ".$reqfile);  
  my $Handle = IO::File->new($reqfile);   my $Handle = IO::File->new($reqfile);
  my $cmd    = <$Handle>;   my $cmd    = <$Handle>;
  chomp $cmd; # There may or may not be a newline...   chomp $cmd; # There may or may not be a newline...
  $cmd = $cmd."\n"; # now for sure there's exactly one newline.   $cmd = $cmd."\n"; # now for sure there's exactly one newline.
  my $Transaction = LondTransaction->new($cmd);   my $Transaction = LondTransaction->new("sethost:$host_id:$cmd");
  $Transaction->SetDeferred($reqfile);   $Transaction->SetDeferred($reqfile);
  QueueTransaction($Transaction);   QueueTransaction($Transaction);
     }      }
Line 1162  sub MakeLondConnection { Line 1203  sub MakeLondConnection {
   .GetServerPort());    .GetServerPort());
   
     my $Connection = LondConnection->new(&GetServerHost(),      my $Connection = LondConnection->new(&GetServerHost(),
  &GetServerPort());   &GetServerPort(),
    &GetHostId());
   
     if($Connection eq undef) { # Needs to be more robust later.      if($Connection eq undef) { # Needs to be more robust later.
  Log("CRITICAL","Failed to make a connection with lond.");   Log("CRITICAL","Failed to make a connection with lond.");
Line 1170  sub MakeLondConnection { Line 1212  sub MakeLondConnection {
  return 0; # Failure.   return 0; # Failure.
     }  else {      }  else {
   
    $LondConnecting = 1; # Connection in progress.
  # The connection needs to have writability    # The connection needs to have writability 
  # monitored in order to send the init sequence   # monitored in order to send the init sequence
  # that starts the whole authentication/key   # that starts the whole authentication/key
Line 1177  sub MakeLondConnection { Line 1220  sub MakeLondConnection {
  #   #
  my $Socket = $Connection->GetSocket();   my $Socket = $Connection->GetSocket();
  if($Socket eq undef) {   if($Socket eq undef) {
     die "did not get a socket from the connection";      &child_exit(-1, "did not get a socket from the connection");
  } else {   } else {
     &Debug(9,"MakeLondConnection got socket: ".$Socket);      &Debug(9,"MakeLondConnection got socket: ".$Socket);
  }   }
Line 1200  sub MakeLondConnection { Line 1243  sub MakeLondConnection {
  }   }
  Log("SUCESS", "Created connection ".$ConnectionCount   Log("SUCESS", "Created connection ".$ConnectionCount
     ." to host ".GetServerHost());      ." to host ".GetServerHost());
  $LondConnecting = 1; # Connection in progress.  
  return 1; # Return success.   return 1; # Return success.
     }      }
           
Line 1341  sub ClientRequest { Line 1383  sub ClientRequest {
     $data = $data.$thisread; # Append new data.      $data = $data.$thisread; # Append new data.
     $watcher->data($data);      $watcher->data($data);
     if($data =~ /\n$/) { # Request entirely read.      if($data =~ /\n$/) { # Request entirely read.
  if($data eq "close_connection_exit\n") {   if ($data eq "close_connection_exit\n") {
     Log("CRITICAL",      Log("CRITICAL",
  "Request Close Connection ... exiting");   "Request Close Connection ... exiting");
     CloseAllLondConnections();      CloseAllLondConnections();
     exit;      exit;
    } elsif ($data eq "reset_retries\n") {
       Log("INFO", "Resetting Connection Retries.");
       $ConnectionRetriesLeft = $ConnectionRetries;
       &UpdateStatus();
       my $Transaction = LondTransaction->new($data);
       $Transaction->SetClient($socket);
       StartClientReply($Transaction, "ok\n");
       $watcher->cancel();
       return;
  }   }
  Debug(8, "Complete transaction received: ".$data);   Debug(8, "Complete transaction received: ".$data);
  if($LogTransactions) {   if ($LogTransactions) {
     Log("SUCCESS", "Transaction: '$data'"); # Transaction has \n.      Log("SUCCESS", "Transaction: '$data'"); # Transaction has \n.
  }   }
  my $Transaction = LondTransaction->new($data);   my $Transaction = LondTransaction->new($data);
Line 1363  sub ClientRequest { Line 1414  sub ClientRequest {
 #     Accept a connection request for a client (lonc child) and  #     Accept a connection request for a client (lonc child) and
 #    start up an event watcher to keep an eye on input from that   #    start up an event watcher to keep an eye on input from that 
 #    Event.  This can be called both from NewClient and from  #    Event.  This can be called both from NewClient and from
 #    ChildProcess if we are started in DieWhenIdle mode.  #    ChildProcess.
 # Parameters:  # Parameters:
 #    $socket       - The listener socket.  #    $socket       - The listener socket.
 # Returns:  # Returns:
Line 1452  sub GetServerHost { Line 1503  sub GetServerHost {
   
 =pod  =pod
   
   =head2 GetServerId
   
   Returns the hostid whose lond we talk with.
   
   =cut
   
   sub GetHostId {
       return $RemoteHostId; # Setup by the fork.
   }
   
   =pod
   
 =head2 GetServerPort  =head2 GetServerPort
   
 Returns the lond port number.  Returns the lond port number.
Line 1479  another event handler to subess requests Line 1542  another event handler to subess requests
 =cut  =cut
   
 sub SetupLoncListener {  sub SetupLoncListener {
       my ($host,$SocketName) = @_;
       if (!$host) { $host = &GetServerHost(); }
       if (!$SocketName) { $SocketName = &GetLoncSocketPath($host); }
   
     my $host       = GetServerHost(); # Default host.  
     if (@_) {  
  ($host)    = @_ # Override host with parameter.  
     }  
   
     my $socket;  
     my $SocketName = GetLoncSocketPath($host);  
     unlink($SocketName);      unlink($SocketName);
   
       my $socket;
     unless ($socket =IO::Socket::UNIX->new(Local  => $SocketName,      unless ($socket =IO::Socket::UNIX->new(Local  => $SocketName,
     Listen => 250,       Listen => 250, 
     Type   => SOCK_STREAM)) {      Type   => SOCK_STREAM)) {
  die "Failed to create a lonc listner socket";   if($I_am_child) {
       &child_exit(-1, "Failed to create a lonc listener socket");
    } else {
       die "Failed to create a lonc listner socket";
    }
     }      }
     return $socket;      return $socket;
 }  }
Line 1523  into the status file. Line 1589  into the status file.
   
 We also use this to reset the retries count in order to allow the  We also use this to reset the retries count in order to allow the
 client to retry connections with a previously dead server.  client to retry connections with a previously dead server.
   
 =cut  =cut
   
 sub ChildStatus {  sub ChildStatus {
Line 1531  sub ChildStatus { Line 1598  sub ChildStatus {
   
     Debug(2, "Reporting child status because : ".$watcher->data);      Debug(2, "Reporting child status because : ".$watcher->data);
     my $docdir = $perlvar{'lonDocRoot'};      my $docdir = $perlvar{'lonDocRoot'};
     my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");      
     print $fh $$."\t".$RemoteHost."\t".$Status."\t".      open(LOG,">>$docdir/lon-status/loncstatus.txt");
       flock(LOG,LOCK_EX);
       print LOG $$."\t".$RemoteHost."\t".$Status."\t".
  $RecentLogEntry."\n";   $RecentLogEntry."\n";
     #      #
     #  Write out information about each of the connections:      #  Write out information about each of the connections:
     #      #
     if ($DebugLevel > 2) {      if ($DebugLevel > 2) {
  print $fh "Active connection statuses: \n";   print LOG "Active connection statuses: \n";
  my $i = 1;   my $i = 1;
  print STDERR  "================================= Socket Status Dump:\n";   print STDERR  "================================= Socket Status Dump:\n";
  foreach my $item (keys %ActiveConnections) {   foreach my $item (keys %ActiveConnections) {
     my $Socket = $ActiveConnections{$item}->data;      my $Socket = $ActiveConnections{$item}->data;
     my $state  = $Socket->GetState();      my $state  = $Socket->GetState();
     print $fh "Connection $i State: $state\n";      print LOG "Connection $i State: $state\n";
     print STDERR "---------------------- Connection $i \n";      print STDERR "---------------------- Connection $i \n";
     $Socket->Dump(-1); # Ensure it gets dumped..      $Socket->Dump(-1); # Ensure it gets dumped..
     $i++;      $i++;
  }   }
     }      }
       flock(LOG,LOCK_UN);
       close(LOG);
     $ConnectionRetriesLeft = $ConnectionRetries;      $ConnectionRetriesLeft = $ConnectionRetries;
       UpdateStatus();
 }  }
   
 =pod  =pod
Line 1571  sub SignalledToDeath { Line 1643  sub SignalledToDeath {
     chomp($signal);      chomp($signal);
     Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "      Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "
  ."died through "."\"$signal\"");   ."died through "."\"$signal\"");
     LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "      #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
     ."\"$signal\"");  #    ."\"$signal\"");
     exit 0;      exit 0;
   
 }  }
   
   =pod
   
 =head2 ToggleDebug  =head2 ToggleDebug
   
 This sub toggles trace debugging on and off.  This sub toggles trace debugging on and off.
Line 1592  sub ToggleDebug { Line 1666  sub ToggleDebug {
   
 }  }
   
   =pod
   
 =head2 ChildProcess  =head2 ChildProcess
   
 This sub implements a child process for a single lonc daemon.  This sub implements a child process for a single lonc daemon.
Line 1602  Optional parameter: Line 1678  Optional parameter:
 =cut  =cut
   
 sub ChildProcess {  sub ChildProcess {
     #  If we are in DieWhenIdle mode, we've inherited all the      #  We've inherited all the
     #  events of our parent and those have to be cancelled or else      #  events of our parent and those have to be cancelled or else
     #  all holy bloody chaos will result.. trust me, I already made      #  all holy bloody chaos will result.. trust me, I already made
     #  >that< mistake.      #  >that< mistake.
Line 1618  sub ChildProcess { Line 1694  sub ChildProcess {
  Debug(5, "Killing watcher for $listener");   Debug(5, "Killing watcher for $listener");
   
  $watcher->cancel();   $watcher->cancel();
  undef         $parent_dispatchers{$listener};   delete($parent_dispatchers{$listener});
   
     }      }
     $I_am_child    = 1; # Seems like in spite of it all I'm still getting  
                                 # parent event dispatches.       #  kill off the parent's signal handlers too!  
       #
   
       for my $handler (keys %parent_handlers) {
    my $watcher = $parent_handlers{$handler};
    $watcher->cancel();
    delete($parent_handlers{$handler});
       }
   
       $I_am_child    = 1; # Seems like in spite of it all I may still getting
                                   # parent event dispatches.. flag I'm a child.
   
   
     #      #
Line 1660  sub ChildProcess { Line 1746  sub ChildProcess {
       desc => 'Lonc Listener Unix Socket',        desc => 'Lonc Listener Unix Socket',
       fd   => $socket);        fd   => $socket);
           
     $Event::Debuglevel = $DebugLevel;      $Event::DebugLevel = $DebugLevel;
           
     Debug(9, "Making initial lond connection for ".$RemoteHost);      Debug(9, "Making initial lond connection for ".$RemoteHost);
   
Line 1668  sub ChildProcess { Line 1754  sub ChildProcess {
           
      # &MakeLondConnection(); // let first work request do it.       # &MakeLondConnection(); // let first work request do it.
   
     #  If We are in diwhenidle, need to accept the connection since the      #  need to accept the connection since the event may  not fire.
     #  event may  not fire.  
   
     if ($DieWhenIdle) {      &accept_client($socket);
  &accept_client($socket);  
     }  
   
     Debug(9,"Entering event loop");      Debug(9,"Entering event loop");
     my $ret = Event::loop(); #  Start the main event loop.      my $ret = Event::loop(); #  Start the main event loop.
           
           
     die "Main event loop exited!!!";      &child_exit (-1,"Main event loop exited!!!");
 }  }
   
 #  Create a new child for host passed in:  #  Create a new child for host passed in:
   
 sub CreateChild {  sub CreateChild {
     my ($host, $socket) = @_;      my ($host, $hostid) = @_;
   
     my $sigset = POSIX::SigSet->new(SIGINT);      my $sigset = POSIX::SigSet->new(SIGINT);
     sigprocmask(SIG_BLOCK, $sigset);      sigprocmask(SIG_BLOCK, $sigset);
Line 1694  sub CreateChild { Line 1777  sub CreateChild {
     my $pid          = fork;      my $pid          = fork;
     if($pid) { # Parent      if($pid) { # Parent
  $RemoteHost = "Parent";   $RemoteHost = "Parent";
  $ChildHash{$pid} = $host;   $ChildPid{$pid} = $host;
  $HostToPid{$host}= $pid;  
  sigprocmask(SIG_UNBLOCK, $sigset);   sigprocmask(SIG_UNBLOCK, $sigset);
    undef(@all_host_ids);
     } else { # child.      } else { # child.
    $RemoteHostId = $hostid;
  ShowStatus("Connected to ".$RemoteHost);   ShowStatus("Connected to ".$RemoteHost);
  $SIG{INT} = 'DEFAULT';   $SIG{INT} = 'DEFAULT';
  sigprocmask(SIG_UNBLOCK, $sigset);   sigprocmask(SIG_UNBLOCK, $sigset);
  if(defined $socket) {   &ChildProcess(); # Does not return.
     &ChildProcess($socket);  
  } else {  
     ChildProcess; # Does not return.  
  }  
     }      }
 }  }
   
Line 1716  sub CreateChild { Line 1795  sub CreateChild {
 #    a connection request arrives.  We must:  #    a connection request arrives.  We must:
 #     Start a child process to accept the connection request.  #     Start a child process to accept the connection request.
 #     Kill our listen on the socket.  #     Kill our listen on the socket.
 #     Setup an event to handle the child process exit. (SIGCHLD).  
 # Parameter:  # Parameter:
 #    event       - The event object that was created to monitor this socket.  #    event       - The event object that was created to monitor this socket.
 #                  event->w->fd is the socket.  #                  event->w->fd is the socket.
Line 1735  sub parent_client_connection { Line 1813  sub parent_client_connection {
  my ($event)   = @_;   my ($event)   = @_;
  my $watcher   = $event->w;   my $watcher   = $event->w;
  my $socket    = $watcher->fd;   my $socket    = $watcher->fd;
    my $connection = $socket->accept(); # Accept the client connection.
  # Lookup the host associated with this socket:   Event->io(cb      => \&get_remote_hostname,
     poll    => 'r',
  my $host = $listening_to{$socket};    data    => "",
     fd      => $connection);
  # Start the child:      }
   }
   
   sub get_remote_hostname {
  &Debug(9,"Creating child for $host (parent_client_connection)");      my ($event)   = @_;
  &CreateChild($host, $socket);      my $watcher   = $event->w;
       my $socket    = $watcher->fd;
  # Clean up the listen since now the child takes over until it exits.  
       my $thisread;
  $watcher->cancel(); # Nolonger listening to this event      my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0);
  delete($listening_to{$socket});      Debug(8, "rcv:  data length = ".length($thisread)." read =".$thisread);
  delete($parent_dispatchers{$host});      if (!defined($rv) || length($thisread) == 0) {
  $socket->close();   # Likely eof on socket.
    Debug(5,"Client Socket closed on lonc for p_c_c");
    close($socket);
    $watcher->cancel();
    return;
     }      }
   
       my $data    = $watcher->data().$thisread;
       $watcher->data($data);
       if($data =~ /\n$/) { # Request entirely read.
    chomp($data);
       } else {
    return;
       }
   
       &Debug(5,"Creating child for $data (parent_client_connection)");
       (my $hostname,my $lonid,@all_host_ids) = split(':',$data);
       $ChildHost{$hostname}++;
       if ($ChildHost{$hostname} == 1) {
    &CreateChild($hostname,$lonid);
       } else {
    &Log('WARNING',"Request for a second child on $hostname");
       }
       # Clean up the listen since now the child takes over until it exits.
       $watcher->cancel(); # Nolonger listening to this event
       $socket->send("done\n");
       $socket->close();
 }  }
   
 # parent_listen:  # parent_listen:
Line 1775  sub parent_listen { Line 1878  sub parent_listen {
     my ($loncapa_host) = @_;      my ($loncapa_host) = @_;
     Debug(5, "parent_listen: $loncapa_host");      Debug(5, "parent_listen: $loncapa_host");
   
     my $socket    = &SetupLoncListener($loncapa_host);      my ($socket,$file);
       if (!$loncapa_host) {
    $loncapa_host = 'common_parent';
    $file         = $perlvar{'lonSockCreate'};
       } else {
    $file         = &GetLoncSocketPath($loncapa_host);
       }
       $socket = &SetupLoncListener($loncapa_host,$file);
   
     $listening_to{$socket} = $loncapa_host;      $listening_to{$socket} = $loncapa_host;
     if (!$socket) {      if (!$socket) {
  die "Unable to create a listen socket for $loncapa_host";   die "Unable to create a listen socket for $loncapa_host";
     }      }
           
     my $lock_file = &GetLoncSocketPath($loncapa_host).".lock";      my $lock_file = $file.".lock";
     unlink($lock_file); # No problem if it doesn't exist yet [startup e.g.]      unlink($lock_file); # No problem if it doesn't exist yet [startup e.g.]
   
     my $watcher = Event->io(cb    => \&parent_client_connection,      my $watcher = 
       poll  => 'r',   Event->io(cb    => \&parent_client_connection,
       desc  => "Parent listener unix socket ($loncapa_host)",    poll  => 'r',
       fd    => $socket);    desc  => "Parent listener unix socket ($loncapa_host)",
     data => "",
     fd    => $socket);
     $parent_dispatchers{$loncapa_host} = $watcher;      $parent_dispatchers{$loncapa_host} = $watcher;
   
 }  }
   
   sub parent_clean_up {
       my ($loncapa_host) = @_;
       Debug(1, "parent_clean_up: $loncapa_host");
   
       my $socket_file = &GetLoncSocketPath($loncapa_host);
       unlink($socket_file); # No problem if it doesn't exist yet [startup e.g.]
       my $lock_file   = $socket_file.".lock";
       unlink($lock_file); # No problem if it doesn't exist yet [startup e.g.]
   }
   
   
   
 # listen_on_all_unix_sockets:  #    This sub initiates a listen on the common unix domain lonc client socket.
 #    This sub initiates a listen on all unix domain lonc client sockets.  #    loncnew starts up with no children, and only spawns off children when a
 #    This will be called in the case where we are trimming idle processes.  #    connection request occurs on the common client unix socket.  The spawned
 #    When idle processes are trimmed, loncnew starts up with no children,  #    child continues to run until it has been idle a while at which point it
 #    and only spawns off children when a connection request occurs on the  #    eventually exits and once more the parent picks up the listen.
 #    client unix socket.  The spawned child continues to run until it has  
 #    been idle a while at which point it eventually exits and once more  
 #    the parent picks up the listen.  
 #  #
 #  Parameters:  #  Parameters:
 #      NONE  #      NONE
Line 1809  sub parent_listen { Line 1930  sub parent_listen {
 #  Returns:  #  Returns:
 #     NONE  #     NONE
 #  #
 sub listen_on_all_unix_sockets {  sub listen_on_common_socket {
     Debug(5, "listen_on_all_unix_sockets");      Debug(5, "listen_on_common_socket");
     my $host_iterator      =   &LondConnection::GetHostIterator();      &parent_listen();
     while (!$host_iterator->end()) {  }
  my $host_entry_ref =   $host_iterator->get();  
  my $host_name      = $host_entry_ref->[0];  #   server_died is called whenever a child process exits.
  Debug(9, "Listen for $host_name");  #   Since this is dispatched via a signal, we must process all
  &parent_listen($host_name);  #   dead children until there are no more left.  The action
  $host_iterator->next();  #   is to:
   #      - Remove the child from the bookeeping hashes
   #      - Re-establish a listen on the unix domain socket associated
   #        with that host.
   # Parameters:
   #    The event, but we don't actually care about it.
   sub server_died {
       &Debug(9, "server_died called...");
       
       while(1) { # Loop until waitpid nowait fails.
    my $pid = waitpid(-1, WNOHANG);
    if($pid <= 0) {
       return; # Nothing left to wait for.
    }
    # need the host to restart:
   
    my $host = $ChildPid{$pid};
    if($host) { # It's for real...
       &Debug(9, "Caught sigchild for $host");
       delete($ChildPid{$pid});
       delete($ChildHost{$host});
       &parent_clean_up($host);
   
    } else {
       &Debug(5, "Caught sigchild for pid not in hosts hash: $pid");
    }
     }      }
   
 }  }
   
 #  #
Line 1869  ShowStatus("Forking node servers"); Line 2016  ShowStatus("Forking node servers");
 Log("CRITICAL", "--------------- Starting children ---------------");  Log("CRITICAL", "--------------- Starting children ---------------");
   
 LondConnection::ReadConfig;               # Read standard config files.  LondConnection::ReadConfig;               # Read standard config files.
 my $HostIterator = LondConnection::GetHostIterator;  
   
 if ($DieWhenIdle) {  $RemoteHost = "[parent]";
     $RemoteHost = "[parent]";  &listen_on_common_socket();
     &listen_on_all_unix_sockets();  
 } else {  
       
     while (! $HostIterator->end()) {  
   
  my $hostentryref = $HostIterator->get();  
  CreateChild($hostentryref->[0]);  
  $HostHash{$hostentryref->[0]} = $hostentryref->[4];  
  $HostIterator->next();  
     }  
 }  
   
 $RemoteHost = "Parent Server";  $RemoteHost = "Parent Server";
   
Line 1892  $RemoteHost = "Parent Server"; Line 2027  $RemoteHost = "Parent Server";
 ShowStatus("Parent keeping the flock");  ShowStatus("Parent keeping the flock");
   
   
 if ($DieWhenIdle) {  # We need to setup a SIGChild event to handle the exit (natural or otherwise)
     $Event::DebugLevel = $DebugLevel;  # of the children.
     Debug(9, "Parent entering event loop");  
     my $ret = Event::loop();  
     die "Main Event loop exited: $ret";  
   
   
 } else {  
     #  
     #   Set up parent signals:  
     #  
       
     $SIG{INT}  = \&Terminate;  
     $SIG{TERM} = \&Terminate;   
     $SIG{HUP}  = \&Restart;  
     $SIG{USR1} = \&CheckKids;   
     $SIG{USR2} = \&UpdateKids; # LonManage update request.  
       
     while(1) {  
  my $deadchild = wait();  
  if(exists $ChildHash{$deadchild}) { # need to restart.  
     my $deadhost = $ChildHash{$deadchild};  
     delete($HostToPid{$deadhost});  
     delete($ChildHash{$deadchild});  
     Log("WARNING","Lost child pid= ".$deadchild.  
  "Connected to host ".$deadhost);  
     Log("INFO", "Restarting child procesing ".$deadhost);  
     CreateChild($deadhost);  
  }  
     }  
 }  
   
   Event->signal(cb       => \&server_died,
         desc     => "Child exit handler",
         signal   => "CHLD");
   
   
   # Set up all the other signals we set up.
   
   $parent_handlers{INT} = Event->signal(cb       => \&Terminate,
         desc     => "Parent INT handler",
         signal   => "INT");
   $parent_handlers{TERM} = Event->signal(cb       => \&Terminate,
          desc     => "Parent TERM handler",
          signal   => "TERM");
   $parent_handlers{HUP}  = Event->signal(cb       => \&KillThemAll,
          desc     => "Parent HUP handler.",
          signal   => "HUP");
   $parent_handlers{USR1} = Event->signal(cb       => \&CheckKids,
          desc     => "Parent USR1 handler",
          signal   => "USR1");
   $parent_handlers{USR2} = Event->signal(cb       => \&UpdateKids,
          desc     => "Parent USR2 handler.",
          signal   => "USR2");
   
   #  Start procdesing events.
   
   $Event::DebugLevel = $DebugLevel;
   Debug(9, "Parent entering event loop");
   my $ret = Event::loop();
   die "Main Event loop exited: $ret";
   
 =pod  =pod
   
Line 1944  sub CheckKids { Line 2079  sub CheckKids {
     my $now=time;      my $now=time;
     my $local=localtime($now);      my $local=localtime($now);
     print $fh "LONC status $local - parent $$ \n\n";      print $fh "LONC status $local - parent $$ \n\n";
     foreach my $pid (keys %ChildHash) {      foreach my $host (keys %parent_dispatchers) {
    print $fh "LONC Parent process listening for $host\n";
       }
       foreach my $pid (keys %ChildPid) {
  Debug(2, "Sending USR1 -> $pid");   Debug(2, "Sending USR1 -> $pid");
  kill 'USR1' => $pid; # Tell Child to report status.   kill 'USR1' => $pid; # Tell Child to report status.
  sleep 1; # Wait so file doesn't intermix.  
     }      }
   
 }  }
   
 =pod  =pod
Line 1981  sub UpdateKids { Line 2119  sub UpdateKids {
   
     Log("INFO", "Updating connections via SIGUSR2");      Log("INFO", "Updating connections via SIGUSR2");
   
     #  Just in case we need to kill our own lonc, we wait a few seconds to      #  I'm not sure what I was thinking in the first implementation.
     #  give it a chance to receive and relay lond's response to the       # someone will have to work hard to convince me the effect is any
     #  re-init command.      # different than Restart, especially now that we don't start up 
     #      # per host servers automatically, may as well just restart.
       # The down side is transactions that are in flight will get timed out
     sleep(2); # Wait a couple of seconds.      # (lost unless they are critical).
   
     my %hosts;                   # Indexed by loncapa hostname, value=ip.  
       
     # Need to re-read  the host table:  
       
       
     LondConnection::ReadConfig();  
     my $I = LondConnection::GetHostIterator;  
     while (! $I->end()) {  
  my $item = $I->get();  
  $hosts{$item->[0]} = $item->[4];  
  $I->next();  
     }  
   
     #  The logic below is written for clarity not for efficiency.  
     #  Since I anticipate that this function is only rarely called, that's  
     #  appropriate.  There are certainly ways to combine the loops below,  
     #  and anyone wishing to obscure the logic is welcome to go for it.  
     #  Note that we don't re-direct sigchild.  Instead we do what's needed  
     #  to the data structures that keep track of children to ensure that  
     #  when sigchild is honored, no new child is born.  
     #  
   
     #  For each existing child; if it's host doesn't exist, kill the child.  
   
     foreach my $child (keys %ChildHash) {  
  my $oldhost = $ChildHash{$child};  
  if (!(exists $hosts{$oldhost})) {  
     Log("CRITICAL", "Killing child for $oldhost  host no longer exists");  
     delete $ChildHash{$child};  
     delete $HostToPid{$oldhost};  
     kill 'QUIT' => $child;  
  }  
     }  
     # For each remaining existing child; if it's host's ip has changed,  
     # Restart the child on the new IP.  
   
     foreach my $child (keys %ChildHash) {      &KillThemAll();
  my $oldhost = $ChildHash{$child};  
  my $oldip   = $HostHash{$oldhost};  
  if ($hosts{$oldhost} ne $oldip) {  
   
     # kill the old child.  
   
     Log("CRITICAL", "Killing child for $oldhost host ip has changed...");  
     delete $ChildHash{$child};  
     delete $HostToPid{$oldhost};  
     kill 'QUIT' => $child;  
   
     # Do the book-keeping needed to start a new child on the  
     # new ip.  
   
     $HostHash{$oldhost} = $hosts{$oldhost};  
     CreateChild($oldhost);  
  }  
     }  
     # Finally, for each new host, not in the host hash, create a  
     # enter the host and create a new child.  
     # Force a status display of any existing process.  
   
     foreach my $host (keys %hosts) {  
  if(!(exists $HostHash{$host})) {  
     Log("INFO", "New host $host discovered in hosts.tab...");  
     $HostHash{$host} = $hosts{$host};  
     CreateChild($host);  
  } else {  
     kill 'HUP' => $HostToPid{$host};    # status display.  
  }  
     }  
 }  }
   
   
Line 2074  sub Restart { Line 2145  sub Restart {
     Log("CRITICAL", "Restarting");      Log("CRITICAL", "Restarting");
     my $execdir = $perlvar{'lonDaemons'};      my $execdir = $perlvar{'lonDaemons'};
     unlink("$execdir/logs/lonc.pid");      unlink("$execdir/logs/lonc.pid");
     exec("$execdir/loncnew");      exec("$executable");
 }  }
   
 =pod  =pod
Line 2088  SIGHUP.  Responds to sigint and sigterm. Line 2159  SIGHUP.  Responds to sigint and sigterm.
   
 sub KillThemAll {  sub KillThemAll {
     Debug(2, "Kill them all!!");      Debug(2, "Kill them all!!");
     local($SIG{CHLD}) = 'IGNORE';      # Our children >will< die.      
     foreach my $pid (keys %ChildHash) {      #local($SIG{CHLD}) = 'IGNORE';
  my $serving = $ChildHash{$pid};      # Our children >will< die.
       # but we need to catch their death and cleanup after them in case this is 
       # a restart set of kills
       my @allpids = keys(%ChildPid);
       foreach my $pid (@allpids) {
    my $serving = $ChildPid{$pid};
  ShowStatus("Nicely Killing lonc for $serving pid = $pid");   ShowStatus("Nicely Killing lonc for $serving pid = $pid");
  Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid");   Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid");
  kill 'QUIT' => $pid;   kill 'QUIT' => $pid;
     }      }
       ShowStatus("Finished killing child processes off.");
   
 }  }
   
   
Line 2108  sub really_kill_them_all_dammit Line 2183  sub really_kill_them_all_dammit
 {  {
     Debug(2, "Kill them all Dammit");      Debug(2, "Kill them all Dammit");
     local($SIG{CHLD} = 'IGNORE'); # In case some purist reenabled them.      local($SIG{CHLD} = 'IGNORE'); # In case some purist reenabled them.
     foreach my $pid (keys %ChildHash) {      foreach my $pid (keys %ChildPid) {
  my $serving = $ChildHash{$pid};   my $serving = $ChildPid{$pid};
  &ShowStatus("Nastily killing lonc for $serving pid = $pid");   &ShowStatus("Nastily killing lonc for $serving pid = $pid");
  Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid");   Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid");
  kill 'KILL' => $pid;   kill 'KILL' => $pid;
  delete($ChildHash{$pid});   delete($ChildPid{$pid});
  my $execdir = $perlvar{'lonDaemons'};   my $execdir = $perlvar{'lonDaemons'};
  unlink("$execdir/logs/lonc.pid");   unlink("$execdir/logs/lonc.pid");
     }      }
 }  }
   
 =pod  =pod
   
 =head1 Terminate  =head1 Terminate
Line 2140  sub Terminate { Line 2216  sub Terminate {
     exit 0;      exit 0;
   
 }  }
   
   sub my_hostname {
       use Sys::Hostname;
       my $name = &hostname();
       &Debug(9,"Name is $name");
       return $name;
   }
   
 =pod  =pod
   
 =head1 Theory  =head1 Theory

Removed from v.1.62  
changed lines
  Added in v.1.87


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>