Diff for /loncom/loncnew between versions 1.62 and 1.81

version 1.62, 2004/10/04 10:30:50 version 1.81, 2007/03/28 20:28:29
Line 60  use LONCAPA::Stack; Line 60  use LONCAPA::Stack;
 use LONCAPA::LondConnection;  use LONCAPA::LondConnection;
 use LONCAPA::LondTransaction;  use LONCAPA::LondTransaction;
 use LONCAPA::Configuration;  use LONCAPA::Configuration;
 use LONCAPA::HashIterator;  use Fcntl qw(:flock);
   
   
 # Read the httpd configuration file to get perl variables  # Read the httpd configuration file to get perl variables
Line 73  my %perlvar    = %{$perlvarref}; Line 73  my %perlvar    = %{$perlvarref};
 #  parent and shared variables.  #  parent and shared variables.
   
 my %ChildHash; # by pid -> host.  my %ChildHash; # by pid -> host.
 my %HostToPid; # By host -> pid.  
 my %HostHash; # by loncapaname -> IP.  
 my %listening_to; # Socket->host table for who the parent  my %listening_to; # Socket->host table for who the parent
                                 # is listening to.                                  # is listening to.
 my %parent_dispatchers;         # host-> listener watcher events.   my %parent_dispatchers;         # host-> listener watcher events. 
   
   my %parent_handlers; # Parent signal handlers...
   
 my $MaxConnectionCount = 10; # Will get from config later.  my $MaxConnectionCount = 10; # Will get from config later.
 my $ClientConnection = 0; # Uniquifier for client events.  my $ClientConnection = 0; # Uniquifier for client events.
   
Line 87  my $NextDebugLevel= 2;  # So Sigint can Line 87  my $NextDebugLevel= 2;  # So Sigint can
 my $IdleTimeout= 600; # Wait 10 minutes before pruning connections.  my $IdleTimeout= 600; # Wait 10 minutes before pruning connections.
   
 my $LogTransactions = 0; # When True, all transactions/replies get logged.  my $LogTransactions = 0; # When True, all transactions/replies get logged.
   my $executable      = $0; # Get the full path to me.
   
 #  #
 #  The variables below are only used by the child processes.  #  The variables below are only used by the child processes.
 #  #
 my $RemoteHost; # Name of host child is talking to.  my $RemoteHost; # Name of host child is talking to.
   my $RemoteHostId; # default lonid of host child is talking to.
 my $UnixSocketDir= $perlvar{'lonSockDir'};  my $UnixSocketDir= $perlvar{'lonSockDir'};
 my $IdleConnections = Stack->new(); # Set of idle connections  my $IdleConnections = Stack->new(); # Set of idle connections
 my %ActiveConnections; # Connections to the remote lond.  my %ActiveConnections; # Connections to the remote lond.
Line 102  my $ConnectionCount = 0; Line 104  my $ConnectionCount = 0;
 my $IdleSeconds     = 0; # Number of seconds idle.  my $IdleSeconds     = 0; # Number of seconds idle.
 my $Status          = ""; # Current status string.  my $Status          = ""; # Current status string.
 my $RecentLogEntry  = "";  my $RecentLogEntry  = "";
 my $ConnectionRetries=2; # Number of connection retries allowed.  my $ConnectionRetries=5; # Number of connection retries allowed.
 my $ConnectionRetriesLeft=2; # Number of connection retries remaining.  my $ConnectionRetriesLeft=5; # Number of connection retries remaining.
 my $LondVersion     = "unknown"; # Version of lond we talk with.  my $LondVersion     = "unknown"; # Version of lond we talk with.
 my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.  my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.
 my $LondConnecting  = 0;       # True when a connection is being built.  my $LondConnecting  = 0;       # True when a connection is being built.
   
   
 # DO NOT SET THE NEXT VARIABLE TO NON ZERO!!!!!!!!!!!!!!!  
   
 my $DieWhenIdle     = 0; # When true children die when trimmed -> 0.  
 my $I_am_child      = 0; # True if this is the child process.  my $I_am_child      = 0; # True if this is the child process.
   
 #  #
Line 149  sub UpdateStatus { Line 149  sub UpdateStatus {
 Makes an entry into the permanent log file.  Makes an entry into the permanent log file.
   
 =cut  =cut
   
 sub LogPerm {  sub LogPerm {
     my $message=shift;      my $message=shift;
     my $execdir=$perlvar{'lonDaemons'};      my $execdir=$perlvar{'lonDaemons'};
Line 194  sub Log { Line 195  sub Log {
     my $now   = time;      my $now   = time;
     my $local = localtime($now);      my $local = localtime($now);
     my $finalformat = "$local ($$) [$RemoteHost] [$Status] ";      my $finalformat = "$local ($$) [$RemoteHost] [$Status] ";
     my $finalformat = $finalformat.$format."\n";      $finalformat = $finalformat.$format."\n";
   
     # open the file and put the result.      # open the file and put the result.
   
Line 268  sub SocketDump { Line 269  sub SocketDump {
  and as what we return in a SIGUSR1   and as what we return in a SIGUSR1
   
 =cut  =cut
   
 sub ShowStatus {  sub ShowStatus {
     my $state = shift;      my $state = shift;
     my $now = time;      my $now = time;
Line 278  sub ShowStatus { Line 280  sub ShowStatus {
   
 =pod  =pod
   
 =head 2 SocketTimeout  =head2 SocketTimeout
   
     Called when an action on the socket times out.  The socket is       Called when an action on the socket times out.  The socket is 
    destroyed and any active transaction is failed.     destroyed and any active transaction is failed.
   
   
 =cut  =cut
   
 sub SocketTimeout {  sub SocketTimeout {
     my $Socket = shift;      my $Socket = shift;
     Log("WARNING", "A socket timeout was detected");      Log("WARNING", "A socket timeout was detected");
Line 302  sub SocketTimeout { Line 305  sub SocketTimeout {
     }      }
   
 }  }
   
   #
   #   This function should be called by the child in all cases where it must
   #   exit.  The child process must create a lock file for the AF_UNIX socket
   #   in order to prevent connection requests from lonnet in the time between
   #   process exit and the parent picking up the listen again.
   #
   # Parameters:
   #     exit_code           - Exit status value, however see the next parameter.
   #     message             - If this optional parameter is supplied, the exit
   #                           is via a die with this message.
   #
   sub child_exit {
       my ($exit_code, $message) = @_;
   
       # Regardless of how we exit, we may need to do the lock thing:
   
       #
       #  Create a lock file since there will be a time window
       #  between our exit and the parent's picking up the listen
       #  during which no listens will be done on the
       #  lonnet client socket.
       #
       my $lock_file = &GetLoncSocketPath().".lock";
       open(LOCK,">$lock_file");
       print LOCK "Contents not important";
       close(LOCK);
       unlink(&GetLoncSocketPath());
   
       if ($message) {
    die($message);
       } else {
    exit($exit_code);
       }
   }
 #----------------------------- Timer management ------------------------  #----------------------------- Timer management ------------------------
   
 =pod  =pod
Line 331  sub Tick { Line 369  sub Tick {
     KillSocket($Socket);      KillSocket($Socket);
     $IdleSeconds = 0; # Otherwise all connections get trimmed to fast.      $IdleSeconds = 0; # Otherwise all connections get trimmed to fast.
     UpdateStatus();      UpdateStatus();
     if(($ConnectionCount == 0) && $DieWhenIdle) {      if(($ConnectionCount == 0)) {
  #   &child_exit(0);
  #  Create a lock file since there will be a time window  
  #  between our exit and the parent's picking up the listen  
  #  during which no listens will be done on the  
  #  lonnet client socket.  
  #  
  my $lock_file = GetLoncSocketPath().".lock";  
  open(LOCK,">$lock_file");  
  print LOCK "Contents not important";  
  close(LOCK);  
   
  exit(0);  
     }      }
  }   }
     } else {      } else {
Line 389  sub Tick { Line 417  sub Tick {
  $KeyMode = "";    $KeyMode = ""; 
  $clock_watcher->cancel();   $clock_watcher->cancel();
     }      }
       &UpdateStatus();
 }  }
   
 =pod  =pod
Line 642  sub FailTransaction { Line 671  sub FailTransaction {
   
     if ($ConnectionRetriesLeft > 0) {      if ($ConnectionRetriesLeft > 0) {
  Log("WARNING", "Failing transaction "   Log("WARNING", "Failing transaction "
     .$transaction->getRequest());      .$transaction->getLoggableRequest());
     }      }
     Debug(1, "Failing transaction: ".$transaction->getRequest());      Debug(1, "Failing transaction: ".$transaction->getLoggableRequest());
     if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.      if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.
  my $client  = $transaction->getClient();   my $client  = $transaction->getClient();
  Debug(1," Replying con_lost to ".$transaction->getRequest());   Debug(1," Replying con_lost to ".$transaction->getRequest());
Line 654  sub FailTransaction { Line 683  sub FailTransaction {
 }  }
   
 =pod  =pod
   
 =head1  EmptyQueue  =head1  EmptyQueue
   
   Fails all items in the work queue with con_lost.    Fails all items in the work queue with con_lost.
   Note that each item in the work queue is a transaction.    Note that each item in the work queue is a transaction.
   
 =cut  =cut
   
 sub EmptyQueue {  sub EmptyQueue {
     $ConnectionRetriesLeft--; # Counts as connection failure too.      $ConnectionRetriesLeft--; # Counts as connection failure too.
     while($WorkQueue->Count()) {      while($WorkQueue->Count()) {
Line 675  sub EmptyQueue { Line 706  sub EmptyQueue {
 Close all connections open on lond prior to exit e.g.  Close all connections open on lond prior to exit e.g.
   
 =cut  =cut
   
 sub CloseAllLondConnections {  sub CloseAllLondConnections {
     foreach my $Socket (keys %ActiveConnections) {      foreach my $Socket (keys %ActiveConnections) {
       if(exists($ActiveTransactions{$Socket})) {        if(exists($ActiveTransactions{$Socket})) {
Line 683  sub CloseAllLondConnections { Line 715  sub CloseAllLondConnections {
       KillSocket($Socket);        KillSocket($Socket);
     }      }
 }  }
 =cut  
   
 =pod  =pod
   
Line 705  Parameters: Line 736  Parameters:
   
 nonzero if we are allowed to create a new connection.  nonzero if we are allowed to create a new connection.
   
   
 =cut  =cut
   
 sub KillSocket {  sub KillSocket {
     my $Socket = shift;      my $Socket = shift;
   
Line 918  sub LondReadable { Line 949  sub LondReadable {
  #  We need to be writable for this and probably don't belong   #  We need to be writable for this and probably don't belong
  #  here inthe first place.   #  here inthe first place.
   
  Deubg(6, "SendingRequest state encountered in readable");   Debug(6, "SendingRequest state encountered in readable");
  $Watcher->poll("w");   $Watcher->poll("w");
  $Watcher->cb(\&LondWritable);   $Watcher->cb(\&LondWritable);
   
Line 1116  sub LondWritable { Line 1147  sub LondWritable {
     }      }
           
 }  }
   
 =pod  =pod
           
 =cut  =cut
   
   
 sub QueueDelayed {  sub QueueDelayed {
     Debug(3,"QueueDelayed called");      Debug(3,"QueueDelayed called");
   
Line 1126  sub QueueDelayed { Line 1160  sub QueueDelayed {
   
     Debug(4, "Delayed path: ".$path);      Debug(4, "Delayed path: ".$path);
     opendir(DIRHANDLE, $path);      opendir(DIRHANDLE, $path);
       
     my @alldelayed = grep /\.$RemoteHost$/, readdir DIRHANDLE;      use Apache::lonnet;
       my @all_host_ids = &Apache::lonnet::machine_ids($RemoteHost);
   
       my $host_id_re = '(?:'.join('|',@all_host_ids).')';
       my @alldelayed = grep(/\.$host_id_re$/, readdir(DIRHANDLE));
     closedir(DIRHANDLE);      closedir(DIRHANDLE);
     my $dfname;      foreach my $dfname (sort(@alldelayed)) {
     my $reqfile;   my $reqfile = "$path/$dfname";
     foreach $dfname (sort  @alldelayed) {   my ($host_id) = ($dfname =~ /\.([^.]*)$/);
  $reqfile = "$path/$dfname";   Debug(4, "queueing ".$reqfile." for $host_id");
  Debug(4, "queueing ".$reqfile);  
  my $Handle = IO::File->new($reqfile);   my $Handle = IO::File->new($reqfile);
  my $cmd    = <$Handle>;   my $cmd    = <$Handle>;
  chomp $cmd; # There may or may not be a newline...   chomp $cmd; # There may or may not be a newline...
  $cmd = $cmd."\n"; # now for sure there's exactly one newline.   $cmd = $cmd."\n"; # now for sure there's exactly one newline.
  my $Transaction = LondTransaction->new($cmd);   my $Transaction = LondTransaction->new("sethost:$host_id:$cmd");
  $Transaction->SetDeferred($reqfile);   $Transaction->SetDeferred($reqfile);
  QueueTransaction($Transaction);   QueueTransaction($Transaction);
     }      }
Line 1162  sub MakeLondConnection { Line 1199  sub MakeLondConnection {
   .GetServerPort());    .GetServerPort());
   
     my $Connection = LondConnection->new(&GetServerHost(),      my $Connection = LondConnection->new(&GetServerHost(),
  &GetServerPort());   &GetServerPort(),
    &GetHostId());
   
     if($Connection eq undef) { # Needs to be more robust later.      if($Connection eq undef) { # Needs to be more robust later.
  Log("CRITICAL","Failed to make a connection with lond.");   Log("CRITICAL","Failed to make a connection with lond.");
Line 1177  sub MakeLondConnection { Line 1215  sub MakeLondConnection {
  #   #
  my $Socket = $Connection->GetSocket();   my $Socket = $Connection->GetSocket();
  if($Socket eq undef) {   if($Socket eq undef) {
     die "did not get a socket from the connection";      &child_exit(-1, "did not get a socket from the connection");
  } else {   } else {
     &Debug(9,"MakeLondConnection got socket: ".$Socket);      &Debug(9,"MakeLondConnection got socket: ".$Socket);
  }   }
Line 1363  sub ClientRequest { Line 1401  sub ClientRequest {
 #     Accept a connection request for a client (lonc child) and  #     Accept a connection request for a client (lonc child) and
 #    start up an event watcher to keep an eye on input from that   #    start up an event watcher to keep an eye on input from that 
 #    Event.  This can be called both from NewClient and from  #    Event.  This can be called both from NewClient and from
 #    ChildProcess if we are started in DieWhenIdle mode.  #    ChildProcess.
 # Parameters:  # Parameters:
 #    $socket       - The listener socket.  #    $socket       - The listener socket.
 # Returns:  # Returns:
Line 1452  sub GetServerHost { Line 1490  sub GetServerHost {
   
 =pod  =pod
   
   =head2 GetServerId
   
   Returns the hostid whose lond we talk with.
   
   =cut
   
   sub GetHostId {
       return $RemoteHostId; # Setup by the fork.
   }
   
   =pod
   
 =head2 GetServerPort  =head2 GetServerPort
   
 Returns the lond port number.  Returns the lond port number.
Line 1479  another event handler to subess requests Line 1529  another event handler to subess requests
 =cut  =cut
   
 sub SetupLoncListener {  sub SetupLoncListener {
       my ($host,$SocketName) = @_;
       if (!$host) { $host = &GetServerHost(); }
       if (!$SocketName) { $SocketName = &GetLoncSocketPath($host); }
   
     my $host       = GetServerHost(); # Default host.  
     if (@_) {  
  ($host)    = @_ # Override host with parameter.  
     }  
   
     my $socket;  
     my $SocketName = GetLoncSocketPath($host);  
     unlink($SocketName);      unlink($SocketName);
   
       my $socket;
     unless ($socket =IO::Socket::UNIX->new(Local  => $SocketName,      unless ($socket =IO::Socket::UNIX->new(Local  => $SocketName,
     Listen => 250,       Listen => 250, 
     Type   => SOCK_STREAM)) {      Type   => SOCK_STREAM)) {
  die "Failed to create a lonc listner socket";   if($I_am_child) {
       &child_exit(-1, "Failed to create a lonc listener socket");
    } else {
       die "Failed to create a lonc listner socket";
    }
     }      }
     return $socket;      return $socket;
 }  }
Line 1523  into the status file. Line 1576  into the status file.
   
 We also use this to reset the retries count in order to allow the  We also use this to reset the retries count in order to allow the
 client to retry connections with a previously dead server.  client to retry connections with a previously dead server.
   
 =cut  =cut
   
 sub ChildStatus {  sub ChildStatus {
Line 1531  sub ChildStatus { Line 1585  sub ChildStatus {
   
     Debug(2, "Reporting child status because : ".$watcher->data);      Debug(2, "Reporting child status because : ".$watcher->data);
     my $docdir = $perlvar{'lonDocRoot'};      my $docdir = $perlvar{'lonDocRoot'};
     my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");      
     print $fh $$."\t".$RemoteHost."\t".$Status."\t".      open(LOG,">>$docdir/lon-status/loncstatus.txt");
       flock(LOG,LOCK_EX);
       print LOG $$."\t".$RemoteHost."\t".$Status."\t".
  $RecentLogEntry."\n";   $RecentLogEntry."\n";
     #      #
     #  Write out information about each of the connections:      #  Write out information about each of the connections:
     #      #
     if ($DebugLevel > 2) {      if ($DebugLevel > 2) {
  print $fh "Active connection statuses: \n";   print LOG "Active connection statuses: \n";
  my $i = 1;   my $i = 1;
  print STDERR  "================================= Socket Status Dump:\n";   print STDERR  "================================= Socket Status Dump:\n";
  foreach my $item (keys %ActiveConnections) {   foreach my $item (keys %ActiveConnections) {
     my $Socket = $ActiveConnections{$item}->data;      my $Socket = $ActiveConnections{$item}->data;
     my $state  = $Socket->GetState();      my $state  = $Socket->GetState();
     print $fh "Connection $i State: $state\n";      print LOG "Connection $i State: $state\n";
     print STDERR "---------------------- Connection $i \n";      print STDERR "---------------------- Connection $i \n";
     $Socket->Dump(-1); # Ensure it gets dumped..      $Socket->Dump(-1); # Ensure it gets dumped..
     $i++;      $i++;
  }   }
     }      }
       flock(LOG,LOCK_UN);
       close(LOG);
     $ConnectionRetriesLeft = $ConnectionRetries;      $ConnectionRetriesLeft = $ConnectionRetries;
       UpdateStatus();
 }  }
   
 =pod  =pod
Line 1571  sub SignalledToDeath { Line 1630  sub SignalledToDeath {
     chomp($signal);      chomp($signal);
     Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "      Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "
  ."died through "."\"$signal\"");   ."died through "."\"$signal\"");
     LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "      #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
     ."\"$signal\"");  #    ."\"$signal\"");
     exit 0;      exit 0;
   
 }  }
   
   =pod
   
 =head2 ToggleDebug  =head2 ToggleDebug
   
 This sub toggles trace debugging on and off.  This sub toggles trace debugging on and off.
Line 1592  sub ToggleDebug { Line 1653  sub ToggleDebug {
   
 }  }
   
   =pod
   
 =head2 ChildProcess  =head2 ChildProcess
   
 This sub implements a child process for a single lonc daemon.  This sub implements a child process for a single lonc daemon.
Line 1602  Optional parameter: Line 1665  Optional parameter:
 =cut  =cut
   
 sub ChildProcess {  sub ChildProcess {
     #  If we are in DieWhenIdle mode, we've inherited all the      #  We've inherited all the
     #  events of our parent and those have to be cancelled or else      #  events of our parent and those have to be cancelled or else
     #  all holy bloody chaos will result.. trust me, I already made      #  all holy bloody chaos will result.. trust me, I already made
     #  >that< mistake.      #  >that< mistake.
Line 1618  sub ChildProcess { Line 1681  sub ChildProcess {
  Debug(5, "Killing watcher for $listener");   Debug(5, "Killing watcher for $listener");
   
  $watcher->cancel();   $watcher->cancel();
  undef         $parent_dispatchers{$listener};   delete($parent_dispatchers{$listener});
   
     }      }
     $I_am_child    = 1; # Seems like in spite of it all I'm still getting  
                                 # parent event dispatches.       #  kill off the parent's signal handlers too!  
       #
   
       for my $handler (keys %parent_handlers) {
    my $watcher = $parent_handlers{$handler};
    $watcher->cancel();
    delete($parent_handlers{$handler});
       }
   
       $I_am_child    = 1; # Seems like in spite of it all I may still getting
                                   # parent event dispatches.. flag I'm a child.
   
   
     #      #
Line 1660  sub ChildProcess { Line 1733  sub ChildProcess {
       desc => 'Lonc Listener Unix Socket',        desc => 'Lonc Listener Unix Socket',
       fd   => $socket);        fd   => $socket);
           
     $Event::Debuglevel = $DebugLevel;      $Event::DebugLevel = $DebugLevel;
           
     Debug(9, "Making initial lond connection for ".$RemoteHost);      Debug(9, "Making initial lond connection for ".$RemoteHost);
   
Line 1668  sub ChildProcess { Line 1741  sub ChildProcess {
           
      # &MakeLondConnection(); // let first work request do it.       # &MakeLondConnection(); // let first work request do it.
   
     #  If We are in diwhenidle, need to accept the connection since the      #  need to accept the connection since the event may  not fire.
     #  event may  not fire.  
   
     if ($DieWhenIdle) {      &accept_client($socket);
  &accept_client($socket);  
     }  
   
     Debug(9,"Entering event loop");      Debug(9,"Entering event loop");
     my $ret = Event::loop(); #  Start the main event loop.      my $ret = Event::loop(); #  Start the main event loop.
           
           
     die "Main event loop exited!!!";      &child_exit (-1,"Main event loop exited!!!");
 }  }
   
 #  Create a new child for host passed in:  #  Create a new child for host passed in:
   
 sub CreateChild {  sub CreateChild {
     my ($host, $socket) = @_;      my ($host, $hostid) = @_;
   
     my $sigset = POSIX::SigSet->new(SIGINT);      my $sigset = POSIX::SigSet->new(SIGINT);
     sigprocmask(SIG_BLOCK, $sigset);      sigprocmask(SIG_BLOCK, $sigset);
Line 1695  sub CreateChild { Line 1765  sub CreateChild {
     if($pid) { # Parent      if($pid) { # Parent
  $RemoteHost = "Parent";   $RemoteHost = "Parent";
  $ChildHash{$pid} = $host;   $ChildHash{$pid} = $host;
  $HostToPid{$host}= $pid;  
  sigprocmask(SIG_UNBLOCK, $sigset);   sigprocmask(SIG_UNBLOCK, $sigset);
   
     } else { # child.      } else { # child.
    $RemoteHostId = $hostid;
  ShowStatus("Connected to ".$RemoteHost);   ShowStatus("Connected to ".$RemoteHost);
  $SIG{INT} = 'DEFAULT';   $SIG{INT} = 'DEFAULT';
  sigprocmask(SIG_UNBLOCK, $sigset);   sigprocmask(SIG_UNBLOCK, $sigset);
  if(defined $socket) {   &ChildProcess(); # Does not return.
     &ChildProcess($socket);  
  } else {  
     ChildProcess; # Does not return.  
  }  
     }      }
 }  }
   
Line 1716  sub CreateChild { Line 1782  sub CreateChild {
 #    a connection request arrives.  We must:  #    a connection request arrives.  We must:
 #     Start a child process to accept the connection request.  #     Start a child process to accept the connection request.
 #     Kill our listen on the socket.  #     Kill our listen on the socket.
 #     Setup an event to handle the child process exit. (SIGCHLD).  
 # Parameter:  # Parameter:
 #    event       - The event object that was created to monitor this socket.  #    event       - The event object that was created to monitor this socket.
 #                  event->w->fd is the socket.  #                  event->w->fd is the socket.
Line 1735  sub parent_client_connection { Line 1800  sub parent_client_connection {
  my ($event)   = @_;   my ($event)   = @_;
  my $watcher   = $event->w;   my $watcher   = $event->w;
  my $socket    = $watcher->fd;   my $socket    = $watcher->fd;
    my $connection = $socket->accept(); # Accept the client connection.
    Event->io(cb      => \&get_remote_hostname,
     poll    => 'r',
     data    => "",
     fd      => $connection);
       }
   }
   
   sub get_remote_hostname {
    my ($event)   = @_;
    my $watcher   = $event->w;
    my $socket    = $watcher->fd;
   
  # Lookup the host associated with this socket:   my $thisread;
    my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0);
  my $host = $listening_to{$socket};   Debug(8, "rcv:  data length = ".length($thisread)." read =".$thisread);
    if (!defined($rv) || length($thisread) == 0) {
  # Start the child:      # Likely eof on socket.
       Debug(5,"Client Socket closed on lonc for p_c_c");
       close($socket);
       $watcher->cancel();
       return;
    }
   
    my $data    = $watcher->data().$thisread;
    $watcher->data($data);
    if($data =~ /\n$/) { # Request entirely read.
       chomp($data);
    } else {
       return;
    }
   
  &Debug(9,"Creating child for $host (parent_client_connection)");   &Debug(5,"Creating child for $data (parent_client_connection)");
  &CreateChild($host, $socket);   my ($hostname,$lonid) = split(':',$data,2);
    &CreateChild($hostname,$lonid);
   
  # Clean up the listen since now the child takes over until it exits.   # Clean up the listen since now the child takes over until it exits.
   
  $watcher->cancel(); # Nolonger listening to this event   $watcher->cancel(); # Nolonger listening to this event
  delete($listening_to{$socket});   $socket->send("done\n");
  delete($parent_dispatchers{$host});  
  $socket->close();   $socket->close();
     }  
 }  }
   
 # parent_listen:  # parent_listen:
Line 1775  sub parent_listen { Line 1861  sub parent_listen {
     my ($loncapa_host) = @_;      my ($loncapa_host) = @_;
     Debug(5, "parent_listen: $loncapa_host");      Debug(5, "parent_listen: $loncapa_host");
   
     my $socket    = &SetupLoncListener($loncapa_host);      my ($socket,$file);
       if (!$loncapa_host) {
    $loncapa_host = 'common_parent';
    $file         = $perlvar{'lonSockCreate'};
       } else {
    $file         = &GetLoncSocketPath($loncapa_host);
       }
       $socket = &SetupLoncListener($loncapa_host,$file);
   
     $listening_to{$socket} = $loncapa_host;      $listening_to{$socket} = $loncapa_host;
     if (!$socket) {      if (!$socket) {
  die "Unable to create a listen socket for $loncapa_host";   die "Unable to create a listen socket for $loncapa_host";
     }      }
           
     my $lock_file = &GetLoncSocketPath($loncapa_host).".lock";      my $lock_file = $file.".lock";
     unlink($lock_file); # No problem if it doesn't exist yet [startup e.g.]      unlink($lock_file); # No problem if it doesn't exist yet [startup e.g.]
   
     my $watcher = Event->io(cb    => \&parent_client_connection,      my $watcher = 
       poll  => 'r',   Event->io(cb    => \&parent_client_connection,
       desc  => "Parent listener unix socket ($loncapa_host)",    poll  => 'r',
       fd    => $socket);    desc  => "Parent listener unix socket ($loncapa_host)",
     data => "",
     fd    => $socket);
     $parent_dispatchers{$loncapa_host} = $watcher;      $parent_dispatchers{$loncapa_host} = $watcher;
   
 }  }
   
   sub parent_clean_up {
       my ($loncapa_host) = @_;
       Debug(5, "parent_clean_up: $loncapa_host");
   
       my $socket_file = &GetLoncSocketPath($loncapa_host);
       unlink($socket_file); # No problem if it doesn't exist yet [startup e.g.]
       my $lock_file   = $socket_file.".lock";
       unlink($lock_file); # No problem if it doesn't exist yet [startup e.g.]
   }
   
   
 # listen_on_all_unix_sockets:  # listen_on_all_unix_sockets:
 #    This sub initiates a listen on all unix domain lonc client sockets.  #    This sub initiates a listen on all unix domain lonc client sockets.
Line 1814  sub listen_on_all_unix_sockets { Line 1920  sub listen_on_all_unix_sockets {
     my $host_iterator      =   &LondConnection::GetHostIterator();      my $host_iterator      =   &LondConnection::GetHostIterator();
     while (!$host_iterator->end()) {      while (!$host_iterator->end()) {
  my $host_entry_ref =   $host_iterator->get();   my $host_entry_ref =   $host_iterator->get();
  my $host_name      = $host_entry_ref->[0];   my $host_name      = $host_entry_ref->[3];
  Debug(9, "Listen for $host_name");   Debug(9, "Listen for $host_name");
  &parent_listen($host_name);   &parent_listen($host_name);
  $host_iterator->next();   $host_iterator->next();
     }      }
 }  }
   
   sub listen_on_common_socket {
       Debug(5, "listen_on_common_socket");
       &parent_listen();
   }
   
   #   server_died is called whenever a child process exits.
   #   Since this is dispatched via a signal, we must process all
   #   dead children until there are no more left.  The action
   #   is to:
   #      - Remove the child from the bookeeping hashes
   #      - Re-establish a listen on the unix domain socket associated
   #        with that host.
   # Parameters:
   #    The event, but we don't actually care about it.
   sub server_died {
       &Debug(9, "server_died called...");
       
       while(1) { # Loop until waitpid nowait fails.
    my $pid = waitpid(-1, WNOHANG);
    if($pid <= 0) {
       return; # Nothing left to wait for.
    }
    # need the host to restart:
   
    my $host = $ChildHash{$pid};
    if($host) { # It's for real...
       &Debug(9, "Caught sigchild for $host");
       delete($ChildHash{$pid});
       &parent_clean_up($host);
   
    } else {
       &Debug(5, "Caught sigchild for pid not in hosts hash: $pid");
    }
       }
   
   }
   
 #  #
 #  Parent process logic pass 1:  #  Parent process logic pass 1:
 #   For each entry in the hosts table, we will  #   For each entry in the hosts table, we will
Line 1869  ShowStatus("Forking node servers"); Line 2012  ShowStatus("Forking node servers");
 Log("CRITICAL", "--------------- Starting children ---------------");  Log("CRITICAL", "--------------- Starting children ---------------");
   
 LondConnection::ReadConfig;               # Read standard config files.  LondConnection::ReadConfig;               # Read standard config files.
 my $HostIterator = LondConnection::GetHostIterator;  
   
 if ($DieWhenIdle) {  $RemoteHost = "[parent]";
     $RemoteHost = "[parent]";  &listen_on_common_socket();
     &listen_on_all_unix_sockets();  
 } else {  
       
     while (! $HostIterator->end()) {  
   
  my $hostentryref = $HostIterator->get();  
  CreateChild($hostentryref->[0]);  
  $HostHash{$hostentryref->[0]} = $hostentryref->[4];  
  $HostIterator->next();  
     }  
 }  
   
 $RemoteHost = "Parent Server";  $RemoteHost = "Parent Server";
   
Line 1892  $RemoteHost = "Parent Server"; Line 2023  $RemoteHost = "Parent Server";
 ShowStatus("Parent keeping the flock");  ShowStatus("Parent keeping the flock");
   
   
 if ($DieWhenIdle) {  # We need to setup a SIGChild event to handle the exit (natural or otherwise)
     $Event::DebugLevel = $DebugLevel;  # of the children.
     Debug(9, "Parent entering event loop");  
     my $ret = Event::loop();  
     die "Main Event loop exited: $ret";  
   
   
 } else {  
     #  
     #   Set up parent signals:  
     #  
       
     $SIG{INT}  = \&Terminate;  
     $SIG{TERM} = \&Terminate;   
     $SIG{HUP}  = \&Restart;  
     $SIG{USR1} = \&CheckKids;   
     $SIG{USR2} = \&UpdateKids; # LonManage update request.  
       
     while(1) {  
  my $deadchild = wait();  
  if(exists $ChildHash{$deadchild}) { # need to restart.  
     my $deadhost = $ChildHash{$deadchild};  
     delete($HostToPid{$deadhost});  
     delete($ChildHash{$deadchild});  
     Log("WARNING","Lost child pid= ".$deadchild.  
  "Connected to host ".$deadhost);  
     Log("INFO", "Restarting child procesing ".$deadhost);  
     CreateChild($deadhost);  
  }  
     }  
 }  
   
   Event->signal(cb       => \&server_died,
         desc     => "Child exit handler",
         signal   => "CHLD");
   
   
   # Set up all the other signals we set up.
   
   $parent_handlers{INT} = Event->signal(cb       => \&Terminate,
         desc     => "Parent INT handler",
         signal   => "INT");
   $parent_handlers{TERM} = Event->signal(cb       => \&Terminate,
          desc     => "Parent TERM handler",
          signal   => "TERM");
   $parent_handlers{HUP}  = Event->signal(cb       => \&KillThemAll,
          desc     => "Parent HUP handler.",
          signal   => "HUP");
   $parent_handlers{USR1} = Event->signal(cb       => \&CheckKids,
          desc     => "Parent USR1 handler",
          signal   => "USR1");
   $parent_handlers{USR2} = Event->signal(cb       => \&UpdateKids,
          desc     => "Parent USR2 handler.",
          signal   => "USR2");
   
   #  Start procdesing events.
   
   $Event::DebugLevel = $DebugLevel;
   Debug(9, "Parent entering event loop");
   my $ret = Event::loop();
   die "Main Event loop exited: $ret";
   
 =pod  =pod
   
Line 1944  sub CheckKids { Line 2075  sub CheckKids {
     my $now=time;      my $now=time;
     my $local=localtime($now);      my $local=localtime($now);
     print $fh "LONC status $local - parent $$ \n\n";      print $fh "LONC status $local - parent $$ \n\n";
       foreach my $host (keys %parent_dispatchers) {
    print $fh "LONC Parent process listening for $host\n";
       }
     foreach my $pid (keys %ChildHash) {      foreach my $pid (keys %ChildHash) {
  Debug(2, "Sending USR1 -> $pid");   Debug(2, "Sending USR1 -> $pid");
  kill 'USR1' => $pid; # Tell Child to report status.   kill 'USR1' => $pid; # Tell Child to report status.
  sleep 1; # Wait so file doesn't intermix.  
     }      }
   
 }  }
   
 =pod  =pod
Line 1981  sub UpdateKids { Line 2115  sub UpdateKids {
   
     Log("INFO", "Updating connections via SIGUSR2");      Log("INFO", "Updating connections via SIGUSR2");
   
     #  Just in case we need to kill our own lonc, we wait a few seconds to      #  I'm not sure what I was thinking in the first implementation.
     #  give it a chance to receive and relay lond's response to the       # someone will have to work hard to convince me the effect is any
     #  re-init command.      # different than Restart, especially now that we don't start up 
     #      # per host servers automatically, may as well just restart.
       # The down side is transactions that are in flight will get timed out
     sleep(2); # Wait a couple of seconds.      # (lost unless they are critical).
   
     my %hosts;                   # Indexed by loncapa hostname, value=ip.  
       
     # Need to re-read  the host table:  
       
       
     LondConnection::ReadConfig();  
     my $I = LondConnection::GetHostIterator;  
     while (! $I->end()) {  
  my $item = $I->get();  
  $hosts{$item->[0]} = $item->[4];  
  $I->next();  
     }  
   
     #  The logic below is written for clarity not for efficiency.  
     #  Since I anticipate that this function is only rarely called, that's  
     #  appropriate.  There are certainly ways to combine the loops below,  
     #  and anyone wishing to obscure the logic is welcome to go for it.  
     #  Note that we don't re-direct sigchild.  Instead we do what's needed  
     #  to the data structures that keep track of children to ensure that  
     #  when sigchild is honored, no new child is born.  
     #  
   
     #  For each existing child; if it's host doesn't exist, kill the child.  
   
     foreach my $child (keys %ChildHash) {  
  my $oldhost = $ChildHash{$child};  
  if (!(exists $hosts{$oldhost})) {  
     Log("CRITICAL", "Killing child for $oldhost  host no longer exists");  
     delete $ChildHash{$child};  
     delete $HostToPid{$oldhost};  
     kill 'QUIT' => $child;  
  }  
     }  
     # For each remaining existing child; if it's host's ip has changed,  
     # Restart the child on the new IP.  
   
     foreach my $child (keys %ChildHash) {  
  my $oldhost = $ChildHash{$child};  
  my $oldip   = $HostHash{$oldhost};  
  if ($hosts{$oldhost} ne $oldip) {  
   
     # kill the old child.  
   
     Log("CRITICAL", "Killing child for $oldhost host ip has changed...");  
     delete $ChildHash{$child};  
     delete $HostToPid{$oldhost};  
     kill 'QUIT' => $child;  
   
     # Do the book-keeping needed to start a new child on the  
     # new ip.  
   
     $HostHash{$oldhost} = $hosts{$oldhost};  
     CreateChild($oldhost);  
  }  
     }  
     # Finally, for each new host, not in the host hash, create a  
     # enter the host and create a new child.  
     # Force a status display of any existing process.  
   
     foreach my $host (keys %hosts) {      &KillThemAll();
  if(!(exists $HostHash{$host})) {  
     Log("INFO", "New host $host discovered in hosts.tab...");  
     $HostHash{$host} = $hosts{$host};  
     CreateChild($host);  
  } else {  
     kill 'HUP' => $HostToPid{$host};    # status display.  
  }  
     }  
 }  }
   
   
Line 2074  sub Restart { Line 2141  sub Restart {
     Log("CRITICAL", "Restarting");      Log("CRITICAL", "Restarting");
     my $execdir = $perlvar{'lonDaemons'};      my $execdir = $perlvar{'lonDaemons'};
     unlink("$execdir/logs/lonc.pid");      unlink("$execdir/logs/lonc.pid");
     exec("$execdir/loncnew");      exec("$executable");
 }  }
   
 =pod  =pod
Line 2095  sub KillThemAll { Line 2162  sub KillThemAll {
  Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid");   Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid");
  kill 'QUIT' => $pid;   kill 'QUIT' => $pid;
     }      }
   
   
 }  }
   
   
Line 2118  sub really_kill_them_all_dammit Line 2183  sub really_kill_them_all_dammit
  unlink("$execdir/logs/lonc.pid");   unlink("$execdir/logs/lonc.pid");
     }      }
 }  }
   
 =pod  =pod
   
 =head1 Terminate  =head1 Terminate
Line 2140  sub Terminate { Line 2206  sub Terminate {
     exit 0;      exit 0;
   
 }  }
   
   sub my_hostname {
       use Sys::Hostname;
       my $name = &hostname();
       &Debug(9,"Name is $name");
       return $name;
   }
   
 =pod  =pod
   
 =head1 Theory  =head1 Theory

Removed from v.1.62  
changed lines
  Added in v.1.81


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>