Diff for /loncom/loncnew between versions 1.17 and 1.23

version 1.17, 2003/08/03 00:44:31 version 1.23, 2003/09/15 09:24:49
Line 45 Line 45
   
 # Change log:  # Change log:
 #    $Log$  #    $Log$
   #    Revision 1.23  2003/09/15 09:24:49  foxr
   #    Add use strict and fix all the fallout from that.
   #
   #    Revision 1.22  2003/09/02 10:34:47  foxr
   #    - Fix errors in host dead detection logic (too many cases where the
   #      retries left were not getting incremented or just not checked).
   #    - Added some additional status to the ps axuww display:
   #      o Remaining retries on a host.
   #      o >>> DEAD <<< indicator if I've given up on a host.
   #    - Tested the SIGHUP will reset the retries remaining count (thanks to
   #      the above status stuff, and get allow the loncnew to re-try again
   #      on the host (thanks to the log).
   #
   #    Revision 1.21  2003/08/26 09:19:51  foxr
   #    How embarrassing... put in the SocketTimeout function in loncnew and forgot
   #    to actually hook it into the LondTransaction.  Added this to MakeLondConnection
   #    where it belongs... hopefully transactions (not just connection attempts) will
   #    timeout more speedily than the socket errors will catch it.
   #
   #    Revision 1.20  2003/08/25 18:48:11  albertel
   #    - fixing a forgotten ;
   #
   #    Revision 1.19  2003/08/19 09:31:46  foxr
   #    Get socket directory from configuration rather than the old hard coded test
   #    way that I forgot to un-hard code.
   #
   #    Revision 1.18  2003/08/06 09:52:29  foxr
   #    Also needed to remember to fail in-flight transactions if their sends fail.
   #
 #    Revision 1.17  2003/08/03 00:44:31  foxr  #    Revision 1.17  2003/08/03 00:44:31  foxr
 #    1. Correct handling of connection failure: Assume it means the host is  #    1. Correct handling of connection failure: Assume it means the host is
 #       unreachable and fail all of the queued transactions.  Note that the  #       unreachable and fail all of the queued transactions.  Note that the
Line 74 Line 103
 #    Revision 1.10  2003/06/24 02:46:04  foxr  #    Revision 1.10  2003/06/24 02:46:04  foxr
 #    Put a limit on  the number of times we'll retry a connection.  #    Put a limit on  the number of times we'll retry a connection.
 #    Start getting the signal stuff put in as well...note that need to get signals  #    Start getting the signal stuff put in as well...note that need to get signals
 #    going or else 6the client will permanently give up on dead servers.  #    going or else the client will permanently give up on dead servers.
 #  #
 #    Revision 1.9  2003/06/13 02:38:43  foxr  #    Revision 1.9  2003/06/13 02:38:43  foxr
 #    Add logging in 'expected format'  #    Add logging in 'expected format'
Line 88 Line 117
 #    complete coding to support deferred transactions.  #    complete coding to support deferred transactions.
 #  #
 #  #
   use strict;
 use lib "/home/httpd/lib/perl/";  use lib "/home/httpd/lib/perl/";
 use lib "/home/foxr/newloncapa/types";  use lib "/home/foxr/newloncapa/types";
 use Event qw(:DEFAULT );  use Event qw(:DEFAULT );
Line 143  my $IdleTimeout= 3600;  # Wait an hour b Line 172  my $IdleTimeout= 3600;  # Wait an hour b
 #  The variables below are only used by the child processes.  #  The variables below are only used by the child processes.
 #  #
 my $RemoteHost; # Name of host child is talking to.  my $RemoteHost; # Name of host child is talking to.
 my $UnixSocketDir= "/home/httpd/sockets";   my $UnixSocketDir= $perlvar{'lonSockDir'};
 my $IdleConnections = Stack->new(); # Set of idle connections  my $IdleConnections = Stack->new(); # Set of idle connections
 my %ActiveConnections; # Connections to the remote lond.  my %ActiveConnections; # Connections to the remote lond.
 my %ActiveTransactions; # LondTransactions in flight.  my %ActiveTransactions; # LondTransactions in flight.
Line 252  sub GetPeername { Line 281  sub GetPeername {
     my $peerip;      my $peerip;
     if($AdrFamily == AF_INET) {      if($AdrFamily == AF_INET) {
  ($peerport, $peerip) = sockaddr_in($peer);   ($peerport, $peerip) = sockaddr_in($peer);
  my $peername    = gethostbyaddr($iaddr, $AdrFamily);   my $peername    = gethostbyaddr($peerip, $AdrFamily);
  return $peername;   return $peername;
     } elsif ($AdrFamily == AF_UNIX) {      } elsif ($AdrFamily == AF_UNIX) {
  my $peerfile;   my $peerfile;
Line 273  sub Debug { Line 302  sub Debug {
     my $level   = shift;      my $level   = shift;
     my $message = shift;      my $message = shift;
     if ($level <= $DebugLevel) {      if ($level <= $DebugLevel) {
  Log("INFO", "-Debug- $message host = $RemotHost");   Log("INFO", "-Debug- $message host = $RemoteHost");
     }      }
 }  }
   
Line 313  sub ShowStatus { Line 342  sub ShowStatus {
 sub SocketTimeout {  sub SocketTimeout {
     my $Socket = shift;      my $Socket = shift;
           
     KillSocket($Socket);      KillSocket($Socket); # A transaction timeout also counts as
                                   # a connection failure:
       $ConnectionRetriesLeft--;
 }  }
   
 =pod  =pod
Line 327  Invoked  each timer tick. Line 358  Invoked  each timer tick.
   
 sub Tick {  sub Tick {
     my $client;      my $client;
     ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount);      if($ConnectionRetriesLeft > 0) {
    ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount
      ." Retries remaining: ".$ConnectionRetriesLeft);
       } else {
    ShowStatus(GetServerHost()." >> DEAD <<");
       }
     # Is it time to prune connection count:      # Is it time to prune connection count:
   
   
Line 336  sub Tick { Line 371  sub Tick {
        ($WorkQueue->Count() == 0)) { # Idle connections and nothing to do?         ($WorkQueue->Count() == 0)) { # Idle connections and nothing to do?
  $IdleSeconds++;   $IdleSeconds++;
  if($IdleSeconds > $IdleTimeout) { # Prune a connection...   if($IdleSeconds > $IdleTimeout) { # Prune a connection...
     $Socket = $IdleConnections->pop();      my $Socket = $IdleConnections->pop();
     KillSocket($Socket);      KillSocket($Socket);
  }   }
     } else {      } else {
Line 345  sub Tick { Line 380  sub Tick {
     #      #
     #  For each inflight transaction, tick down its timeout counter.      #  For each inflight transaction, tick down its timeout counter.
     #      #
     foreach $item (keys %ActiveTransactions) {      foreach my $item (keys %ActiveTransactions) {
  my $Socket = $ActiveTransactions{$item}->getServer();   my $Socket = $ActiveTransactions{$item}->getServer();
  $Socket->Tick();   $Socket->Tick();
     }      }
Line 359  sub Tick { Line 394  sub Tick {
     my $Connections = ($Requests <= $MaxConnectionCount) ?      my $Connections = ($Requests <= $MaxConnectionCount) ?
  $Requests : $MaxConnectionCount;   $Requests : $MaxConnectionCount;
     Debug(1,"Work but no connections, start ".$Connections." of them");      Debug(1,"Work but no connections, start ".$Connections." of them");
     for ($i =0; $i < $Connections; $i++) {      my $successCount = 0;
  MakeLondConnection();      for (my $i =0; $i < $Connections; $i++) {
    $successCount += MakeLondConnection();
       }
       if($successCount == 0) { # All connections failed:
    Debug(1,"Work in queue failed to make any connectiouns\n");
    EmptyQueue(); # Fail pending transactions with con_lost.
     }      }
  } else {   } else {
       ShowStatus(GetServerHost()." >>> DEAD!!! <<<");
     Debug(1,"Work in queue, but gave up on connections..flushing\n");      Debug(1,"Work in queue, but gave up on connections..flushing\n");
     EmptyQueue(); # Connections can't be established.      EmptyQueue(); # Connections can't be established.
  }   }
Line 414  sub ServerToIdle { Line 455  sub ServerToIdle {
   
     #  If there's work to do, start the transaction:      #  If there's work to do, start the transaction:
   
     $reqdata = $WorkQueue->dequeue(); # This is a LondTransaction      my $reqdata = $WorkQueue->dequeue(); # This is a LondTransaction
     unless($reqdata eq undef)  {      unless($reqdata eq undef)  {
  Debug(9, "Queue gave request data: ".$reqdata->getRequest());   Debug(9, "Queue gave request data: ".$reqdata->getRequest());
  &StartRequest($Socket,  $reqdata);   &StartRequest($Socket,  $reqdata);
Line 548  sub CompleteTransaction { Line 589  sub CompleteTransaction {
  StartClientReply($Transaction, $data);   StartClientReply($Transaction, $data);
     } else { # Delete deferred transaction file.      } else { # Delete deferred transaction file.
  Log("SUCCESS", "A delayed transaction was completed");   Log("SUCCESS", "A delayed transaction was completed");
  LogPerm("S:$Client:".$Transaction->getRequest());   LogPerm("S:$Transaction->getClient() :".$Transaction->getRequest());
  unlink $Transaction->getFile();   unlink $Transaction->getFile();
     }      }
 }  }
Line 616  sub FailTransaction { Line 657  sub FailTransaction {
  Debug(1," Replying con_lost to ".$transaction->getRequest());   Debug(1," Replying con_lost to ".$transaction->getRequest());
  StartClientReply($transaction, "con_lost\n");   StartClientReply($transaction, "con_lost\n");
     }      }
       if($ConnectionRetriesLeft <= 0) {
    Log("CRITICAL", "Host marked dead: ".GetServerHost());
       }
   
 }  }
   
Line 627  sub FailTransaction { Line 671  sub FailTransaction {
   
 =cut  =cut
 sub EmptyQueue {  sub EmptyQueue {
       $ConnectionRetriesLeft--; # Counts as connection failure too.
     while($WorkQueue->Count()) {      while($WorkQueue->Count()) {
  my $request = $WorkQueue->dequeue(); # This is a transaction   my $request = $WorkQueue->dequeue(); # This is a transaction
  FailTransaction($request);   FailTransaction($request);
Line 641  Close all connections open on lond prior Line 686  Close all connections open on lond prior
   
 =cut  =cut
 sub CloseAllLondConnections {  sub CloseAllLondConnections {
     foreach $Socket (keys %ActiveConnections) {      foreach my $Socket (keys %ActiveConnections) {
  KillSocket($Socket);   KillSocket($Socket);
     }      }
 }  }
Line 693  sub KillSocket { Line 738  sub KillSocket {
     #  work queue, the work all gets failed with con_lost.      #  work queue, the work all gets failed with con_lost.
     #      #
     if($ConnectionCount == 0) {      if($ConnectionCount == 0) {
  EmptyQueue;   EmptyQueue();
     }      }
 }  }
   
Line 762  sub LondReadable { Line 807  sub LondReadable {
     my $Socket     = $Watcher->data;      my $Socket     = $Watcher->data;
     my $client     = undef;      my $client     = undef;
   
     &Debug(6,"LondReadable called state = ".$State);      &Debug(6,"LondReadable called state = ".$Socket->GetState());
   
   
     my $State = $Socket->GetState(); # All action depends on the state.      my $State = $Socket->GetState(); # All action depends on the state.
Line 783  sub LondReadable { Line 828  sub LondReadable {
  }   }
  $Watcher->cancel();   $Watcher->cancel();
  KillSocket($Socket);   KillSocket($Socket);
    $ConnectionRetriesLeft--;       # Counts as connection failure
  return;   return;
     }      }
     SocketDump(6,$Socket);      SocketDump(6,$Socket);
Line 816  sub LondReadable { Line 862  sub LondReadable {
     } elsif ($State eq "Idle") {      } elsif ($State eq "Idle") {
  # If necessary, complete a transaction and then go into the   # If necessary, complete a transaction and then go into the
  # idle queue.   # idle queue.
    #  Note that a trasition to idle indicates a live lond
    # on the other end so reset the connection retries.
    #
    $ConnectionRetriesLeft = $ConnectionRetries; # success resets the count
  $Watcher->cancel();   $Watcher->cancel();
  if(exists($ActiveTransactions{$Socket})) {   if(exists($ActiveTransactions{$Socket})) {
     Debug(8,"Completing transaction!!");      Debug(8,"Completing transaction!!");
Line 928  sub LondWritable { Line 978  sub LondWritable {
     # We'll treat this as if the socket got disconnected:      # We'll treat this as if the socket got disconnected:
     Log("WARNING", "Connection to ".$RemoteHost.      Log("WARNING", "Connection to ".$RemoteHost.
  " has been disconnected");   " has been disconnected");
       FailTransaction($ActiveTransactions{$Socket});
     $Watcher->cancel();      $Watcher->cancel();
     KillSocket($Socket);      KillSocket($Socket);
     return;      return;
Line 1027  sub QueueDelayed { Line 1078  sub QueueDelayed {
     Debug(4, "Delayed path: ".$path);      Debug(4, "Delayed path: ".$path);
     opendir(DIRHANDLE, $path);      opendir(DIRHANDLE, $path);
           
     @alldelayed = grep /\.$RemoteHost$/, readdir DIRHANDLE;      my @alldelayed = grep /\.$RemoteHost$/, readdir DIRHANDLE;
     Debug(4, "Got ".$alldelayed." delayed files");  
     closedir(DIRHANDLE);      closedir(DIRHANDLE);
     my $dfname;      my $dfname;
     my $reqfile;      my $reqfile;
Line 1070  sub MakeLondConnection { Line 1120  sub MakeLondConnection {
  $ConnectionRetriesLeft--;   $ConnectionRetriesLeft--;
  return 0; # Failure.   return 0; # Failure.
     }  else {      }  else {
  $ConnectionRetriesLeft = $ConnectionRetries; # success resets the count  
  # The connection needs to have writability    # The connection needs to have writability 
  # monitored in order to send the init sequence   # monitored in order to send the init sequence
  # that starts the whole authentication/key   # that starts the whole authentication/key
Line 1083  sub MakeLondConnection { Line 1133  sub MakeLondConnection {
     &Debug(9,"MakeLondConnection got socket: ".$Socket);      &Debug(9,"MakeLondConnection got socket: ".$Socket);
  }   }
   
    $Connection->SetTimeoutCallback(\&SocketTimeout);
  $event = Event->io(fd       => $Socket,  
    my $event = Event->io(fd       => $Socket,
    poll     => 'w',     poll     => 'w',
    cb       => \&LondWritable,     cb       => \&LondWritable,
    data     => $Connection,     data     => $Connection,
Line 1140  sub StartRequest { Line 1191  sub StartRequest {
     $ActiveTransactions{$Lond} = $Request;      $ActiveTransactions{$Lond} = $Request;
   
     $Lond->InitiateTransaction($Request->getRequest());      $Lond->InitiateTransaction($Request->getRequest());
     $event = Event->io(fd      => $Socket,      my $event = Event->io(fd      => $Socket,
        poll    => "w",         poll    => "w",
        cb      => \&LondWritable,         cb      => \&LondWritable,
        data    => $Lond,         data    => $Lond,
Line 1182  sub QueueTransaction { Line 1233  sub QueueTransaction {
  Debug(8,"Must queue...");   Debug(8,"Must queue...");
  $WorkQueue->enqueue($requestData);   $WorkQueue->enqueue($requestData);
  if($ConnectionCount < $MaxConnectionCount) {   if($ConnectionCount < $MaxConnectionCount) {
     Debug(4,"Starting additional lond connection");      if($ConnectionRetriesLeft > 0) {
     if(MakeLondConnection() == 0) {   Debug(4,"Starting additional lond connection");
  EmptyQueue(); # Fail transactions, can't make connection.   if(MakeLondConnection() == 0) {
       EmptyQueue(); # Fail transactions, can't make connection.
    }
       } else {
    ShowStatus(GetServerHost()." >>> DEAD !!!! <<<");
    EmptyQueue(); # It's worse than that ... he's dead Jim.
     }      }
  }   }
     } else { # Can start the request:      } else { # Can start the request:
Line 1350  sub SetupLoncListener { Line 1406  sub SetupLoncListener {
 Child USR1 signal handler to report the most recent status  Child USR1 signal handler to report the most recent status
 into the status file.  into the status file.
   
   We also use this to reset the retries count in order to allow the
   client to retry connections with a previously dead server.
 =cut  =cut
 sub ChildStatus {  sub ChildStatus {
     my $event = shift;      my $event = shift;
Line 1360  sub ChildStatus { Line 1418  sub ChildStatus {
     my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");      my $fh = IO::File->new(">>$docdir/lon-status/loncstatus.txt");
     print $fh $$."\t".$RemoteHost."\t".$Status."\t".      print $fh $$."\t".$RemoteHost."\t".$Status."\t".
  $RecentLogEntry."\n";   $RecentLogEntry."\n";
       $ConnectionRetriesLeft = $ConnectionRetries;
 }  }
   
 =pod  =pod
Line 1455  sub CreateChild { Line 1514  sub CreateChild {
     my $host = shift;      my $host = shift;
     $RemoteHost = $host;      $RemoteHost = $host;
     Log("CRITICAL", "Forking server for ".$host);      Log("CRITICAL", "Forking server for ".$host);
     $pid          = fork;      my $pid          = fork;
     if($pid) { # Parent      if($pid) { # Parent
  $RemoteHost = "Parent";   $RemoteHost = "Parent";
  $ChildHash{$pid} = $RemoteHost;   $ChildHash{$pid} = $RemoteHost;
Line 1463  sub CreateChild { Line 1522  sub CreateChild {
   
     } else { # child.      } else { # child.
  ShowStatus("Connected to ".$RemoteHost);   ShowStatus("Connected to ".$RemoteHost);
  $SIG{INT} = DEFAULT;   $SIG{INT} = 'DEFAULT';
  sigprocmask(SIG_UNBLOCK, $sigset);   sigprocmask(SIG_UNBLOCK, $sigset);
  ChildProcess; # Does not return.   ChildProcess; # Does not return.
     }      }
Line 1500  if ($childpid != 0) { Line 1559  if ($childpid != 0) {
 #  #
   
 ShowStatus("Parent writing pid file:");  ShowStatus("Parent writing pid file:");
 $execdir = $perlvar{'lonDaemons'};  my $execdir = $perlvar{'lonDaemons'};
 open (PIDSAVE, ">$execdir/logs/lonc.pid");  open (PIDSAVE, ">$execdir/logs/lonc.pid");
 print PIDSAVE "$$\n";  print PIDSAVE "$$\n";
 close(PIDSAVE);  close(PIDSAVE);
Line 1519  Log("CRITICAL", "--------------- Startin Line 1578  Log("CRITICAL", "--------------- Startin
 my $HostIterator = LondConnection::GetHostIterator;  my $HostIterator = LondConnection::GetHostIterator;
 while (! $HostIterator->end()) {  while (! $HostIterator->end()) {
   
     $hostentryref = $HostIterator->get();      my $hostentryref = $HostIterator->get();
     CreateChild($hostentryref->[0]);      CreateChild($hostentryref->[0]);
     $HostIterator->next();      $HostIterator->next();
 }  }
Line 1539  $SIG{HUP}  = \&Restart; Line 1598  $SIG{HUP}  = \&Restart;
 $SIG{USR1} = \&CheckKids;   $SIG{USR1} = \&CheckKids; 
   
 while(1) {  while(1) {
     $deadchild = wait();      my $deadchild = wait();
     if(exists $ChildHash{$deadchild}) { # need to restart.      if(exists $ChildHash{$deadchild}) { # need to restart.
  $deadhost = $ChildHash{$deadchild};   my $deadhost = $ChildHash{$deadchild};
  delete($ChildHash{$deadchild});   delete($ChildHash{$deadchild});
  Log("WARNING","Lost child pid= ".$deadchild.   Log("WARNING","Lost child pid= ".$deadchild.
       "Connected to host ".$deadhost);        "Connected to host ".$deadhost);
Line 1571  sub CheckKids { Line 1630  sub CheckKids {
     my $now=time;      my $now=time;
     my $local=localtime($now);      my $local=localtime($now);
     print $fh "LONC status $local - parent $$ \n\n";      print $fh "LONC status $local - parent $$ \n\n";
     foreach $pid (keys %ChildHash) {      foreach my $pid (keys %ChildHash) {
  Debug(2, "Sending USR1 -> $pid");   Debug(2, "Sending USR1 -> $pid");
  kill 'USR1' => $pid; # Tell Child to report status.   kill 'USR1' => $pid; # Tell Child to report status.
  sleep 1; # Wait so file doesn't intermix.   sleep 1; # Wait so file doesn't intermix.
Line 1589  the config file. Line 1648  the config file.
 =cut  =cut
   
 sub Restart {  sub Restart {
     KillThemAll; # First kill all the children.      &KillThemAll; # First kill all the children.
     Log("CRITICAL", "Restarting");      Log("CRITICAL", "Restarting");
     my $execdir = $perlvar{'lonDaemons'};      my $execdir = $perlvar{'lonDaemons'};
     unlink("$execdir/logs/lonc.pid");      unlink("$execdir/logs/lonc.pid");
Line 1608  SIGHUP.  Responds to sigint and sigterm. Line 1667  SIGHUP.  Responds to sigint and sigterm.
 sub KillThemAll {  sub KillThemAll {
     Debug(2, "Kill them all!!");      Debug(2, "Kill them all!!");
     local($SIG{CHLD}) = 'IGNORE';      # Our children >will< die.      local($SIG{CHLD}) = 'IGNORE';      # Our children >will< die.
     foreach $pid (keys %ChildHash) {      foreach my $pid (keys %ChildHash) {
  my $serving = $ChildHash{$pid};   my $serving = $ChildHash{$pid};
  Debug(2, "Killing lonc for $serving pid = $pid");   Debug(2, "Killing lonc for $serving pid = $pid");
  ShowStatus("Killing lonc for $serving pid = $pid");   ShowStatus("Killing lonc for $serving pid = $pid");

Removed from v.1.17  
changed lines
  Added in v.1.23


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>