Diff for /loncom/Attic/lonc between versions 1.18 and 1.54

version 1.18, 2001/11/26 22:20:26 version 1.54, 2003/08/29 18:25:01
Line 5 Line 5
 # provides persistent TCP connections to the other servers in the network  # provides persistent TCP connections to the other servers in the network
 # through multiplexed domain sockets  # through multiplexed domain sockets
 #  #
   # $Id$
   #
   # Copyright Michigan State University Board of Trustees
   #
   # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   #
   # LON-CAPA is free software; you can redistribute it and/or modify
   # it under the terms of the GNU General Public License as published by
   # the Free Software Foundation; either version 2 of the License, or
   # (at your option) any later version.
   #
   # LON-CAPA is distributed in the hope that it will be useful,
   # but WITHOUT ANY WARRANTY; without even the implied warranty of
   # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   # GNU General Public License for more details.
   #
   # You should have received a copy of the GNU General Public License
   # along with LON-CAPA; if not, write to the Free Software
   # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   #
   # /home/httpd/html/adm/gpl.txt
   #
   # http://www.lon-capa.org/
   #
 # PID in subdir logs/lonc.pid  # PID in subdir logs/lonc.pid
 # kill kills  # kill kills
 # HUP restarts  # HUP restarts
Line 13 Line 37
 # 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19,  # 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19,
 # 10/8,10/9,10/15,11/18,12/22,  # 10/8,10/9,10/15,11/18,12/22,
 # 2/8,7/25 Gerd Kortemeyer  # 2/8,7/25 Gerd Kortemeyer
 # 12/05 Scott Harrison  
 # 12/05 Gerd Kortemeyer  # 12/05 Gerd Kortemeyer
 # 01/10/01 Scott Harrison  # YEAR=2001
 # 03/14/01,03/15,06/12,11/26 Gerd Kortemeyer  # 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer
 #   # YEAR=2002
   # 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer
   # 3/07/02 Ron Fox 
 # based on nonforker from Perl Cookbook  # based on nonforker from Perl Cookbook
 # - server who multiplexes without forking  # - server who multiplexes without forking
   
   use lib '/home/httpd/lib/perl/';
   use LONCAPA::Configuration;
   
 use POSIX;  use POSIX;
 use IO::Socket;  use IO::Socket;
 use IO::Select;  use IO::Select;
Line 29  use Socket; Line 57  use Socket;
 use Fcntl;  use Fcntl;
 use Tie::RefHash;  use Tie::RefHash;
 use Crypt::IDEA;  use Crypt::IDEA;
   #use Net::Ping;
   use LWP::UserAgent();
   
 my $status='';  $status='';
 my $lastlog='';  $lastlog='';
   $conserver='SHELL';
 # grabs exception and records it to log before exiting  $DEBUG = 0; # Set to 1 for annoyingly complete logs.
 sub catchexception {  $VERSION='$Revison$'; #' stupid emacs
     my ($signal)=@_;  $remoteVERSION;
     $SIG{'QUIT'}='DEFAULT';  
     $SIG{__DIE__}='DEFAULT';  
     &logthis("<font color=red>CRITICAL: "  
      ."ABNORMAL EXIT. Child $$ for server $wasserver died through "  
      ."\"$signal\" with this parameter->[$@]</font>");  
     die($@);  
 }  
   
 $childmaxattempts=5;  
   
 # -------------------------------- Set signal handlers to record abnormal exits  # -------------------------------- Set signal handlers to record abnormal exits
   
 $SIG{'QUIT'}=\&catchexception;  &status("Init exception handlers");
   $SIG{QUIT}=\&catchexception;
 $SIG{__DIE__}=\&catchexception;  $SIG{__DIE__}=\&catchexception;
   
 # ------------------------------------ Read httpd access.conf and get variables  # ---------------------------------- Read loncapa_apache.conf and loncapa.conf
   &status("Read loncapa.conf and loncapa_apache.conf");
 open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";  my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
   my %perlvar=%{$perlvarref};
 while ($configline=<CONFIG>) {  undef $perlvarref;
     if ($configline =~ /PerlSetVar/) {  
  my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);  
         chomp($varvalue);  
         $perlvar{$varname}=$varvalue;  
     }  
 }  
 close(CONFIG);  
   
 # ----------------------------- Make sure this process is running from user=www  # ----------------------------- Make sure this process is running from user=www
   &status("Check user ID");
 my $wwwid=getpwnam('www');  my $wwwid=getpwnam('www');
 if ($wwwid!=$<) {  if ($wwwid!=$<) {
    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";     $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
Line 92  open (CONFIG,"$perlvar{'lonTabDir'}/host Line 107  open (CONFIG,"$perlvar{'lonTabDir'}/host
 while ($configline=<CONFIG>) {  while ($configline=<CONFIG>) {
     my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);      my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
     chomp($ip);      chomp($ip);
     $hostip{$id}=$ip;      if ($ip) {
        $hostip{$id}=$ip;
        $hostname{$id}=$name;
       }
 }  }
   
 close(CONFIG);  close(CONFIG);
   
 # -------------------------------------------------------- Routines for forking  # -------------------------------------------------------- Routines for forking
Line 105  close(CONFIG); Line 124  close(CONFIG);
 %childatt               = ();       # number of attempts to start server  %childatt               = ();       # number of attempts to start server
                                     # for ID                                      # for ID
   
 sub REAPER {                        # takes care of dead children  $childmaxattempts=15;
     $SIG{CHLD} = \&REAPER;  
     my $pid = wait;  
     my $wasserver=$children{$pid};  
     &logthis("<font color=red>CRITICAL: "  
      ."Child $pid for server $wasserver died ($childatt{$wasserver})</font>");  
     delete $children{$pid};  
     delete $childpid{$wasserver};  
     my $port = "$perlvar{'lonSockDir'}/$wasserver";  
     unlink($port);  
 }  
   
 sub HUNTSMAN {                      # signal handler for SIGINT  
     local($SIG{CHLD}) = 'IGNORE';   # we're going to kill our children  
     map {  
         $wasserver=$children{$_};  
         &status("Closing $wasserver");  
         &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));  
         &status("Kill PID $_ for $wasserver");  
  kill ('INT',$_);  
     } keys %children;  
     my $execdir=$perlvar{'lonDaemons'};  
     unlink("$execdir/logs/lonc.pid");  
     &logthis("<font color=red>CRITICAL: Shutting down</font>");  
     exit;                           # clean up with dignity  
 }  
   
 sub HUPSMAN {                      # signal handler for SIGHUP  
     local($SIG{CHLD}) = 'IGNORE';  # we're going to kill our children  
     map {  
         $wasserver=$children{$_};  
         &status("Closing $wasserver");  
         &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));  
         &status("Kill PID $_ for $wasserver");  
  kill ('INT',$_);  
     } keys %children;  
     &logthis("<font color=red>CRITICAL: Restarting</font>");  
     unlink("$execdir/logs/lonc.pid");  
     my $execdir=$perlvar{'lonDaemons'};  
     exec("$execdir/lonc");         # here we go again  
 }  
   
 sub checkchildren {  
     &initnewstatus();  
     &logstatus();  
     &logthis('Going to check on the children');  
     map {  
  sleep 1;  
         unless (kill 'USR1' => $_) {  
     &logthis ('Child '.$_.' is dead');  
             &logstatus($$.' is dead');  
         }   
     } sort keys %children;  
 }  
   
 sub USRMAN {  
     &logthis("USR1: Trying to establish connections again");  
     foreach $thisserver (keys %hostip) {  
  $answer=subreply("ping",$thisserver);  
         &logthis("USR1: Ping $thisserver "  
         ."(pid >$childpid{$thisserver}<, $childatt{thisserver} attempts): "  
         ." >$answer<");  
     }  
     %childatt=();  
     &checkchildren();  
 }  
   
 # -------------------------------------------------- Non-critical communication  
 sub subreply {   
  my ($cmd,$server)=@_;  
  my $answer='';  
  if ($server ne $perlvar{'lonHostID'}) {   
     my $peerfile="$perlvar{'lonSockDir'}/$server";  
     my $sclient=IO::Socket::UNIX->new(Peer    =>"$peerfile",  
                                       Type    => SOCK_STREAM,  
                                       Timeout => 10)  
        or return "con_lost";  
     print $sclient "$cmd\n";  
     my $answer=<$sclient>;  
     chomp($answer);  
     if (!$answer) { $answer="con_lost"; }  
  } else { $answer='self_reply'; }  
  return $answer;  
 }  
   
 # --------------------------------------------------------------------- Logging  
   
 sub logthis {  
     my $message=shift;  
     my $execdir=$perlvar{'lonDaemons'};  
     my $fh=IO::File->new(">>$execdir/logs/lonc.log");  
     my $now=time;  
     my $local=localtime($now);  
     $lastlog=$local.': '.$message;  
     print $fh "$local ($$): $message\n";  
 }  
   
   
 sub logperm {  
     my $message=shift;  
     my $execdir=$perlvar{'lonDaemons'};  
     my $now=time;  
     my $local=localtime($now);  
     my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");  
     print $fh "$now:$message:$local\n";  
 }  
 # ------------------------------------------------------------------ Log status  
   
 sub logstatus {  
     my $docdir=$perlvar{'lonDocRoot'};  
     my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");  
     print $fh $$."\t".$status."\t".$lastlog."\n";  
 }  
   
 sub initnewstatus {  
     my $docdir=$perlvar{'lonDocRoot'};  
     my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");  
     my $now=time;  
     my $local=localtime($now);  
     print $fh "LONC status $local - parent $$\n\n";  
 }  
   
 # -------------------------------------------------------------- Status setting  
   
 sub status {  
     my $what=shift;  
     my $now=time;  
     my $local=localtime($now);  
     $status=$local.': '.$what;  
 }  
   
   
 # ---------------------------------------------------- Fork once and dissociate  # ---------------------------------------------------- Fork once and dissociate
   &status("Fork and dissociate");
 $fpid=fork;  $fpid=fork;
 exit if $fpid;  exit if $fpid;
 die "Couldn't fork: $!" unless defined ($fpid);  die "Couldn't fork: $!" unless defined ($fpid);
   
 POSIX::setsid() or die "Can't start new session: $!";  POSIX::setsid() or die "Can't start new session: $!";
   
 # ------------------------------------------------------- Write our PID on disk  $conserver='PARENT';
   
   # ------------------------------------------------------- Write our PID on disk
   &status("Write PID");
 $execdir=$perlvar{'lonDaemons'};  $execdir=$perlvar{'lonDaemons'};
 open (PIDSAVE,">$execdir/logs/lonc.pid");  open (PIDSAVE,">$execdir/logs/lonc.pid");
 print PIDSAVE "$$\n";  print PIDSAVE "$$\n";
Line 262  $SIG{HUP}=$SIG{USR1}='IGNORE'; Line 153  $SIG{HUP}=$SIG{USR1}='IGNORE';
 &status("Forking ...");  &status("Forking ...");
   
 foreach $thisserver (keys %hostip) {  foreach $thisserver (keys %hostip) {
     make_new_child($thisserver);      #if (&online($hostname{$thisserver})) {
          make_new_child($thisserver);
       #}
 }  }
   
 &logthis("Done starting initial servers");  &logthis("Done starting initial servers");
 # ----------------------------------------------------- Install signal handlers  # ----------------------------------------------------- Install signal handlers
   
 $SIG{CHLD} = \&REAPER;  
 $SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;  $SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
 $SIG{HUP}  = \&HUPSMAN;  $SIG{HUP}  = \&HUPSMAN;
 $SIG{USR1} = \&USRMAN;  $SIG{USR1} = \&USRMAN;
   
 # And maintain the population.  # And maintain the population.
 while (1) {  while (1) {
     &status("Sleeping");      my $deadpid = wait; # Wait for the next child to die.
     sleep;                          # wait for a signal (i.e., child's death)                                  # See who died and start new one
                                     # See who died and start new one                                  # or a signal (e.g. USR1 for restart).
                                   # if a signal, the wait will fail
                                   # This is ordinarily detected by
                                   # checking for the existence of the
                                   # pid index inthe children hash since
                                   # the return value from a failed wait is -1
                                   # which is an impossible PID.
     &status("Woke up");      &status("Woke up");
     foreach $thisserver (keys %hostip) {      my $skipping='';
         if (!$childpid{$thisserver}) {  
     if ($childatt{$thisserver}<$childmaxattempts) {      if(exists($children{$deadpid})) {
        $childatt{$thisserver}++;  
                &logthis(   $thisserver = $children{$deadpid}; # Look name of dead guy's peer.
    "<font color=yellow>INFO: Trying to reconnect for $thisserver "  
   ."($childatt{$thisserver} of $childmaxattempts attempts)</font>");    delete($children{$deadpid}); # Get rid of dead hash entry.
                make_new_child($thisserver);  
     }   if($childatt{$thisserver} < $childmaxattempts) {
         }             $childatt{$thisserver}++;
       &logthis(
          "<font color=yellow>INFO: Trying to reconnect for $thisserver "
               ."($childatt{$thisserver} of $childmaxattempts attempts)</font>"); 
       make_new_child($thisserver);
   
    }
    else {
       $skipping .= $thisserver.' ';
    }
    if($skipping) {
       &logthis("<font color=blue>WARNING: Skipped $skipping</font>");
     
    }
     }      }
   
 }  }
   
   
   
 sub make_new_child {  sub make_new_child {
         
     my $conserver=shift;      $newserver=shift;
     my $pid;      my $pid;
     my $sigset;      my $sigset;
     &logthis("Attempting to start child for server $conserver");      &logthis("Attempting to start child for server $newserver");
     # block signal for fork      # block signal for fork
     $sigset = POSIX::SigSet->new(SIGINT);      $sigset = POSIX::SigSet->new(SIGINT);
     sigprocmask(SIG_BLOCK, $sigset)      sigprocmask(SIG_BLOCK, $sigset)
Line 310  sub make_new_child { Line 224  sub make_new_child {
         # Parent records the child's birth and returns.          # Parent records the child's birth and returns.
         sigprocmask(SIG_UNBLOCK, $sigset)          sigprocmask(SIG_UNBLOCK, $sigset)
             or die "Can't unblock SIGINT for fork: $!\n";              or die "Can't unblock SIGINT for fork: $!\n";
         $children{$pid} = $conserver;          $children{$pid} = $newserver;
         $childpid{$conserver} = $pid;          $childpid{$newserver} = $pid;
         return;          return;
     } else {      } else {
           $conserver=$newserver;
         # Child can *not* return from this subroutine.          # Child can *not* return from this subroutine.
         $SIG{INT} = 'DEFAULT';      # make SIGINT kill us as it did before          $SIG{INT} = 'DEFAULT';      # make SIGINT kill us as it did before
         $SIG{USR1}= \&logstatus;          $SIG{USR1}= \&logstatus;
Line 328  $port = "$perlvar{'lonSockDir'}/$conserv Line 243  $port = "$perlvar{'lonSockDir'}/$conserv
   
 unlink($port);  unlink($port);
   
 # ---------------------------------------------------- Client to network server  # -------------------------------------------------------------- Open other end
   
 &status("Opening TCP: $conserver");  
   
 unless (  
   $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},  
                                       PeerPort => $perlvar{'londPort'},  
                                       Proto    => "tcp",  
                                       Type     => SOCK_STREAM)  
    ) {   
        my $st=120+int(rand(240));  
        &logthis(  
 "<font color=blue>WARNING: Couldn't connect $conserver ($st secs): $@</font>");  
        sleep($st);  
        exit;   
      };  
 # --------------------------------------- Send a ping to make other end do USR1  
   
 &status("Init dialogue: $conserver");  
   
 print $remotesock "init\n";  
 $answer=<$remotesock>;  
 print $remotesock "$answer";  
 $answer=<$remotesock>;  
 chomp($answer);  
 &logthis("Init reply for $conserver: >$answer<");  
 if ($answer ne 'ok') {  
        my $st=120+int(rand(240));  
        &logthis(  
 "<font color=blue>WARNING: Init failed $conserver ($st secs)</font>");  
        sleep($st);  
        exit;   
 }  
 sleep 5;  
 &status("Ponging $conserver");  
 print $remotesock "pong\n";  
 $answer=<$remotesock>;  
 chomp($answer);  
 &logthis("Pong reply for $conserver: >$answer<");  
 # ----------------------------------------------------------- Initialize cipher  
   
 &status("Initialize cipher: $conserver");  
 print $remotesock "ekey\n";  
 my $buildkey=<$remotesock>;  
 my $key=$conserver.$perlvar{'lonHostID'};  
 $key=~tr/a-z/A-Z/;  
 $key=~tr/G-P/0-9/;  
 $key=~tr/Q-Z/0-9/;  
 $key=$key.$buildkey.$key.$buildkey.$key.$buildkey;  
 $key=substr($key,0,32);  
 my $cipherkey=pack("H32",$key);  
 if ($cipher=new IDEA $cipherkey) {  
    &logthis("Secure connection initialized: $conserver");  
 } else {  
    my $st=120+int(rand(240));  
    &logthis(  
      "<font color=blue>WARNING: ".  
      "Could not establish secure connection, $conserver ($st secs)!</font>");  
    sleep($st);  
    exit;  
 }  
   
   &openremote($conserver);
    &logthis("<font color=green> Connection to $conserver open </font>");
 # ----------------------------------------- We're online, send delayed messages  # ----------------------------------------- We're online, send delayed messages
     &status("Checking for delayed messages");      &status("Checking for delayed messages");
   
     my @allbuffered;      my @allbuffered;
     my $path="$perlvar{'lonSockDir'}/delayed";      my $path="$perlvar{'lonSockDir'}/delayed";
     opendir(DIRHANDLE,$path);      opendir(DIRHANDLE,$path);
     @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE;      @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE;
     closedir(DIRHANDLE);      closedir(DIRHANDLE);
     my $dfname;      my $dfname;
     map {      foreach (sort @allbuffered) {
         &status("Sending delayed $conserver $_");          &status("Sending delayed: $_");
         $dfname="$path/$_";          $dfname="$path/$_";
         &logthis($dfname);          if($DEBUG) { &logthis('Sending '.$dfname); }
         my $wcmd;          my $wcmd;
         {          {
          my $dfh=IO::File->new($dfname);           my $dfh=IO::File->new($dfname);
Line 422  if ($cipher=new IDEA $cipherkey) { Line 280  if ($cipher=new IDEA $cipherkey) {
             }              }
             $cmd="enc:$cmdlength:$encrequest\n";              $cmd="enc:$cmdlength:$encrequest\n";
         }          }
    $answer = londtransaction($remotesock, $cmd, 60);
         print $remotesock "$cmd\n";  
         $answer=<$remotesock>;  
  chomp($answer);   chomp($answer);
         if ($answer ne '') {  
           if (($answer ne '') && ($@!~/timeout/)) {
     unlink("$dfname");      unlink("$dfname");
             &logthis("Delayed $cmd to $conserver: >$answer<");              &logthis("Delayed $cmd: >$answer<");
             &logperm("S:$conserver:$bcmd");              &logperm("S:$conserver:$bcmd");
         }                  }        
     } @allbuffered;      }
    if($DEBUG) { &logthis("<font color=green> Delayed transactions sent"); }
   
 # ------------------------------------------------------- Listen to UNIX socket  # ------------------------------------------------------- Listen to UNIX socket
 &status("Opening socket $conserver");  &status("Opening socket");
 unless (  unless (
   $server = IO::Socket::UNIX->new(Local  => $port,    $server = IO::Socket::UNIX->new(Local  => $port,
                                   Type   => SOCK_STREAM,                                    Type   => SOCK_STREAM,
Line 443  unless ( Line 301  unless (
        my $st=120+int(rand(240));         my $st=120+int(rand(240));
        &logthis(         &logthis(
          "<font color=blue>WARNING: ".           "<font color=blue>WARNING: ".
          "Can't make server socket $conserver ($st secs): $@</font>");           "Can't make server socket ($st secs):  .. exiting</font>");
        sleep($st);         sleep($st);
        exit;          exit; 
      };       };
      
 # -----------------------------------------------------------------------------  # -----------------------------------------------------------------------------
   
 &logthis("<font color=green>$conserver online</font>");  &logthis("<font color=green>$conserver online</font>");
Line 457  unless ( Line 315  unless (
 %inbuffer  = ();  %inbuffer  = ();
 %outbuffer = ();  %outbuffer = ();
 %ready     = ();  %ready     = ();
   %servers   = (); # To be compatible with make filevector.  indexed by
    # File ids, values are sockets.
    # note that the accept socket is omitted.
   
 tie %ready, 'Tie::RefHash';  tie %ready, 'Tie::RefHash';
   
 nonblock($server);  # nonblock($server);
 $select = IO::Select->new($server);  # $select = IO::Select->new($server);
   
 # Main loop: check reads/accepts, check writes, check ready to process  # Main loop: check reads/accepts, check writes, check ready to process
   
   status("Main loop $conserver");
 while (1) {  while (1) {
     my $client;      my $client;
     my $rv;      my $rv;
     my $data;      my $data;
   
     # check for new information on the connections we have      my $infdset; # bit vec of fd's to select on input.
   
     # anything to read or accept?      my $outfdset; # Bit vec of fd's to select on output.
     foreach $client ($select->can_read(0.1)) {  
   
         if ($client == $server) {  
             # accept a new connection  
             &status("Accept new connection: $conserver");  
             $client = $server->accept();  
             $select->add($client);  
             nonblock($client);  
         } else {  
             # read data  
             $data = '';  
             $rv   = $client->recv($data, POSIX::BUFSIZ, 0);  
   
             unless (defined($rv) && length $data) {  
                 # This would be the end of file, so close the client  
                 delete $inbuffer{$client};  
                 delete $outbuffer{$client};  
                 delete $ready{$client};  
   
                 &status("Idle $conserver");  
                 $select->remove($client);  
                 close $client;  
                 next;  
             }  
   
             $inbuffer{$client} .= $data;      $infdset = MakeFileVector(\%servers);
       $outfdset= MakeFileVector(\%outbuffer);
       vec($infdset, $server->fileno, 1) = 1;
       if($DEBUG) {
    &logthis("Adding ".$server->fileno.
    " to input select vector (listner)".
    unpack("b*",$infdset)."\n");
       }
       DoSelect(\$infdset, \$outfdset); # Wait for input.
       if($DEBUG) {
    &logthis("Doselect completed!");
    &logthis("ins = ".unpack("b*",$infdset)."\n");
    &logthis("outs= ".unpack("b*",$outfdset)."\n");
     
       }
   
             # test whether the data in the buffer or the data we      # Checkfor new connections:
             # just read means there is a complete request waiting      if (vec($infdset, $server->fileno, 1)) {
             # to be fulfilled.  If there is, set $ready{$client}   if($DEBUG) {
             # to the requests waiting to be fulfilled.      &logthis("New connection established");
             while ($inbuffer{$client} =~ s/(.*\n)//) {   }
                 push( @{$ready{$client}}, $1 );   # accept a new connection
             }   &status("Accept new connection: $conserver");
         }   $client = $server->accept();
    if($DEBUG) {
       &logthis("New client fd = ".$client->fileno."\n");
    }
    $servers{$client->fileno} = $client;
    nonblock($client);
    $client->sockopt(SO_KEEPALIVE, 1);# Enable monitoring of
                                     # connection liveness.
     }      }
       HandleInput($infdset, \%servers, \%inbuffer, \%outbuffer, \%ready);
       HandleOutput($outfdset, \%servers, \%outbuffer, \%inbuffer,
    \%ready);
   # -------------------------------------------------------- Wow, connection lost
   
     # Any complete requests to process?  }
     foreach $client (keys %ready) {     
         handle($client);  
     }      }
   }
   
     # Buffers to flush?  # ------------------------------------------------------- End of make_new_child
     foreach $client ($select->can_write(1)) {  
         # Skip this client if we have nothing to say  
         next unless exists $outbuffer{$client};  
   
         $rv = $client->send($outbuffer{$client}, 0);  
         unless (defined $rv) {  
             # Whine, but move on.  
             &logthis("I was told I could write, but I can't.\n");  
             next;  
         }  
         $errno=$!;  
         if (($rv == length $outbuffer{$client}) ||  
             ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) {  
             substr($outbuffer{$client}, 0, $rv) = '';  
             delete $outbuffer{$client} unless length $outbuffer{$client};  
         } else {  
             # Couldn't write all the data, and it wasn't because  
             # it would have blocked.  Shutdown and move on.  
   
     &logthis("Dropping data with ".$errno.": ".  
                      length($outbuffer{$client}).", $rv");  
   
             delete $inbuffer{$client};  #
             delete $outbuffer{$client};  #  Make a vector of file descriptors to wait for in a select.
             delete $ready{$client};  #  parameters:
   #     \%fdhash  -reference to a hash which has IO::Socket's as indices.  
             $select->remove($client);  #                We only care about the indices, not the values.
             close($client);  #  A select vector is created from all indices of the hash.
             next;  
         }  sub MakeFileVector
   {
       my $fdhash = shift;
       my $selvar = "";
   
       foreach $socket (keys %$fdhash) {
    if($DEBUG) {
       &logthis("Adding  ".$socket.
        "to select vector. (client)\n");
    }
    vec($selvar, $socket, 1) = 1;
     }      }
       return $selvar;
 }  }
   
   
   #
   #  HandleOutput:
   #    Processes output on a buffered set of file descriptors which are
   #    ready to be read.
   #  Parameters:
   #    $selvector - Vector of file descriptors which are writable.
   #    \%sockets  - Vector of socket references indexed by socket.
   #    \%buffers  - Reference to a hash containing output buffers.
   #                 Hashes are indexed by sockets.  The file descriptors of some
   #                 of those sockets will be present in $selvector.
   #                 For each one of those, we will attempt to write the output
   #                 buffer to the socket.  Note that we will assume that
   #                 the sockets are being run in non blocking mode.
   #   \%inbufs    - Reference to hash containing input buffers.
   #   \%readys    - Reference to hash containing flags for items with complete
   #                 requests.
   #
   sub HandleOutput
   {
       my $selvector = shift;
       my $sockets   = shift;
       my $buffers   = shift;
       my $inbufs    = shift;
       my $readys    = shift;
       my $sock;
   
       if($DEBUG) {
    &logthis("HandleOutput entered\n");
       }
   
       foreach $sock (keys %$sockets) {
    my $socket = $sockets->{$sock};
    if(vec($selvector, $sock, 1)) { # $socket is writable.
       if($DEBUG) {
    &logthis("Sending $buffers->{$sock} \n");
       }
       my $rv = $socket->send($buffers->{$sock}, 0);
       $errno = $!;
       unless ($buffers->{$sock} eq "con_lost\n") {
    unless (defined $rv) { # Write failed... could be EINTR
       unless ($errno == POSIX::EINTR) {
    &logthis("Write failed on writable socket");
       } # EINTR is not an error .. just retry.
       next;
    }
    if( ($rv == length $buffers->{$sock})    ||
       ($errno == POSIX::EWOULDBLOCK)       ||
       ($errno == POSIX::EAGAIN)            || # same as above.
       ($errno == POSIX::EINTR)             || # signal during IO
       ($errno == 0)) {
       substr($buffers->{$sock}, 0, $rv)=""; # delete written part
       delete $buffers->{$sock} unless length $buffers->{$sock};
    } else {
       # For some reason the write failed with an error code
       # we didn't look for.  Shutdown the socket.
       &logthis("Unable to write data with ".$errno.": ".
        "Dropping data: ".length($buffers->{$sock}).
        ", $rv");
       #
       # kill off the buffers in the hash:
   
       delete $buffers->{$sock};
       delete $inbufs->{$sock};
       delete $readys->{$sock};
   
       close($socket); # Close the client socket.
       next;
    }
       } else { # Kludgy way to mark lond connection lost.
    &logthis(
    "<font color=red>CRITICAL lond connection lost</font>");
    status("Connection lost");
    $remotesock->shutdown(2);
    &logthis("Attempting to open a new connection");
    &openremote($conserver);
       }
      
    }
       }
   
 }  }
   #
   #   HandleInput - Deals with input on client sockets.
   #                 Each socket has an associated input buffer.
   #                 For each readable socket, the currently available
   #                 data is appended to this buffer.
   #                 If necessary, the buffer is created.
   #                 On various failures, we may shutdown the client.
   #  Parameters:
   #     $selvec   - Vector of readable sockets.
   #     \%sockets - Refers to the  Hash of sockets indexed by sockets.  
   #                 Each of these may or may not have it's fd bit set 
   #                 in the $selvec.
   #     \%ibufs   - Refers to the hash of input buffers indexed by socket.
   #     \%obufs   - Hash of output buffers indexed by socket. 
   #     \%ready   - Hash of ready flags indicating the existence of a completed
   #                 Request.
   sub HandleInput 
   {
   
       # Marshall the parameters.   Note that the hashes are actually
       # references not values.
   
       my $selvec  = shift;
       my $sockets = shift;
       my $ibufs   = shift;
       my $obufs   = shift;
       my $ready   = shift;
       my $sock;
   
 # ------------------------------------------------------- End of make_new_child      if($DEBUG) {
    &logthis("Entered HandleInput\n");
       }
       foreach $sock (keys %$sockets) {
    my $socket = $sockets->{$sock};
    if(vec($selvec, $sock, 1)) { # Socket which is readable.
   
       #  Attempt to read the data and do error management.
       my $data = '';
       my $rv = $socket->recv($data, POSIX::BUFSIZ, 0);
       if($DEBUG) {
    &logthis("Received $data from socket");
       }
       unless (defined($rv) && length $data) {
   
    # Read an end of file.. this is a disconnect from the peer.
   
    delete $sockets->{$sock};
    delete $ibufs->{$sock};
    delete $obufs->{$sock};
    delete $ready->{$sock};
   
    status("Idle");
    close $socket;
    next;
       }
       #  Append the read data to the input buffer. If the buffer
       # now contains a \n the request is complete and we can 
       # mark this in the $ready hash (one request for each \n.)
   
       $ibufs->{$sock} .= $data;
       while($ibufs->{$sock} =~ s/(.*\n)//) {
    push(@{$ready->{$sock}}, $1);
       }
       
    }
       }
       #  Now handle any requests which are ready:
   
       foreach $client (keys %ready) {
    handle($client);
       }
   }
   
   # DoSelect:  does a select with no timeout.  On signal (errno == EINTR), 
   #            the select is retried until there are items in the returned
   #            vectors.  
   #
   # Parameters:
   #   \$readvec   - Reference to a vector of file descriptors to 
   #                 check for readability.
   #   \$writevec  - Reference to a vector of file descriptors to check for
   #                 writability.
   #  On exit, the referents are modified with vectors indicating which 
   #  file handles are readable/writable.
   #
   sub DoSelect {
       my $readvec = shift;
       my $writevec= shift;
       my $outs;
       my $ins;
   
       while (1) {
    my $nfds = select( $ins = $$readvec, $outs = $$writevec, undef, undef);
    if($nfds) {
       if($DEBUG) {
    &logthis("select exited with ".$nfds." fds\n");
    &logthis("ins = ".unpack("b*",$ins).
    " readvec = ".unpack("b*",$$readvec)."\n");
    &logthis("outs = ".unpack("b*",$outs).
    " writevec = ".unpack("b*",$$writevec)."\n");
       }
       $$readvec  = $ins;
       $$writevec = $outs;
       return;
    } else {
       if($DEBUG) {
    &logthis("Select exited with no bits set in mask\n");
       }
       die "Select failed" unless $! == EINTR;
    }
       }
   }
   
 # handle($socket) deals with all pending requests for $client  # handle($socket) deals with all pending requests for $client
   #
 sub handle {  sub handle {
     # requests are in $ready{$client}      # requests are in $ready{$client}
     # send output to $outbuffer{$client}      # send output to $outbuffer{$client}
     my $client = shift;      my $client = shift;
     my $request;      my $request;
   
     foreach $request (@{$ready{$client}}) {      foreach $request (@{$ready{$client}}) {
 # ============================================================= Process request  # ============================================================= Process request
         # $request is the text of the request          # $request is the text of the request
         # put text of reply into $outbuffer{$client}          # put text of reply into $outbuffer{$client}
   # ------------------------------------------------------------ Is this the end?
    chomp($request);
    if($DEBUG) {
        &logthis("<font color=green> Request $request processing starts</font>");
           }
           if ($request eq "close_connection_exit\n") {
       &status("Request close connection");
              &logthis(
        "<font color=red>CRITICAL: Request Close Connection ... exiting</font>");
              $remotesock->shutdown(2);
              $server->close();
              exit;
           }
 # -----------------------------------------------------------------------------  # -----------------------------------------------------------------------------
         if ($request =~ /^encrypt\:/) {          if ($request =~ /^encrypt\:/) {
     my $cmd=$request;      my $cmd=$request;
Line 574  sub handle { Line 635  sub handle {
                 $encrequest.=                  $encrequest.=
                     unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8)));                      unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8)));
             }              }
             $request="enc:$cmdlength:$encrequest\n";              $request="enc:$cmdlength:$encrequest";
         }          }
         &status("Sending $conserver: $request");  # --------------------------------------------------------------- Main exchange
         print $remotesock "$request";   $answer = londtransaction($remotesock, $request, 60);
         &status("Waiting for reply from $conserver: $request");  
         $answer=<$remotesock>;   if($DEBUG) { 
         &status("Received reply: $request");      &logthis("<font color=green> Request data exchange complete");
    }
    if ($@=~/timeout/) { 
       $answer='';
       &logthis(
        "<font color=red>CRITICAL: Timeout: $request</font>");
    }  
   
   
         if ($answer) {          if ($answer) {
    if ($answer =~ /^enc/) {     if ($answer =~ /^enc/) {
                my ($cmd,$cmdlength,$encinput)=split(/:/,$answer);                 my ($cmd,$cmdlength,$encinput)=split(/:/,$answer);
Line 594  sub handle { Line 663  sub handle {
       $answer=substr($answer,0,$cmdlength);        $answer=substr($answer,0,$cmdlength);
       $answer.="\n";        $answer.="\n";
    }     }
      if($DEBUG) {
          &logthis("sending $answer to client\n");
      }
            $outbuffer{$client} .= $answer;             $outbuffer{$client} .= $answer;
         } else {          } else {
            $outbuffer{$client} .= "con_lost\n";             $outbuffer{$client} .= "con_lost\n";
         }          }
   
        &status("Completed: $request");
    if($DEBUG) {
       &logthis("<font color=green> Request processing complete</font>");
    }
 # ===================================================== Done processing request  # ===================================================== Done processing request
     }      }
     delete $ready{$client};      delete $ready{$client};
     &status("Completed $conserver: $request");  
 # -------------------------------------------------------------- End non-forker  # -------------------------------------------------------------- End non-forker
       if($DEBUG) {
    &logthis("<font color=green> requests for child handled</font>");
       }
 }  }
 # ---------------------------------------------------------- End make_new_child  # ---------------------------------------------------------- End make_new_child
 }  
   
 # nonblock($socket) puts socket into nonblocking mode  # nonblock($socket) puts socket into nonblocking mode
 sub nonblock {  sub nonblock {
Line 620  sub nonblock { Line 697  sub nonblock {
             or die "Can't make socket nonblocking: $!\n";              or die "Can't make socket nonblocking: $!\n";
 }  }
   
   
   sub openremote {
   # ---------------------------------------------------- Client to network server
   
       my $conserver=shift;
   
       &status("Opening TCP $conserver");
       my $st=120+int(rand(240)); # Sleep before opening:
   
       unless (
       $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
    PeerPort => $perlvar{'londPort'},
    Proto    => "tcp",
    Type     => SOCK_STREAM)
      ) {
   
    &logthis(
    "<font color=blue>WARNING: Couldn't connect to $conserver ($st secs): </font>");
    sleep($st);
    exit;
       };
   # ----------------------------------------------------------------- Init dialog
   
       &logthis("<font color=green>INFO Connected to $conserver, initing</font>");
       &status("Init dialogue: $conserver");
   
       $answer = londtransaction($remotesock, "init", 60);
       chomp($answer);
       $answer = londtransaction($remotesock, $answer, 60);
       chomp($answer);
   
       if ($@=~/timeout/) {
    &logthis("Timed out during init.. exiting");
    exit;
       }
   
       if ($answer ne 'ok') {
    &logthis("Init reply: >$answer<");
    my $st=120+int(rand(240));
    &logthis("<font color=blue>WARNING: Init failed ($st secs)</font>");
    sleep($st);
    exit;
       }
   
       $answer = londtransaction($remotesock,"sethost:$conserver",60);
       chomp($answer);
       if ( $answer ne 'ok') {
    &logthis('<font color="blue">WARNING: unable to specify remote host'.
    $answer.'</font>');
       }
   
       $answer = londtransaction($remotesock,"version:$VERSION",60);
       chomp($answer);
       if ($answer =~ /^version:/) {
    $remoteVERSION=(split(/:/,$answer))[1];
       } else {
    &logthis('<font color="blue">WARNING: request remote version failed :'.
    $answer.': my version is :'.$VERSION.':</font>');
       }
   
       sleep 5;
       &status("Ponging $conserver");
       $answer= londtransaction($remotesock,"pong",60);
       chomp($answer);
       if ($answer!~/^$conserver/) {
    &logthis("Pong reply: >$answer<");
       }
   # ----------------------------------------------------------- Initialize cipher
   
       &status("Initialize cipher");
       my $buildkey=londtransaction($remotesock,"ekey",60);
       my $key=$conserver.$perlvar{'lonHostID'};
       $key=~tr/a-z/A-Z/;
       $key=~tr/G-P/0-9/;
       $key=~tr/Q-Z/0-9/;
       $key=$key.$buildkey.$key.$buildkey.$key.$buildkey;
       $key=substr($key,0,32);
       my $cipherkey=pack("H32",$key);
       if ($cipher=new IDEA $cipherkey) {
    &logthis("Secure connection initialized");
       } else {
    my $st=120+int(rand(240));
    &logthis("<font color=blue>WARNING: ".
    "Could not establish secure connection ($st secs)!</font>");
    sleep($st);
    exit;
       }
       &logthis("<font color=green> Remote open success </font>");
   }
   
   
   
   # grabs exception and records it to log before exiting
   sub catchexception {
       my ($signal)=@_;
       $SIG{QUIT}='DEFAULT';
       $SIG{__DIE__}='DEFAULT';
       chomp($signal);
       &logthis("<font color=red>CRITICAL: "
        ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through "
        ."\"$signal\" with parameter </font>");
       die("Signal abend");
   }
   
   # -------------------------------------- Routines to see if other box available
   
   #sub online {
   #    my $host=shift;
   #    &status("Pinging ".$host);
   #    my $p=Net::Ping->new("tcp",20);
   #    my $online=$p->ping("$host");
   #    $p->close();
   #    undef ($p);
   #    return $online;
   #}
   
   sub connected {
       my ($local,$remote)=@_;
       &status("Checking connection $local to $remote");
       $local=~s/\W//g;
       $remote=~s/\W//g;
   
       unless ($hostname{$local}) { return 'local_unknown'; }
       unless ($hostname{$remote}) { return 'remote_unknown'; }
   
       #unless (&online($hostname{$local})) { return 'local_offline'; }
   
       my $ua=new LWP::UserAgent;
       
       my $request=new HTTP::Request('GET',
         "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote);
   
       my $response=$ua->request($request);
   
       unless ($response->is_success) { return 'local_error'; }
   
       my $reply=$response->content;
       $reply=(split("\n",$reply))[0];
       $reply=~s/\W//g;
       if ($reply ne $remote) { return $reply; }
       return 'ok';
   }
   
   
   
   sub hangup {
       foreach (keys %children) {
           $wasserver=$children{$_};
           &status("Closing $wasserver");
           &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
           &status("Kill PID $_ for $wasserver");
    kill ('INT',$_);
       }
   }
   
   sub HUNTSMAN {                      # signal handler for SIGINT
       local($SIG{CHLD}) = 'IGNORE';   # we're going to kill our children
       &hangup();
       my $execdir=$perlvar{'lonDaemons'};
       unlink("$execdir/logs/lonc.pid");
       &logthis("<font color=red>CRITICAL: Shutting down</font>");
       exit;                           # clean up with dignity
   }
   
   sub HUPSMAN {                      # signal handler for SIGHUP
       local($SIG{CHLD}) = 'IGNORE';  # we're going to kill our children
       &hangup();
       &logthis("<font color=red>CRITICAL: Restarting</font>");
       my $execdir=$perlvar{'lonDaemons'};
       unlink("$execdir/logs/lonc.pid");
       exec("$execdir/lonc");         # here we go again
   }
   
   sub checkchildren {
       &initnewstatus();
       &logstatus();
       &logthis('Going to check on the children');
       foreach (sort keys %children) {
    sleep 1;
           unless (kill 'USR1' => $_) {
       &logthis ('<font color=red>CRITICAL: Child '.$_.' is dead</font>');
               &logstatus($$.' is dead');
           } 
       }
   }
   
   sub USRMAN {
       &logthis("USR1: Trying to establish connections again");
       #
       #  It is really important not to just clear the childatt hash or we will
       #  lose all memory of the children.  What we really want to do is this:
       #  For each index where childatt is >= $childmaxattempts
       #  Zero the associated counter and do a make_child for the host.
       #  Regardles, the childatt entry is zeroed:
       my $host;
       foreach $host (keys %childatt) {
    if ($childatt{$host} >= $childmaxattempts) {
       $childatt{$host} = 0;
       &logthis("<font color=green>INFO: Restarting child for server: "
        .$host."</font>\n");
       make_new_child($host);
    }
    else {
       $childatt{$host} = 0;
    }
       }
       &checkchildren(); # See if any children are still dead...
   }
   
   # -------------------------------------------------- Non-critical communication
   sub subreply { 
    my ($cmd,$server)=@_;
    my $answer='';
    if ($server ne $perlvar{'lonHostID'}) { 
       my $peerfile="$perlvar{'lonSockDir'}/$server";
       my $sclient=IO::Socket::UNIX->new(Peer    =>"$peerfile",
                                         Type    => SOCK_STREAM,
                                         Timeout => 10)
          or return "con_lost";
   
   
       $answer = londtransaction($sclient, $cmd, 10);
   
       if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; }
       $SIG{ALRM}='DEFAULT';
       $SIG{__DIE__}=\&catchexception;
    } else { $answer='self_reply'; }
    return $answer;
   }
   
   # --------------------------------------------------------------------- Logging
   
   sub logthis {
       my $message=shift;
       my $execdir=$perlvar{'lonDaemons'};
       my $fh=IO::File->new(">>$execdir/logs/lonc.log");
       my $now=time;
       my $local=localtime($now);
       $lastlog=$local.': '.$message;
       print $fh "$local ($$) [$conserver] [$status]: $message\n";
   }
   
   #--------------------------------------  londtransaction:
   #  
   #  Performs a transaction with lond with timeout support.
   #    result = londtransaction(socket,request,timeout)
   #
   sub londtransaction {
       my ($socket, $request, $tmo) = @_;
   
       if($DEBUG) {
    &logthis("londtransaction request: $request");
       }
   
       # Set the signal handlers: ALRM for timeout and disble the others.
   
       $SIG{ALRM} = sub { die "timeout" };
       $SIG{__DIE__} = 'DEFAULT';
       
       # Disable all but alarm so that only that can interupt the
       # send /receive.
       #
       my $sigset = POSIX::SigSet->new(QUIT, USR1, HUP, INT, TERM);
       my $priorsigs = POSIX::SigSet->new;
       unless (defined sigprocmask(SIG_BLOCK, $sigset, $priorsigs)) {
    &logthis("<font color=red> CRITICAL -- londtransaction ".
    "failed to block signals </font>");
    die "could not block signals in londtransaction";
       }
       $answer = '';
       #
       #  Send request to lond.
       #
       eval { 
    alarm($tmo);
    print $socket "$request\n";
    alarm(0);
       };
       #  If request didn't timeout, try for the response.
       #
   
       if ($@!~/timeout/) {
    eval {
       alarm($tmo);
       $answer = <$socket>;
       if($DEBUG) {
    &logthis("Received $answer in londtransaction");
       }
       alarm(0);
    };
       } else {
    &logthis("lonc - suiciding on send Timeout");
    die("lonc - suiciding on send Timeout");
       }
       if ($@ =~ /timeout/) {
    &logthis("lonc - suiciding on read Timeout");
    die("lonc - suiciding on read Timeout");
       }
       #
       # Restore the initial sigmask set.
       #
       unless (defined sigprocmask(SIG_UNBLOCK, $priorsigs)) {
    &logthis("<font color=red> CRITICAL -- londtransaction ".
    "failed to re-enable signal processing. </font>");
    die "londtransaction failed to re-enable signals";
       }
       #
       # go back to the prior handler set.
       #
       $SIG{ALRM} = 'DEFAULT';
       $SIG{__DIE__} = \&cathcexception;
   
       #    chomp $answer;
       if ($DEBUG) {
    &logthis("Returning $answer in londtransaction");
       }
       return $answer;
   
   }
   
   sub logperm {
       my $message=shift;
       my $execdir=$perlvar{'lonDaemons'};
       my $now=time;
       my $local=localtime($now);
       my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
       print $fh "$now:$message:$local\n";
   }
   # ------------------------------------------------------------------ Log status
   
   sub logstatus {
       my $docdir=$perlvar{'lonDocRoot'};
       my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");
       print $fh $$."\t".$conserver."\t".$status."\t".$lastlog."\n";
   }
   
   sub initnewstatus {
       my $docdir=$perlvar{'lonDocRoot'};
       my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");
       my $now=time;
       my $local=localtime($now);
       print $fh "LONC status $local - parent $$\n\n";
   }
   
   # -------------------------------------------------------------- Status setting
   
   sub status {
       my $what=shift;
       my $now=time;
       my $local=localtime($now);
       $status=$local.': '.$what;
       $0='lonc: '.$what.' '.$local;
   }
   
   
   
   # ----------------------------------- POD (plain old documentation, CPAN style)
   
   =head1 NAME
   
   lonc - LON TCP-MySQL-Server Daemon for handling database requests.
   
   =head1 SYNOPSIS
   
   Usage: B<lonc>
   
   Should only be run as user=www.  This is a command-line script which
   is invoked by B<loncron>.  There is no expectation that a typical user
   will manually start B<lonc> from the command-line.  (In other words,
   DO NOT START B<lonc> YOURSELF.)
   
   =head1 OVERVIEW
   
   =head2 Physical Overview
   
   =begin latex 
   
   \begin{figure} 
     \begin{center}
       \includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram}
     \end{center}
     \caption{\label{Overview_Of_Network}Overview of Network}
   \end{figure}
   
   =end latex
   
   Physically, the Network consists of relatively inexpensive
   upper-PC-class server machines which are linked through the commodity
   internet in a load-balancing, dynamically content-replicating and
   failover-secure way.
   
   All machines in the Network are connected with each other through
   two-way persistent TCP/IP connections. Clients (B<B>, B<F>, B<G> and
   B<H> in Fig. Overview of Network) connect to the servers via standard
   HTTP. There are two classes of servers, B<Library Servers> (B<A> and
   B<E> in Fig. Overview of Network) and B<Access Servers> (B<C>, B<D>,
   B<I> and B<J> in Fig. Overview of Network).
   
   B<Library Servers> X<library server> X<server, library> are used to
   store all personal records of a set of users, and are responsible for
   their initial authentication when a session is opened on any server in
   the Network. For Authors, Library Servers also hosts their
   construction area and the authoritative copy of the current and
   previous versions of every resource that was published by that
   author. Library servers can be used as backups to host sessions when
   all access servers in the Network are overloaded. Otherwise, for
   learners, access servers are used to host the sessions. Library
   servers need to have strong I/O capabilities.
   
   B<Access Servers> X<access server> X<server, access> provide LON-CAPA
   service to users, using the library servers as their data source. The
   network is designed so that the number of concurrent sessions can be
   increased over a wide range by simply adding additional access servers
   before having to add additional library servers. Preliminary tests
   showed that a library server could handle up to 10 access servers
   fully parallel. Access servers can generally be cheaper hardware then
   library servers require.
   
   The Network is divided into B<domains> X<domain>, which are logical
   boundaries between participating institutions. These domains can be
   used to limit the flow of personal user information across the
   network, set access privileges and enforce royalty schemes. LON-CAPA
   domains bear no relationship to any other domain, including domains
   used by the DNS system; LON-CAPA domains may be freely configured in
   any manner that suits your use pattern.
   
   =head2 Example Transactions
   
   Fig. Overview of Network also depicts examples for several kinds of
   transactions conducted across the Network.
   
   An instructor at client B<B> modifies and publishes a resource on her
   Home Server B<A>. Server B<A> has a record of all server machines
   currently subscribed to this resource, and replicates it to servers
   B<D> and B<I>. However, server B<D> is currently offline, so the
   update notification gets buffered on B<A> until B<D> comes online
   again. Servers B<C> and B<J> are currently not subscribed to this
   resource.
   
   Learners B<F> and B<G> have open sessions on server B<I>, and the new
   resource is immediately available to them.
   
   Learner B<H> tries to connect to server B<I> for a new session,
   however, the machine is not reachable, so he connects to another
   Access Server B<J> instead. This server currently does not have all
   necessary resources locally present to host learner B<H>, but
   subscribes to them and replicates them as they are accessed by B<H>.
   
   Learner B<H> solves a problem on server B<J>. Library Server B<E> is
   B<H>'s Home Server, so this information gets forwarded to B<E>, where
   the records of H are updated.
   
   =head2 lond, lonc, and lonnet
   
   =begin latex
   
   \begin{figure}
   \includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram2}
     \caption{\label{Overview_Of_Network_Communication}Overview of
   Network Communication} \end{figure}
   
   =end latex
   
   Fig. Overview of Network Communication elaborates on the details of
   this network infrastructure. It depicts three servers (B<A>, B<B> and
   B<C>) and a client who has a session on server B<C>.
   
   As B<C> accesses different resources in the system, different
   handlers, which are incorporated as modules into the child processes
   of the web server software, process these requests.
   
   Our current implementation uses C<mod_perl> inside of the Apache web
   server software. As an example, server B<C> currently has four active
   web server software child processes. The chain of handlers dealing
   with a certain resource is determined by both the server content
   resource area (see below) and the MIME type, which in turn is
   determined by the URL extension. For most URL structures, both an
   authentication handler and a content handler are registered.
   
   Handlers use a common library C<lonnet> X<lonnet> to interact with
   both locally present temporary session data and data across the server
   network. For example, lonnet provides routines for finding the home
   server of a user, finding the server with the lowest loadavg, sending
   simple command-reply sequences, and sending critical messages such as
   a homework completion, etc. For a non-critical message, the routines
   reply with a simple "connection lost" if the message could not be
   delivered. For critical messages, lonnet tries to re-establish
   connections, re-send the command, etc. If no valid reply could be
   received, it answers "connection deferred" and stores the message in
   buffer space to be sent at a later point in time. Also, failed
   critical messages are logged.
   
   The interface between C<lonnet> and the Network is established by a
   multiplexed UNIX domain socket, denoted B<DS> in Fig. Overview of
   Network Communication. The rationale behind this rather involved
   architecture is that httpd processes (Apache children) dynamically
   come and go on the timescale of minutes, based on workload and number
   of processed requests. Over the lifetime of an httpd child, however,
   it has to establish several hundred connections to several different
   servers in the Network.
   
   On the other hand, establishing a TCP/IP connection is resource
   consuming for both ends of the line, and to optimize this connectivity
   between different servers, connections in the Network are designed to
   be persistent on the timescale of months, until either end is
   rebooted. This mechanism will be elaborated on below.
   
   =begin latex
   
   \begin{figure}
   \begin{lyxcode}
   msul1:msu:library:zaphod.lite.msu.edu:35.8.63.51
   
   msua1:msu:access:agrajag.lite.msu.edu:35.8.63.68
   
   msul2:msu:library:frootmig.lite.msu.edu:35.8.63.69
   
   msua2:msu:access:bistromath.lite.msu.edu:35.8.63.67
   
   hubl14:hub:library:hubs128-pc-14.cl.msu.edu:35.8.116.34
   
   hubl15:hub:library:hubs128-pc-15.cl.msu.edu:35.8.116.35
   
   hubl16:hub:library:hubs128-pc-16.cl.msu.edu:35.8.116.36
   
   huba20:hub:access:hubs128-pc-20.cl.msu.edu:35.8.116.40
   
   huba21:hub:access:hubs128-pc-21.cl.msu.edu:35.8.116.41
   
   huba22:hub:access:hubs128-pc-22.cl.msu.edu:35.8.116.42
   
   huba23:hub:access:hubs128-pc-23.cl.msu.edu:35.8.116.43
   
   hubl25:other:library:hubs128-pc-25.cl.msu.edu:35.8.116.45
   
   huba27:other:access:hubs128-pc-27.cl.msu.edu:35.8.116.47
   \end{lyxcode}
   
   \caption{\label{Example_Of_hosts.tab}Example of Hosts Lookup table\texttt{/home/httpd/lonTabs/hosts.tab}} 
   \end{figure}
   
   =end latex
   
   Establishing a connection to a UNIX domain socket is far less resource
   consuming than the establishing of a TCP/IP connection. C<lonc>
   X<lonc> is a proxy daemon that forks off a child for every server in
   the Network. Which servers are members of the Network is determined by
   a lookup table, such as the one in Fig. Examples of Hosts. In order,
   the entries denote an internal name for the server, the domain of the
   server, the type of the server, the host name and the IP address.
   
   The C<lonc> parent process maintains the population and listens for
   signals to restart or shutdown, as well as I<USR1>. Every child
   establishes a multiplexed UNIX domain socket for its server and opens
   a TCP/IP connection to the lond daemon (discussed below) on the remote
   machine, which it keeps alive. If the connection is interrupted, the
   child dies, whereupon the parent makes several attempts to fork
   another child for that server.
   
   When starting a new child (a new connection), first an init-sequence
   is carried out, which includes receiving the information from the
   remote C<lond> which is needed to establish the 128-bit encryption key
   - the key is different for every connection. Next, any buffered
   (delayed) messages for the server are sent.
   
   In normal operation, the child listens to the UNIX socket, forwards
   requests to the TCP connection, gets the reply from C<lond>, and sends
   it back to the UNIX socket. Also, C<lonc> takes care to the encryption
   and decryption of messages.
   
   C<lond> X<lond> is the remote end of the TCP/IP connection and acts as
   a remote command processor. It receives commands, executes them, and
   sends replies. In normal operation, a C<lonc> child is constantly
   connected to a dedicated C<lond> child on the remote server, and the
   same is true vice versa (two persistent connections per server
   combination).
   
   lond listens to a TCP/IP port (denoted B<P> in Fig. Overview of
   Network Communication) and forks off enough child processes to have
   one for each other server in the network plus two spare children. The
   parent process maintains the population and listens for signals to
   restart or shutdown. Client servers are authenticated by IP.
   
   When a new client server comes online, C<lond> sends a signal I<USR1>
   to lonc, whereupon C<lonc> tries again to reestablish all lost
   connections, even if it had given up on them before - a new client
   connecting could mean that that machine came online again after an
   interruption.
   
   The gray boxes in Fig. Overview of Network Communication denote the
   entities involved in an example transaction of the Network. The Client
   is logged into server B<C>, while server B<B> is her Home
   Server. Server B<C> can be an access server or a library server, while
   server B<B> is a library server. She submits a solution to a homework
   problem, which is processed by the appropriate handler for the MIME
   type "problem". Through C<lonnet>, the handler writes information
   about this transaction to the local session data. To make a permanent
   log entry, C<lonnet> establishes a connection to the UNIX domain
   socket for server B<B>. C<lonc> receives this command, encrypts it,
   and sends it through the persistent TCP/IP connection to the TCP/IP
   port of the remote C<lond>. C<lond> decrypts the command, executes it
   by writing to the permanent user data files of the client, and sends
   back a reply regarding the success of the operation. If the operation
   was unsuccessful, or the connection would have broken down, C<lonc>
   would write the command into a FIFO buffer stack to be sent again
   later. C<lonc> now sends a reply regarding the overall success of the
   operation to C<lonnet> via the UNIX domain port, which is eventually
   received back by the handler.
   
   =head2 Dynamic Resource Replication
   
   Since resources are assembled into higher order resources simply by
   reference, in principle it would be sufficient to retrieve them from
   the respective Home Servers of the authors. However, there are several
   problems with this simple approach: since the resource assembly
   mechanism is designed to facilitate content assembly from a large
   number of widely distributed sources, individual sessions would depend
   on a large number of machines and network connections to be available,
   thus be rather fragile. Also, frequently accessed resources could
   potentially drive individual machines in the network into overload
   situations.
   
   Finally, since most resources depend on content handlers on the Access
   Servers to be served to a client within the session context, the raw
   source would first have to be transferred across the Network from the
   respective Library Server to the Access Server, processed there, and
   then transferred on to the client.
   
   =begin latex
   
   \begin{figure}
   \includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Request}
     \caption{\label{Dynamic_Replication}Dynamic Replication} 
   \end{figure}
   
   =end latex
   
   To enable resource assembly in a reliable and scalable way, a dynamic
   resource replication scheme was developed. Fig. "Dynamic Replication"
   shows the details of this mechanism.
   
   Anytime a resource out of the resource space is requested, a handler
   routine is called which in turn calls the replication routine. As a
   first step, this routines determines whether or not the resource is
   currently in replication transfer (Step B<D1a>). During replication
   transfer, the incoming data is stored in a temporary file, and Step
   B<D1a> checks for the presence of that file. If transfer of a resource
   is actively going on, the controlling handler receives an error
   message, waits for a few seconds, and then calls the replication
   routine again. If the resource is still in transfer, the client will
   receive the message "Service currently not available".
   
   In the next step (Step B<D1b>), the replication routine checks if the
   URL is locally present. If it is, the replication routine returns OK
   to the controlling handler, which in turn passes the request on to the
   next handler in the chain.
   
   If the resource is not locally present, the Home Server of the
   resource author (as extracted from the URL) is determined (Step
   B<D2>). This is done by contacting all library servers in the author?s
   domain (as determined from the lookup table, see Fig. 1.1.2B). In Step
   B<D2b> a query is sent to the remote server whether or not it is the
   Home Server of the author (in our current implementation, an
   additional cache is used to store already identified Home Servers (not
   shown in the figure)). In Step B<D2c>, the remote server answers the
   query with True or False. If the Home Server was found, the routine
   continues, otherwise it contacts the next server (Step D2a). If no
   server could be found, a "File not Found" error message is issued. In
   our current implementation, in this step the Home Server is also
   written into a cache for faster access if resources by the same author
   are needed again (not shown in the figure).
   
   =begin latex
   
   \begin{figure}
   \includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Change}
     \caption{\label{Dynamic_Replication_Change}Dynamic Replication: Change} \end{figure}
   
   =end latex
   
   In Step B<D3a>, the routine sends a subscribe command for the URL to
   the Home Server of the author. The Home Server first determines if the
   resource is present, and if the access privileges allow it to be
   copied to the requesting server (B<D3b>). If this is true, the
   requesting server is added to the list of subscribed servers for that
   resource (Step B<D3c>). The Home Server will reply with either OK or
   an error message, which is determined in Step D4. If the remote
   resource was not present, the error message "File not Found" will be
   passed on to the client, if the access was not allowed, the error
   message "Access Denied" is passed on. If the operation succeeded, the
   requesting server sends an HTTP request for the resource out of the
   C</raw> server content resource area of the Home Server.
   
   The Home Server will then check if the requesting server is part of
   the network, and if it is subscribed to the resource (Step B<D5b>). If
   it is, it will send the resource via HTTP to the requesting server
   without any content handlers processing it (Step B<D5c>). The
   requesting server will store the incoming data in a temporary data
   file (Step B<D5a>) - this is the file that Step B<D1a> checks for. If
   the transfer could not complete, and appropriate error message is sent
   to the client (Step B<D6>). Otherwise, the transferred temporary file
   is renamed as the actual resource, and the replication routine returns
   OK to the controlling handler (Step B<D7>).
   
   Fig. "Dynamic Replication: Change" depicts the process of modifying a
   resource. When an author publishes a new version of a resource, the
   Home Server will contact every server currently subscribed to the
   resource (Step B<U1>), as determined from the list of subscribed
   servers for the resource generated in Step B<D3c>. The subscribing
   servers will receive and acknowledge the update message (Step
   B<U1c>). The update mechanism finishes when the last subscribed server
   has been contacted (messages to unreachable servers are buffered).
   
   Each subscribing server will check if the resource in question had
   been accessed recently, that is, within a configurable amount of time
   (Step B<U2>).
   
   If the resource had not been accessed recently, the local copy of the
   resource is deleted (Step B<U3a>) and an unsubscribe command is sent
   to the Home Server (Step B<U3b>). The Home Server will check if the
   server had indeed originally subscribed to the resource (Step B<U3c>)
   and then delete the server from the list of subscribed servers for the
   resource (Step B<U3d>).
   
   If the resource had been accessed recently, the modified resource will
   be copied over using the same mechanism as in Step B<D5a> through
   B<D7>, which represents steps Steps B<U4a> through B<U6> in the
   replication figure.
   
   =head2 Load Balancing 
   
   X<load balancing>C<lond> provides a function to query the server's current loadavg. As
   a configuration parameter, one can determine the value of loadavg,
   which is to be considered 100%, for example, 2.00.
   
   Access servers can have a list of spare access servers,
   C</home/httpd/lonTabs/spares.tab>, to offload sessions depending on
   own workload. This check happens is done by the login handler. It
   re-directs the login information and session to the least busy spare
   server if itself is overloaded. An additional round-robin IP scheme
   possible. See Fig. "Load Balancing Sample" for an example of a
   load-balancing scheme.
   
   =begin latex
   
   \begin{figure}
   \includegraphics[width=0.75\paperwidth,keepaspectratio]{Load_Balancing_Example}
     \caption{\label{Load_Balancing_Example}Load Balancing Example} \end{figure}
   
   =end latex
   
   =head1 DESCRIPTION
   
   Provides persistent TCP connections to the other servers in the network
   through multiplexed domain sockets
   
   B<lonc> forks off children processes that correspond to the other servers
   in the network.  Management of these processes can be done at the
   parent process level or the child process level.
   
   After forking off the children, B<lonc> the B<parent> executes a main
   loop which simply waits for processes to exit.  As a process exits, a
   new process managing a link to the same peer as the exiting process is
   created.
   
   B<logs/lonc.log> is the location of log messages.
   
   The process management is now explained in terms of linux shell commands,
   subroutines internal to this code, and signal assignments:
   
   =over 4
   
   =item *
   
   PID is stored in B<logs/lonc.pid>
   
   This is the process id number of the parent B<lonc> process.
   
   =item *
   
   SIGTERM and SIGINT
   
   Parent signal assignment:
    $SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
   
   Child signal assignment:
    $SIG{INT}  = 'DEFAULT'; (and SIGTERM is DEFAULT also)
   (The child dies and a SIGALRM is sent to parent, awaking parent from slumber
    to restart a new child.)
   
   Command-line invocations:
    B<kill> B<-s> SIGTERM I<PID>
    B<kill> B<-s> SIGINT I<PID>
   
   Subroutine B<HUNTSMAN>:
    This is only invoked for the B<lonc> parent I<PID>.
   This kills all the children, and then the parent.
   The B<lonc.pid> file is cleared.
   
   =item *
   
   SIGHUP
   
   Current bug:
    This signal can only be processed the first time
   on the parent process.  Subsequent SIGHUP signals
   have no effect.
   
   Parent signal assignment:
    $SIG{HUP}  = \&HUPSMAN;
   
   Child signal assignment:
    none (nothing happens)
   
   Command-line invocations:
    B<kill> B<-s> SIGHUP I<PID>
   
   Subroutine B<HUPSMAN>:
    This is only invoked for the B<lonc> parent I<PID>,
   This kills all the children, and then the parent.
   The B<lonc.pid> file is cleared.
   
   =item *
   
   SIGUSR1
   
   Parent signal assignment:
    $SIG{USR1} = \&USRMAN;
   
   Child signal assignment:
    $SIG{USR1}= \&logstatus;
   
   Command-line invocations:
    B<kill> B<-s> SIGUSR1 I<PID>
   
   Subroutine B<USRMAN>:
    When invoked for the B<lonc> parent I<PID>,
   SIGUSR1 is sent to all the children, and the status of
   each connection is logged.
   
   
   =back
   
   =cut

Removed from v.1.18  
changed lines
  Added in v.1.54


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>