File:  [LON-CAPA] / loncom / Attic / lonc
Revision 1.56: download - view: text, annotated - select for diffs
Fri Oct 24 16:36:14 2003 UTC (20 years, 6 months ago) by albertel
Branches: MAIN
CVS tags: version_2_3_X, version_2_3_2, version_2_3_1, version_2_3_0, version_2_2_X, version_2_2_99_1, version_2_2_99_0, version_2_2_2, version_2_2_1, version_2_2_0, version_2_1_X, version_2_1_99_3, version_2_1_99_2, version_2_1_99_1, version_2_1_99_0, version_2_1_3, version_2_1_2, version_2_1_1, version_2_1_0, version_2_0_X, version_2_0_99_1, version_2_0_2, version_2_0_1, version_2_0_0, version_1_99_3, version_1_99_2, version_1_99_1_tmcc, version_1_99_1, version_1_99_0_tmcc, version_1_99_0, version_1_3_X, version_1_3_3, version_1_3_2, version_1_3_1, version_1_3_0, version_1_2_X, version_1_2_99_1, version_1_2_99_0, version_1_2_1, version_1_2_0, version_1_1_X, version_1_1_99_5, version_1_1_99_4, version_1_1_99_3, version_1_1_99_2, version_1_1_99_1, version_1_1_99_0, version_1_1_3, version_1_1_2, version_1_1_1, version_1_1_0, version_1_0_99_3, version_1_0_99_2, version_1_0_99_1, version_1_0_99, HEAD
- make connections using hostname

    1: #!/usr/bin/perl
    2: 
    3: # The LearningOnline Network
    4: # lonc - LON TCP-Client Domain-Socket-Server
    5: # provides persistent TCP connections to the other servers in the network
    6: # through multiplexed domain sockets
    7: #
    8: # $Id: lonc,v 1.56 2003/10/24 16:36:14 albertel Exp $
    9: #
   10: # Copyright Michigan State University Board of Trustees
   11: #
   12: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   13: #
   14: # LON-CAPA is free software; you can redistribute it and/or modify
   15: # it under the terms of the GNU General Public License as published by
   16: # the Free Software Foundation; either version 2 of the License, or
   17: # (at your option) any later version.
   18: #
   19: # LON-CAPA is distributed in the hope that it will be useful,
   20: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   21: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   22: # GNU General Public License for more details.
   23: #
   24: # You should have received a copy of the GNU General Public License
   25: # along with LON-CAPA; if not, write to the Free Software
   26: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   27: #
   28: # /home/httpd/html/adm/gpl.txt
   29: #
   30: # http://www.lon-capa.org/
   31: #
   32: # PID in subdir logs/lonc.pid
   33: # kill kills
   34: # HUP restarts
   35: # USR1 tries to open connections again
   36: 
   37: # 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19,
   38: # 10/8,10/9,10/15,11/18,12/22,
   39: # 2/8,7/25 Gerd Kortemeyer
   40: # 12/05 Gerd Kortemeyer
   41: # YEAR=2001
   42: # 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer
   43: # YEAR=2002
   44: # 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer
   45: # 3/07/02 Ron Fox 
   46: # based on nonforker from Perl Cookbook
   47: # - server who multiplexes without forking
   48: 
   49: use lib '/home/httpd/lib/perl/';
   50: use LONCAPA::Configuration;
   51: 
   52: use POSIX;
   53: use IO::Socket;
   54: use IO::Select;
   55: use IO::File;
   56: use Socket;
   57: use Fcntl;
   58: use Tie::RefHash;
   59: use Crypt::IDEA;
   60: #use Net::Ping;
   61: use LWP::UserAgent();
   62: 
   63: $status='';
   64: $lastlog='';
   65: $conserver='SHELL';
   66: $DEBUG = 0;			# Set to 1 for annoyingly complete logs.
   67: $VERSION='$Revison$'; #' stupid emacs
   68: $remoteVERSION;
   69: # -------------------------------- Set signal handlers to record abnormal exits
   70: 
   71: &status("Init exception handlers");
   72: $SIG{QUIT}=\&catchexception;
   73: $SIG{__DIE__}=\&catchexception;
   74: 
   75: # ---------------------------------- Read loncapa_apache.conf and loncapa.conf
   76: &status("Read loncapa.conf and loncapa_apache.conf");
   77: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
   78: my %perlvar=%{$perlvarref};
   79: undef $perlvarref;
   80: 
   81: # ----------------------------- Make sure this process is running from user=www
   82: &status("Check user ID");
   83: my $wwwid=getpwnam('www');
   84: if ($wwwid!=$<) {
   85:    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
   86:    $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
   87:    system("echo 'User ID mismatch.  lonc must be run as user www.' |\
   88:  mailto $emailto -s '$subj' > /dev/null");
   89:    exit 1;
   90: }
   91: 
   92: # --------------------------------------------- Check if other instance running
   93: 
   94: my $pidfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
   95: 
   96: if (-e $pidfile) {
   97:    my $lfh=IO::File->new("$pidfile");
   98:    my $pide=<$lfh>;
   99:    chomp($pide);
  100:    if (kill 0 => $pide) { die "already running"; }
  101: }
  102: 
  103: # ------------------------------------------------------------- Read hosts file
  104: 
  105: open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file";
  106: 
  107: while ($configline=<CONFIG>) {
  108:     my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
  109:     chomp($ip);
  110:     if ($ip) {
  111:      $hostip{$id}=$ip;
  112:      $hostname{$id}=$name;
  113:     }
  114: }
  115: 
  116: close(CONFIG);
  117: 
  118: # -------------------------------------------------------- Routines for forking
  119: 
  120: %children               = ();       # keys are current child process IDs,
  121:                                     # values are hosts
  122: %childpid               = ();       # the other way around
  123: 
  124: %childatt               = ();       # number of attempts to start server
  125:                                     # for ID
  126: 
  127: $childmaxattempts=15;
  128: 
  129: # ---------------------------------------------------- Fork once and dissociate
  130: &status("Fork and dissociate");
  131: $fpid=fork;
  132: exit if $fpid;
  133: die "Couldn't fork: $!" unless defined ($fpid);
  134: 
  135: POSIX::setsid() or die "Can't start new session: $!";
  136: 
  137: $conserver='PARENT';
  138: 
  139: # ------------------------------------------------------- Write our PID on disk
  140: &status("Write PID");
  141: $execdir=$perlvar{'lonDaemons'};
  142: open (PIDSAVE,">$execdir/logs/lonc.pid");
  143: print PIDSAVE "$$\n";
  144: close(PIDSAVE);
  145: &logthis("<font color=red>CRITICAL: ---------- Starting ----------</font>");
  146: 
  147: # ----------------------------- Ignore signals generated during initial startup
  148: $SIG{HUP}=$SIG{USR1}='IGNORE';
  149: # ------------------------------------------------------- Now we are on our own
  150:     
  151: # Fork off our children, one for every server
  152: 
  153: &status("Forking ...");
  154: 
  155: foreach $thisserver (keys %hostip) {
  156:     #if (&online($hostname{$thisserver})) {
  157:        make_new_child($thisserver);
  158:     #}
  159: }
  160: 
  161: &logthis("Done starting initial servers");
  162: # ----------------------------------------------------- Install signal handlers
  163: 
  164: 
  165: $SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
  166: $SIG{HUP}  = \&HUPSMAN;
  167: $SIG{USR1} = \&USRMAN;
  168: 
  169: # And maintain the population.
  170: while (1) {
  171:     my $deadpid = wait;		# Wait for the next child to die.
  172:                                 # See who died and start new one
  173:                                 # or a signal (e.g. USR1 for restart).
  174:                                 # if a signal, the wait will fail
  175:                                 # This is ordinarily detected by
  176:                                 # checking for the existence of the
  177:                                 # pid index inthe children hash since
  178:                                 # the return value from a failed wait is -1
  179:                                 # which is an impossible PID.
  180:     &status("Woke up");
  181:     my $skipping='';
  182: 
  183:     if(exists($children{$deadpid})) {
  184: 
  185: 	$thisserver = $children{$deadpid}; # Look name of dead guy's peer.
  186: 
  187: 	delete($children{$deadpid}); # Get rid of dead hash entry.
  188: 
  189: 	if($childatt{$thisserver} < $childmaxattempts) {
  190: 	    $childatt{$thisserver}++;
  191: 	    &logthis(
  192: 	       "<font color=yellow>INFO: Trying to reconnect for $thisserver "
  193:             ."($childatt{$thisserver} of $childmaxattempts attempts)</font>"); 
  194: 	    make_new_child($thisserver);
  195: 	
  196: 	}
  197: 	else {
  198: 	    $skipping .= $thisserver.' ';
  199: 	}
  200: 	if($skipping) {
  201: 	    &logthis("<font color=blue>WARNING: Skipped $skipping</font>");
  202:   
  203: 	}
  204:     }
  205: 
  206: }
  207: 
  208: 
  209: 
  210: sub make_new_child {
  211:    
  212:     $newserver=shift;
  213:     my $pid;
  214:     my $sigset;
  215:     &logthis("Attempting to start child for server $newserver");
  216:     # block signal for fork
  217:     $sigset = POSIX::SigSet->new(SIGINT);
  218:     sigprocmask(SIG_BLOCK, $sigset)
  219:         or die "Can't block SIGINT for fork: $!\n";
  220:     
  221:     die "fork: $!" unless defined ($pid = fork);
  222:     
  223:     if ($pid) {
  224:         # Parent records the child's birth and returns.
  225:         sigprocmask(SIG_UNBLOCK, $sigset)
  226:             or die "Can't unblock SIGINT for fork: $!\n";
  227:         $children{$pid} = $newserver;
  228:         $childpid{$newserver} = $pid;
  229:         return;
  230:     } else {
  231:         $conserver=$newserver;
  232:         # Child can *not* return from this subroutine.
  233:         $SIG{INT} = 'DEFAULT';      # make SIGINT kill us as it did before
  234:         $SIG{USR1}= \&logstatus;
  235:    
  236:         # unblock signals
  237:         sigprocmask(SIG_UNBLOCK, $sigset)
  238:             or die "Can't unblock SIGINT for fork: $!\n";
  239: 
  240: # ----------------------------- This is the modified main program of non-forker
  241: 
  242: $port = "$perlvar{'lonSockDir'}/$conserver";
  243: 
  244: unlink($port);
  245: 
  246: # -------------------------------------------------------------- Open other end
  247: 
  248: &openremote($conserver);
  249: 	&logthis("<font color=green> Connection to $conserver open </font>");
  250: # ----------------------------------------- We're online, send delayed messages
  251:     &status("Checking for delayed messages");
  252: 
  253:     my @allbuffered;
  254:     my $path="$perlvar{'lonSockDir'}/delayed";
  255:     opendir(DIRHANDLE,$path);
  256:     @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE;
  257:     closedir(DIRHANDLE);
  258:     my $dfname;
  259:     foreach (sort @allbuffered) {
  260:         &status("Sending delayed: $_");
  261:         $dfname="$path/$_";
  262:         if($DEBUG) { &logthis('Sending '.$dfname); }
  263:         my $wcmd;
  264:         {
  265:          my $dfh=IO::File->new($dfname);
  266:          $cmd=<$dfh>;
  267:         }
  268:         chomp($cmd);
  269:         my $bcmd=$cmd;
  270:         if ($cmd =~ /^encrypt\:/) {
  271: 	    my $rcmd=$cmd;
  272:             $rcmd =~ s/^encrypt\://;
  273:             chomp($rcmd);
  274:             my $cmdlength=length($rcmd);
  275:             $rcmd.="         ";
  276:             my $encrequest='';
  277:             for (my $encidx=0;$encidx<=$cmdlength;$encidx+=8) {
  278:                 $encrequest.=
  279:                     unpack("H16",$cipher->encrypt(substr($rcmd,$encidx,8)));
  280:             }
  281:             $cmd="enc:$cmdlength:$encrequest\n";
  282:         }
  283: 	$answer = londtransaction($remotesock, $cmd, 60);
  284: 	chomp($answer);
  285: 
  286:         if (($answer ne '') && ($@!~/timeout/)) {
  287: 	    unlink("$dfname");
  288:             &logthis("Delayed $cmd: >$answer<");
  289:             &logperm("S:$conserver:$bcmd");
  290:         }        
  291:     }
  292: 	if($DEBUG) { &logthis("<font color=green> Delayed transactions sent"); }
  293: 
  294: # ------------------------------------------------------- Listen to UNIX socket
  295: &status("Opening socket");
  296: unless (
  297:   $server = IO::Socket::UNIX->new(Local  => $port,
  298:                                   Type   => SOCK_STREAM,
  299:                                   Listen => 10 )
  300:    ) { 
  301:        my $st=120+int(rand(240));
  302:        &logthis(
  303:          "<font color=blue>WARNING: ".
  304:          "Can't make server socket ($st secs):  .. exiting</font>");
  305:        sleep($st);
  306:        exit; 
  307:      };
  308:    
  309: # -----------------------------------------------------------------------------
  310: 
  311: &logthis("<font color=green>$conserver online</font>");
  312: 
  313: # -----------------------------------------------------------------------------
  314: # begin with empty buffers
  315: %inbuffer  = ();
  316: %outbuffer = ();
  317: %ready     = ();
  318: %servers   = ();	# To be compatible with make filevector.  indexed by
  319: 			# File ids, values are sockets.
  320: 			# note that the accept socket is omitted.
  321: 
  322: tie %ready, 'Tie::RefHash';
  323: 
  324: # nonblock($server);
  325: # $select = IO::Select->new($server);
  326: 
  327: # Main loop: check reads/accepts, check writes, check ready to process
  328: 
  329: status("Main loop $conserver");
  330: while (1) {
  331:     my $client;
  332:     my $rv;
  333:     my $data;
  334: 
  335:     my $infdset;		# bit vec of fd's to select on input.
  336: 
  337:     my $outfdset;		# Bit vec of fd's to select on output.
  338: 
  339: 
  340:     $infdset = MakeFileVector(\%servers);
  341:     $outfdset= MakeFileVector(\%outbuffer);
  342:     vec($infdset, $server->fileno, 1) = 1;
  343:     if($DEBUG) {
  344: 	&logthis("Adding ".$server->fileno.
  345: 		 " to input select vector (listner)".
  346: 		 unpack("b*",$infdset)."\n");
  347:     }
  348:     DoSelect(\$infdset, \$outfdset); # Wait for input.
  349:     if($DEBUG) {
  350: 	&logthis("Doselect completed!");
  351: 	&logthis("ins = ".unpack("b*",$infdset)."\n");
  352: 	&logthis("outs= ".unpack("b*",$outfdset)."\n");
  353: 		 
  354:     }
  355: 
  356:     # Checkfor new connections:
  357:     if (vec($infdset, $server->fileno, 1)) {
  358: 	if($DEBUG) {
  359: 	    &logthis("New connection established");
  360: 	}
  361: 	# accept a new connection
  362: 	&status("Accept new connection: $conserver");
  363: 	$client = $server->accept();
  364: 	if (!$client) {
  365: 	    &logthis("Got stupid nonexisent client on ".$server->fileno." $conserver \n");
  366: 	} else {
  367: 	    if($DEBUG) {
  368: 		&logthis("New client fd = ".$client->fileno."\n");
  369: 	    }
  370: 	    $servers{$client->fileno} = $client;
  371: 	    nonblock($client);
  372: 	    $client->sockopt(SO_KEEPALIVE, 1); # Enable monitoring of
  373: 	                                       # connection liveness.
  374: 	}
  375:     }
  376:     HandleInput($infdset, \%servers, \%inbuffer, \%outbuffer, \%ready);
  377:     HandleOutput($outfdset, \%servers, \%outbuffer, \%inbuffer,
  378: 		 \%ready);
  379: # -------------------------------------------------------- Wow, connection lost
  380: 
  381: }
  382:    
  383:     }
  384: }
  385: 
  386: # ------------------------------------------------------- End of make_new_child
  387: 
  388: 
  389: #
  390: #  Make a vector of file descriptors to wait for in a select.
  391: #  parameters:
  392: #     \%fdhash  -reference to a hash which has IO::Socket's as indices.  
  393: #                We only care about the indices, not the values.
  394: #  A select vector is created from all indices of the hash.
  395: 
  396: sub MakeFileVector
  397: {
  398:     my $fdhash = shift;
  399:     my $selvar = "";
  400: 
  401:     foreach $socket (keys %$fdhash) {
  402: 	if($DEBUG) {
  403: 	    &logthis("Adding  ".$socket.
  404: 		     "to select vector. (client)\n");
  405: 	}
  406: 	vec($selvar, $socket, 1) = 1;
  407:     }
  408:     return $selvar;
  409: }
  410: 
  411: 
  412: #
  413: #  HandleOutput:
  414: #    Processes output on a buffered set of file descriptors which are
  415: #    ready to be read.
  416: #  Parameters:
  417: #    $selvector - Vector of file descriptors which are writable.
  418: #    \%sockets  - Vector of socket references indexed by socket.
  419: #    \%buffers  - Reference to a hash containing output buffers.
  420: #                 Hashes are indexed by sockets.  The file descriptors of some
  421: #                 of those sockets will be present in $selvector.
  422: #                 For each one of those, we will attempt to write the output
  423: #                 buffer to the socket.  Note that we will assume that
  424: #                 the sockets are being run in non blocking mode.
  425: #   \%inbufs    - Reference to hash containing input buffers.
  426: #   \%readys    - Reference to hash containing flags for items with complete
  427: #                 requests.
  428: #
  429: sub HandleOutput
  430: {
  431:     my $selvector = shift;
  432:     my $sockets   = shift;
  433:     my $buffers   = shift;
  434:     my $inbufs    = shift;
  435:     my $readys    = shift;
  436:     my $sock;
  437: 
  438:     if($DEBUG) {
  439: 	&logthis("HandleOutput entered\n");
  440:     }
  441: 
  442:     foreach $sock (keys %$sockets) {
  443: 	my $socket = $sockets->{$sock};
  444: 	if(vec($selvector, $sock, 1)) { # $socket is writable.
  445: 	    if($DEBUG) {
  446: 		&logthis("Sending $buffers->{$sock} \n");
  447: 	    }
  448: 	    my $rv = $socket->send($buffers->{$sock}, 0);
  449: 	    $errno = $!;
  450: 	    unless ($buffers->{$sock} eq "con_lost\n") {
  451: 		unless (defined $rv) { # Write failed... could be EINTR
  452: 		    unless ($errno == POSIX::EINTR) {
  453: 			&logthis("Write failed on writable socket");
  454: 		    }		# EINTR is not an error .. just retry.
  455: 		    next;
  456: 		}
  457: 		if( ($rv == length $buffers->{$sock})    ||
  458: 		    ($errno == POSIX::EWOULDBLOCK)       ||
  459: 		    ($errno == POSIX::EAGAIN)            || # same as above.
  460: 		    ($errno == POSIX::EINTR)             || # signal during IO
  461: 		    ($errno == 0)) {
  462: 		    substr($buffers->{$sock}, 0, $rv)=""; # delete written part
  463: 		    delete $buffers->{$sock} unless length $buffers->{$sock};
  464: 		} else {
  465: 		    # For some reason the write failed with an error code
  466: 		    # we didn't look for.  Shutdown the socket.
  467: 		    &logthis("Unable to write data with ".$errno.": ".
  468: 			     "Dropping data: ".length($buffers->{$sock}).
  469: 			     ", $rv");
  470: 		    #
  471: 		    # kill off the buffers in the hash:
  472: 
  473: 		    delete $buffers->{$sock};
  474: 		    delete $inbufs->{$sock};
  475: 		    delete $readys->{$sock};
  476: 
  477: 		    close($socket); # Close the client socket.
  478: 		    next;
  479: 		}
  480: 	    } else {		# Kludgy way to mark lond connection lost.
  481: 		&logthis(
  482: 		 "<font color=red>CRITICAL lond connection lost</font>");
  483: 		status("Connection lost");
  484: 		$remotesock->shutdown(2);
  485: 		&logthis("Attempting to open a new connection");
  486: 		&openremote($conserver);
  487: 	    }
  488: 		   
  489: 	}
  490:     }
  491: 
  492: }
  493: #
  494: #   HandleInput - Deals with input on client sockets.
  495: #                 Each socket has an associated input buffer.
  496: #                 For each readable socket, the currently available
  497: #                 data is appended to this buffer.
  498: #                 If necessary, the buffer is created.
  499: #                 On various failures, we may shutdown the client.
  500: #  Parameters:
  501: #     $selvec   - Vector of readable sockets.
  502: #     \%sockets - Refers to the  Hash of sockets indexed by sockets.  
  503: #                 Each of these may or may not have it's fd bit set 
  504: #                 in the $selvec.
  505: #     \%ibufs   - Refers to the hash of input buffers indexed by socket.
  506: #     \%obufs   - Hash of output buffers indexed by socket. 
  507: #     \%ready   - Hash of ready flags indicating the existence of a completed
  508: #                 Request.
  509: sub HandleInput 
  510: {
  511: 
  512:     # Marshall the parameters.   Note that the hashes are actually
  513:     # references not values.
  514: 
  515:     my $selvec  = shift;
  516:     my $sockets = shift;
  517:     my $ibufs   = shift;
  518:     my $obufs   = shift;
  519:     my $ready   = shift;
  520:     my $sock;
  521: 
  522:     if($DEBUG) {
  523: 	&logthis("Entered HandleInput\n");
  524:     }
  525:     foreach $sock (keys %$sockets) {
  526: 	my $socket = $sockets->{$sock};
  527: 	if(vec($selvec, $sock, 1)) { # Socket which is readable.
  528: 
  529: 	    #  Attempt to read the data and do error management.
  530: 	    my $data = '';
  531: 	    my $rv = $socket->recv($data, POSIX::BUFSIZ, 0);
  532: 	    if($DEBUG) {
  533: 		&logthis("Received $data from socket");
  534: 	    }
  535: 	    unless (defined($rv) && length $data) {
  536: 
  537: 		# Read an end of file.. this is a disconnect from the peer.
  538: 
  539: 		delete $sockets->{$sock};
  540: 		delete $ibufs->{$sock};
  541: 		delete $obufs->{$sock};
  542: 		delete $ready->{$sock};
  543: 
  544: 		status("Idle");
  545: 		close $socket;
  546: 		next;
  547: 	    }
  548: 	    #  Append the read data to the input buffer. If the buffer
  549: 	    # now contains a \n the request is complete and we can 
  550: 	    # mark this in the $ready hash (one request for each \n.)
  551: 
  552: 	    $ibufs->{$sock} .= $data;
  553: 	    while($ibufs->{$sock} =~ s/(.*\n)//) {
  554: 		push(@{$ready->{$sock}}, $1);
  555: 	    }
  556: 	    
  557: 	}
  558:     }
  559:     #  Now handle any requests which are ready:
  560: 
  561:     foreach $client (keys %ready) {
  562: 	handle($client);
  563:     }
  564: }
  565: 
  566: # DoSelect:  does a select with no timeout.  On signal (errno == EINTR), 
  567: #            the select is retried until there are items in the returned
  568: #            vectors.  
  569: #
  570: # Parameters:
  571: #   \$readvec   - Reference to a vector of file descriptors to 
  572: #                 check for readability.
  573: #   \$writevec  - Reference to a vector of file descriptors to check for
  574: #                 writability.
  575: #  On exit, the referents are modified with vectors indicating which 
  576: #  file handles are readable/writable.
  577: #
  578: sub DoSelect {
  579:     my $readvec = shift;
  580:     my $writevec= shift;
  581:     my $outs;
  582:     my $ins;
  583: 
  584:     while (1) {
  585: 	my $nfds = select( $ins = $$readvec, $outs = $$writevec, undef, undef);
  586: 	if($nfds) {
  587: 	    if($DEBUG) {
  588: 		&logthis("select exited with ".$nfds." fds\n");
  589: 		&logthis("ins = ".unpack("b*",$ins).
  590: 			 " readvec = ".unpack("b*",$$readvec)."\n");
  591: 		&logthis("outs = ".unpack("b*",$outs).
  592: 			 " writevec = ".unpack("b*",$$writevec)."\n");
  593: 	    }
  594: 	    $$readvec  = $ins;
  595: 	    $$writevec = $outs;
  596: 	    return;
  597: 	} else {
  598: 	    if($DEBUG) {
  599: 		&logthis("Select exited with no bits set in mask\n");
  600: 	    }
  601: 	    die "Select failed" unless $! == EINTR;
  602: 	}
  603:     }
  604: }
  605: 
  606: # handle($socket) deals with all pending requests for $client
  607: #
  608: sub handle {
  609:     # requests are in $ready{$client}
  610:     # send output to $outbuffer{$client}
  611:     my $client = shift;
  612:     my $request;
  613:     foreach $request (@{$ready{$client}}) {
  614: # ============================================================= Process request
  615:         # $request is the text of the request
  616:         # put text of reply into $outbuffer{$client}
  617: # ------------------------------------------------------------ Is this the end?
  618: 	chomp($request);
  619: 	if($DEBUG) {
  620:      &logthis("<font color=green> Request $request processing starts</font>");
  621:         }
  622:         if ($request eq "close_connection_exit\n") {
  623: 	    &status("Request close connection");
  624:            &logthis(
  625:      "<font color=red>CRITICAL: Request Close Connection ... exiting</font>");
  626:            $remotesock->shutdown(2);
  627:            $server->close();
  628:            exit;
  629:         }
  630: # -----------------------------------------------------------------------------
  631:         if ($request =~ /^encrypt\:/) {
  632: 	    my $cmd=$request;
  633:             $cmd =~ s/^encrypt\://;
  634:             chomp($cmd);
  635:             my $cmdlength=length($cmd);
  636:             $cmd.="         ";
  637:             my $encrequest='';
  638:             for (my $encidx=0;$encidx<=$cmdlength;$encidx+=8) {
  639:                 $encrequest.=
  640:                     unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8)));
  641:             }
  642:             $request="enc:$cmdlength:$encrequest";
  643:         }
  644: # --------------------------------------------------------------- Main exchange
  645: 	$answer = londtransaction($remotesock, $request, 60);
  646: 
  647: 	if($DEBUG) { 
  648: 	    &logthis("<font color=green> Request data exchange complete");
  649: 	}
  650: 	if ($@=~/timeout/) { 
  651: 	    $answer='';
  652: 	    &logthis(
  653: 		     "<font color=red>CRITICAL: Timeout: $request</font>");
  654: 	}  
  655: 
  656: 
  657:         if ($answer) {
  658: 	   if ($answer =~ /^enc/) {
  659:                my ($cmd,$cmdlength,$encinput)=split(/:/,$answer);
  660:                chomp($encinput);
  661: 	       $answer='';
  662:                for (my $encidx=0;$encidx<length($encinput);$encidx+=16) {
  663:                   $answer.=$cipher->decrypt(
  664:                    pack("H16",substr($encinput,$encidx,16))
  665:                   );
  666: 	       }
  667: 	      $answer=substr($answer,0,$cmdlength);
  668: 	      $answer.="\n";
  669: 	   }
  670: 	   if($DEBUG) {
  671: 	       &logthis("sending $answer to client\n");
  672: 	   }
  673:            $outbuffer{$client} .= $answer;
  674:         } else {
  675:            $outbuffer{$client} .= "con_lost\n";
  676:         }
  677: 
  678:      &status("Completed: $request");
  679: 	if($DEBUG) {
  680: 	    &logthis("<font color=green> Request processing complete</font>");
  681: 	}
  682: # ===================================================== Done processing request
  683:     }
  684:     delete $ready{$client};
  685: # -------------------------------------------------------------- End non-forker
  686:     if($DEBUG) {
  687: 	&logthis("<font color=green> requests for child handled</font>");
  688:     }
  689: }
  690: # ---------------------------------------------------------- End make_new_child
  691: 
  692: # nonblock($socket) puts socket into nonblocking mode
  693: sub nonblock {
  694:     my $socket = shift;
  695:     my $flags;
  696: 
  697:     
  698:     $flags = fcntl($socket, F_GETFL, 0)
  699:             or die "Can't get flags for socket: $!\n";
  700:     fcntl($socket, F_SETFL, $flags | O_NONBLOCK)
  701:             or die "Can't make socket nonblocking: $!\n";
  702: }
  703: 
  704: 
  705: sub openremote {
  706: # ---------------------------------------------------- Client to network server
  707: 
  708:     my $conserver=shift;
  709: 
  710:     &status("Opening TCP $conserver");
  711:     my $st=120+int(rand(240)); # Sleep before opening:
  712: 
  713:     unless (
  714: 	    $remotesock = IO::Socket::INET->new(PeerAddr => $hostname{$conserver},
  715: 						PeerPort => $perlvar{'londPort'},
  716: 						Proto    => "tcp",
  717: 						Type     => SOCK_STREAM)
  718: 	   ) {
  719: 
  720: 	&logthis(
  721: 		 "<font color=blue>WARNING: Couldn't connect to $conserver ($st secs): </font>");
  722: 	sleep($st);
  723: 	exit;
  724:     };
  725: # ----------------------------------------------------------------- Init dialog
  726: 
  727:     &logthis("<font color=green>INFO Connected to $conserver, initing</font>");
  728:     &status("Init dialogue: $conserver");
  729: 
  730:     $answer = londtransaction($remotesock, "init", 60);
  731:     chomp($answer);
  732:     $answer = londtransaction($remotesock, $answer, 60);
  733:     chomp($answer);
  734: 
  735:     if ($@=~/timeout/) {
  736: 	&logthis("Timed out during init.. exiting");
  737: 	exit;
  738:     }
  739: 
  740:     if ($answer ne 'ok') {
  741: 	&logthis("Init reply: >$answer<");
  742: 	my $st=120+int(rand(240));
  743: 	&logthis("<font color=blue>WARNING: Init failed ($st secs)</font>");
  744: 	sleep($st);
  745: 	exit;
  746:     }
  747: 
  748:     $answer = londtransaction($remotesock,"sethost:$conserver",60);
  749:     chomp($answer);
  750:     if ( $answer ne 'ok') {
  751: 	&logthis('<font color="blue">WARNING: unable to specify remote host'.
  752: 		 $answer.'</font>');
  753:     }
  754: 
  755:     $answer = londtransaction($remotesock,"version:$VERSION",60);
  756:     chomp($answer);
  757:     if ($answer =~ /^version:/) {
  758: 	$remoteVERSION=(split(/:/,$answer))[1];
  759:     } else {
  760: 	&logthis('<font color="blue">WARNING: request remote version failed :'.
  761: 		 $answer.': my version is :'.$VERSION.':</font>');
  762:     }
  763: 
  764:     sleep 5;
  765:     &status("Ponging $conserver");
  766:     $answer= londtransaction($remotesock,"pong",60);
  767:     chomp($answer);
  768:     if ($answer!~/^$conserver/) {
  769: 	&logthis("Pong reply: >$answer<");
  770:     }
  771: # ----------------------------------------------------------- Initialize cipher
  772: 
  773:     &status("Initialize cipher");
  774:     my $buildkey=londtransaction($remotesock,"ekey",60);
  775:     my $key=$conserver.$perlvar{'lonHostID'};
  776:     $key=~tr/a-z/A-Z/;
  777:     $key=~tr/G-P/0-9/;
  778:     $key=~tr/Q-Z/0-9/;
  779:     $key=$key.$buildkey.$key.$buildkey.$key.$buildkey;
  780:     $key=substr($key,0,32);
  781:     my $cipherkey=pack("H32",$key);
  782:     if ($cipher=new IDEA $cipherkey) {
  783: 	&logthis("Secure connection initialized");
  784:     } else {
  785: 	my $st=120+int(rand(240));
  786: 	&logthis("<font color=blue>WARNING: ".
  787: 		 "Could not establish secure connection ($st secs)!</font>");
  788: 	sleep($st);
  789: 	exit;
  790:     }
  791:     &logthis("<font color=green> Remote open success </font>");
  792: }
  793: 
  794: 
  795: 
  796: # grabs exception and records it to log before exiting
  797: sub catchexception {
  798:     my ($signal)=@_;
  799:     $SIG{QUIT}='DEFAULT';
  800:     $SIG{__DIE__}='DEFAULT';
  801:     chomp($signal);
  802:     &logthis("<font color=red>CRITICAL: "
  803:      ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through "
  804:      ."\"$signal\" with parameter </font>");
  805:     die("Signal abend");
  806: }
  807: 
  808: # -------------------------------------- Routines to see if other box available
  809: 
  810: #sub online {
  811: #    my $host=shift;
  812: #    &status("Pinging ".$host);
  813: #    my $p=Net::Ping->new("tcp",20);
  814: #    my $online=$p->ping("$host");
  815: #    $p->close();
  816: #    undef ($p);
  817: #    return $online;
  818: #}
  819: 
  820: sub connected {
  821:     my ($local,$remote)=@_;
  822:     &status("Checking connection $local to $remote");
  823:     $local=~s/\W//g;
  824:     $remote=~s/\W//g;
  825: 
  826:     unless ($hostname{$local}) { return 'local_unknown'; }
  827:     unless ($hostname{$remote}) { return 'remote_unknown'; }
  828: 
  829:     #unless (&online($hostname{$local})) { return 'local_offline'; }
  830: 
  831:     my $ua=new LWP::UserAgent;
  832:     
  833:     my $request=new HTTP::Request('GET',
  834:       "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote);
  835: 
  836:     my $response=$ua->request($request);
  837: 
  838:     unless ($response->is_success) { return 'local_error'; }
  839: 
  840:     my $reply=$response->content;
  841:     $reply=(split("\n",$reply))[0];
  842:     $reply=~s/\W//g;
  843:     if ($reply ne $remote) { return $reply; }
  844:     return 'ok';
  845: }
  846: 
  847: 
  848: 
  849: sub hangup {
  850:     foreach (keys %children) {
  851:         $wasserver=$children{$_};
  852:         &status("Closing $wasserver");
  853:         &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
  854:         &status("Kill PID $_ for $wasserver");
  855: 	kill ('INT',$_);
  856:     }
  857: }
  858: 
  859: sub HUNTSMAN {                      # signal handler for SIGINT
  860:     local($SIG{CHLD}) = 'IGNORE';   # we're going to kill our children
  861:     &hangup();
  862:     my $execdir=$perlvar{'lonDaemons'};
  863:     unlink("$execdir/logs/lonc.pid");
  864:     &logthis("<font color=red>CRITICAL: Shutting down</font>");
  865:     exit;                           # clean up with dignity
  866: }
  867: 
  868: sub HUPSMAN {                      # signal handler for SIGHUP
  869:     local($SIG{CHLD}) = 'IGNORE';  # we're going to kill our children
  870:     &hangup();
  871:     &logthis("<font color=red>CRITICAL: Restarting</font>");
  872:     my $execdir=$perlvar{'lonDaemons'};
  873:     unlink("$execdir/logs/lonc.pid");
  874:     exec("$execdir/lonc");         # here we go again
  875: }
  876: 
  877: sub checkchildren {
  878:     &initnewstatus();
  879:     &logstatus();
  880:     &logthis('Going to check on the children');
  881:     foreach (sort keys %children) {
  882: 	sleep 1;
  883:         unless (kill 'USR1' => $_) {
  884: 	    &logthis ('<font color=red>CRITICAL: Child '.$_.' is dead</font>');
  885:             &logstatus($$.' is dead');
  886:         } 
  887:     }
  888: }
  889: 
  890: sub USRMAN {
  891:     &logthis("USR1: Trying to establish connections again");
  892:     #
  893:     #  It is really important not to just clear the childatt hash or we will
  894:     #  lose all memory of the children.  What we really want to do is this:
  895:     #  For each index where childatt is >= $childmaxattempts
  896:     #  Zero the associated counter and do a make_child for the host.
  897:     #  Regardles, the childatt entry is zeroed:
  898:     my $host;
  899:     foreach $host (keys %childatt) {
  900: 	if ($childatt{$host} >= $childmaxattempts) {
  901: 	    $childatt{$host} = 0;
  902: 	    &logthis("<font color=green>INFO: Restarting child for server: "
  903: 		     .$host."</font>\n");
  904: 	    make_new_child($host);
  905: 	}
  906: 	else {
  907: 	    $childatt{$host} = 0;
  908: 	}
  909:     }
  910:     &checkchildren();		# See if any children are still dead...
  911: }
  912: 
  913: # -------------------------------------------------- Non-critical communication
  914: sub subreply { 
  915:  my ($cmd,$server)=@_;
  916:  my $answer='';
  917:  if ($server ne $perlvar{'lonHostID'}) { 
  918:     my $peerfile="$perlvar{'lonSockDir'}/$server";
  919:     my $sclient=IO::Socket::UNIX->new(Peer    =>"$peerfile",
  920:                                       Type    => SOCK_STREAM,
  921:                                       Timeout => 10)
  922:        or return "con_lost";
  923: 
  924: 
  925:     $answer = londtransaction($sclient, $cmd, 10);
  926: 
  927:     if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; }
  928:     $SIG{ALRM}='DEFAULT';
  929:     $SIG{__DIE__}=\&catchexception;
  930:  } else { $answer='self_reply'; }
  931:  return $answer;
  932: }
  933: 
  934: # --------------------------------------------------------------------- Logging
  935: 
  936: sub logthis {
  937:     my $message=shift;
  938:     my $execdir=$perlvar{'lonDaemons'};
  939:     my $fh=IO::File->new(">>$execdir/logs/lonc.log");
  940:     my $now=time;
  941:     my $local=localtime($now);
  942:     $lastlog=$local.': '.$message;
  943:     print $fh "$local ($$) [$conserver] [$status]: $message\n";
  944: }
  945: 
  946: #--------------------------------------  londtransaction:
  947: #  
  948: #  Performs a transaction with lond with timeout support.
  949: #    result = londtransaction(socket,request,timeout)
  950: #
  951: sub londtransaction {
  952:     my ($socket, $request, $tmo) = @_;
  953: 
  954:     if($DEBUG) {
  955: 	&logthis("londtransaction request: $request");
  956:     }
  957: 
  958:     # Set the signal handlers: ALRM for timeout and disble the others.
  959: 
  960:     $SIG{ALRM} = sub { die "timeout" };
  961:     $SIG{__DIE__} = 'DEFAULT';
  962:     
  963:     # Disable all but alarm so that only that can interupt the
  964:     # send /receive.
  965:     #
  966:     my $sigset = POSIX::SigSet->new(QUIT, USR1, HUP, INT, TERM);
  967:     my $priorsigs = POSIX::SigSet->new;
  968:     unless (defined sigprocmask(SIG_BLOCK, $sigset, $priorsigs)) {
  969: 	&logthis("<font color=red> CRITICAL -- londtransaction ".
  970: 		"failed to block signals </font>");
  971: 	die "could not block signals in londtransaction";
  972:     }
  973:     $answer = '';
  974:     #
  975:     #  Send request to lond.
  976:     #
  977:     eval { 
  978: 	alarm($tmo);
  979: 	print $socket "$request\n";
  980: 	alarm(0);
  981:     };
  982:     #  If request didn't timeout, try for the response.
  983:     #
  984: 
  985:     if ($@!~/timeout/) {
  986: 	eval {
  987: 	    alarm($tmo);
  988: 	    $answer = <$socket>;
  989: 	    if($DEBUG) {
  990: 		&logthis("Received $answer in londtransaction");
  991: 	    }
  992: 	    alarm(0);
  993: 	};
  994:     } else {
  995: 	&logthis("lonc - $conserver - suiciding on send Timeout");
  996: 	die("lonc - $conserver - suiciding on send Timeout");
  997:     }
  998:     if ($@ =~ /timeout/) {
  999: 	&logthis("lonc - $conserver - suiciding on read Timeout");
 1000: 	die("lonc - $conserver - suiciding on read Timeout");
 1001:     }
 1002:     #
 1003:     # Restore the initial sigmask set.
 1004:     #
 1005:     unless (defined sigprocmask(SIG_UNBLOCK, $priorsigs)) {
 1006: 	&logthis("<font color=red> CRITICAL -- londtransaction ".
 1007: 		"failed to re-enable signal processing. </font>");
 1008: 	die "londtransaction failed to re-enable signals";
 1009:     }
 1010:     #
 1011:     # go back to the prior handler set.
 1012:     #
 1013:     $SIG{ALRM} = 'DEFAULT';
 1014:     $SIG{__DIE__} = \&cathcexception;
 1015: 
 1016:     #    chomp $answer;
 1017:     if ($DEBUG) {
 1018: 	&logthis("Returning $answer in londtransaction");
 1019:     }
 1020:     return $answer;
 1021: 
 1022: }
 1023: 
 1024: sub logperm {
 1025:     my $message=shift;
 1026:     my $execdir=$perlvar{'lonDaemons'};
 1027:     my $now=time;
 1028:     my $local=localtime($now);
 1029:     my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
 1030:     print $fh "$now:$message:$local\n";
 1031: }
 1032: # ------------------------------------------------------------------ Log status
 1033: 
 1034: sub logstatus {
 1035:     my $docdir=$perlvar{'lonDocRoot'};
 1036:     my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");
 1037:     print $fh $$."\t".$conserver."\t".$status."\t".$lastlog."\n";
 1038: }
 1039: 
 1040: sub initnewstatus {
 1041:     my $docdir=$perlvar{'lonDocRoot'};
 1042:     my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");
 1043:     my $now=time;
 1044:     my $local=localtime($now);
 1045:     print $fh "LONC status $local - parent $$\n\n";
 1046: }
 1047: 
 1048: # -------------------------------------------------------------- Status setting
 1049: 
 1050: sub status {
 1051:     my $what=shift;
 1052:     my $now=time;
 1053:     my $local=localtime($now);
 1054:     $status=$local.': '.$what;
 1055:     $0='lonc: '.$what.' '.$local;
 1056: }
 1057: 
 1058: 
 1059: 
 1060: # ----------------------------------- POD (plain old documentation, CPAN style)
 1061: 
 1062: =head1 NAME
 1063: 
 1064: lonc - LON TCP-MySQL-Server Daemon for handling database requests.
 1065: 
 1066: =head1 SYNOPSIS
 1067: 
 1068: Usage: B<lonc>
 1069: 
 1070: Should only be run as user=www.  This is a command-line script which
 1071: is invoked by B<loncron>.  There is no expectation that a typical user
 1072: will manually start B<lonc> from the command-line.  (In other words,
 1073: DO NOT START B<lonc> YOURSELF.)
 1074: 
 1075: =head1 OVERVIEW
 1076: 
 1077: =head2 Physical Overview
 1078: 
 1079: =begin latex 
 1080: 
 1081: \begin{figure} 
 1082:   \begin{center}
 1083:     \includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram}
 1084:   \end{center}
 1085:   \caption{\label{Overview_Of_Network}Overview of Network}
 1086: \end{figure}
 1087: 
 1088: =end latex
 1089: 
 1090: Physically, the Network consists of relatively inexpensive
 1091: upper-PC-class server machines which are linked through the commodity
 1092: internet in a load-balancing, dynamically content-replicating and
 1093: failover-secure way.
 1094: 
 1095: All machines in the Network are connected with each other through
 1096: two-way persistent TCP/IP connections. Clients (B<B>, B<F>, B<G> and
 1097: B<H> in Fig. Overview of Network) connect to the servers via standard
 1098: HTTP. There are two classes of servers, B<Library Servers> (B<A> and
 1099: B<E> in Fig. Overview of Network) and B<Access Servers> (B<C>, B<D>,
 1100: B<I> and B<J> in Fig. Overview of Network).
 1101: 
 1102: B<Library Servers> X<library server> X<server, library> are used to
 1103: store all personal records of a set of users, and are responsible for
 1104: their initial authentication when a session is opened on any server in
 1105: the Network. For Authors, Library Servers also hosts their
 1106: construction area and the authoritative copy of the current and
 1107: previous versions of every resource that was published by that
 1108: author. Library servers can be used as backups to host sessions when
 1109: all access servers in the Network are overloaded. Otherwise, for
 1110: learners, access servers are used to host the sessions. Library
 1111: servers need to have strong I/O capabilities.
 1112: 
 1113: B<Access Servers> X<access server> X<server, access> provide LON-CAPA
 1114: service to users, using the library servers as their data source. The
 1115: network is designed so that the number of concurrent sessions can be
 1116: increased over a wide range by simply adding additional access servers
 1117: before having to add additional library servers. Preliminary tests
 1118: showed that a library server could handle up to 10 access servers
 1119: fully parallel. Access servers can generally be cheaper hardware then
 1120: library servers require.
 1121: 
 1122: The Network is divided into B<domains> X<domain>, which are logical
 1123: boundaries between participating institutions. These domains can be
 1124: used to limit the flow of personal user information across the
 1125: network, set access privileges and enforce royalty schemes. LON-CAPA
 1126: domains bear no relationship to any other domain, including domains
 1127: used by the DNS system; LON-CAPA domains may be freely configured in
 1128: any manner that suits your use pattern.
 1129: 
 1130: =head2 Example Transactions
 1131: 
 1132: Fig. Overview of Network also depicts examples for several kinds of
 1133: transactions conducted across the Network.
 1134: 
 1135: An instructor at client B<B> modifies and publishes a resource on her
 1136: Home Server B<A>. Server B<A> has a record of all server machines
 1137: currently subscribed to this resource, and replicates it to servers
 1138: B<D> and B<I>. However, server B<D> is currently offline, so the
 1139: update notification gets buffered on B<A> until B<D> comes online
 1140: again. Servers B<C> and B<J> are currently not subscribed to this
 1141: resource.
 1142: 
 1143: Learners B<F> and B<G> have open sessions on server B<I>, and the new
 1144: resource is immediately available to them.
 1145: 
 1146: Learner B<H> tries to connect to server B<I> for a new session,
 1147: however, the machine is not reachable, so he connects to another
 1148: Access Server B<J> instead. This server currently does not have all
 1149: necessary resources locally present to host learner B<H>, but
 1150: subscribes to them and replicates them as they are accessed by B<H>.
 1151: 
 1152: Learner B<H> solves a problem on server B<J>. Library Server B<E> is
 1153: B<H>'s Home Server, so this information gets forwarded to B<E>, where
 1154: the records of H are updated.
 1155: 
 1156: =head2 lond, lonc, and lonnet
 1157: 
 1158: =begin latex
 1159: 
 1160: \begin{figure}
 1161: \includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram2}
 1162:   \caption{\label{Overview_Of_Network_Communication}Overview of
 1163: Network Communication} \end{figure}
 1164: 
 1165: =end latex
 1166: 
 1167: Fig. Overview of Network Communication elaborates on the details of
 1168: this network infrastructure. It depicts three servers (B<A>, B<B> and
 1169: B<C>) and a client who has a session on server B<C>.
 1170: 
 1171: As B<C> accesses different resources in the system, different
 1172: handlers, which are incorporated as modules into the child processes
 1173: of the web server software, process these requests.
 1174: 
 1175: Our current implementation uses C<mod_perl> inside of the Apache web
 1176: server software. As an example, server B<C> currently has four active
 1177: web server software child processes. The chain of handlers dealing
 1178: with a certain resource is determined by both the server content
 1179: resource area (see below) and the MIME type, which in turn is
 1180: determined by the URL extension. For most URL structures, both an
 1181: authentication handler and a content handler are registered.
 1182: 
 1183: Handlers use a common library C<lonnet> X<lonnet> to interact with
 1184: both locally present temporary session data and data across the server
 1185: network. For example, lonnet provides routines for finding the home
 1186: server of a user, finding the server with the lowest loadavg, sending
 1187: simple command-reply sequences, and sending critical messages such as
 1188: a homework completion, etc. For a non-critical message, the routines
 1189: reply with a simple "connection lost" if the message could not be
 1190: delivered. For critical messages, lonnet tries to re-establish
 1191: connections, re-send the command, etc. If no valid reply could be
 1192: received, it answers "connection deferred" and stores the message in
 1193: buffer space to be sent at a later point in time. Also, failed
 1194: critical messages are logged.
 1195: 
 1196: The interface between C<lonnet> and the Network is established by a
 1197: multiplexed UNIX domain socket, denoted B<DS> in Fig. Overview of
 1198: Network Communication. The rationale behind this rather involved
 1199: architecture is that httpd processes (Apache children) dynamically
 1200: come and go on the timescale of minutes, based on workload and number
 1201: of processed requests. Over the lifetime of an httpd child, however,
 1202: it has to establish several hundred connections to several different
 1203: servers in the Network.
 1204: 
 1205: On the other hand, establishing a TCP/IP connection is resource
 1206: consuming for both ends of the line, and to optimize this connectivity
 1207: between different servers, connections in the Network are designed to
 1208: be persistent on the timescale of months, until either end is
 1209: rebooted. This mechanism will be elaborated on below.
 1210: 
 1211: =begin latex
 1212: 
 1213: \begin{figure}
 1214: \begin{lyxcode}
 1215: msul1:msu:library:zaphod.lite.msu.edu:35.8.63.51
 1216: 
 1217: msua1:msu:access:agrajag.lite.msu.edu:35.8.63.68
 1218: 
 1219: msul2:msu:library:frootmig.lite.msu.edu:35.8.63.69
 1220: 
 1221: msua2:msu:access:bistromath.lite.msu.edu:35.8.63.67
 1222: 
 1223: hubl14:hub:library:hubs128-pc-14.cl.msu.edu:35.8.116.34
 1224: 
 1225: hubl15:hub:library:hubs128-pc-15.cl.msu.edu:35.8.116.35
 1226: 
 1227: hubl16:hub:library:hubs128-pc-16.cl.msu.edu:35.8.116.36
 1228: 
 1229: huba20:hub:access:hubs128-pc-20.cl.msu.edu:35.8.116.40
 1230: 
 1231: huba21:hub:access:hubs128-pc-21.cl.msu.edu:35.8.116.41
 1232: 
 1233: huba22:hub:access:hubs128-pc-22.cl.msu.edu:35.8.116.42
 1234: 
 1235: huba23:hub:access:hubs128-pc-23.cl.msu.edu:35.8.116.43
 1236: 
 1237: hubl25:other:library:hubs128-pc-25.cl.msu.edu:35.8.116.45
 1238: 
 1239: huba27:other:access:hubs128-pc-27.cl.msu.edu:35.8.116.47
 1240: \end{lyxcode}
 1241: 
 1242: \caption{\label{Example_Of_hosts.tab}Example of Hosts Lookup table\texttt{/home/httpd/lonTabs/hosts.tab}} 
 1243: \end{figure}
 1244: 
 1245: =end latex
 1246: 
 1247: Establishing a connection to a UNIX domain socket is far less resource
 1248: consuming than the establishing of a TCP/IP connection. C<lonc>
 1249: X<lonc> is a proxy daemon that forks off a child for every server in
 1250: the Network. Which servers are members of the Network is determined by
 1251: a lookup table, such as the one in Fig. Examples of Hosts. In order,
 1252: the entries denote an internal name for the server, the domain of the
 1253: server, the type of the server, the host name and the IP address.
 1254: 
 1255: The C<lonc> parent process maintains the population and listens for
 1256: signals to restart or shutdown, as well as I<USR1>. Every child
 1257: establishes a multiplexed UNIX domain socket for its server and opens
 1258: a TCP/IP connection to the lond daemon (discussed below) on the remote
 1259: machine, which it keeps alive. If the connection is interrupted, the
 1260: child dies, whereupon the parent makes several attempts to fork
 1261: another child for that server.
 1262: 
 1263: When starting a new child (a new connection), first an init-sequence
 1264: is carried out, which includes receiving the information from the
 1265: remote C<lond> which is needed to establish the 128-bit encryption key
 1266: - the key is different for every connection. Next, any buffered
 1267: (delayed) messages for the server are sent.
 1268: 
 1269: In normal operation, the child listens to the UNIX socket, forwards
 1270: requests to the TCP connection, gets the reply from C<lond>, and sends
 1271: it back to the UNIX socket. Also, C<lonc> takes care to the encryption
 1272: and decryption of messages.
 1273: 
 1274: C<lond> X<lond> is the remote end of the TCP/IP connection and acts as
 1275: a remote command processor. It receives commands, executes them, and
 1276: sends replies. In normal operation, a C<lonc> child is constantly
 1277: connected to a dedicated C<lond> child on the remote server, and the
 1278: same is true vice versa (two persistent connections per server
 1279: combination).
 1280: 
 1281: lond listens to a TCP/IP port (denoted B<P> in Fig. Overview of
 1282: Network Communication) and forks off enough child processes to have
 1283: one for each other server in the network plus two spare children. The
 1284: parent process maintains the population and listens for signals to
 1285: restart or shutdown. Client servers are authenticated by IP.
 1286: 
 1287: When a new client server comes online, C<lond> sends a signal I<USR1>
 1288: to lonc, whereupon C<lonc> tries again to reestablish all lost
 1289: connections, even if it had given up on them before - a new client
 1290: connecting could mean that that machine came online again after an
 1291: interruption.
 1292: 
 1293: The gray boxes in Fig. Overview of Network Communication denote the
 1294: entities involved in an example transaction of the Network. The Client
 1295: is logged into server B<C>, while server B<B> is her Home
 1296: Server. Server B<C> can be an access server or a library server, while
 1297: server B<B> is a library server. She submits a solution to a homework
 1298: problem, which is processed by the appropriate handler for the MIME
 1299: type "problem". Through C<lonnet>, the handler writes information
 1300: about this transaction to the local session data. To make a permanent
 1301: log entry, C<lonnet> establishes a connection to the UNIX domain
 1302: socket for server B<B>. C<lonc> receives this command, encrypts it,
 1303: and sends it through the persistent TCP/IP connection to the TCP/IP
 1304: port of the remote C<lond>. C<lond> decrypts the command, executes it
 1305: by writing to the permanent user data files of the client, and sends
 1306: back a reply regarding the success of the operation. If the operation
 1307: was unsuccessful, or the connection would have broken down, C<lonc>
 1308: would write the command into a FIFO buffer stack to be sent again
 1309: later. C<lonc> now sends a reply regarding the overall success of the
 1310: operation to C<lonnet> via the UNIX domain port, which is eventually
 1311: received back by the handler.
 1312: 
 1313: =head2 Dynamic Resource Replication
 1314: 
 1315: Since resources are assembled into higher order resources simply by
 1316: reference, in principle it would be sufficient to retrieve them from
 1317: the respective Home Servers of the authors. However, there are several
 1318: problems with this simple approach: since the resource assembly
 1319: mechanism is designed to facilitate content assembly from a large
 1320: number of widely distributed sources, individual sessions would depend
 1321: on a large number of machines and network connections to be available,
 1322: thus be rather fragile. Also, frequently accessed resources could
 1323: potentially drive individual machines in the network into overload
 1324: situations.
 1325: 
 1326: Finally, since most resources depend on content handlers on the Access
 1327: Servers to be served to a client within the session context, the raw
 1328: source would first have to be transferred across the Network from the
 1329: respective Library Server to the Access Server, processed there, and
 1330: then transferred on to the client.
 1331: 
 1332: =begin latex
 1333: 
 1334: \begin{figure}
 1335: \includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Request}
 1336:   \caption{\label{Dynamic_Replication}Dynamic Replication} 
 1337: \end{figure}
 1338: 
 1339: =end latex
 1340: 
 1341: To enable resource assembly in a reliable and scalable way, a dynamic
 1342: resource replication scheme was developed. Fig. "Dynamic Replication"
 1343: shows the details of this mechanism.
 1344: 
 1345: Anytime a resource out of the resource space is requested, a handler
 1346: routine is called which in turn calls the replication routine. As a
 1347: first step, this routines determines whether or not the resource is
 1348: currently in replication transfer (Step B<D1a>). During replication
 1349: transfer, the incoming data is stored in a temporary file, and Step
 1350: B<D1a> checks for the presence of that file. If transfer of a resource
 1351: is actively going on, the controlling handler receives an error
 1352: message, waits for a few seconds, and then calls the replication
 1353: routine again. If the resource is still in transfer, the client will
 1354: receive the message "Service currently not available".
 1355: 
 1356: In the next step (Step B<D1b>), the replication routine checks if the
 1357: URL is locally present. If it is, the replication routine returns OK
 1358: to the controlling handler, which in turn passes the request on to the
 1359: next handler in the chain.
 1360: 
 1361: If the resource is not locally present, the Home Server of the
 1362: resource author (as extracted from the URL) is determined (Step
 1363: B<D2>). This is done by contacting all library servers in the author?s
 1364: domain (as determined from the lookup table, see Fig. 1.1.2B). In Step
 1365: B<D2b> a query is sent to the remote server whether or not it is the
 1366: Home Server of the author (in our current implementation, an
 1367: additional cache is used to store already identified Home Servers (not
 1368: shown in the figure)). In Step B<D2c>, the remote server answers the
 1369: query with True or False. If the Home Server was found, the routine
 1370: continues, otherwise it contacts the next server (Step D2a). If no
 1371: server could be found, a "File not Found" error message is issued. In
 1372: our current implementation, in this step the Home Server is also
 1373: written into a cache for faster access if resources by the same author
 1374: are needed again (not shown in the figure).
 1375: 
 1376: =begin latex
 1377: 
 1378: \begin{figure}
 1379: \includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Change}
 1380:   \caption{\label{Dynamic_Replication_Change}Dynamic Replication: Change} \end{figure}
 1381: 
 1382: =end latex
 1383: 
 1384: In Step B<D3a>, the routine sends a subscribe command for the URL to
 1385: the Home Server of the author. The Home Server first determines if the
 1386: resource is present, and if the access privileges allow it to be
 1387: copied to the requesting server (B<D3b>). If this is true, the
 1388: requesting server is added to the list of subscribed servers for that
 1389: resource (Step B<D3c>). The Home Server will reply with either OK or
 1390: an error message, which is determined in Step D4. If the remote
 1391: resource was not present, the error message "File not Found" will be
 1392: passed on to the client, if the access was not allowed, the error
 1393: message "Access Denied" is passed on. If the operation succeeded, the
 1394: requesting server sends an HTTP request for the resource out of the
 1395: C</raw> server content resource area of the Home Server.
 1396: 
 1397: The Home Server will then check if the requesting server is part of
 1398: the network, and if it is subscribed to the resource (Step B<D5b>). If
 1399: it is, it will send the resource via HTTP to the requesting server
 1400: without any content handlers processing it (Step B<D5c>). The
 1401: requesting server will store the incoming data in a temporary data
 1402: file (Step B<D5a>) - this is the file that Step B<D1a> checks for. If
 1403: the transfer could not complete, and appropriate error message is sent
 1404: to the client (Step B<D6>). Otherwise, the transferred temporary file
 1405: is renamed as the actual resource, and the replication routine returns
 1406: OK to the controlling handler (Step B<D7>).
 1407: 
 1408: Fig. "Dynamic Replication: Change" depicts the process of modifying a
 1409: resource. When an author publishes a new version of a resource, the
 1410: Home Server will contact every server currently subscribed to the
 1411: resource (Step B<U1>), as determined from the list of subscribed
 1412: servers for the resource generated in Step B<D3c>. The subscribing
 1413: servers will receive and acknowledge the update message (Step
 1414: B<U1c>). The update mechanism finishes when the last subscribed server
 1415: has been contacted (messages to unreachable servers are buffered).
 1416: 
 1417: Each subscribing server will check if the resource in question had
 1418: been accessed recently, that is, within a configurable amount of time
 1419: (Step B<U2>).
 1420: 
 1421: If the resource had not been accessed recently, the local copy of the
 1422: resource is deleted (Step B<U3a>) and an unsubscribe command is sent
 1423: to the Home Server (Step B<U3b>). The Home Server will check if the
 1424: server had indeed originally subscribed to the resource (Step B<U3c>)
 1425: and then delete the server from the list of subscribed servers for the
 1426: resource (Step B<U3d>).
 1427: 
 1428: If the resource had been accessed recently, the modified resource will
 1429: be copied over using the same mechanism as in Step B<D5a> through
 1430: B<D7>, which represents steps Steps B<U4a> through B<U6> in the
 1431: replication figure.
 1432: 
 1433: =head2 Load Balancing 
 1434: 
 1435: X<load balancing>C<lond> provides a function to query the server's current loadavg. As
 1436: a configuration parameter, one can determine the value of loadavg,
 1437: which is to be considered 100%, for example, 2.00.
 1438: 
 1439: Access servers can have a list of spare access servers,
 1440: C</home/httpd/lonTabs/spares.tab>, to offload sessions depending on
 1441: own workload. This check happens is done by the login handler. It
 1442: re-directs the login information and session to the least busy spare
 1443: server if itself is overloaded. An additional round-robin IP scheme
 1444: possible. See Fig. "Load Balancing Sample" for an example of a
 1445: load-balancing scheme.
 1446: 
 1447: =begin latex
 1448: 
 1449: \begin{figure}
 1450: \includegraphics[width=0.75\paperwidth,keepaspectratio]{Load_Balancing_Example}
 1451:   \caption{\label{Load_Balancing_Example}Load Balancing Example} \end{figure}
 1452: 
 1453: =end latex
 1454: 
 1455: =head1 DESCRIPTION
 1456: 
 1457: Provides persistent TCP connections to the other servers in the network
 1458: through multiplexed domain sockets
 1459: 
 1460: B<lonc> forks off children processes that correspond to the other servers
 1461: in the network.  Management of these processes can be done at the
 1462: parent process level or the child process level.
 1463: 
 1464: After forking off the children, B<lonc> the B<parent> executes a main
 1465: loop which simply waits for processes to exit.  As a process exits, a
 1466: new process managing a link to the same peer as the exiting process is
 1467: created.
 1468: 
 1469: B<logs/lonc.log> is the location of log messages.
 1470: 
 1471: The process management is now explained in terms of linux shell commands,
 1472: subroutines internal to this code, and signal assignments:
 1473: 
 1474: =over 4
 1475: 
 1476: =item *
 1477: 
 1478: PID is stored in B<logs/lonc.pid>
 1479: 
 1480: This is the process id number of the parent B<lonc> process.
 1481: 
 1482: =item *
 1483: 
 1484: SIGTERM and SIGINT
 1485: 
 1486: Parent signal assignment:
 1487:  $SIG{INT}  = $SIG{TERM} = \&HUNTSMAN;
 1488: 
 1489: Child signal assignment:
 1490:  $SIG{INT}  = 'DEFAULT'; (and SIGTERM is DEFAULT also)
 1491: (The child dies and a SIGALRM is sent to parent, awaking parent from slumber
 1492:  to restart a new child.)
 1493: 
 1494: Command-line invocations:
 1495:  B<kill> B<-s> SIGTERM I<PID>
 1496:  B<kill> B<-s> SIGINT I<PID>
 1497: 
 1498: Subroutine B<HUNTSMAN>:
 1499:  This is only invoked for the B<lonc> parent I<PID>.
 1500: This kills all the children, and then the parent.
 1501: The B<lonc.pid> file is cleared.
 1502: 
 1503: =item *
 1504: 
 1505: SIGHUP
 1506: 
 1507: Current bug:
 1508:  This signal can only be processed the first time
 1509: on the parent process.  Subsequent SIGHUP signals
 1510: have no effect.
 1511: 
 1512: Parent signal assignment:
 1513:  $SIG{HUP}  = \&HUPSMAN;
 1514: 
 1515: Child signal assignment:
 1516:  none (nothing happens)
 1517: 
 1518: Command-line invocations:
 1519:  B<kill> B<-s> SIGHUP I<PID>
 1520: 
 1521: Subroutine B<HUPSMAN>:
 1522:  This is only invoked for the B<lonc> parent I<PID>,
 1523: This kills all the children, and then the parent.
 1524: The B<lonc.pid> file is cleared.
 1525: 
 1526: =item *
 1527: 
 1528: SIGUSR1
 1529: 
 1530: Parent signal assignment:
 1531:  $SIG{USR1} = \&USRMAN;
 1532: 
 1533: Child signal assignment:
 1534:  $SIG{USR1}= \&logstatus;
 1535: 
 1536: Command-line invocations:
 1537:  B<kill> B<-s> SIGUSR1 I<PID>
 1538: 
 1539: Subroutine B<USRMAN>:
 1540:  When invoked for the B<lonc> parent I<PID>,
 1541: SIGUSR1 is sent to all the children, and the status of
 1542: each connection is logged.
 1543: 
 1544: 
 1545: =back
 1546: 
 1547: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>