--- loncom/Attic/lonc 2002/03/20 03:42:45 1.33 +++ loncom/Attic/lonc 2003/10/24 16:36:14 1.56 @@ -5,7 +5,7 @@ # provides persistent TCP connections to the other servers in the network # through multiplexed domain sockets # -# $Id: lonc,v 1.33 2002/03/20 03:42:45 foxr Exp $ +# $Id: lonc,v 1.56 2003/10/24 16:36:14 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -37,18 +37,18 @@ # 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19, # 10/8,10/9,10/15,11/18,12/22, # 2/8,7/25 Gerd Kortemeyer -# 12/05 Scott Harrison # 12/05 Gerd Kortemeyer # YEAR=2001 -# 01/10/01 Scott Harrison # 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer -# 12/20 Scott Harrison # YEAR=2002 # 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer # 3/07/02 Ron Fox # based on nonforker from Perl Cookbook # - server who multiplexes without forking +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration; + use POSIX; use IO::Socket; use IO::Select; @@ -64,25 +64,19 @@ $status=''; $lastlog=''; $conserver='SHELL'; $DEBUG = 0; # Set to 1 for annoyingly complete logs. - +$VERSION='$Revison$'; #' stupid emacs +$remoteVERSION; # -------------------------------- Set signal handlers to record abnormal exits &status("Init exception handlers"); $SIG{QUIT}=\&catchexception; $SIG{__DIE__}=\&catchexception; -# ------------------------------------ Read httpd access.conf and get variables -&status("Read access.conf"); -open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; - -while ($configline=) { - if ($configline =~ /PerlSetVar/) { - my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); - chomp($varvalue); - $perlvar{$varname}=$varvalue; - } -} -close(CONFIG); +# ---------------------------------- Read loncapa_apache.conf and loncapa.conf +&status("Read loncapa.conf and loncapa_apache.conf"); +my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); +my %perlvar=%{$perlvarref}; +undef $perlvarref; # ----------------------------- Make sure this process is running from user=www &status("Check user ID"); @@ -130,7 +124,7 @@ close(CONFIG); %childatt = (); # number of attempts to start server # for ID -$childmaxattempts=5; +$childmaxattempts=15; # ---------------------------------------------------- Fork once and dissociate &status("Fork and dissociate"); @@ -175,7 +169,14 @@ $SIG{USR1} = \&USRMAN; # And maintain the population. while (1) { my $deadpid = wait; # Wait for the next child to die. - # See who died and start new one + # See who died and start new one + # or a signal (e.g. USR1 for restart). + # if a signal, the wait will fail + # This is ordinarily detected by + # checking for the existence of the + # pid index inthe children hash since + # the return value from a failed wait is -1 + # which is an impossible PID. &status("Woke up"); my $skipping=''; @@ -255,7 +256,7 @@ unlink($port); @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE; closedir(DIRHANDLE); my $dfname; - foreach (@allbuffered) { + foreach (sort @allbuffered) { &status("Sending delayed: $_"); $dfname="$path/$_"; if($DEBUG) { &logthis('Sending '.$dfname); } @@ -314,114 +315,296 @@ unless ( %inbuffer = (); %outbuffer = (); %ready = (); +%servers = (); # To be compatible with make filevector. indexed by + # File ids, values are sockets. + # note that the accept socket is omitted. tie %ready, 'Tie::RefHash'; -nonblock($server); -$select = IO::Select->new($server); +# nonblock($server); +# $select = IO::Select->new($server); # Main loop: check reads/accepts, check writes, check ready to process + +status("Main loop $conserver"); while (1) { my $client; my $rv; my $data; - # check for new information on the connections we have + my $infdset; # bit vec of fd's to select on input. - # anything to read or accept? + my $outfdset; # Bit vec of fd's to select on output. - foreach $client ($select->can_read(100.0)) { - if ($client == $server) { - # accept a new connection - &status("Accept new connection: $conserver"); - $client = $server->accept(); - $select->add($client); - nonblock($client); - } else { - # read data - $data = ''; - $rv = $client->recv($data, POSIX::BUFSIZ, 0); - - unless (defined($rv) && length $data) { - # This would be the end of file, so close the client - delete $inbuffer{$client}; - delete $outbuffer{$client}; - delete $ready{$client}; - - &status("Idle"); - $select->remove($client); - close $client; - next; - } - $inbuffer{$client} .= $data; + $infdset = MakeFileVector(\%servers); + $outfdset= MakeFileVector(\%outbuffer); + vec($infdset, $server->fileno, 1) = 1; + if($DEBUG) { + &logthis("Adding ".$server->fileno. + " to input select vector (listner)". + unpack("b*",$infdset)."\n"); + } + DoSelect(\$infdset, \$outfdset); # Wait for input. + if($DEBUG) { + &logthis("Doselect completed!"); + &logthis("ins = ".unpack("b*",$infdset)."\n"); + &logthis("outs= ".unpack("b*",$outfdset)."\n"); + + } + # Checkfor new connections: + if (vec($infdset, $server->fileno, 1)) { + if($DEBUG) { + &logthis("New connection established"); + } + # accept a new connection + &status("Accept new connection: $conserver"); + $client = $server->accept(); + if (!$client) { + &logthis("Got stupid nonexisent client on ".$server->fileno." $conserver \n"); + } else { + if($DEBUG) { + &logthis("New client fd = ".$client->fileno."\n"); + } + $servers{$client->fileno} = $client; + nonblock($client); + $client->sockopt(SO_KEEPALIVE, 1); # Enable monitoring of + # connection liveness. + } + } + HandleInput($infdset, \%servers, \%inbuffer, \%outbuffer, \%ready); + HandleOutput($outfdset, \%servers, \%outbuffer, \%inbuffer, + \%ready); +# -------------------------------------------------------- Wow, connection lost - # test whether the data in the buffer or the data we - # just read means there is a complete request waiting - # to be fulfilled. If there is, set $ready{$client} - # to the requests waiting to be fulfilled. - while ($inbuffer{$client} =~ s/(.*\n)//) { - push( @{$ready{$client}}, $1 ); - } - } +} + } - - # Any complete requests to process? - foreach $client (keys %ready) { - handle($client); +} + +# ------------------------------------------------------- End of make_new_child + + +# +# Make a vector of file descriptors to wait for in a select. +# parameters: +# \%fdhash -reference to a hash which has IO::Socket's as indices. +# We only care about the indices, not the values. +# A select vector is created from all indices of the hash. + +sub MakeFileVector +{ + my $fdhash = shift; + my $selvar = ""; + + foreach $socket (keys %$fdhash) { + if($DEBUG) { + &logthis("Adding ".$socket. + "to select vector. (client)\n"); + } + vec($selvar, $socket, 1) = 1; } - - # Buffers to flush? - foreach $client ($select->can_write(1)) { - # Skip this client if we have nothing to say - next unless exists $outbuffer{$client}; - - $rv = $client->send($outbuffer{$client}, 0); - - unless ($outbuffer{$client} eq "con_lost\n") { - unless (defined $rv) { - # Whine, but move on. - &logthis("I was told I could write, but I can't.\n"); - next; - } - $errno=$!; - if (($rv == length $outbuffer{$client}) || - ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) { - substr($outbuffer{$client}, 0, $rv) = ''; - delete $outbuffer{$client} unless length $outbuffer{$client}; - } else { - # Couldn't write all the data, and it wasn't because - # it would have blocked. Shutdown and move on. + return $selvar; +} - &logthis("Dropping data with ".$errno.": ". - length($outbuffer{$client}).", $rv"); - delete $inbuffer{$client}; - delete $outbuffer{$client}; - delete $ready{$client}; - - $select->remove($client); - close($client); - next; - } - } else { -# -------------------------------------------------------- Wow, connection lost - &logthis( - "CRITICAL: Closing connection"); - &status("Connection lost"); - $remotesock->shutdown(2); - &logthis("Attempting to open new connection"); - &openremote($conserver); - } +# +# HandleOutput: +# Processes output on a buffered set of file descriptors which are +# ready to be read. +# Parameters: +# $selvector - Vector of file descriptors which are writable. +# \%sockets - Vector of socket references indexed by socket. +# \%buffers - Reference to a hash containing output buffers. +# Hashes are indexed by sockets. The file descriptors of some +# of those sockets will be present in $selvector. +# For each one of those, we will attempt to write the output +# buffer to the socket. Note that we will assume that +# the sockets are being run in non blocking mode. +# \%inbufs - Reference to hash containing input buffers. +# \%readys - Reference to hash containing flags for items with complete +# requests. +# +sub HandleOutput +{ + my $selvector = shift; + my $sockets = shift; + my $buffers = shift; + my $inbufs = shift; + my $readys = shift; + my $sock; + + if($DEBUG) { + &logthis("HandleOutput entered\n"); } - + + foreach $sock (keys %$sockets) { + my $socket = $sockets->{$sock}; + if(vec($selvector, $sock, 1)) { # $socket is writable. + if($DEBUG) { + &logthis("Sending $buffers->{$sock} \n"); + } + my $rv = $socket->send($buffers->{$sock}, 0); + $errno = $!; + unless ($buffers->{$sock} eq "con_lost\n") { + unless (defined $rv) { # Write failed... could be EINTR + unless ($errno == POSIX::EINTR) { + &logthis("Write failed on writable socket"); + } # EINTR is not an error .. just retry. + next; + } + if( ($rv == length $buffers->{$sock}) || + ($errno == POSIX::EWOULDBLOCK) || + ($errno == POSIX::EAGAIN) || # same as above. + ($errno == POSIX::EINTR) || # signal during IO + ($errno == 0)) { + substr($buffers->{$sock}, 0, $rv)=""; # delete written part + delete $buffers->{$sock} unless length $buffers->{$sock}; + } else { + # For some reason the write failed with an error code + # we didn't look for. Shutdown the socket. + &logthis("Unable to write data with ".$errno.": ". + "Dropping data: ".length($buffers->{$sock}). + ", $rv"); + # + # kill off the buffers in the hash: + + delete $buffers->{$sock}; + delete $inbufs->{$sock}; + delete $readys->{$sock}; + + close($socket); # Close the client socket. + next; + } + } else { # Kludgy way to mark lond connection lost. + &logthis( + "CRITICAL lond connection lost"); + status("Connection lost"); + $remotesock->shutdown(2); + &logthis("Attempting to open a new connection"); + &openremote($conserver); + } + + } + } + } +# +# HandleInput - Deals with input on client sockets. +# Each socket has an associated input buffer. +# For each readable socket, the currently available +# data is appended to this buffer. +# If necessary, the buffer is created. +# On various failures, we may shutdown the client. +# Parameters: +# $selvec - Vector of readable sockets. +# \%sockets - Refers to the Hash of sockets indexed by sockets. +# Each of these may or may not have it's fd bit set +# in the $selvec. +# \%ibufs - Refers to the hash of input buffers indexed by socket. +# \%obufs - Hash of output buffers indexed by socket. +# \%ready - Hash of ready flags indicating the existence of a completed +# Request. +sub HandleInput +{ + + # Marshall the parameters. Note that the hashes are actually + # references not values. + + my $selvec = shift; + my $sockets = shift; + my $ibufs = shift; + my $obufs = shift; + my $ready = shift; + my $sock; + + if($DEBUG) { + &logthis("Entered HandleInput\n"); + } + foreach $sock (keys %$sockets) { + my $socket = $sockets->{$sock}; + if(vec($selvec, $sock, 1)) { # Socket which is readable. + + # Attempt to read the data and do error management. + my $data = ''; + my $rv = $socket->recv($data, POSIX::BUFSIZ, 0); + if($DEBUG) { + &logthis("Received $data from socket"); + } + unless (defined($rv) && length $data) { + + # Read an end of file.. this is a disconnect from the peer. + + delete $sockets->{$sock}; + delete $ibufs->{$sock}; + delete $obufs->{$sock}; + delete $ready->{$sock}; + + status("Idle"); + close $socket; + next; + } + # Append the read data to the input buffer. If the buffer + # now contains a \n the request is complete and we can + # mark this in the $ready hash (one request for each \n.) + + $ibufs->{$sock} .= $data; + while($ibufs->{$sock} =~ s/(.*\n)//) { + push(@{$ready->{$sock}}, $1); + } + + } + } + # Now handle any requests which are ready: + + foreach $client (keys %ready) { + handle($client); + } } -# ------------------------------------------------------- End of make_new_child +# DoSelect: does a select with no timeout. On signal (errno == EINTR), +# the select is retried until there are items in the returned +# vectors. +# +# Parameters: +# \$readvec - Reference to a vector of file descriptors to +# check for readability. +# \$writevec - Reference to a vector of file descriptors to check for +# writability. +# On exit, the referents are modified with vectors indicating which +# file handles are readable/writable. +# +sub DoSelect { + my $readvec = shift; + my $writevec= shift; + my $outs; + my $ins; + + while (1) { + my $nfds = select( $ins = $$readvec, $outs = $$writevec, undef, undef); + if($nfds) { + if($DEBUG) { + &logthis("select exited with ".$nfds." fds\n"); + &logthis("ins = ".unpack("b*",$ins). + " readvec = ".unpack("b*",$$readvec)."\n"); + &logthis("outs = ".unpack("b*",$outs). + " writevec = ".unpack("b*",$$writevec)."\n"); + } + $$readvec = $ins; + $$writevec = $outs; + return; + } else { + if($DEBUG) { + &logthis("Select exited with no bits set in mask\n"); + } + die "Select failed" unless $! == EINTR; + } + } +} # handle($socket) deals with all pending requests for $client +# sub handle { # requests are in $ready{$client} # send output to $outbuffer{$client} @@ -459,7 +642,7 @@ sub handle { $request="enc:$cmdlength:$encrequest"; } # --------------------------------------------------------------- Main exchange - $answer = londtransaction($remotesock, $request, 300); + $answer = londtransaction($remotesock, $request, 60); if($DEBUG) { &logthis(" Request data exchange complete"); @@ -505,7 +688,6 @@ sub handle { } } # ---------------------------------------------------------- End make_new_child -} # nonblock($socket) puts socket into nonblocking mode sub nonblock { @@ -525,75 +707,87 @@ sub openremote { my $conserver=shift; -&status("Opening TCP"); + &status("Opening TCP $conserver"); my $st=120+int(rand(240)); # Sleep before opening: -unless ( - $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, - PeerPort => $perlvar{'londPort'}, - Proto => "tcp", - Type => SOCK_STREAM) - ) { - - &logthis( -"WARNING: Couldn't connect to $conserver ($st secs): "); - sleep($st); - exit; - }; + unless ( + $remotesock = IO::Socket::INET->new(PeerAddr => $hostname{$conserver}, + PeerPort => $perlvar{'londPort'}, + Proto => "tcp", + Type => SOCK_STREAM) + ) { + + &logthis( + "WARNING: Couldn't connect to $conserver ($st secs): "); + sleep($st); + exit; + }; # ----------------------------------------------------------------- Init dialog -&logthis("INFO Connected to $conserver, initing "); -&status("Init dialogue: $conserver"); + &logthis("INFO Connected to $conserver, initing"); + &status("Init dialogue: $conserver"); $answer = londtransaction($remotesock, "init", 60); chomp($answer); $answer = londtransaction($remotesock, $answer, 60); chomp($answer); - - if ($@=~/timeout/) { - &logthis("Timed out during init.. exiting"); - exit; - } -if ($answer ne 'ok') { - &logthis("Init reply: >$answer<"); - my $st=120+int(rand(240)); - &logthis( -"WARNING: Init failed ($st secs)"); - sleep($st); - exit; -} + if ($@=~/timeout/) { + &logthis("Timed out during init.. exiting"); + exit; + } -sleep 5; -&status("Ponging"); -print $remotesock "pong\n"; -$answer=<$remotesock>; -chomp($answer); -if ($answer!~/^$conserver/) { - &logthis("Pong reply: >$answer<"); -} + if ($answer ne 'ok') { + &logthis("Init reply: >$answer<"); + my $st=120+int(rand(240)); + &logthis("WARNING: Init failed ($st secs)"); + sleep($st); + exit; + } + + $answer = londtransaction($remotesock,"sethost:$conserver",60); + chomp($answer); + if ( $answer ne 'ok') { + &logthis('WARNING: unable to specify remote host'. + $answer.''); + } + + $answer = londtransaction($remotesock,"version:$VERSION",60); + chomp($answer); + if ($answer =~ /^version:/) { + $remoteVERSION=(split(/:/,$answer))[1]; + } else { + &logthis('WARNING: request remote version failed :'. + $answer.': my version is :'.$VERSION.':'); + } + + sleep 5; + &status("Ponging $conserver"); + $answer= londtransaction($remotesock,"pong",60); + chomp($answer); + if ($answer!~/^$conserver/) { + &logthis("Pong reply: >$answer<"); + } # ----------------------------------------------------------- Initialize cipher -&status("Initialize cipher"); -print $remotesock "ekey\n"; -my $buildkey=<$remotesock>; -my $key=$conserver.$perlvar{'lonHostID'}; -$key=~tr/a-z/A-Z/; -$key=~tr/G-P/0-9/; -$key=~tr/Q-Z/0-9/; -$key=$key.$buildkey.$key.$buildkey.$key.$buildkey; -$key=substr($key,0,32); -my $cipherkey=pack("H32",$key); -if ($cipher=new IDEA $cipherkey) { - &logthis("Secure connection initialized"); -} else { - my $st=120+int(rand(240)); - &logthis( - "WARNING: ". - "Could not establish secure connection ($st secs)!"); - sleep($st); - exit; -} + &status("Initialize cipher"); + my $buildkey=londtransaction($remotesock,"ekey",60); + my $key=$conserver.$perlvar{'lonHostID'}; + $key=~tr/a-z/A-Z/; + $key=~tr/G-P/0-9/; + $key=~tr/Q-Z/0-9/; + $key=$key.$buildkey.$key.$buildkey.$key.$buildkey; + $key=substr($key,0,32); + my $cipherkey=pack("H32",$key); + if ($cipher=new IDEA $cipherkey) { + &logthis("Secure connection initialized"); + } else { + my $st=120+int(rand(240)); + &logthis("WARNING: ". + "Could not establish secure connection ($st secs)!"); + sleep($st); + exit; + } &logthis(" Remote open success "); } @@ -675,8 +869,8 @@ sub HUPSMAN { # sig local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children &hangup(); &logthis("CRITICAL: Restarting"); - unlink("$execdir/logs/lonc.pid"); my $execdir=$perlvar{'lonDaemons'}; + unlink("$execdir/logs/lonc.pid"); exec("$execdir/lonc"); # here we go again } @@ -695,8 +889,25 @@ sub checkchildren { sub USRMAN { &logthis("USR1: Trying to establish connections again"); - %childatt=(); - &checkchildren(); + # + # It is really important not to just clear the childatt hash or we will + # lose all memory of the children. What we really want to do is this: + # For each index where childatt is >= $childmaxattempts + # Zero the associated counter and do a make_child for the host. + # Regardles, the childatt entry is zeroed: + my $host; + foreach $host (keys %childatt) { + if ($childatt{$host} >= $childmaxattempts) { + $childatt{$host} = 0; + &logthis("INFO: Restarting child for server: " + .$host."\n"); + make_new_child($host); + } + else { + $childatt{$host} = 0; + } + } + &checkchildren(); # See if any children are still dead... } # -------------------------------------------------- Non-critical communication @@ -781,12 +992,12 @@ sub londtransaction { alarm(0); }; } else { - if($DEBUG) { - &logthis("Timeout on send in londtransaction"); - } + &logthis("lonc - $conserver - suiciding on send Timeout"); + die("lonc - $conserver - suiciding on send Timeout"); } - if( ($@ =~ /timeout/) && ($DEBUG)) { - &logthis("Timeout on receive in londtransaction"); + if ($@ =~ /timeout/) { + &logthis("lonc - $conserver - suiciding on read Timeout"); + die("lonc - $conserver - suiciding on read Timeout"); } # # Restore the initial sigmask set. @@ -841,6 +1052,7 @@ sub status { my $now=time; my $local=localtime($now); $status=$local.': '.$what; + $0='lonc: '.$what.' '.$local; } @@ -860,6 +1072,386 @@ is invoked by B. There is no e will manually start B from the command-line. (In other words, DO NOT START B YOURSELF.) +=head1 OVERVIEW + +=head2 Physical Overview + +=begin latex + +\begin{figure} + \begin{center} + \includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram} + \end{center} + \caption{\label{Overview_Of_Network}Overview of Network} +\end{figure} + +=end latex + +Physically, the Network consists of relatively inexpensive +upper-PC-class server machines which are linked through the commodity +internet in a load-balancing, dynamically content-replicating and +failover-secure way. + +All machines in the Network are connected with each other through +two-way persistent TCP/IP connections. Clients (B, B, B and +B in Fig. Overview of Network) connect to the servers via standard +HTTP. There are two classes of servers, B (B and +B in Fig. Overview of Network) and B (B, B, +B and B in Fig. Overview of Network). + +B X X are used to +store all personal records of a set of users, and are responsible for +their initial authentication when a session is opened on any server in +the Network. For Authors, Library Servers also hosts their +construction area and the authoritative copy of the current and +previous versions of every resource that was published by that +author. Library servers can be used as backups to host sessions when +all access servers in the Network are overloaded. Otherwise, for +learners, access servers are used to host the sessions. Library +servers need to have strong I/O capabilities. + +B X X provide LON-CAPA +service to users, using the library servers as their data source. The +network is designed so that the number of concurrent sessions can be +increased over a wide range by simply adding additional access servers +before having to add additional library servers. Preliminary tests +showed that a library server could handle up to 10 access servers +fully parallel. Access servers can generally be cheaper hardware then +library servers require. + +The Network is divided into B X, which are logical +boundaries between participating institutions. These domains can be +used to limit the flow of personal user information across the +network, set access privileges and enforce royalty schemes. LON-CAPA +domains bear no relationship to any other domain, including domains +used by the DNS system; LON-CAPA domains may be freely configured in +any manner that suits your use pattern. + +=head2 Example Transactions + +Fig. Overview of Network also depicts examples for several kinds of +transactions conducted across the Network. + +An instructor at client B modifies and publishes a resource on her +Home Server B. Server B has a record of all server machines +currently subscribed to this resource, and replicates it to servers +B and B. However, server B is currently offline, so the +update notification gets buffered on B until B comes online +again. Servers B and B are currently not subscribed to this +resource. + +Learners B and B have open sessions on server B, and the new +resource is immediately available to them. + +Learner B tries to connect to server B for a new session, +however, the machine is not reachable, so he connects to another +Access Server B instead. This server currently does not have all +necessary resources locally present to host learner B, but +subscribes to them and replicates them as they are accessed by B. + +Learner B solves a problem on server B. Library Server B is +B's Home Server, so this information gets forwarded to B, where +the records of H are updated. + +=head2 lond, lonc, and lonnet + +=begin latex + +\begin{figure} +\includegraphics[width=0.65\paperwidth,keepaspectratio]{LONCAPA_Network_Diagram2} + \caption{\label{Overview_Of_Network_Communication}Overview of +Network Communication} \end{figure} + +=end latex + +Fig. Overview of Network Communication elaborates on the details of +this network infrastructure. It depicts three servers (B, B and +B) and a client who has a session on server B. + +As B accesses different resources in the system, different +handlers, which are incorporated as modules into the child processes +of the web server software, process these requests. + +Our current implementation uses C inside of the Apache web +server software. As an example, server B currently has four active +web server software child processes. The chain of handlers dealing +with a certain resource is determined by both the server content +resource area (see below) and the MIME type, which in turn is +determined by the URL extension. For most URL structures, both an +authentication handler and a content handler are registered. + +Handlers use a common library C X to interact with +both locally present temporary session data and data across the server +network. For example, lonnet provides routines for finding the home +server of a user, finding the server with the lowest loadavg, sending +simple command-reply sequences, and sending critical messages such as +a homework completion, etc. For a non-critical message, the routines +reply with a simple "connection lost" if the message could not be +delivered. For critical messages, lonnet tries to re-establish +connections, re-send the command, etc. If no valid reply could be +received, it answers "connection deferred" and stores the message in +buffer space to be sent at a later point in time. Also, failed +critical messages are logged. + +The interface between C and the Network is established by a +multiplexed UNIX domain socket, denoted B in Fig. Overview of +Network Communication. The rationale behind this rather involved +architecture is that httpd processes (Apache children) dynamically +come and go on the timescale of minutes, based on workload and number +of processed requests. Over the lifetime of an httpd child, however, +it has to establish several hundred connections to several different +servers in the Network. + +On the other hand, establishing a TCP/IP connection is resource +consuming for both ends of the line, and to optimize this connectivity +between different servers, connections in the Network are designed to +be persistent on the timescale of months, until either end is +rebooted. This mechanism will be elaborated on below. + +=begin latex + +\begin{figure} +\begin{lyxcode} +msul1:msu:library:zaphod.lite.msu.edu:35.8.63.51 + +msua1:msu:access:agrajag.lite.msu.edu:35.8.63.68 + +msul2:msu:library:frootmig.lite.msu.edu:35.8.63.69 + +msua2:msu:access:bistromath.lite.msu.edu:35.8.63.67 + +hubl14:hub:library:hubs128-pc-14.cl.msu.edu:35.8.116.34 + +hubl15:hub:library:hubs128-pc-15.cl.msu.edu:35.8.116.35 + +hubl16:hub:library:hubs128-pc-16.cl.msu.edu:35.8.116.36 + +huba20:hub:access:hubs128-pc-20.cl.msu.edu:35.8.116.40 + +huba21:hub:access:hubs128-pc-21.cl.msu.edu:35.8.116.41 + +huba22:hub:access:hubs128-pc-22.cl.msu.edu:35.8.116.42 + +huba23:hub:access:hubs128-pc-23.cl.msu.edu:35.8.116.43 + +hubl25:other:library:hubs128-pc-25.cl.msu.edu:35.8.116.45 + +huba27:other:access:hubs128-pc-27.cl.msu.edu:35.8.116.47 +\end{lyxcode} + +\caption{\label{Example_Of_hosts.tab}Example of Hosts Lookup table\texttt{/home/httpd/lonTabs/hosts.tab}} +\end{figure} + +=end latex + +Establishing a connection to a UNIX domain socket is far less resource +consuming than the establishing of a TCP/IP connection. C +X is a proxy daemon that forks off a child for every server in +the Network. Which servers are members of the Network is determined by +a lookup table, such as the one in Fig. Examples of Hosts. In order, +the entries denote an internal name for the server, the domain of the +server, the type of the server, the host name and the IP address. + +The C parent process maintains the population and listens for +signals to restart or shutdown, as well as I. Every child +establishes a multiplexed UNIX domain socket for its server and opens +a TCP/IP connection to the lond daemon (discussed below) on the remote +machine, which it keeps alive. If the connection is interrupted, the +child dies, whereupon the parent makes several attempts to fork +another child for that server. + +When starting a new child (a new connection), first an init-sequence +is carried out, which includes receiving the information from the +remote C which is needed to establish the 128-bit encryption key +- the key is different for every connection. Next, any buffered +(delayed) messages for the server are sent. + +In normal operation, the child listens to the UNIX socket, forwards +requests to the TCP connection, gets the reply from C, and sends +it back to the UNIX socket. Also, C takes care to the encryption +and decryption of messages. + +C X is the remote end of the TCP/IP connection and acts as +a remote command processor. It receives commands, executes them, and +sends replies. In normal operation, a C child is constantly +connected to a dedicated C child on the remote server, and the +same is true vice versa (two persistent connections per server +combination). + +lond listens to a TCP/IP port (denoted B

in Fig. Overview of +Network Communication) and forks off enough child processes to have +one for each other server in the network plus two spare children. The +parent process maintains the population and listens for signals to +restart or shutdown. Client servers are authenticated by IP. + +When a new client server comes online, C sends a signal I +to lonc, whereupon C tries again to reestablish all lost +connections, even if it had given up on them before - a new client +connecting could mean that that machine came online again after an +interruption. + +The gray boxes in Fig. Overview of Network Communication denote the +entities involved in an example transaction of the Network. The Client +is logged into server B, while server B is her Home +Server. Server B can be an access server or a library server, while +server B is a library server. She submits a solution to a homework +problem, which is processed by the appropriate handler for the MIME +type "problem". Through C, the handler writes information +about this transaction to the local session data. To make a permanent +log entry, C establishes a connection to the UNIX domain +socket for server B. C receives this command, encrypts it, +and sends it through the persistent TCP/IP connection to the TCP/IP +port of the remote C. C decrypts the command, executes it +by writing to the permanent user data files of the client, and sends +back a reply regarding the success of the operation. If the operation +was unsuccessful, or the connection would have broken down, C +would write the command into a FIFO buffer stack to be sent again +later. C now sends a reply regarding the overall success of the +operation to C via the UNIX domain port, which is eventually +received back by the handler. + +=head2 Dynamic Resource Replication + +Since resources are assembled into higher order resources simply by +reference, in principle it would be sufficient to retrieve them from +the respective Home Servers of the authors. However, there are several +problems with this simple approach: since the resource assembly +mechanism is designed to facilitate content assembly from a large +number of widely distributed sources, individual sessions would depend +on a large number of machines and network connections to be available, +thus be rather fragile. Also, frequently accessed resources could +potentially drive individual machines in the network into overload +situations. + +Finally, since most resources depend on content handlers on the Access +Servers to be served to a client within the session context, the raw +source would first have to be transferred across the Network from the +respective Library Server to the Access Server, processed there, and +then transferred on to the client. + +=begin latex + +\begin{figure} +\includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Request} + \caption{\label{Dynamic_Replication}Dynamic Replication} +\end{figure} + +=end latex + +To enable resource assembly in a reliable and scalable way, a dynamic +resource replication scheme was developed. Fig. "Dynamic Replication" +shows the details of this mechanism. + +Anytime a resource out of the resource space is requested, a handler +routine is called which in turn calls the replication routine. As a +first step, this routines determines whether or not the resource is +currently in replication transfer (Step B). During replication +transfer, the incoming data is stored in a temporary file, and Step +B checks for the presence of that file. If transfer of a resource +is actively going on, the controlling handler receives an error +message, waits for a few seconds, and then calls the replication +routine again. If the resource is still in transfer, the client will +receive the message "Service currently not available". + +In the next step (Step B), the replication routine checks if the +URL is locally present. If it is, the replication routine returns OK +to the controlling handler, which in turn passes the request on to the +next handler in the chain. + +If the resource is not locally present, the Home Server of the +resource author (as extracted from the URL) is determined (Step +B). This is done by contacting all library servers in the author?s +domain (as determined from the lookup table, see Fig. 1.1.2B). In Step +B a query is sent to the remote server whether or not it is the +Home Server of the author (in our current implementation, an +additional cache is used to store already identified Home Servers (not +shown in the figure)). In Step B, the remote server answers the +query with True or False. If the Home Server was found, the routine +continues, otherwise it contacts the next server (Step D2a). If no +server could be found, a "File not Found" error message is issued. In +our current implementation, in this step the Home Server is also +written into a cache for faster access if resources by the same author +are needed again (not shown in the figure). + +=begin latex + +\begin{figure} +\includegraphics[width=0.75\paperwidth,keepaspectratio]{Dynamic_Replication_Change} + \caption{\label{Dynamic_Replication_Change}Dynamic Replication: Change} \end{figure} + +=end latex + +In Step B, the routine sends a subscribe command for the URL to +the Home Server of the author. The Home Server first determines if the +resource is present, and if the access privileges allow it to be +copied to the requesting server (B). If this is true, the +requesting server is added to the list of subscribed servers for that +resource (Step B). The Home Server will reply with either OK or +an error message, which is determined in Step D4. If the remote +resource was not present, the error message "File not Found" will be +passed on to the client, if the access was not allowed, the error +message "Access Denied" is passed on. If the operation succeeded, the +requesting server sends an HTTP request for the resource out of the +C server content resource area of the Home Server. + +The Home Server will then check if the requesting server is part of +the network, and if it is subscribed to the resource (Step B). If +it is, it will send the resource via HTTP to the requesting server +without any content handlers processing it (Step B). The +requesting server will store the incoming data in a temporary data +file (Step B) - this is the file that Step B checks for. If +the transfer could not complete, and appropriate error message is sent +to the client (Step B). Otherwise, the transferred temporary file +is renamed as the actual resource, and the replication routine returns +OK to the controlling handler (Step B). + +Fig. "Dynamic Replication: Change" depicts the process of modifying a +resource. When an author publishes a new version of a resource, the +Home Server will contact every server currently subscribed to the +resource (Step B), as determined from the list of subscribed +servers for the resource generated in Step B. The subscribing +servers will receive and acknowledge the update message (Step +B). The update mechanism finishes when the last subscribed server +has been contacted (messages to unreachable servers are buffered). + +Each subscribing server will check if the resource in question had +been accessed recently, that is, within a configurable amount of time +(Step B). + +If the resource had not been accessed recently, the local copy of the +resource is deleted (Step B) and an unsubscribe command is sent +to the Home Server (Step B). The Home Server will check if the +server had indeed originally subscribed to the resource (Step B) +and then delete the server from the list of subscribed servers for the +resource (Step B). + +If the resource had been accessed recently, the modified resource will +be copied over using the same mechanism as in Step B through +B, which represents steps Steps B through B in the +replication figure. + +=head2 Load Balancing + +XC provides a function to query the server's current loadavg. As +a configuration parameter, one can determine the value of loadavg, +which is to be considered 100%, for example, 2.00. + +Access servers can have a list of spare access servers, +C, to offload sessions depending on +own workload. This check happens is done by the login handler. It +re-directs the login information and session to the least busy spare +server if itself is overloaded. An additional round-robin IP scheme +possible. See Fig. "Load Balancing Sample" for an example of a +load-balancing scheme. + +=begin latex + +\begin{figure} +\includegraphics[width=0.75\paperwidth,keepaspectratio]{Load_Balancing_Example} + \caption{\label{Load_Balancing_Example}Load Balancing Example} \end{figure} + +=end latex + =head1 DESCRIPTION Provides persistent TCP connections to the other servers in the network @@ -869,10 +1461,10 @@ B forks off children processes tha in the network. Management of these processes can be done at the parent process level or the child process level. - After forking off the children, B the B -executes a main loop which simply waits for processes to exit. -As a process exits, a new process managing a link to the same -peer as the exiting process is created. +After forking off the children, B the B executes a main +loop which simply waits for processes to exit. As a process exits, a +new process managing a link to the same peer as the exiting process is +created. B is the location of log messages. @@ -952,25 +1544,4 @@ each connection is logged. =back -=head1 PREREQUISITES - -POSIX -IO::Socket -IO::Select -IO::File -Socket -Fcntl -Tie::RefHash -Crypt::IDEA - -=head1 COREQUISITES - -=head1 OSNAMES - -linux - -=head1 SCRIPT CATEGORIES - -Server/Process - =cut