--- loncom/Attic/lonc 2001/11/29 18:57:46 1.22 +++ loncom/Attic/lonc 2002/10/30 14:50:04 1.43 @@ -5,7 +5,7 @@ # provides persistent TCP connections to the other servers in the network # through multiplexed domain sockets # -# $Id: lonc,v 1.22 2001/11/29 18:57:46 www Exp $ +# $Id: lonc,v 1.43 2002/10/30 14:50:04 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -39,11 +39,19 @@ # 2/8,7/25 Gerd Kortemeyer # 12/05 Scott Harrison # 12/05 Gerd Kortemeyer +# YEAR=2001 # 01/10/01 Scott Harrison # 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer -# +# 12/20 Scott Harrison +# YEAR=2002 +# 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer +# 3/07/02 Ron Fox # based on nonforker from Perl Cookbook # - server who multiplexes without forking +# 5/11/2002 Scott Harrison + +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration; use POSIX; use IO::Socket; @@ -53,42 +61,28 @@ use Socket; use Fcntl; use Tie::RefHash; use Crypt::IDEA; +#use Net::Ping; +use LWP::UserAgent(); -my $status=''; -my $lastlog=''; - -# grabs exception and records it to log before exiting -sub catchexception { - my ($signal)=@_; - $SIG{'QUIT'}='DEFAULT'; - $SIG{__DIE__}='DEFAULT'; - &logthis("CRITICAL: " - ."ABNORMAL EXIT. Child $$ for server $wasserver died through " - ."\"$signal\" with this parameter->[$@]"); - die($@); -} - -$childmaxattempts=5; +$status=''; +$lastlog=''; +$conserver='SHELL'; +$DEBUG = 0; # Set to 1 for annoyingly complete logs. # -------------------------------- Set signal handlers to record abnormal exits -$SIG{'QUIT'}=\&catchexception; +&status("Init exception handlers"); +$SIG{QUIT}=\&catchexception; $SIG{__DIE__}=\&catchexception; -# ------------------------------------ Read httpd access.conf and get variables - -open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; - -while ($configline=) { - if ($configline =~ /PerlSetVar/) { - my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); - chomp($varvalue); - $perlvar{$varname}=$varvalue; - } -} -close(CONFIG); +# ---------------------------------- Read loncapa_apache.conf and loncapa.conf +&status("Read loncapa.conf and loncapa_apache.conf"); +my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); +my %perlvar=%{$perlvarref}; +undef $perlvarref; # ----------------------------- Make sure this process is running from user=www +&status("Check user ID"); my $wwwid=getpwnam('www'); if ($wwwid!=$<) { $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; @@ -116,8 +110,12 @@ open (CONFIG,"$perlvar{'lonTabDir'}/host while ($configline=) { my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); chomp($ip); - $hostip{$id}=$ip; + if ($ip) { + $hostip{$id}=$ip; + $hostname{$id}=$name; + } } + close(CONFIG); # -------------------------------------------------------- Routines for forking @@ -129,158 +127,20 @@ close(CONFIG); %childatt = (); # number of attempts to start server # for ID -sub REAPER { # takes care of dead children - $SIG{CHLD} = \&REAPER; - my $pid = wait; - my $wasserver=$children{$pid}; - &logthis("CRITICAL: " - ."Child $pid for server $wasserver died ($childatt{$wasserver})"); - delete $children{$pid}; - delete $childpid{$wasserver}; - my $port = "$perlvar{'lonSockDir'}/$wasserver"; - unlink($port); -} - -sub HUNTSMAN { # signal handler for SIGINT - local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children - map { - $wasserver=$children{$_}; - &status("Closing $wasserver"); - &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver)); - &status("Kill PID $_ for $wasserver"); - kill ('INT',$_); - } keys %children; - my $execdir=$perlvar{'lonDaemons'}; - unlink("$execdir/logs/lonc.pid"); - &logthis("CRITICAL: Shutting down"); - exit; # clean up with dignity -} - -sub HUPSMAN { # signal handler for SIGHUP - local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children - map { - $wasserver=$children{$_}; - &status("Closing $wasserver"); - &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver)); - &status("Kill PID $_ for $wasserver"); - kill ('INT',$_); - } keys %children; - &logthis("CRITICAL: Restarting"); - unlink("$execdir/logs/lonc.pid"); - my $execdir=$perlvar{'lonDaemons'}; - exec("$execdir/lonc"); # here we go again -} - -sub checkchildren { - &initnewstatus(); - &logstatus(); - &logthis('Going to check on the children'); - map { - sleep 1; - unless (kill 'USR1' => $_) { - &logthis ('Child '.$_.' is dead'); - &logstatus($$.' is dead'); - } - } sort keys %children; -} - -sub USRMAN { - &logthis("USR1: Trying to establish connections again"); - foreach $thisserver (keys %hostip) { - $answer=subreply("ping",$thisserver); - &logthis("USR1: Ping $thisserver " - ."(pid >$childpid{$thisserver}<, $childatt{thisserver} attempts): " - ." >$answer<"); - } - %childatt=(); - &checkchildren(); -} - -# -------------------------------------------------- Non-critical communication -sub subreply { - my ($cmd,$server)=@_; - my $answer=''; - if ($server ne $perlvar{'lonHostID'}) { - my $peerfile="$perlvar{'lonSockDir'}/$server"; - my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile", - Type => SOCK_STREAM, - Timeout => 10) - or return "con_lost"; - - - $SIG{ALRM}=sub { die "timeout" }; - $SIG{__DIE__}='DEFAULT'; - eval { - alarm(10); - print $sclient "$cmd\n"; - $answer=<$sclient>; - chomp($answer); - alarm(0); - }; - if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; } - $SIG{ALRM}='DEFAULT'; - $SIG{__DIE__}=\&catchexception; - } else { $answer='self_reply'; } - return $answer; -} - -# --------------------------------------------------------------------- Logging - -sub logthis { - my $message=shift; - my $execdir=$perlvar{'lonDaemons'}; - my $fh=IO::File->new(">>$execdir/logs/lonc.log"); - my $now=time; - my $local=localtime($now); - $lastlog=$local.': '.$message; - print $fh "$local ($$): $message\n"; -} - - -sub logperm { - my $message=shift; - my $execdir=$perlvar{'lonDaemons'}; - my $now=time; - my $local=localtime($now); - my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); - print $fh "$now:$message:$local\n"; -} -# ------------------------------------------------------------------ Log status - -sub logstatus { - my $docdir=$perlvar{'lonDocRoot'}; - my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt"); - print $fh $$."\t".$status."\t".$lastlog."\n"; -} - -sub initnewstatus { - my $docdir=$perlvar{'lonDocRoot'}; - my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt"); - my $now=time; - my $local=localtime($now); - print $fh "LONC status $local - parent $$\n\n"; -} - -# -------------------------------------------------------------- Status setting - -sub status { - my $what=shift; - my $now=time; - my $local=localtime($now); - $status=$local.': '.$what; -} - +$childmaxattempts=5; # ---------------------------------------------------- Fork once and dissociate - +&status("Fork and dissociate"); $fpid=fork; exit if $fpid; die "Couldn't fork: $!" unless defined ($fpid); POSIX::setsid() or die "Can't start new session: $!"; -# ------------------------------------------------------- Write our PID on disk +$conserver='PARENT'; +# ------------------------------------------------------- Write our PID on disk +&status("Write PID"); $execdir=$perlvar{'lonDaemons'}; open (PIDSAVE,">$execdir/logs/lonc.pid"); print PIDSAVE "$$\n"; @@ -296,43 +156,66 @@ $SIG{HUP}=$SIG{USR1}='IGNORE'; &status("Forking ..."); foreach $thisserver (keys %hostip) { - make_new_child($thisserver); + #if (&online($hostname{$thisserver})) { + make_new_child($thisserver); + #} } &logthis("Done starting initial servers"); # ----------------------------------------------------- Install signal handlers -$SIG{CHLD} = \&REAPER; + $SIG{INT} = $SIG{TERM} = \&HUNTSMAN; $SIG{HUP} = \&HUPSMAN; $SIG{USR1} = \&USRMAN; # And maintain the population. while (1) { - &status("Sleeping"); - sleep; # wait for a signal (i.e., child's death) - # See who died and start new one + my $deadpid = wait; # Wait for the next child to die. + # See who died and start new one + # or a signal (e.g. USR1 for restart). + # if a signal, the wait will fail + # This is ordinarily detected by + # checking for the existence of the + # pid index inthe children hash since + # the return value from a failed wait is -1 + # which is an impossible PID. &status("Woke up"); - foreach $thisserver (keys %hostip) { - if (!$childpid{$thisserver}) { - if ($childatt{$thisserver}<$childmaxattempts) { - $childatt{$thisserver}++; - &logthis( - "INFO: Trying to reconnect for $thisserver " - ."($childatt{$thisserver} of $childmaxattempts attempts)"); - make_new_child($thisserver); - } - } + my $skipping=''; + + if(exists($children{$deadpid})) { + + $thisserver = $children{$deadpid}; # Look name of dead guy's peer. + + delete($children{$deadpid}); # Get rid of dead hash entry. + + if($childatt{$thisserver} < $childmaxattempts) { + $childatt{$thisserver}++; + &logthis( + "INFO: Trying to reconnect for $thisserver " + ."($childatt{$thisserver} of $childmaxattempts attempts)"); + make_new_child($thisserver); + + } + else { + $skipping .= $thisserver.' '; + } + if($skipping) { + &logthis("WARNING: Skipped $skipping"); + + } } + } + sub make_new_child { - my $conserver=shift; + $newserver=shift; my $pid; my $sigset; - &logthis("Attempting to start child for server $conserver"); + &logthis("Attempting to start child for server $newserver"); # block signal for fork $sigset = POSIX::SigSet->new(SIGINT); sigprocmask(SIG_BLOCK, $sigset) @@ -344,10 +227,11 @@ sub make_new_child { # Parent records the child's birth and returns. sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; - $children{$pid} = $conserver; - $childpid{$conserver} = $pid; + $children{$pid} = $newserver; + $childpid{$newserver} = $pid; return; } else { + $conserver=$newserver; # Child can *not* return from this subroutine. $SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before $SIG{USR1}= \&logstatus; @@ -362,95 +246,23 @@ $port = "$perlvar{'lonSockDir'}/$conserv unlink($port); -# ---------------------------------------------------- Client to network server - -&status("Opening TCP: $conserver"); - -unless ( - $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, - PeerPort => $perlvar{'londPort'}, - Proto => "tcp", - Type => SOCK_STREAM) - ) { - my $st=120+int(rand(240)); - &logthis( -"WARNING: Couldn't connect $conserver ($st secs): $@"); - sleep($st); - exit; - }; -# ----------------------------------------------------------------- Init dialog - -&status("Init dialogue: $conserver"); - - $SIG{ALRM}=sub { die "timeout" }; - $SIG{__DIE__}='DEFAULT'; - eval { - alarm(60); -print $remotesock "init\n"; -$answer=<$remotesock>; -print $remotesock "$answer"; -$answer=<$remotesock>; -chomp($answer); - alarm(0); - }; - $SIG{ALRM}='DEFAULT'; - $SIG{__DIE__}=\&catchexception; - - if ($@=~/timeout/) { - &logthis("Timed out during init: $conserver"); - exit; - } - - -&logthis("Init reply for $conserver: >$answer<"); -if ($answer ne 'ok') { - my $st=120+int(rand(240)); - &logthis( -"WARNING: Init failed $conserver ($st secs)"); - sleep($st); - exit; -} -sleep 5; -&status("Ponging $conserver"); -print $remotesock "pong\n"; -$answer=<$remotesock>; -chomp($answer); -&logthis("Pong reply for $conserver: >$answer<"); -# ----------------------------------------------------------- Initialize cipher - -&status("Initialize cipher: $conserver"); -print $remotesock "ekey\n"; -my $buildkey=<$remotesock>; -my $key=$conserver.$perlvar{'lonHostID'}; -$key=~tr/a-z/A-Z/; -$key=~tr/G-P/0-9/; -$key=~tr/Q-Z/0-9/; -$key=$key.$buildkey.$key.$buildkey.$key.$buildkey; -$key=substr($key,0,32); -my $cipherkey=pack("H32",$key); -if ($cipher=new IDEA $cipherkey) { - &logthis("Secure connection initialized: $conserver"); -} else { - my $st=120+int(rand(240)); - &logthis( - "WARNING: ". - "Could not establish secure connection, $conserver ($st secs)!"); - sleep($st); - exit; -} +# -------------------------------------------------------------- Open other end +&openremote($conserver); + &logthis(" Connection to $conserver open "); # ----------------------------------------- We're online, send delayed messages &status("Checking for delayed messages"); + my @allbuffered; my $path="$perlvar{'lonSockDir'}/delayed"; opendir(DIRHANDLE,$path); @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE; closedir(DIRHANDLE); my $dfname; - map { - &status("Sending delayed $conserver $_"); + foreach (@allbuffered) { + &status("Sending delayed: $_"); $dfname="$path/$_"; - &logthis($dfname); + if($DEBUG) { &logthis('Sending '.$dfname); } my $wcmd; { my $dfh=IO::File->new($dfname); @@ -471,27 +283,19 @@ if ($cipher=new IDEA $cipherkey) { } $cmd="enc:$cmdlength:$encrequest\n"; } - $SIG{ALRM}=sub { die "timeout" }; - $SIG{__DIE__}='DEFAULT'; - eval { - alarm(60); - print $remotesock "$cmd\n"; - $answer=<$remotesock>; + $answer = londtransaction($remotesock, $cmd, 60); chomp($answer); - alarm(0); - }; - $SIG{ALRM}='DEFAULT'; - $SIG{__DIE__}=\&catchexception; if (($answer ne '') && ($@!~/timeout/)) { unlink("$dfname"); - &logthis("Delayed $cmd to $conserver: >$answer<"); + &logthis("Delayed $cmd: >$answer<"); &logperm("S:$conserver:$bcmd"); } - } @allbuffered; + } + if($DEBUG) { &logthis(" Delayed transactions sent"); } # ------------------------------------------------------- Listen to UNIX socket -&status("Opening socket $conserver"); +&status("Opening socket"); unless ( $server = IO::Socket::UNIX->new(Local => $port, Type => SOCK_STREAM, @@ -500,11 +304,11 @@ unless ( my $st=120+int(rand(240)); &logthis( "WARNING: ". - "Can't make server socket $conserver ($st secs): $@"); + "Can't make server socket ($st secs): .. exiting"); sleep($st); exit; }; - + # ----------------------------------------------------------------------------- &logthis("$conserver online"); @@ -514,111 +318,312 @@ unless ( %inbuffer = (); %outbuffer = (); %ready = (); +%servers = (); # To be compatible with make filevector. indexed by + # File ids, values are sockets. + # note that the accept socket is omitted. tie %ready, 'Tie::RefHash'; -nonblock($server); -$select = IO::Select->new($server); +# nonblock($server); +# $select = IO::Select->new($server); # Main loop: check reads/accepts, check writes, check ready to process + +status("Main loop"); while (1) { my $client; my $rv; my $data; - # check for new information on the connections we have + my $infdset; # bit vec of fd's to select on input. - # anything to read or accept? - foreach $client ($select->can_read(0.1)) { + my $outfdset; # Bit vec of fd's to select on output. - if ($client == $server) { - # accept a new connection - &status("Accept new connection: $conserver"); - $client = $server->accept(); - $select->add($client); - nonblock($client); - } else { - # read data - $data = ''; - $rv = $client->recv($data, POSIX::BUFSIZ, 0); - - unless (defined($rv) && length $data) { - # This would be the end of file, so close the client - delete $inbuffer{$client}; - delete $outbuffer{$client}; - delete $ready{$client}; - - &status("Idle $conserver"); - $select->remove($client); - close $client; - next; - } - $inbuffer{$client} .= $data; + $infdset = MakeFileVector(\%servers); + $outfdset= MakeFileVector(\%outbuffer); + vec($infdset, $server->fileno, 1) = 1; + if($DEBUG) { + &logthis("Adding ".$server->fileno. + " to input select vector (listner)". + unpack("b*",$infdset)."\n"); + } + DoSelect(\$infdset, \$outfdset); # Wait for input. + if($DEBUG) { + &logthis("Doselect completed!"); + &logthis("ins = ".unpack("b*",$infdset)."\n"); + &logthis("outs= ".unpack("b*",$outfdset)."\n"); + + } - # test whether the data in the buffer or the data we - # just read means there is a complete request waiting - # to be fulfilled. If there is, set $ready{$client} - # to the requests waiting to be fulfilled. - while ($inbuffer{$client} =~ s/(.*\n)//) { - push( @{$ready{$client}}, $1 ); - } - } + # Checkfor new connections: + if (vec($infdset, $server->fileno, 1)) { + if($DEBUG) { + &logthis("New connection established"); + } + # accept a new connection + &status("Accept new connection: $conserver"); + $client = $server->accept(); + if($DEBUG) { + &logthis("New client fd = ".$client->fileno."\n"); + } + $servers{$client->fileno} = $client; + nonblock($client); } + HandleInput($infdset, \%servers, \%inbuffer, \%outbuffer, \%ready); + HandleOutput($outfdset, \%servers, \%outbuffer, \%inbuffer, + \%ready); +# -------------------------------------------------------- Wow, connection lost - # Any complete requests to process? - foreach $client (keys %ready) { - handle($client); +} + } +} - # Buffers to flush? - foreach $client ($select->can_write(1)) { - # Skip this client if we have nothing to say - next unless exists $outbuffer{$client}; - - $rv = $client->send($outbuffer{$client}, 0); - unless (defined $rv) { - # Whine, but move on. - &logthis("I was told I could write, but I can't.\n"); - next; - } - $errno=$!; - if (($rv == length $outbuffer{$client}) || - ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) { - substr($outbuffer{$client}, 0, $rv) = ''; - delete $outbuffer{$client} unless length $outbuffer{$client}; - } else { - # Couldn't write all the data, and it wasn't because - # it would have blocked. Shutdown and move on. +# ------------------------------------------------------- End of make_new_child - &logthis("Dropping data with ".$errno.": ". - length($outbuffer{$client}).", $rv"); - delete $inbuffer{$client}; - delete $outbuffer{$client}; - delete $ready{$client}; - - $select->remove($client); - close($client); - next; - } +# +# Make a vector of file descriptors to wait for in a select. +# parameters: +# \%fdhash -reference to a hash which has IO::Socket's as indices. +# We only care about the indices, not the values. +# A select vector is created from all indices of the hash. + +sub MakeFileVector +{ + my $fdhash = shift; + my $selvar = ""; + + foreach $socket (keys %$fdhash) { + if($DEBUG) { + &logthis("Adding ".$socket. + "to select vector. (client)\n"); + } + vec($selvar, $socket, 1) = 1; } + return $selvar; } + + +# +# HandleOutput: +# Processes output on a buffered set of file descriptors which are +# ready to be read. +# Parameters: +# $selvector - Vector of file descriptors which are writable. +# \%sockets - Vector of socket references indexed by socket. +# \%buffers - Reference to a hash containing output buffers. +# Hashes are indexed by sockets. The file descriptors of some +# of those sockets will be present in $selvector. +# For each one of those, we will attempt to write the output +# buffer to the socket. Note that we will assume that +# the sockets are being run in non blocking mode. +# \%inbufs - Reference to hash containing input buffers. +# \%readys - Reference to hash containing flags for items with complete +# requests. +# +sub HandleOutput +{ + my $selvector = shift; + my $sockets = shift; + my $buffers = shift; + my $inbufs = shift; + my $readys = shift; + my $sock; + + if($DEBUG) { + &logthis("HandleOutput entered\n"); + } + + foreach $sock (keys %$sockets) { + my $socket = $sockets->{$sock}; + if(vec($selvector, $sock, 1)) { # $socket is writable. + if($DEBUG) { + &logthis("Sending $buffers->{$sock} \n"); + } + my $rv = $socket->send($buffers->{$sock}, 0); + $errno = $!; + unless ($buffers->{$sock} eq "con_lost\n") { + unless (defined $rv) { # Write failed... could be EINTR + unless ($errno == POSIX::EINTR) { + &logthis("Write failed on writable socket"); + } # EINTR is not an error .. just retry. + next; + } + if( ($rv == length $buffers->{$sock}) || + ($errno == POSIX::EWOULDBLOCK) || + ($errno == POSIX::EAGAIN) || # same as above. + ($errno == POSIX::EINTR) || # signal during IO + ($errno == 0)) { + substr($buffers->{$sock}, 0, $rv)=""; # delete written part + delete $buffers->{$sock} unless length $buffers->{$sock}; + } else { + # For some reason the write failed with an error code + # we didn't look for. Shutdown the socket. + &logthis("Unable to write data with ".$errno.": ". + "Dropping data: ".length($buffers->{$sock}). + ", $rv"); + # + # kill off the buffers in the hash: + + delete $buffers->{$sock}; + delete $inbufs->{$sock}; + delete $readys->{$sock}; + + close($socket); # Close the client socket. + next; + } + } else { # Kludgy way to mark lond connection lost. + &logthis( + "CRITICAL lond connection lost"); + status("Connection lost"); + $remotesock->shutdown(2); + &logthis("Attempting to open a new connection"); + &openremote($conserver); + } + + } + } + } +# +# HandleInput - Deals with input on client sockets. +# Each socket has an associated input buffer. +# For each readable socket, the currently available +# data is appended to this buffer. +# If necessary, the buffer is created. +# On various failures, we may shutdown the client. +# Parameters: +# $selvec - Vector of readable sockets. +# \%sockets - Refers to the Hash of sockets indexed by sockets. +# Each of these may or may not have it's fd bit set +# in the $selvec. +# \%ibufs - Refers to the hash of input buffers indexed by socket. +# \%obufs - Hash of output buffers indexed by socket. +# \%ready - Hash of ready flags indicating the existence of a completed +# Request. +sub HandleInput +{ + + # Marshall the parameters. Note that the hashes are actually + # references not values. + + my $selvec = shift; + my $sockets = shift; + my $ibufs = shift; + my $obufs = shift; + my $ready = shift; + my $sock; -# ------------------------------------------------------- End of make_new_child + if($DEBUG) { + &logthis("Entered HandleInput\n"); + } + foreach $sock (keys %$sockets) { + my $socket = $sockets->{$sock}; + if(vec($selvec, $sock, 1)) { # Socket which is readable. + + # Attempt to read the data and do error management. + my $data = ''; + my $rv = $socket->recv($data, POSIX::BUFSIZ, 0); + if($DEBUG) { + &logthis("Received $data from socket"); + } + unless (defined($rv) && length $data) { + + # Read an end of file.. this is a disconnect from the peer. + + delete $sockets->{$sock}; + delete $ibufs->{$sock}; + delete $obufs->{$sock}; + delete $ready->{$sock}; + + status("Idle"); + close $socket; + next; + } + # Append the read data to the input buffer. If the buffer + # now contains a \n the request is complete and we can + # mark this in the $ready hash (one request for each \n.) + + $ibufs->{$sock} .= $data; + while($ibufs->{$sock} =~ s/(.*\n)//) { + push(@{$ready->{$sock}}, $1); + } + + } + } + # Now handle any requests which are ready: + + foreach $client (keys %ready) { + handle($client); + } +} + +# DoSelect: does a select with no timeout. On signal (errno == EINTR), +# the select is retried until there are items in the returned +# vectors. +# +# Parameters: +# \$readvec - Reference to a vector of file descriptors to +# check for readability. +# \$writevec - Reference to a vector of file descriptors to check for +# writability. +# On exit, the referents are modified with vectors indicating which +# file handles are readable/writable. +# +sub DoSelect { + my $readvec = shift; + my $writevec= shift; + my $outs; + my $ins; + + while (1) { + my $nfds = select( $ins = $$readvec, $outs = $$writevec, undef, undef); + if($nfds) { + if($DEBUG) { + &logthis("select exited with ".$nfds." fds\n"); + &logthis("ins = ".unpack("b*",$ins). + " readvec = ".unpack("b*",$$readvec)."\n"); + &logthis("outs = ".unpack("b*",$outs). + " writevec = ".unpack("b*",$$writevec)."\n"); + } + $$readvec = $ins; + $$writevec = $outs; + return; + } else { + if($DEBUG) { + &logthis("Select exited with no bits set in mask\n"); + } + die "Select failed" unless $! == EINTR; + } + } +} # handle($socket) deals with all pending requests for $client +# sub handle { # requests are in $ready{$client} # send output to $outbuffer{$client} my $client = shift; my $request; - foreach $request (@{$ready{$client}}) { # ============================================================= Process request # $request is the text of the request # put text of reply into $outbuffer{$client} +# ------------------------------------------------------------ Is this the end? + chomp($request); + if($DEBUG) { + &logthis(" Request $request processing starts"); + } + if ($request eq "close_connection_exit\n") { + &status("Request close connection"); + &logthis( + "CRITICAL: Request Close Connection ... exiting"); + $remotesock->shutdown(2); + $server->close(); + exit; + } # ----------------------------------------------------------------------------- if ($request =~ /^encrypt\:/) { my $cmd=$request; @@ -631,27 +636,19 @@ sub handle { $encrequest.= unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8))); } - $request="enc:$cmdlength:$encrequest\n"; + $request="enc:$cmdlength:$encrequest"; } # --------------------------------------------------------------- Main exchange - $SIG{ALRM}=sub { die "timeout" }; - $SIG{__DIE__}='DEFAULT'; - eval { - alarm(300); - &status("Sending $conserver: $request"); - print $remotesock "$request"; - &status("Waiting for reply from $conserver: $request"); - $answer=<$remotesock>; - &status("Received reply: $request"); - alarm(0); - }; - if ($@=~/timeout/) { - $answer=''; - &logthis( - "CRITICAL: Timeout $conserver: $request"); - } - $SIG{ALRM}='DEFAULT'; - $SIG{__DIE__}=\&catchexception; + $answer = londtransaction($remotesock, $request, 300); + + if($DEBUG) { + &logthis(" Request data exchange complete"); + } + if ($@=~/timeout/) { + $answer=''; + &logthis( + "CRITICAL: Timeout: $request"); + } if ($answer) { @@ -667,19 +664,27 @@ sub handle { $answer=substr($answer,0,$cmdlength); $answer.="\n"; } + if($DEBUG) { + &logthis("sending $answer to client\n"); + } $outbuffer{$client} .= $answer; } else { $outbuffer{$client} .= "con_lost\n"; } + &status("Completed: $request"); + if($DEBUG) { + &logthis(" Request processing complete"); + } # ===================================================== Done processing request } delete $ready{$client}; - &status("Completed $conserver: $request"); # -------------------------------------------------------------- End non-forker + if($DEBUG) { + &logthis(" requests for child handled"); + } } # ---------------------------------------------------------- End make_new_child -} # nonblock($socket) puts socket into nonblocking mode sub nonblock { @@ -693,3 +698,476 @@ sub nonblock { or die "Can't make socket nonblocking: $!\n"; } + +sub openremote { +# ---------------------------------------------------- Client to network server + + my $conserver=shift; + +&status("Opening TCP"); + my $st=120+int(rand(240)); # Sleep before opening: + +unless ( + $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, + PeerPort => $perlvar{'londPort'}, + Proto => "tcp", + Type => SOCK_STREAM) + ) { + + &logthis( +"WARNING: Couldn't connect to $conserver ($st secs): "); + sleep($st); + exit; + }; +# ----------------------------------------------------------------- Init dialog + +&logthis("INFO Connected to $conserver, initing "); +&status("Init dialogue: $conserver"); + + $answer = londtransaction($remotesock, "init", 60); + chomp($answer); + $answer = londtransaction($remotesock, $answer, 60); + chomp($answer); + + if ($@=~/timeout/) { + &logthis("Timed out during init.. exiting"); + exit; + } + +if ($answer ne 'ok') { + &logthis("Init reply: >$answer<"); + my $st=120+int(rand(240)); + &logthis( +"WARNING: Init failed ($st secs)"); + sleep($st); + exit; +} + +sleep 5; +&status("Ponging"); +print $remotesock "pong\n"; +$answer=<$remotesock>; +chomp($answer); +if ($answer!~/^$conserver/) { + &logthis("Pong reply: >$answer<"); +} +# ----------------------------------------------------------- Initialize cipher + +&status("Initialize cipher"); +print $remotesock "ekey\n"; +my $buildkey=<$remotesock>; +my $key=$conserver.$perlvar{'lonHostID'}; +$key=~tr/a-z/A-Z/; +$key=~tr/G-P/0-9/; +$key=~tr/Q-Z/0-9/; +$key=$key.$buildkey.$key.$buildkey.$key.$buildkey; +$key=substr($key,0,32); +my $cipherkey=pack("H32",$key); +if ($cipher=new IDEA $cipherkey) { + &logthis("Secure connection initialized"); +} else { + my $st=120+int(rand(240)); + &logthis( + "WARNING: ". + "Could not establish secure connection ($st secs)!"); + sleep($st); + exit; +} + &logthis(" Remote open success "); +} + + + +# grabs exception and records it to log before exiting +sub catchexception { + my ($signal)=@_; + $SIG{QUIT}='DEFAULT'; + $SIG{__DIE__}='DEFAULT'; + chomp($signal); + &logthis("CRITICAL: " + ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through " + ."\"$signal\" with parameter "); + die("Signal abend"); +} + +# -------------------------------------- Routines to see if other box available + +#sub online { +# my $host=shift; +# &status("Pinging ".$host); +# my $p=Net::Ping->new("tcp",20); +# my $online=$p->ping("$host"); +# $p->close(); +# undef ($p); +# return $online; +#} + +sub connected { + my ($local,$remote)=@_; + &status("Checking connection $local to $remote"); + $local=~s/\W//g; + $remote=~s/\W//g; + + unless ($hostname{$local}) { return 'local_unknown'; } + unless ($hostname{$remote}) { return 'remote_unknown'; } + + #unless (&online($hostname{$local})) { return 'local_offline'; } + + my $ua=new LWP::UserAgent; + + my $request=new HTTP::Request('GET', + "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote); + + my $response=$ua->request($request); + + unless ($response->is_success) { return 'local_error'; } + + my $reply=$response->content; + $reply=(split("\n",$reply))[0]; + $reply=~s/\W//g; + if ($reply ne $remote) { return $reply; } + return 'ok'; +} + + + +sub hangup { + foreach (keys %children) { + $wasserver=$children{$_}; + &status("Closing $wasserver"); + &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver)); + &status("Kill PID $_ for $wasserver"); + kill ('INT',$_); + } +} + +sub HUNTSMAN { # signal handler for SIGINT + local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children + &hangup(); + my $execdir=$perlvar{'lonDaemons'}; + unlink("$execdir/logs/lonc.pid"); + &logthis("CRITICAL: Shutting down"); + exit; # clean up with dignity +} + +sub HUPSMAN { # signal handler for SIGHUP + local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children + &hangup(); + &logthis("CRITICAL: Restarting"); + unlink("$execdir/logs/lonc.pid"); + my $execdir=$perlvar{'lonDaemons'}; + exec("$execdir/lonc"); # here we go again +} + +sub checkchildren { + &initnewstatus(); + &logstatus(); + &logthis('Going to check on the children'); + foreach (sort keys %children) { + sleep 1; + unless (kill 'USR1' => $_) { + &logthis ('CRITICAL: Child '.$_.' is dead'); + &logstatus($$.' is dead'); + } + } +} + +sub USRMAN { + &logthis("USR1: Trying to establish connections again"); + # + # It is really important not to just clear the childatt hash or we will + # lose all memory of the children. What we really want to do is this: + # For each index where childatt is >= $childmaxattempts + # Zero the associated counter and do a make_child for the host. + # Regardles, the childatt entry is zeroed: + my $host; + foreach $host (keys %childatt) { + if ($childatt{$host} >= $childmaxattempts) { + $childatt{$host} = 0; + &logthis("INFO: Restarting child for server: " + .$host."\n"); + make_new_child($host); + } + else { + $childatt{$host} = 0; + } + } + &checkchildren(); # See if any children are still dead... +} + +# -------------------------------------------------- Non-critical communication +sub subreply { + my ($cmd,$server)=@_; + my $answer=''; + if ($server ne $perlvar{'lonHostID'}) { + my $peerfile="$perlvar{'lonSockDir'}/$server"; + my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile", + Type => SOCK_STREAM, + Timeout => 10) + or return "con_lost"; + + + $answer = londtransaction($sclient, $cmd, 10); + + if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; } + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; + } else { $answer='self_reply'; } + return $answer; +} + +# --------------------------------------------------------------------- Logging + +sub logthis { + my $message=shift; + my $execdir=$perlvar{'lonDaemons'}; + my $fh=IO::File->new(">>$execdir/logs/lonc.log"); + my $now=time; + my $local=localtime($now); + $lastlog=$local.': '.$message; + print $fh "$local ($$) [$conserver] [$status]: $message\n"; +} + +#-------------------------------------- londtransaction: +# +# Performs a transaction with lond with timeout support. +# result = londtransaction(socket,request,timeout) +# +sub londtransaction { + my ($socket, $request, $tmo) = @_; + + if($DEBUG) { + &logthis("londtransaction request: $request"); + } + + # Set the signal handlers: ALRM for timeout and disble the others. + + $SIG{ALRM} = sub { die "timeout" }; + $SIG{__DIE__} = 'DEFAULT'; + + # Disable all but alarm so that only that can interupt the + # send /receive. + # + my $sigset = POSIX::SigSet->new(QUIT, USR1, HUP, INT, TERM); + my $priorsigs = POSIX::SigSet->new; + unless (defined sigprocmask(SIG_BLOCK, $sigset, $priorsigs)) { + &logthis(" CRITICAL -- londtransaction ". + "failed to block signals "); + die "could not block signals in londtransaction"; + } + $answer = ''; + # + # Send request to lond. + # + eval { + alarm($tmo); + print $socket "$request\n"; + alarm(0); + }; + # If request didn't timeout, try for the response. + # + + if ($@!~/timeout/) { + eval { + alarm($tmo); + $answer = <$socket>; + if($DEBUG) { + &logthis("Received $answer in londtransaction"); + } + alarm(0); + }; + } else { + if($DEBUG) { + &logthis("Timeout on send in londtransaction"); + } + } + if( ($@ =~ /timeout/) && ($DEBUG)) { + &logthis("Timeout on receive in londtransaction"); + } + # + # Restore the initial sigmask set. + # + unless (defined sigprocmask(SIG_UNBLOCK, $priorsigs)) { + &logthis(" CRITICAL -- londtransaction ". + "failed to re-enable signal processing. "); + die "londtransaction failed to re-enable signals"; + } + # + # go back to the prior handler set. + # + $SIG{ALRM} = 'DEFAULT'; + $SIG{__DIE__} = \&cathcexception; + + # chomp $answer; + if ($DEBUG) { + &logthis("Returning $answer in londtransaction"); + } + return $answer; + +} + +sub logperm { + my $message=shift; + my $execdir=$perlvar{'lonDaemons'}; + my $now=time; + my $local=localtime($now); + my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); + print $fh "$now:$message:$local\n"; +} +# ------------------------------------------------------------------ Log status + +sub logstatus { + my $docdir=$perlvar{'lonDocRoot'}; + my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt"); + print $fh $$."\t".$conserver."\t".$status."\t".$lastlog."\n"; +} + +sub initnewstatus { + my $docdir=$perlvar{'lonDocRoot'}; + my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt"); + my $now=time; + my $local=localtime($now); + print $fh "LONC status $local - parent $$\n\n"; +} + +# -------------------------------------------------------------- Status setting + +sub status { + my $what=shift; + my $now=time; + my $local=localtime($now); + $status=$local.': '.$what; + $0='lonc: '.$what.' '.$local; +} + + + +# ----------------------------------- POD (plain old documentation, CPAN style) + +=head1 NAME + +lonc - LON TCP-MySQL-Server Daemon for handling database requests. + +=head1 SYNOPSIS + +Usage: B + +Should only be run as user=www. This is a command-line script which +is invoked by B. There is no expectation that a typical user +will manually start B from the command-line. (In other words, +DO NOT START B YOURSELF.) + +=head1 DESCRIPTION + +Provides persistent TCP connections to the other servers in the network +through multiplexed domain sockets + +B forks off children processes that correspond to the other servers +in the network. Management of these processes can be done at the +parent process level or the child process level. + + After forking off the children, B the B +executes a main loop which simply waits for processes to exit. +As a process exits, a new process managing a link to the same +peer as the exiting process is created. + +B is the location of log messages. + +The process management is now explained in terms of linux shell commands, +subroutines internal to this code, and signal assignments: + +=over 4 + +=item * + +PID is stored in B + +This is the process id number of the parent B process. + +=item * + +SIGTERM and SIGINT + +Parent signal assignment: + $SIG{INT} = $SIG{TERM} = \&HUNTSMAN; + +Child signal assignment: + $SIG{INT} = 'DEFAULT'; (and SIGTERM is DEFAULT also) +(The child dies and a SIGALRM is sent to parent, awaking parent from slumber + to restart a new child.) + +Command-line invocations: + B B<-s> SIGTERM I + B B<-s> SIGINT I + +Subroutine B: + This is only invoked for the B parent I. +This kills all the children, and then the parent. +The B file is cleared. + +=item * + +SIGHUP + +Current bug: + This signal can only be processed the first time +on the parent process. Subsequent SIGHUP signals +have no effect. + +Parent signal assignment: + $SIG{HUP} = \&HUPSMAN; + +Child signal assignment: + none (nothing happens) + +Command-line invocations: + B B<-s> SIGHUP I + +Subroutine B: + This is only invoked for the B parent I, +This kills all the children, and then the parent. +The B file is cleared. + +=item * + +SIGUSR1 + +Parent signal assignment: + $SIG{USR1} = \&USRMAN; + +Child signal assignment: + $SIG{USR1}= \&logstatus; + +Command-line invocations: + B B<-s> SIGUSR1 I + +Subroutine B: + When invoked for the B parent I, +SIGUSR1 is sent to all the children, and the status of +each connection is logged. + + +=back + +=head1 PREREQUISITES + +POSIX +IO::Socket +IO::Select +IO::File +Socket +Fcntl +Tie::RefHash +Crypt::IDEA + +=head1 COREQUISITES + +=head1 OSNAMES + +linux + +=head1 SCRIPT CATEGORIES + +Server/Process + +=cut