--- loncom/Attic/lonc 2002/02/25 20:43:15 1.30 +++ loncom/Attic/lonc 2002/03/20 03:42:45 1.33 @@ -5,7 +5,7 @@ # provides persistent TCP connections to the other servers in the network # through multiplexed domain sockets # -# $Id: lonc,v 1.30 2002/02/25 20:43:15 www Exp $ +# $Id: lonc,v 1.33 2002/03/20 03:42:45 foxr Exp $ # # Copyright Michigan State University Board of Trustees # @@ -45,7 +45,7 @@ # 12/20 Scott Harrison # YEAR=2002 # 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer -# +# 3/07/02 Ron Fox # based on nonforker from Perl Cookbook # - server who multiplexes without forking @@ -57,12 +57,13 @@ use Socket; use Fcntl; use Tie::RefHash; use Crypt::IDEA; -use Net::Ping; +#use Net::Ping; use LWP::UserAgent(); $status=''; $lastlog=''; $conserver='SHELL'; +$DEBUG = 0; # Set to 1 for annoyingly complete logs. # -------------------------------- Set signal handlers to record abnormal exits @@ -158,47 +159,53 @@ $SIG{HUP}=$SIG{USR1}='IGNORE'; &status("Forking ..."); foreach $thisserver (keys %hostip) { - if (&online($hostname{$thisserver})) { + #if (&online($hostname{$thisserver})) { make_new_child($thisserver); - } + #} } &logthis("Done starting initial servers"); # ----------------------------------------------------- Install signal handlers -$SIG{CHLD} = \&REAPER; + $SIG{INT} = $SIG{TERM} = \&HUNTSMAN; $SIG{HUP} = \&HUPSMAN; $SIG{USR1} = \&USRMAN; # And maintain the population. while (1) { - &status("Sleeping"); - sleep; # wait for a signal (i.e., child's death) + my $deadpid = wait; # Wait for the next child to die. # See who died and start new one &status("Woke up"); my $skipping=''; - foreach $thisserver (keys %hostip) { - if (!$childpid{$thisserver}) { - if (($childatt{$thisserver}<$childmaxattempts) && - (&online($hostname{$thisserver}))) { - $childatt{$thisserver}++; - &logthis( - "INFO: Trying to reconnect for $thisserver " - ."($childatt{$thisserver} of $childmaxattempts attempts)"); - make_new_child($thisserver); - } else { - $skipping.=$thisserver.' '; - } - - } - } - if ($skipping) { - &logthis("WARNING: Skipped $skipping"); + + if(exists($children{$deadpid})) { + + $thisserver = $children{$deadpid}; # Look name of dead guy's peer. + + delete($children{$deadpid}); # Get rid of dead hash entry. + + if($childatt{$thisserver} < $childmaxattempts) { + $childatt{$thisserver}++; + &logthis( + "INFO: Trying to reconnect for $thisserver " + ."($childatt{$thisserver} of $childmaxattempts attempts)"); + make_new_child($thisserver); + + } + else { + $skipping .= $thisserver.' '; + } + if($skipping) { + &logthis("WARNING: Skipped $skipping"); + + } } + } + sub make_new_child { $newserver=shift; @@ -217,7 +224,7 @@ sub make_new_child { sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; $children{$pid} = $newserver; - $childpid{$conserver} = $pid; + $childpid{$newserver} = $pid; return; } else { $conserver=$newserver; @@ -238,9 +245,10 @@ unlink($port); # -------------------------------------------------------------- Open other end &openremote($conserver); - + &logthis(" Connection to $conserver open "); # ----------------------------------------- We're online, send delayed messages &status("Checking for delayed messages"); + my @allbuffered; my $path="$perlvar{'lonSockDir'}/delayed"; opendir(DIRHANDLE,$path); @@ -250,7 +258,7 @@ unlink($port); foreach (@allbuffered) { &status("Sending delayed: $_"); $dfname="$path/$_"; - &logthis('Sending '.$dfname); + if($DEBUG) { &logthis('Sending '.$dfname); } my $wcmd; { my $dfh=IO::File->new($dfname); @@ -271,17 +279,8 @@ unlink($port); } $cmd="enc:$cmdlength:$encrequest\n"; } - $SIG{ALRM}=sub { die "timeout" }; - $SIG{__DIE__}='DEFAULT'; - eval { - alarm(60); - print $remotesock "$cmd\n"; - $answer=<$remotesock>; + $answer = londtransaction($remotesock, $cmd, 60); chomp($answer); - alarm(0); - }; - $SIG{ALRM}='DEFAULT'; - $SIG{__DIE__}=\&catchexception; if (($answer ne '') && ($@!~/timeout/)) { unlink("$dfname"); @@ -289,6 +288,7 @@ unlink($port); &logperm("S:$conserver:$bcmd"); } } + if($DEBUG) { &logthis(" Delayed transactions sent"); } # ------------------------------------------------------- Listen to UNIX socket &status("Opening socket"); @@ -300,11 +300,11 @@ unless ( my $st=120+int(rand(240)); &logthis( "WARNING: ". - "Can't make server socket ($st secs): $@"); + "Can't make server socket ($st secs): .. exiting"); sleep($st); exit; }; - + # ----------------------------------------------------------------------------- &logthis("$conserver online"); @@ -329,8 +329,8 @@ while (1) { # check for new information on the connections we have # anything to read or accept? - foreach $client ($select->can_read(0.1)) { + foreach $client ($select->can_read(100.0)) { if ($client == $server) { # accept a new connection &status("Accept new connection: $conserver"); @@ -356,6 +356,7 @@ while (1) { $inbuffer{$client} .= $data; + # test whether the data in the buffer or the data we # just read means there is a complete request waiting # to be fulfilled. If there is, set $ready{$client} @@ -365,12 +366,12 @@ while (1) { } } } - + # Any complete requests to process? foreach $client (keys %ready) { handle($client); } - + # Buffers to flush? foreach $client ($select->can_write(1)) { # Skip this client if we have nothing to say @@ -426,16 +427,19 @@ sub handle { # send output to $outbuffer{$client} my $client = shift; my $request; - foreach $request (@{$ready{$client}}) { # ============================================================= Process request # $request is the text of the request # put text of reply into $outbuffer{$client} # ------------------------------------------------------------ Is this the end? + chomp($request); + if($DEBUG) { + &logthis(" Request $request processing starts"); + } if ($request eq "close_connection_exit\n") { &status("Request close connection"); &logthis( - "CRITICAL: Request Close Connection"); + "CRITICAL: Request Close Connection ... exiting"); $remotesock->shutdown(2); $server->close(); exit; @@ -452,27 +456,19 @@ sub handle { $encrequest.= unpack("H16",$cipher->encrypt(substr($cmd,$encidx,8))); } - $request="enc:$cmdlength:$encrequest\n"; + $request="enc:$cmdlength:$encrequest"; } # --------------------------------------------------------------- Main exchange - $SIG{ALRM}=sub { die "timeout" }; - $SIG{__DIE__}='DEFAULT'; - eval { - alarm(300); - &status("Sending: $request"); - print $remotesock "$request"; - &status("Waiting for reply from $conserver: $request"); - $answer=<$remotesock>; - &status("Received reply: $request"); - alarm(0); - }; - if ($@=~/timeout/) { - $answer=''; - &logthis( - "CRITICAL: Timeout: $request"); - } - $SIG{ALRM}='DEFAULT'; - $SIG{__DIE__}=\&catchexception; + $answer = londtransaction($remotesock, $request, 300); + + if($DEBUG) { + &logthis(" Request data exchange complete"); + } + if ($@=~/timeout/) { + $answer=''; + &logthis( + "CRITICAL: Timeout: $request"); + } if ($answer) { @@ -488,17 +484,25 @@ sub handle { $answer=substr($answer,0,$cmdlength); $answer.="\n"; } + if($DEBUG) { + &logthis("sending $answer to client\n"); + } $outbuffer{$client} .= $answer; } else { $outbuffer{$client} .= "con_lost\n"; } &status("Completed: $request"); - + if($DEBUG) { + &logthis(" Request processing complete"); + } # ===================================================== Done processing request } delete $ready{$client}; # -------------------------------------------------------------- End non-forker + if($DEBUG) { + &logthis(" requests for child handled"); + } } # ---------------------------------------------------------- End make_new_child } @@ -522,6 +526,7 @@ sub openremote { my $conserver=shift; &status("Opening TCP"); + my $st=120+int(rand(240)); # Sleep before opening: unless ( $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, @@ -529,32 +534,24 @@ unless ( Proto => "tcp", Type => SOCK_STREAM) ) { - my $st=120+int(rand(240)); + &logthis( -"WARNING: Couldn't connect ($st secs): $@"); +"WARNING: Couldn't connect to $conserver ($st secs): "); sleep($st); exit; }; # ----------------------------------------------------------------- Init dialog +&logthis("INFO Connected to $conserver, initing "); &status("Init dialogue: $conserver"); - $SIG{ALRM}=sub { die "timeout" }; - $SIG{__DIE__}='DEFAULT'; - eval { - alarm(60); -print $remotesock "init\n"; -$answer=<$remotesock>; -print $remotesock "$answer"; -$answer=<$remotesock>; -chomp($answer); - alarm(0); - }; - $SIG{ALRM}='DEFAULT'; - $SIG{__DIE__}=\&catchexception; + $answer = londtransaction($remotesock, "init", 60); + chomp($answer); + $answer = londtransaction($remotesock, $answer, 60); + chomp($answer); if ($@=~/timeout/) { - &logthis("Timed out during init"); + &logthis("Timed out during init.. exiting"); exit; } @@ -597,7 +594,7 @@ if ($cipher=new IDEA $cipherkey) { sleep($st); exit; } - + &logthis(" Remote open success "); } @@ -610,21 +607,21 @@ sub catchexception { chomp($signal); &logthis("CRITICAL: " ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through " - ."\"$signal\" with parameter [$@]"); - die($@); + ."\"$signal\" with parameter "); + die("Signal abend"); } # -------------------------------------- Routines to see if other box available -sub online { - my $host=shift; - &status("Pinging ".$host); - my $p=Net::Ping->new("tcp",20); - my $online=$p->ping("$host"); - $p->close(); - undef ($p); - return $online; -} +#sub online { +# my $host=shift; +# &status("Pinging ".$host); +# my $p=Net::Ping->new("tcp",20); +# my $online=$p->ping("$host"); +# $p->close(); +# undef ($p); +# return $online; +#} sub connected { my ($local,$remote)=@_; @@ -635,7 +632,7 @@ sub connected { unless ($hostname{$local}) { return 'local_unknown'; } unless ($hostname{$remote}) { return 'remote_unknown'; } - unless (&online($hostname{$local})) { return 'local_offline'; } + #unless (&online($hostname{$local})) { return 'local_offline'; } my $ua=new LWP::UserAgent; @@ -654,17 +651,6 @@ sub connected { } -sub REAPER { # takes care of dead children - $SIG{CHLD} = \&REAPER; - my $pid = wait; - my $wasserver=$children{$pid}; - &logthis("CRITICAL: " - ."Child $pid for server $wasserver died ($childatt{$wasserver})"); - delete $children{$pid}; - delete $childpid{$wasserver}; - my $port = "$perlvar{'lonSockDir'}/$wasserver"; - unlink($port); -} sub hangup { foreach (keys %children) { @@ -725,15 +711,8 @@ sub subreply { or return "con_lost"; - $SIG{ALRM}=sub { die "timeout" }; - $SIG{__DIE__}='DEFAULT'; - eval { - alarm(10); - print $sclient "$cmd\n"; - $answer=<$sclient>; - chomp($answer); - alarm(0); - }; + $answer = londtransaction($sclient, $cmd, 10); + if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; } $SIG{ALRM}='DEFAULT'; $SIG{__DIE__}=\&catchexception; @@ -753,6 +732,83 @@ sub logthis { print $fh "$local ($$) [$conserver] [$status]: $message\n"; } +#-------------------------------------- londtransaction: +# +# Performs a transaction with lond with timeout support. +# result = londtransaction(socket,request,timeout) +# +sub londtransaction { + my ($socket, $request, $tmo) = @_; + + if($DEBUG) { + &logthis("londtransaction request: $request"); + } + + # Set the signal handlers: ALRM for timeout and disble the others. + + $SIG{ALRM} = sub { die "timeout" }; + $SIG{__DIE__} = 'DEFAULT'; + + # Disable all but alarm so that only that can interupt the + # send /receive. + # + my $sigset = POSIX::SigSet->new(QUIT, USR1, HUP, INT, TERM); + my $priorsigs = POSIX::SigSet->new; + unless (defined sigprocmask(SIG_BLOCK, $sigset, $priorsigs)) { + &logthis(" CRITICAL -- londtransaction ". + "failed to block signals "); + die "could not block signals in londtransaction"; + } + $answer = ''; + # + # Send request to lond. + # + eval { + alarm($tmo); + print $socket "$request\n"; + alarm(0); + }; + # If request didn't timeout, try for the response. + # + + if ($@!~/timeout/) { + eval { + alarm($tmo); + $answer = <$socket>; + if($DEBUG) { + &logthis("Received $answer in londtransaction"); + } + alarm(0); + }; + } else { + if($DEBUG) { + &logthis("Timeout on send in londtransaction"); + } + } + if( ($@ =~ /timeout/) && ($DEBUG)) { + &logthis("Timeout on receive in londtransaction"); + } + # + # Restore the initial sigmask set. + # + unless (defined sigprocmask(SIG_UNBLOCK, $priorsigs)) { + &logthis(" CRITICAL -- londtransaction ". + "failed to re-enable signal processing. "); + die "londtransaction failed to re-enable signals"; + } + # + # go back to the prior handler set. + # + $SIG{ALRM} = 'DEFAULT'; + $SIG{__DIE__} = \&cathcexception; + + # chomp $answer; + if ($DEBUG) { + &logthis("Returning $answer in londtransaction"); + } + return $answer; + +} sub logperm { my $message=shift; @@ -797,22 +853,104 @@ lonc - LON TCP-MySQL-Server Daemon for h =head1 SYNOPSIS +Usage: B + Should only be run as user=www. This is a command-line script which -is invoked by loncron. +is invoked by B. There is no expectation that a typical user +will manually start B from the command-line. (In other words, +DO NOT START B YOURSELF.) =head1 DESCRIPTION Provides persistent TCP connections to the other servers in the network through multiplexed domain sockets - PID in subdir logs/lonc.pid - kill kills - HUP restarts - USR1 tries to open connections again +B forks off children processes that correspond to the other servers +in the network. Management of these processes can be done at the +parent process level or the child process level. + + After forking off the children, B the B +executes a main loop which simply waits for processes to exit. +As a process exits, a new process managing a link to the same +peer as the exiting process is created. + +B is the location of log messages. + +The process management is now explained in terms of linux shell commands, +subroutines internal to this code, and signal assignments: + +=over 4 + +=item * + +PID is stored in B + +This is the process id number of the parent B process. + +=item * + +SIGTERM and SIGINT + +Parent signal assignment: + $SIG{INT} = $SIG{TERM} = \&HUNTSMAN; + +Child signal assignment: + $SIG{INT} = 'DEFAULT'; (and SIGTERM is DEFAULT also) +(The child dies and a SIGALRM is sent to parent, awaking parent from slumber + to restart a new child.) + +Command-line invocations: + B B<-s> SIGTERM I + B B<-s> SIGINT I + +Subroutine B: + This is only invoked for the B parent I. +This kills all the children, and then the parent. +The B file is cleared. + +=item * + +SIGHUP + +Current bug: + This signal can only be processed the first time +on the parent process. Subsequent SIGHUP signals +have no effect. + +Parent signal assignment: + $SIG{HUP} = \&HUPSMAN; + +Child signal assignment: + none (nothing happens) + +Command-line invocations: + B B<-s> SIGHUP I + +Subroutine B: + This is only invoked for the B parent I, +This kills all the children, and then the parent. +The B file is cleared. + +=item * + +SIGUSR1 + +Parent signal assignment: + $SIG{USR1} = \&USRMAN; + +Child signal assignment: + $SIG{USR1}= \&logstatus; + +Command-line invocations: + B B<-s> SIGUSR1 I + +Subroutine B: + When invoked for the B parent I, +SIGUSR1 is sent to all the children, and the status of +each connection is logged. -=head1 README -Not yet written. +=back =head1 PREREQUISITES