--- loncom/Attic/lonc 2002/02/25 15:48:11 1.29 +++ loncom/Attic/lonc 2002/02/25 20:43:15 1.30 @@ -5,7 +5,7 @@ # provides persistent TCP connections to the other servers in the network # through multiplexed domain sockets # -# $Id: lonc,v 1.29 2002/02/25 15:48:11 www Exp $ +# $Id: lonc,v 1.30 2002/02/25 20:43:15 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -60,62 +60,9 @@ use Crypt::IDEA; use Net::Ping; use LWP::UserAgent(); -my $status=''; -my $lastlog=''; - -# grabs exception and records it to log before exiting -sub catchexception { - my ($signal)=@_; - $SIG{QUIT}='DEFAULT'; - $SIG{__DIE__}='DEFAULT'; - chomp($signal); - &logthis("CRITICAL: " - ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through " - ."\"$signal\" with parameter [$@]"); - die($@); -} - -$childmaxattempts=5; - -# -------------------------------------- Routines to see if other box available - -sub online { - my $host=shift; - &status("Pinging ".$host); - my $p=Net::Ping->new("tcp",20); - my $online=$p->ping("$host"); - $p->close(); - undef ($p); - return $online; -} - -sub connected { - my ($local,$remote)=@_; - &status("Checking connection $local to $remote"); - $local=~s/\W//g; - $remote=~s/\W//g; - - unless ($hostname{$local}) { return 'local_unknown'; } - unless ($hostname{$remote}) { return 'remote_unknown'; } - - unless (&online($hostname{$local})) { return 'local_offline'; } - - my $ua=new LWP::UserAgent; - - my $request=new HTTP::Request('GET', - "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote); - - my $response=$ua->request($request); - - unless ($response->is_success) { return 'local_error'; } - - my $reply=$response->content; - $reply=(split("\n",$reply))[0]; - $reply=~s/\W//g; - if ($reply ne $remote) { return $reply; } - return 'ok'; -} - +$status=''; +$lastlog=''; +$conserver='SHELL'; # -------------------------------- Set signal handlers to record abnormal exits @@ -182,139 +129,7 @@ close(CONFIG); %childatt = (); # number of attempts to start server # for ID -sub REAPER { # takes care of dead children - $SIG{CHLD} = \&REAPER; - my $pid = wait; - my $wasserver=$children{$pid}; - &logthis("CRITICAL: " - ."Child $pid for server $wasserver died ($childatt{$wasserver})"); - delete $children{$pid}; - delete $childpid{$wasserver}; - my $port = "$perlvar{'lonSockDir'}/$wasserver"; - unlink($port); -} - -sub hangup { - foreach (keys %children) { - $wasserver=$children{$_}; - &status("Closing $wasserver"); - &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver)); - &status("Kill PID $_ for $wasserver"); - kill ('INT',$_); - } -} - -sub HUNTSMAN { # signal handler for SIGINT - local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children - &hangup(); - my $execdir=$perlvar{'lonDaemons'}; - unlink("$execdir/logs/lonc.pid"); - &logthis("CRITICAL: Shutting down"); - exit; # clean up with dignity -} - -sub HUPSMAN { # signal handler for SIGHUP - local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children - &hangup(); - &logthis("CRITICAL: Restarting"); - unlink("$execdir/logs/lonc.pid"); - my $execdir=$perlvar{'lonDaemons'}; - exec("$execdir/lonc"); # here we go again -} - -sub checkchildren { - &initnewstatus(); - &logstatus(); - &logthis('Going to check on the children'); - foreach (sort keys %children) { - sleep 1; - unless (kill 'USR1' => $_) { - &logthis ('CRITICAL: Child '.$_.' is dead'); - &logstatus($$.' is dead'); - } - } -} - -sub USRMAN { - &logthis("USR1: Trying to establish connections again"); - %childatt=(); - &checkchildren(); -} - -# -------------------------------------------------- Non-critical communication -sub subreply { - my ($cmd,$server)=@_; - my $answer=''; - if ($server ne $perlvar{'lonHostID'}) { - my $peerfile="$perlvar{'lonSockDir'}/$server"; - my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile", - Type => SOCK_STREAM, - Timeout => 10) - or return "con_lost"; - - - $SIG{ALRM}=sub { die "timeout" }; - $SIG{__DIE__}='DEFAULT'; - eval { - alarm(10); - print $sclient "$cmd\n"; - $answer=<$sclient>; - chomp($answer); - alarm(0); - }; - if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; } - $SIG{ALRM}='DEFAULT'; - $SIG{__DIE__}=\&catchexception; - } else { $answer='self_reply'; } - return $answer; -} - -# --------------------------------------------------------------------- Logging - -sub logthis { - my $message=shift; - my $execdir=$perlvar{'lonDaemons'}; - my $fh=IO::File->new(">>$execdir/logs/lonc.log"); - my $now=time; - my $local=localtime($now); - $lastlog=$local.': '.$message; - print $fh "$local ($$) [$status]: $message\n"; -} - - -sub logperm { - my $message=shift; - my $execdir=$perlvar{'lonDaemons'}; - my $now=time; - my $local=localtime($now); - my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); - print $fh "$now:$message:$local\n"; -} -# ------------------------------------------------------------------ Log status - -sub logstatus { - my $docdir=$perlvar{'lonDocRoot'}; - my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt"); - print $fh $$."\t".$status."\t".$lastlog."\n"; -} - -sub initnewstatus { - my $docdir=$perlvar{'lonDocRoot'}; - my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt"); - my $now=time; - my $local=localtime($now); - print $fh "LONC status $local - parent $$\n\n"; -} - -# -------------------------------------------------------------- Status setting - -sub status { - my $what=shift; - my $now=time; - my $local=localtime($now); - $status=$local.': '.$what; -} - +$childmaxattempts=5; # ---------------------------------------------------- Fork once and dissociate &status("Fork and dissociate"); @@ -324,6 +139,8 @@ die "Couldn't fork: $!" unless defined ( POSIX::setsid() or die "Can't start new session: $!"; +$conserver='PARENT'; + # ------------------------------------------------------- Write our PID on disk &status("Write PID"); $execdir=$perlvar{'lonDaemons'}; @@ -360,6 +177,7 @@ while (1) { sleep; # wait for a signal (i.e., child's death) # See who died and start new one &status("Woke up"); + my $skipping=''; foreach $thisserver (keys %hostip) { if (!$childpid{$thisserver}) { if (($childatt{$thisserver}<$childmaxattempts) && @@ -367,26 +185,26 @@ while (1) { $childatt{$thisserver}++; &logthis( "INFO: Trying to reconnect for $thisserver " - ."(".($childatt{$thisserver}?$childatt{$thisserver}:'none'). - " of $childmaxattempts attempts)"); + ."($childatt{$thisserver} of $childmaxattempts attempts)"); make_new_child($thisserver); } else { - &logthis( - "INFO: Skipping $thisserver " - ."($childatt{$thisserver} of $childmaxattempts attempts)"); + $skipping.=$thisserver.' '; } } } + if ($skipping) { + &logthis("WARNING: Skipped $skipping"); + } } sub make_new_child { - my $conserver=shift; + $newserver=shift; my $pid; my $sigset; - &logthis("Attempting to start child for server $conserver"); + &logthis("Attempting to start child for server $newserver"); # block signal for fork $sigset = POSIX::SigSet->new(SIGINT); sigprocmask(SIG_BLOCK, $sigset) @@ -398,10 +216,11 @@ sub make_new_child { # Parent records the child's birth and returns. sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; - $children{$pid} = $conserver; + $children{$pid} = $newserver; $childpid{$conserver} = $pid; return; } else { + $conserver=$newserver; # Child can *not* return from this subroutine. $SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before $SIG{USR1}= \&logstatus; @@ -429,7 +248,7 @@ unlink($port); closedir(DIRHANDLE); my $dfname; foreach (@allbuffered) { - &status("Sending delayed $conserver $_"); + &status("Sending delayed: $_"); $dfname="$path/$_"; &logthis('Sending '.$dfname); my $wcmd; @@ -466,13 +285,13 @@ unlink($port); if (($answer ne '') && ($@!~/timeout/)) { unlink("$dfname"); - &logthis("Delayed $cmd to $conserver: >$answer<"); + &logthis("Delayed $cmd: >$answer<"); &logperm("S:$conserver:$bcmd"); } } # ------------------------------------------------------- Listen to UNIX socket -&status("Opening socket $conserver"); +&status("Opening socket"); unless ( $server = IO::Socket::UNIX->new(Local => $port, Type => SOCK_STREAM, @@ -481,7 +300,7 @@ unless ( my $st=120+int(rand(240)); &logthis( "WARNING: ". - "Can't make server socket $conserver ($st secs): $@"); + "Can't make server socket ($st secs): $@"); sleep($st); exit; }; @@ -529,7 +348,7 @@ while (1) { delete $outbuffer{$client}; delete $ready{$client}; - &status("Idle $conserver"); + &status("Idle"); $select->remove($client); close $client; next; @@ -559,7 +378,7 @@ while (1) { $rv = $client->send($outbuffer{$client}, 0); - unless ($outbuffer{$client}=~/con_lost\n$/) { + unless ($outbuffer{$client} eq "con_lost\n") { unless (defined $rv) { # Whine, but move on. &logthis("I was told I could write, but I can't.\n"); @@ -588,8 +407,8 @@ while (1) { } else { # -------------------------------------------------------- Wow, connection lost &logthis( - "CRITICAL: Closing connection $conserver"); - &status("Connection lost $conserver"); + "CRITICAL: Closing connection"); + &status("Connection lost"); $remotesock->shutdown(2); &logthis("Attempting to open new connection"); &openremote($conserver); @@ -614,9 +433,9 @@ sub handle { # put text of reply into $outbuffer{$client} # ------------------------------------------------------------ Is this the end? if ($request eq "close_connection_exit\n") { - &status("Request close connection: $conserver"); + &status("Request close connection"); &logthis( - "CRITICAL: Request Close Connection $conserver"); + "CRITICAL: Request Close Connection"); $remotesock->shutdown(2); $server->close(); exit; @@ -640,7 +459,7 @@ sub handle { $SIG{__DIE__}='DEFAULT'; eval { alarm(300); - &status("Sending $conserver: $request"); + &status("Sending: $request"); print $remotesock "$request"; &status("Waiting for reply from $conserver: $request"); $answer=<$remotesock>; @@ -650,7 +469,7 @@ sub handle { if ($@=~/timeout/) { $answer=''; &logthis( - "CRITICAL: Timeout $conserver: $request"); + "CRITICAL: Timeout: $request"); } $SIG{ALRM}='DEFAULT'; $SIG{__DIE__}=\&catchexception; @@ -674,10 +493,11 @@ sub handle { $outbuffer{$client} .= "con_lost\n"; } + &status("Completed: $request"); + # ===================================================== Done processing request } delete $ready{$client}; - &status("Completed $conserver: $request"); # -------------------------------------------------------------- End non-forker } # ---------------------------------------------------------- End make_new_child @@ -701,7 +521,7 @@ sub openremote { my $conserver=shift; -&status("Opening TCP: $conserver"); +&status("Opening TCP"); unless ( $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, @@ -711,7 +531,7 @@ unless ( ) { my $st=120+int(rand(240)); &logthis( -"WARNING: Couldn't connect $conserver ($st secs): $@"); +"WARNING: Couldn't connect ($st secs): $@"); sleep($st); exit; }; @@ -734,30 +554,30 @@ chomp($answer); $SIG{__DIE__}=\&catchexception; if ($@=~/timeout/) { - &logthis("Timed out during init: $conserver"); + &logthis("Timed out during init"); exit; } if ($answer ne 'ok') { - &logthis("Init reply for $conserver: >$answer<"); + &logthis("Init reply: >$answer<"); my $st=120+int(rand(240)); &logthis( -"WARNING: Init failed $conserver ($st secs)"); +"WARNING: Init failed ($st secs)"); sleep($st); exit; } sleep 5; -&status("Ponging $conserver"); +&status("Ponging"); print $remotesock "pong\n"; $answer=<$remotesock>; chomp($answer); -if ($answer!~/^$converver/) { - &logthis("Pong reply for $conserver: >$answer<"); +if ($answer!~/^$conserver/) { + &logthis("Pong reply: >$answer<"); } # ----------------------------------------------------------- Initialize cipher -&status("Initialize cipher: $conserver"); +&status("Initialize cipher"); print $remotesock "ekey\n"; my $buildkey=<$remotesock>; my $key=$conserver.$perlvar{'lonHostID'}; @@ -768,18 +588,207 @@ $key=$key.$buildkey.$key.$buildkey.$key. $key=substr($key,0,32); my $cipherkey=pack("H32",$key); if ($cipher=new IDEA $cipherkey) { - &logthis("Secure connection initialized: $conserver"); + &logthis("Secure connection initialized"); } else { my $st=120+int(rand(240)); &logthis( "WARNING: ". - "Could not establish secure connection, $conserver ($st secs)!"); + "Could not establish secure connection ($st secs)!"); sleep($st); exit; } } + + +# grabs exception and records it to log before exiting +sub catchexception { + my ($signal)=@_; + $SIG{QUIT}='DEFAULT'; + $SIG{__DIE__}='DEFAULT'; + chomp($signal); + &logthis("CRITICAL: " + ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through " + ."\"$signal\" with parameter [$@]"); + die($@); +} + +# -------------------------------------- Routines to see if other box available + +sub online { + my $host=shift; + &status("Pinging ".$host); + my $p=Net::Ping->new("tcp",20); + my $online=$p->ping("$host"); + $p->close(); + undef ($p); + return $online; +} + +sub connected { + my ($local,$remote)=@_; + &status("Checking connection $local to $remote"); + $local=~s/\W//g; + $remote=~s/\W//g; + + unless ($hostname{$local}) { return 'local_unknown'; } + unless ($hostname{$remote}) { return 'remote_unknown'; } + + unless (&online($hostname{$local})) { return 'local_offline'; } + + my $ua=new LWP::UserAgent; + + my $request=new HTTP::Request('GET', + "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote); + + my $response=$ua->request($request); + + unless ($response->is_success) { return 'local_error'; } + + my $reply=$response->content; + $reply=(split("\n",$reply))[0]; + $reply=~s/\W//g; + if ($reply ne $remote) { return $reply; } + return 'ok'; +} + + +sub REAPER { # takes care of dead children + $SIG{CHLD} = \&REAPER; + my $pid = wait; + my $wasserver=$children{$pid}; + &logthis("CRITICAL: " + ."Child $pid for server $wasserver died ($childatt{$wasserver})"); + delete $children{$pid}; + delete $childpid{$wasserver}; + my $port = "$perlvar{'lonSockDir'}/$wasserver"; + unlink($port); +} + +sub hangup { + foreach (keys %children) { + $wasserver=$children{$_}; + &status("Closing $wasserver"); + &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver)); + &status("Kill PID $_ for $wasserver"); + kill ('INT',$_); + } +} + +sub HUNTSMAN { # signal handler for SIGINT + local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children + &hangup(); + my $execdir=$perlvar{'lonDaemons'}; + unlink("$execdir/logs/lonc.pid"); + &logthis("CRITICAL: Shutting down"); + exit; # clean up with dignity +} + +sub HUPSMAN { # signal handler for SIGHUP + local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children + &hangup(); + &logthis("CRITICAL: Restarting"); + unlink("$execdir/logs/lonc.pid"); + my $execdir=$perlvar{'lonDaemons'}; + exec("$execdir/lonc"); # here we go again +} + +sub checkchildren { + &initnewstatus(); + &logstatus(); + &logthis('Going to check on the children'); + foreach (sort keys %children) { + sleep 1; + unless (kill 'USR1' => $_) { + &logthis ('CRITICAL: Child '.$_.' is dead'); + &logstatus($$.' is dead'); + } + } +} + +sub USRMAN { + &logthis("USR1: Trying to establish connections again"); + %childatt=(); + &checkchildren(); +} + +# -------------------------------------------------- Non-critical communication +sub subreply { + my ($cmd,$server)=@_; + my $answer=''; + if ($server ne $perlvar{'lonHostID'}) { + my $peerfile="$perlvar{'lonSockDir'}/$server"; + my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile", + Type => SOCK_STREAM, + Timeout => 10) + or return "con_lost"; + + + $SIG{ALRM}=sub { die "timeout" }; + $SIG{__DIE__}='DEFAULT'; + eval { + alarm(10); + print $sclient "$cmd\n"; + $answer=<$sclient>; + chomp($answer); + alarm(0); + }; + if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; } + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; + } else { $answer='self_reply'; } + return $answer; +} + +# --------------------------------------------------------------------- Logging + +sub logthis { + my $message=shift; + my $execdir=$perlvar{'lonDaemons'}; + my $fh=IO::File->new(">>$execdir/logs/lonc.log"); + my $now=time; + my $local=localtime($now); + $lastlog=$local.': '.$message; + print $fh "$local ($$) [$conserver] [$status]: $message\n"; +} + + +sub logperm { + my $message=shift; + my $execdir=$perlvar{'lonDaemons'}; + my $now=time; + my $local=localtime($now); + my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); + print $fh "$now:$message:$local\n"; +} +# ------------------------------------------------------------------ Log status + +sub logstatus { + my $docdir=$perlvar{'lonDocRoot'}; + my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt"); + print $fh $$."\t".$conserver."\t".$status."\t".$lastlog."\n"; +} + +sub initnewstatus { + my $docdir=$perlvar{'lonDocRoot'}; + my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt"); + my $now=time; + my $local=localtime($now); + print $fh "LONC status $local - parent $$\n\n"; +} + +# -------------------------------------------------------------- Status setting + +sub status { + my $what=shift; + my $now=time; + my $local=localtime($now); + $status=$local.': '.$what; +} + + + # ----------------------------------- POD (plain old documentation, CPAN style) =head1 NAME