--- loncom/Attic/lonc 1999/12/22 17:18:04 1.4 +++ loncom/Attic/lonc 2000/12/05 19:03:55 1.10 @@ -11,7 +11,11 @@ # USR1 tries to open connections again # 6/4/99,6/5,6/7,6/8,6/9,6/10,6/11,6/12,7/14,7/19, -# 10/8,10/9,10/15,11/18,12/22 Gerd Kortemeyer +# 10/8,10/9,10/15,11/18,12/22, +# 2/8,7/25 Gerd Kortemeyer +# 12/05 Scott Harrison +# 12/05 Gerd Kortemeyer +# # based on nonforker from Perl Cookbook # - server who multiplexes without forking @@ -24,9 +28,42 @@ use Fcntl; use Tie::RefHash; use Crypt::IDEA; +# grabs exception and records it to log before exiting +sub catchexception { + my ($signal)=@_; + $SIG{'QUIT'}='DEFAULT'; + $SIG{__DIE__}='DEFAULT'; + &logthis("CRITICAL: " + ."ABNORMAL EXIT. Child $$ for server $wasserver died through " + ."$signal with this parameter->[$@]"); + die($@); +} + +# grabs exception and records it to log before exiting +# NOTE: we must NOT use the regular (non-overrided) die function in +# the code because a handler CANNOT be attached to it +# (despite what some of the documentation says about SIG{__DIE__}. +sub catchdie { + my ($message)=@_; + $SIG{'QUIT'}='DEFAULT'; + $SIG{__DIE__}='DEFAULT'; + &logthis("CRITICAL: " + ."ABNORMAL EXIT. Child $$ for server $wasserver died through " + ."\_\_DIE\_\_ with this parameter->[$message]"); + die($message); +} + +$childmaxattempts=10; + +# -------------------------------- Set signal handlers to record abnormal exits + +$SIG{'QUIT'}=\&catchexception; +$SIG{__DIE__}=\&catchexception; + # ------------------------------------ Read httpd access.conf and get variables -open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; +open (CONFIG,"/etc/httpd/conf/access.conf") + || catchdie "Can't read access.conf"; while ($configline=) { if ($configline =~ /PerlSetVar/) { @@ -37,9 +74,21 @@ while ($configline=) { } close(CONFIG); +# --------------------------------------------- Check if other instance running + +my $pidfile="$perlvar{'lonDaemons'}/logs/lonc.pid"; + +if (-e $pidfile) { + my $lfh=IO::File->new("$pidfile"); + my $pide=<$lfh>; + chomp($pide); + if (kill 0 => $pide) { catchdie "already running"; } +} + # ------------------------------------------------------------- Read hosts file -open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file"; +open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") + || catchdie "Can't read host file"; while ($configline=) { my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); @@ -61,7 +110,8 @@ sub REAPER { # ta $SIG{CHLD} = \&REAPER; my $pid = wait; my $wasserver=$children{$pid}; - &logthis("Child $pid for server $wasserver died"); + &logthis("CRITICAL: " + ."Child $pid for server $wasserver died ($childatt{$wasserver})"); delete $children{$pid}; delete $childpid{$wasserver}; my $port = "$perlvar{'lonSockDir'}/$wasserver"; @@ -73,31 +123,33 @@ sub HUNTSMAN { # si kill 'INT' => keys %children; my $execdir=$perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); - &logthis("Shutting down"); + &logthis("CRITICAL: Shutting down"); exit; # clean up with dignity } sub HUPSMAN { # signal handler for SIGHUP local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children kill 'INT' => keys %children; - &logthis("Restarting"); + &logthis("CRITICAL: Restarting"); my $execdir=$perlvar{'lonDaemons'}; exec("$execdir/lonc"); # here we go again } sub USRMAN { - %childatt=(); &logthis("USR1: Trying to establish connections again"); foreach $thisserver (keys %hostip) { $answer=subreply("ping",$thisserver); - &logthis( - "USR1: Ping $thisserver (pid >$childpid{$thisserver}<): >$answer<"); + &logthis("USR1: Ping $thisserver " + ."(pid >$childpid{$thisserver}<, $childatt{thisserver} attempts): " + ." >$answer<"); } + %childatt=(); } # -------------------------------------------------- Non-critical communication sub subreply { my ($cmd,$server)=@_; + my $answer=''; if ($server ne $perlvar{'lonHostID'}) { my $peerfile="$perlvar{'lonSockDir'}/$server"; my $sclient=IO::Socket::UNIX->new(Peer =>"$peerfile", @@ -137,9 +189,9 @@ sub logperm { $fpid=fork; exit if $fpid; -die "Couldn't fork: $!" unless defined ($fpid); +catchdie "Couldn't fork: $!" unless defined ($fpid); -POSIX::setsid() or die "Can't start new session: $!"; +POSIX::setsid() or catchdie "Can't start new session: $!"; # ------------------------------------------------------- Write our PID on disk @@ -147,7 +199,7 @@ $execdir=$perlvar{'lonDaemons'}; open (PIDSAVE,">$execdir/logs/lonc.pid"); print PIDSAVE "$$\n"; close(PIDSAVE); -&logthis("---------- Starting ----------"); +&logthis("CRITICAL: ---------- Starting ----------"); # ----------------------------- Ignore signals generated during initial startup $SIG{HUP}=$SIG{USR1}='IGNORE'; @@ -173,9 +225,12 @@ while (1) { # See who died and start new one foreach $thisserver (keys %hostip) { if (!$childpid{$thisserver}) { - if ($childatt{$thisserver}<5) { + if ($childatt{$thisserver}<=$childmaxattempts) { + $childatt{$thisserver}++; + &logthis( + "INFO: Trying to reconnect for $thisserver " + ."($childatt{$thisserver} of $childmaxattempts attempts)"); make_new_child($thisserver); - $childatt{$thisserver}++; } } } @@ -191,14 +246,14 @@ sub make_new_child { # block signal for fork $sigset = POSIX::SigSet->new(SIGINT); sigprocmask(SIG_BLOCK, $sigset) - or die "Can't block SIGINT for fork: $!\n"; + or catchdie "Can't block SIGINT for fork: $!\n"; - die "fork: $!" unless defined ($pid = fork); + catchdie "fork: $!" unless defined ($pid = fork); if ($pid) { # Parent records the child's birth and returns. sigprocmask(SIG_UNBLOCK, $sigset) - or die "Can't unblock SIGINT for fork: $!\n"; + or catchdie "Can't unblock SIGINT for fork: $!\n"; $children{$pid} = $conserver; $childpid{$conserver} = $pid; return; @@ -208,7 +263,7 @@ sub make_new_child { # unblock signals sigprocmask(SIG_UNBLOCK, $sigset) - or die "Can't unblock SIGINT for fork: $!\n"; + or catchdie "Can't unblock SIGINT for fork: $!\n"; # ----------------------------- This is the modified main program of non-forker @@ -221,8 +276,11 @@ unless ( PeerPort => $perlvar{'londPort'}, Proto => "tcp", Type => SOCK_STREAM) - ) { &logthis("Couldn't connect $conserver: $@"); - sleep(5); + ) { + my $st=120+int(rand(240)); + &logthis( +"WARNING: Couldn't connect $conserver ($st secs): $@"); + sleep($st); exit; }; # --------------------------------------- Send a ping to make other end do USR1 @@ -251,7 +309,12 @@ my $cipherkey=pack("H32",$key); if ($cipher=new IDEA $cipherkey) { &logthis("Secure connection inititalized: $conserver"); } else { - &logthis("Error: Could not establish secure connection, $conserver!"); + my $st=120+int(rand(240)); + &logthis( + "WARNING: ". + "Could not establish secure connection, $conserver ($st secs)!"); + sleep($st); + exit; } # ----------------------------------------- We're online, send delayed messages @@ -301,13 +364,20 @@ unless ( $server = IO::Socket::UNIX->new(Local => $port, Type => SOCK_STREAM, Listen => 10 ) - ) { &logthis("Can't make server socket $conserver: $@"); - sleep(5); + ) { + my $st=120+int(rand(240)); + &logthis( + "WARNING: ". + "Can't make server socket $conserver ($st secs): $@"); + sleep($st); exit; }; # ----------------------------------------------------------------------------- +&logthis("$conserver online"); + +# ----------------------------------------------------------------------------- # begin with empty buffers %inbuffer = (); %outbuffer = (); @@ -460,12 +530,8 @@ sub nonblock { $flags = fcntl($socket, F_GETFL, 0) - or die "Can't get flags for socket: $!\n"; + or catchdie "Can't get flags for socket: $!\n"; fcntl($socket, F_SETFL, $flags | O_NONBLOCK) - or die "Can't make socket nonblocking: $!\n"; + or catchdie "Can't make socket nonblocking: $!\n"; } - - - -