--- loncom/Attic/lonc 2000/12/05 19:03:55 1.10 +++ loncom/Attic/lonc 2002/02/06 14:17:50 1.23.2.1 @@ -5,6 +5,30 @@ # provides persistent TCP connections to the other servers in the network # through multiplexed domain sockets # +# $Id: lonc,v 1.23.2.1 2002/02/06 14:17:50 albertel Exp $ +# +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ +# # PID in subdir logs/lonc.pid # kill kills # HUP restarts @@ -15,6 +39,10 @@ # 2/8,7/25 Gerd Kortemeyer # 12/05 Scott Harrison # 12/05 Gerd Kortemeyer +# YEAR=2001 +# 01/10/01 Scott Harrison +# 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer +# 12/20 Scott Harrison # # based on nonforker from Perl Cookbook # - server who multiplexes without forking @@ -28,6 +56,9 @@ use Fcntl; use Tie::RefHash; use Crypt::IDEA; +my $status=''; +my $lastlog=''; + # grabs exception and records it to log before exiting sub catchexception { my ($signal)=@_; @@ -35,25 +66,11 @@ sub catchexception { $SIG{__DIE__}='DEFAULT'; &logthis("CRITICAL: " ."ABNORMAL EXIT. Child $$ for server $wasserver died through " - ."$signal with this parameter->[$@]"); + ."\"$signal\" with this parameter->[$@]"); die($@); } -# grabs exception and records it to log before exiting -# NOTE: we must NOT use the regular (non-overrided) die function in -# the code because a handler CANNOT be attached to it -# (despite what some of the documentation says about SIG{__DIE__}. -sub catchdie { - my ($message)=@_; - $SIG{'QUIT'}='DEFAULT'; - $SIG{__DIE__}='DEFAULT'; - &logthis("CRITICAL: " - ."ABNORMAL EXIT. Child $$ for server $wasserver died through " - ."\_\_DIE\_\_ with this parameter->[$message]"); - die($message); -} - -$childmaxattempts=10; +$childmaxattempts=5; # -------------------------------- Set signal handlers to record abnormal exits @@ -62,8 +79,7 @@ $SIG{__DIE__}=\&catchexception; # ------------------------------------ Read httpd access.conf and get variables -open (CONFIG,"/etc/httpd/conf/access.conf") - || catchdie "Can't read access.conf"; +open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; while ($configline=) { if ($configline =~ /PerlSetVar/) { @@ -74,6 +90,16 @@ while ($configline=) { } close(CONFIG); +# ----------------------------- Make sure this process is running from user=www +my $wwwid=getpwnam('www'); +if ($wwwid!=$<) { + $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + system("echo 'User ID mismatch. lonc must be run as user www.' |\ + mailto $emailto -s '$subj' > /dev/null"); + exit 1; +} + # --------------------------------------------- Check if other instance running my $pidfile="$perlvar{'lonDaemons'}/logs/lonc.pid"; @@ -82,13 +108,12 @@ if (-e $pidfile) { my $lfh=IO::File->new("$pidfile"); my $pide=<$lfh>; chomp($pide); - if (kill 0 => $pide) { catchdie "already running"; } + if (kill 0 => $pide) { die "already running"; } } # ------------------------------------------------------------- Read hosts file -open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") - || catchdie "Can't read host file"; +open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file"; while ($configline=) { my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); @@ -120,7 +145,13 @@ sub REAPER { # ta sub HUNTSMAN { # signal handler for SIGINT local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children - kill 'INT' => keys %children; + foreach (keys %children) { + $wasserver=$children{$_}; + &status("Closing $wasserver"); + &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver)); + &status("Kill PID $_ for $wasserver"); + kill ('INT',$_); + } my $execdir=$perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); &logthis("CRITICAL: Shutting down"); @@ -129,12 +160,32 @@ sub HUNTSMAN { # si sub HUPSMAN { # signal handler for SIGHUP local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children - kill 'INT' => keys %children; + foreach (keys %children) { + $wasserver=$children{$_}; + &status("Closing $wasserver"); + &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver)); + &status("Kill PID $_ for $wasserver"); + kill ('INT',$_); + } &logthis("CRITICAL: Restarting"); + unlink("$execdir/logs/lonc.pid"); my $execdir=$perlvar{'lonDaemons'}; exec("$execdir/lonc"); # here we go again } +sub checkchildren { + &initnewstatus(); + &logstatus(); + &logthis('Going to check on the children'); + foreach (sort keys %children) { + sleep 1; + unless (kill 'USR1' => $_) { + &logthis ('Child '.$_.' is dead'); + &logstatus($$.' is dead'); + } + } +} + sub USRMAN { &logthis("USR1: Trying to establish connections again"); foreach $thisserver (keys %hostip) { @@ -144,6 +195,7 @@ sub USRMAN { ." >$answer<"); } %childatt=(); + &checkchildren(); } # -------------------------------------------------- Non-critical communication @@ -156,10 +208,20 @@ sub subreply { Type => SOCK_STREAM, Timeout => 10) or return "con_lost"; - print $sclient "$cmd\n"; - my $answer=<$sclient>; - chomp($answer); - if (!$answer) { $answer="con_lost"; } + + + $SIG{ALRM}=sub { die "timeout" }; + $SIG{__DIE__}='DEFAULT'; + eval { + alarm(10); + print $sclient "$cmd\n"; + $answer=<$sclient>; + chomp($answer); + alarm(0); + }; + if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; } + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; } else { $answer='self_reply'; } return $answer; } @@ -172,6 +234,7 @@ sub logthis { my $fh=IO::File->new(">>$execdir/logs/lonc.log"); my $now=time; my $local=localtime($now); + $lastlog=$local.': '.$message; print $fh "$local ($$): $message\n"; } @@ -184,14 +247,39 @@ sub logperm { my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); print $fh "$now:$message:$local\n"; } +# ------------------------------------------------------------------ Log status + +sub logstatus { + my $docdir=$perlvar{'lonDocRoot'}; + my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt"); + print $fh $$."\t".$status."\t".$lastlog."\n"; +} + +sub initnewstatus { + my $docdir=$perlvar{'lonDocRoot'}; + my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt"); + my $now=time; + my $local=localtime($now); + print $fh "LONC status $local - parent $$\n\n"; +} + +# -------------------------------------------------------------- Status setting + +sub status { + my $what=shift; + my $now=time; + my $local=localtime($now); + $status=$local.': '.$what; +} + # ---------------------------------------------------- Fork once and dissociate $fpid=fork; exit if $fpid; -catchdie "Couldn't fork: $!" unless defined ($fpid); +die "Couldn't fork: $!" unless defined ($fpid); -POSIX::setsid() or catchdie "Can't start new session: $!"; +POSIX::setsid() or die "Can't start new session: $!"; # ------------------------------------------------------- Write our PID on disk @@ -207,6 +295,8 @@ $SIG{HUP}=$SIG{USR1}='IGNORE'; # Fork off our children, one for every server +&status("Forking ..."); + foreach $thisserver (keys %hostip) { make_new_child($thisserver); } @@ -221,11 +311,13 @@ $SIG{USR1} = \&USRMAN; # And maintain the population. while (1) { + &status("Sleeping"); sleep; # wait for a signal (i.e., child's death) # See who died and start new one + &status("Woke up"); foreach $thisserver (keys %hostip) { if (!$childpid{$thisserver}) { - if ($childatt{$thisserver}<=$childmaxattempts) { + if ($childatt{$thisserver}<$childmaxattempts) { $childatt{$thisserver}++; &logthis( "INFO: Trying to reconnect for $thisserver " @@ -246,31 +338,36 @@ sub make_new_child { # block signal for fork $sigset = POSIX::SigSet->new(SIGINT); sigprocmask(SIG_BLOCK, $sigset) - or catchdie "Can't block SIGINT for fork: $!\n"; + or die "Can't block SIGINT for fork: $!\n"; - catchdie "fork: $!" unless defined ($pid = fork); + die "fork: $!" unless defined ($pid = fork); if ($pid) { # Parent records the child's birth and returns. sigprocmask(SIG_UNBLOCK, $sigset) - or catchdie "Can't unblock SIGINT for fork: $!\n"; + or die "Can't unblock SIGINT for fork: $!\n"; $children{$pid} = $conserver; $childpid{$conserver} = $pid; return; } else { # Child can *not* return from this subroutine. $SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before - + $SIG{USR1}= \&logstatus; + # unblock signals sigprocmask(SIG_UNBLOCK, $sigset) - or catchdie "Can't unblock SIGINT for fork: $!\n"; + or die "Can't unblock SIGINT for fork: $!\n"; # ----------------------------- This is the modified main program of non-forker $port = "$perlvar{'lonSockDir'}/$conserver"; unlink($port); + # ---------------------------------------------------- Client to network server + +&status("Opening TCP: $conserver"); + unless ( $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, PeerPort => $perlvar{'londPort'}, @@ -283,20 +380,47 @@ unless ( sleep($st); exit; }; -# --------------------------------------- Send a ping to make other end do USR1 +# ----------------------------------------------------------------- Init dialog + +&status("Init dialogue: $conserver"); + + $SIG{ALRM}=sub { die "timeout" }; + $SIG{__DIE__}='DEFAULT'; + eval { + alarm(60); print $remotesock "init\n"; $answer=<$remotesock>; print $remotesock "$answer"; $answer=<$remotesock>; chomp($answer); + alarm(0); + }; + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; + + if ($@=~/timeout/) { + &logthis("Timed out during init: $conserver"); + exit; + } + + &logthis("Init reply for $conserver: >$answer<"); +if ($answer ne 'ok') { + my $st=120+int(rand(240)); + &logthis( +"WARNING: Init failed $conserver ($st secs)"); + sleep($st); + exit; +} sleep 5; +&status("Ponging $conserver"); print $remotesock "pong\n"; $answer=<$remotesock>; chomp($answer); &logthis("Pong reply for $conserver: >$answer<"); # ----------------------------------------------------------- Initialize cipher +&status("Initialize cipher: $conserver"); print $remotesock "ekey\n"; my $buildkey=<$remotesock>; my $key=$conserver.$perlvar{'lonHostID'}; @@ -307,7 +431,7 @@ $key=$key.$buildkey.$key.$buildkey.$key. $key=substr($key,0,32); my $cipherkey=pack("H32",$key); if ($cipher=new IDEA $cipherkey) { - &logthis("Secure connection inititalized: $conserver"); + &logthis("Secure connection initialized: $conserver"); } else { my $st=120+int(rand(240)); &logthis( @@ -318,14 +442,15 @@ if ($cipher=new IDEA $cipherkey) { } # ----------------------------------------- We're online, send delayed messages - + &status("Checking for delayed messages"); my @allbuffered; my $path="$perlvar{'lonSockDir'}/delayed"; opendir(DIRHANDLE,$path); @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE; closedir(DIRHANDLE); my $dfname; - map { + foreach (@allbuffered) { + &status("Sending delayed $conserver $_"); $dfname="$path/$_"; &logthis($dfname); my $wcmd; @@ -348,18 +473,27 @@ if ($cipher=new IDEA $cipherkey) { } $cmd="enc:$cmdlength:$encrequest\n"; } - + $SIG{ALRM}=sub { die "timeout" }; + $SIG{__DIE__}='DEFAULT'; + eval { + alarm(60); print $remotesock "$cmd\n"; $answer=<$remotesock>; chomp($answer); - if ($answer ne '') { + alarm(0); + }; + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; + + if (($answer ne '') && ($@!~/timeout/)) { unlink("$dfname"); &logthis("Delayed $cmd to $conserver: >$answer<"); &logperm("S:$conserver:$bcmd"); } - } @allbuffered; + } # ------------------------------------------------------- Listen to UNIX socket +&status("Opening socket $conserver"); unless ( $server = IO::Socket::UNIX->new(Local => $port, Type => SOCK_STREAM, @@ -397,11 +531,11 @@ while (1) { # check for new information on the connections we have # anything to read or accept? - foreach $client ($select->can_read(1)) { + foreach $client ($select->can_read(0.1)) { if ($client == $server) { # accept a new connection - + &status("Accept new connection: $conserver"); $client = $server->accept(); $select->add($client); nonblock($client); @@ -416,6 +550,7 @@ while (1) { delete $outbuffer{$client}; delete $ready{$client}; + &status("Idle $conserver"); $select->remove($client); close $client; next; @@ -435,27 +570,31 @@ while (1) { # Any complete requests to process? foreach $client (keys %ready) { - handle($client); + handle($client,$conserver); } # Buffers to flush? foreach $client ($select->can_write(1)) { # Skip this client if we have nothing to say next unless exists $outbuffer{$client}; - $rv = $client->send($outbuffer{$client}, 0); unless (defined $rv) { # Whine, but move on. - warn "I was told I could write, but I can't.\n"; + &logthis("I was told I could write, but I can't.\n"); next; } + $errno=$!; if (($rv == length $outbuffer{$client}) || - ($! == POSIX::EWOULDBLOCK)) { + ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) { substr($outbuffer{$client}, 0, $rv) = ''; delete $outbuffer{$client} unless length $outbuffer{$client}; } else { # Couldn't write all the data, and it wasn't because # it would have blocked. Shutdown and move on. + + &logthis("Dropping data with ".$errno.": ". + length($outbuffer{$client}).", $rv"); + delete $inbuffer{$client}; delete $outbuffer{$client}; delete $ready{$client}; @@ -467,7 +606,7 @@ while (1) { } } } - +} # ------------------------------------------------------- End of make_new_child # handle($socket) deals with all pending requests for $client @@ -475,6 +614,7 @@ sub handle { # requests are in $ready{$client} # send output to $outbuffer{$client} my $client = shift; + my $conserver = shift; my $request; foreach $request (@{$ready{$client}}) { @@ -495,8 +635,30 @@ sub handle { } $request="enc:$cmdlength:$encrequest\n"; } +# --------------------------------------------------------------- Main exchange + $SIG{ALRM}=sub { die "timeout" }; + $SIG{__DIE__}='DEFAULT'; + eval { + alarm(300); + &status("Sending $conserver: $request"); + &logthis("Sending $conserver: $request"); print $remotesock "$request"; + &status("Waiting for reply from $conserver: $request"); + &logthis("Waiting for reply from $conserver: $request"); $answer=<$remotesock>; + &status("Received reply: $request"); + &logthis("Received reply $conserver: $answer"); + alarm(0); + }; + if ($@=~/timeout/) { + $answer=''; + &logthis( + "CRITICAL: Timeout $conserver: $request"); + } + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; + + if ($answer) { if ($answer =~ /^enc/) { my ($cmd,$cmdlength,$encinput)=split(/:/,$answer); @@ -516,12 +678,13 @@ sub handle { } # ===================================================== Done processing request + &logthis("Completed $conserver: $request"); } delete $ready{$client}; + &status("Completed $conserver: $request"); # -------------------------------------------------------------- End non-forker } # ---------------------------------------------------------- End make_new_child -} # nonblock($socket) puts socket into nonblocking mode sub nonblock { @@ -530,8 +693,55 @@ sub nonblock { $flags = fcntl($socket, F_GETFL, 0) - or catchdie "Can't get flags for socket: $!\n"; + or die "Can't get flags for socket: $!\n"; fcntl($socket, F_SETFL, $flags | O_NONBLOCK) - or catchdie "Can't make socket nonblocking: $!\n"; + or die "Can't make socket nonblocking: $!\n"; } +# ----------------------------------- POD (plain old documentation, CPAN style) + +=head1 NAME + +lonc - LON TCP-MySQL-Server Daemon for handling database requests. + +=head1 SYNOPSIS + +Should only be run as user=www. This is a command-line script which +is invoked by loncron. + +=head1 DESCRIPTION + +Provides persistent TCP connections to the other servers in the network +through multiplexed domain sockets + + PID in subdir logs/lonc.pid + kill kills + HUP restarts + USR1 tries to open connections again + +=head1 README + +Not yet written. + +=head1 PREREQUISITES + +POSIX +IO::Socket +IO::Select +IO::File +Socket +Fcntl +Tie::RefHash +Crypt::IDEA + +=head1 COREQUISITES + +=head1 OSNAMES + +linux + +=head1 SCRIPT CATEGORIES + +Server/Process + +=cut