--- loncom/lond 2003/12/02 10:37:59 1.164 +++ loncom/lond 2004/03/08 20:13:07 1.165.2.3 @@ -2,7 +2,7 @@ # The LearningOnline Network # lond "LON Daemon" Server (port "LOND" 5663) # -# $Id: lond,v 1.164 2003/12/02 10:37:59 foxr Exp $ +# $Id: lond,v 1.165.2.3 2004/03/08 20:13:07 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -52,7 +52,7 @@ my $DEBUG = 0; # Non zero to ena my $status=''; my $lastlog=''; -my $VERSION='$Revision: 1.164 $'; #' stupid emacs +my $VERSION='$Revision: 1.165.2.3 $'; #' stupid emacs my $remoteVERSION; my $currenthostid; my $currentdomainid; @@ -513,6 +513,7 @@ sub catchexception { my ($error)=@_; $SIG{'QUIT'}='DEFAULT'; $SIG{__DIE__}='DEFAULT'; + &status("Catching exception"); &logthis("CRITICAL: " ."ABNORMAL EXIT. Child $$ for server $thisserver died through " ."a crash with this error msg->[$error]"); @@ -523,6 +524,7 @@ sub catchexception { } sub timeout { + &status("Handling Timeout"); &logthis("CRITICAL: TIME OUT ".$$.""); &catchexception('Timeout'); } @@ -575,37 +577,51 @@ $server = IO::Socket::INET->new(LocalPor # global variables my %children = (); # keys are current child process IDs -my $children = 0; # current number of children sub REAPER { # takes care of dead children $SIG{CHLD} = \&REAPER; - my $pid = wait; - if (defined($children{$pid})) { - &logthis("Child $pid died"); - $children --; - delete $children{$pid}; - } else { - &logthis("Unknown Child $pid died"); + &status("Handling child death"); + my $pid; + do { + $pid = waitpid(-1,&WNOHANG()); + if (defined($children{$pid})) { + &logthis("Child $pid died"); + delete($children{$pid}); + } else { + &logthis("Unknown Child $pid died"); + } + } while ( $pid > 0 ); + foreach my $child (keys(%children)) { + $pid = waitpid($child,&WNOHANG()); + if ($pid > 0) { + &logthis("Child $child - $pid looks like we missed it's death"); + delete($children{$pid}); + } } + &status("Finished Handling child death"); } sub HUNTSMAN { # signal handler for SIGINT + &status("Killing children (INT)"); local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children kill 'INT' => keys %children; &logthis("Free socket: ".shutdown($server,2)); # free up socket my $execdir=$perlvar{'lonDaemons'}; unlink("$execdir/logs/lond.pid"); &logthis("CRITICAL: Shutting down"); + &status("Done killing children"); exit; # clean up with dignity } sub HUPSMAN { # signal handler for SIGHUP local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children + &status("Killing children for restart (HUP)"); kill 'INT' => keys %children; &logthis("Free socket: ".shutdown($server,2)); # free up socket &logthis("CRITICAL: Restarting"); my $execdir=$perlvar{'lonDaemons'}; unlink("$execdir/logs/lond.pid"); + &status("Restarting self (HUP)"); exec("$execdir/lond"); # here we go again } @@ -636,12 +652,14 @@ sub ReadHostTable { open (CONFIG,"$perlvar{'lonTabDir'}/hosts.tab") || die "Can't read host file"; while (my $configline=) { - my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); - chomp($ip); $ip=~s/\D+$//; - $hostid{$ip}=$id; - $hostdom{$id}=$domain; - $hostip{$id}=$ip; - if ($id eq $perlvar{'lonHostID'}) { $thisserver=$name; } + if (!($configline =~ /^\s*\#/)) { + my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); + chomp($ip); $ip=~s/\D+$//; + $hostid{$ip}=$id; + $hostdom{$id}=$domain; + $hostip{$id}=$ip; + if ($id eq $perlvar{'lonHostID'}) { $thisserver=$name; } + } } close(CONFIG); } @@ -667,6 +685,7 @@ sub ReloadApache { # now be honored. # sub UpdateHosts { + &status("Reload hosts.tab"); logthis(' Updating connections '); # # The %children hash has the set of IP's we currently have children @@ -691,10 +710,12 @@ sub UpdateHosts { } } ReloadApache; + &status("Finished reloading hosts.tab"); } sub checkchildren { + &status("Checking on the children (sending signals)"); &initnewstatus(); &logstatus(); &logthis('Going to check on the children'); @@ -709,6 +730,7 @@ sub checkchildren { sleep 5; $SIG{ALRM} = sub { die "timeout" }; $SIG{__DIE__} = 'DEFAULT'; + &status("Checking on the children (waiting for reports)"); foreach (sort keys %children) { unless (-e "$docdir/lon-status/londchld/$_.txt") { eval { @@ -726,6 +748,7 @@ sub checkchildren { } $SIG{ALRM} = 'DEFAULT'; $SIG{__DIE__} = \&catchexception; + &status("Finished checking children"); } # --------------------------------------------------------------------- Logging @@ -768,17 +791,20 @@ sub Reply { # ------------------------------------------------------------------ Log status sub logstatus { + &status("Doing logging"); my $docdir=$perlvar{'lonDocRoot'}; { my $fh=IO::File->new(">>$docdir/lon-status/londstatus.txt"); - print $fh $$."\t".$currenthostid."\t".$status."\t".$lastlog."\n"; + print $fh $$."\t".$clientname."\t".$currenthostid."\t".$status."\t".$lastlog."\n"; $fh->close(); } + &status("Finished londstatus.txt"); { my $fh=IO::File->new(">$docdir/lon-status/londchld/$$.txt"); print $fh $status."\n".$lastlog."\n".time; $fh->close(); } + &status("Finished logging"); } sub initnewstatus { @@ -966,8 +992,11 @@ ReadHostTable; # along the connection. while (1) { + &status('Starting accept'); $client = $server->accept() or next; + &status('Accepted '.$client.' off to spawn'); make_new_child($client); + &status('Finished spawning'); } sub make_new_child { @@ -976,6 +1005,7 @@ sub make_new_child { my $sigset; $client = shift; + &status('Starting new child '.$client); &logthis(' Attempting to start child ('.$client. ")"); # block signal for fork @@ -1001,7 +1031,6 @@ sub make_new_child { sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; $children{$pid} = $clientip; - $children++; &status('Started child '.$pid); return; } else { @@ -1419,7 +1448,7 @@ sub make_new_child { unless (mkdir($fpnow,0777)) { $fperror="error: ".($!+0) ." mkdir failed while attempting " - ."makeuser\n"; + ."makeuser"; } } }