--- loncom/loncron 2004/05/11 20:19:46 1.48 +++ loncom/loncron 2006/11/01 21:08:06 1.69 @@ -2,7 +2,7 @@ # Housekeeping program, started by cron, loncontrol and loncron.pl # -# $Id: loncron,v 1.48 2004/05/11 20:19:46 albertel Exp $ +# $Id: loncron,v 1.69 2006/11/01 21:08:06 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -36,6 +36,7 @@ use LONCAPA::Configuration; use IO::File; use IO::Socket; use HTML::Entities; +use Getopt::Long; #globals use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount); @@ -44,13 +45,13 @@ my $statusdir="/home/httpd/html/lon-stat # -------------------------------------------------- Non-critical communication sub reply { - my ($cmd,$server)=@_; - my $peerfile="$perlvar{'lonSockDir'}/$server"; + my ($cmd,$server,$hostname)=@_; + my $peerfile="$perlvar{'lonSockDir'}/".$hostname->{$server}; my $client=IO::Socket::UNIX->new(Peer =>"$peerfile", Type => SOCK_STREAM, Timeout => 10) or return "con_lost"; - print $client "$cmd\n"; + print $client "sethost:$server:$cmd\n"; my $answer=<$client>; chomp($answer); if (!$answer) { $answer="con_lost"; } @@ -76,21 +77,28 @@ ENDERROUT } sub start_daemon { - my ($fh,$daemon,$pidfile) = @_; + my ($fh,$daemon,$pidfile,$args) = @_; my $progname=$daemon; - if ($daemon eq 'lonc' && $ARGV[0] eq 'new') { + if ($daemon eq 'lonc' && $args eq 'new') { $progname='loncnew'; print "new "; } - system("$perlvar{'lonDaemons'}/$progname 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); - sleep 2; + my $error_fname="$perlvar{'lonDaemons'}/logs/${daemon}_errors"; + my $size=(stat($error_fname))[7]; + if ($size>40000) { + &log($fh,"
Rotating error logs ...
"); + rename("$error_fname.2","$error_fname.3"); + rename("$error_fname.1","$error_fname.2"); + rename("$error_fname","$error_fname.1"); + } + system("$perlvar{'lonDaemons'}/$progname 2>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); + sleep 1; if (-e $pidfile) { &log($fh,"Seems like it started ...
"); my $lfh=IO::File->new("$pidfile"); my $daemonpid=<$lfh>; chomp($daemonpid); - sleep 2; - if (kill 0 => $daemonpid) { + if ($daemonpid =~ /^\d+$/ && kill 0 => $daemonpid) { return 1; } else { return 0; @@ -102,10 +110,11 @@ sub start_daemon { } sub checkon_daemon { - my ($fh,$daemon,$maxsize,$sendusr1)=@_; + my ($fh,$daemon,$maxsize,$send,$args)=@_; + my $result; &log($fh,'');
- printf("%-10s ",$daemon);
+ printf("%-15s ",$daemon);
if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
while (my $line= Give it one more try ... The server encountered an internal error or
misconfiguration and was unable to complete
your request. Please contact the server administrator at
root@localhost to inform them of the time this error occurred,
and the actions you performed just before this error. More information about this error may be available
in the server error log.$daemon at pid $daemonpid responding");
- if ($sendusr1) { &log($fh,", sending USR1"); }
+ if ($send) { &log($fh,", sending $send"); }
&log($fh,"
");
- if ($sendusr1) { kill USR1 => $daemonpid; }
+ if ($send eq 'USR1') { kill USR1 => $daemonpid; }
+ if ($send eq 'USR2') { kill USR2 => $daemonpid; }
$restartflag=0;
- print "running\n";
+ if ($send eq 'USR2') {
+ $result = 'reloaded';
+ print "reloaded\n";
+ } else {
+ $result = 'running';
+ print "running\n";
+ }
} else {
$errors++;
&log($fh,"$daemon at pid $daemonpid not responding
");
@@ -143,28 +159,33 @@ sub checkon_daemon {
if ($restartflag==1) {
$simplestatus{$daemon}='off';
$errors++;
+ my $kadaemon=$daemon;
+ if ($kadaemon eq 'lonmemcached') { $kadaemon='memcached'; }
&log($fh,'
Killall '.$daemon.': '.
- `killall $daemon 2>&1`.' - ');
- sleep 2;
+ `killall $kadaemon 2>&1`.' - ');
+ sleep 1;
&log($fh,unlink($pidfile).' - '.
- `killall -9 $daemon 2>&1`.
+ `killall -9 $kadaemon 2>&1`.
'
');
&log($fh,"$daemon not running, trying to start
");
- if (&start_daemon($fh,$daemon,$pidfile)) {
+ if (&start_daemon($fh,$daemon,$pidfile,$args)) {
&log($fh,"$daemon at pid $daemonpid responding
");
$simplestatus{$daemon}='restarted';
+ $result = 'started';
print "started\n";
} else {
$errors++;
&log($fh,"$daemon at pid $daemonpid not responding
");
&log($fh,"$daemon at pid $daemonpid responding
");
$simplestatus{$daemon}='restarted';
+ $result = 'started';
print "started\n";
} else {
+ $result = 'failed';
print " failed\n";
$simplestatus{$daemon}='failed';
$errors++; $errors++;
@@ -201,6 +222,7 @@ sub checkon_daemon {
}
&errout($fh);
+ return $result;
}
# --------------------------------------------------------------------- Machine
@@ -251,7 +273,7 @@ sub log_machine_info {
&log($fh,"");
my $psproc=0;
- open (PSH,"ps -aux --cols 140 |");
+ open (PSH,"ps aux --cols 140 |");
while (my $line=
distprobe
");
+ &log($fh,"");
+ open(DSH,"$perlvar{'lonDaemons'}/distprobe |");
+ while (my $line=
");
+
&errout($fh);
}
@@ -450,29 +482,36 @@ sub test_connections {
&log($fh,'Connections
');
print "testing connections\n";
&log($fh,"");
+ my ($good,$bad)=(0,0);
foreach my $tryserver (sort(keys(%{$hostname}))) {
print(".");
my $result;
- my $answer=reply("pong",$tryserver);
+ my $answer=&reply("ping",$tryserver,$hostname);
if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
$result="ok";
+ $good++;
} else {
$result=$answer;
$warnings++;
- if ($answer eq 'con_lost') { $warnings++; }
+ if ($answer eq 'con_lost') {
+ $bad++;
+ $warnings++;
+ } else {
+ $good++; #self connection
+ }
}
if ($answer =~ /con_lost/) { print(" $tryserver down\n"); }
&log($fh,"
");
-
+ print "\n$good good, $bad bad connections\n";
&errout($fh);
}
# ------------------------------------------------------------ Delayed messages
sub check_delayed_msg {
- my ($fh)=@_;
+ my ($fh,$hostname)=@_;
&log($fh,' \n");
}
&log($fh,"$tryserver $result Delayed Messages
');
print "checking buffers\n";
@@ -497,13 +536,22 @@ sub check_delayed_msg {
if ($unsend) { $simplestatus{'unsend'}=$unsend; }
&log($fh,"Outgoing Buffer
\n");
-
+# list directory with delayed messages and remember offline servers
+ my %servers=();
open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
- while (my $line=
\n");
close (DFH);
+# pong to all servers that have delayed messages
+# this will trigger a reverse connection, which should flush the buffers
+ foreach my $tryserver (keys %servers) {
+ my $answer=&reply("pong",$tryserver,$hostname);
+ &log($fh,"Pong to $tryserver: $answer
");
+ }
}
sub finish_logging {
@@ -538,15 +586,53 @@ sub log_simplestatus {
sub send_mail {
print "sending mail\n";
my $emailto="$perlvar{'lonAdmEMail'}";
- if ($totalcount>1000) {
+ if ($totalcount>2500) {
$emailto.=",$perlvar{'lonSysEMail'}";
}
my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices";
- system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
+
+ my $result=system("metasend -b -S 4000000 -t $emailto -s '$subj' -f $statusdir/index.html -m text/html >& /dev/null");
+ if ($result != 0) {
+ $result=system("mail -s '$subj' $emailto < $statusdir/index.html");
+ }
+}
+
+sub usage {
+ print(<Internal Server Error