#!/usr/bin/perl # Housekeeping program, started by cron, loncontrol and loncron.pl # # $Id: loncron,v 1.49 2004/05/11 21:08:55 albertel Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # $|=1; use strict; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use IO::File; use IO::Socket; use HTML::Entities; use Getopt::Long; #globals use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount); my $statusdir="/home/httpd/html/lon-status"; # -------------------------------------------------- Non-critical communication sub reply { my ($cmd,$server)=@_; my $peerfile="$perlvar{'lonSockDir'}/$server"; my $client=IO::Socket::UNIX->new(Peer =>"$peerfile", Type => SOCK_STREAM, Timeout => 10) or return "con_lost"; print $client "$cmd\n"; my $answer=<$client>; chomp($answer); if (!$answer) { $answer="con_lost"; } return $answer; } # --------------------------------------------------------- Output error status sub log { my $fh=shift; if ($fh) { print $fh @_ } } sub errout { my $fh=shift; &log($fh,(< Notices$notices Warnings$warnings Errors$errors

Top

ENDERROUT } sub start_daemon { my ($fh,$daemon,$pidfile) = @_; my $progname=$daemon; if ($daemon eq 'lonc' && $ARGV[0] eq 'new') { $progname='loncnew'; print "new "; } system("$perlvar{'lonDaemons'}/$progname 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); sleep 2; if (-e $pidfile) { &log($fh,"

Seems like it started ...

"); my $lfh=IO::File->new("$pidfile"); my $daemonpid=<$lfh>; chomp($daemonpid); sleep 2; if (kill 0 => $daemonpid) { return 1; } else { return 0; } } &log($fh,"

Seems like that did not work!

"); $errors++; return 0; } sub checkon_daemon { my ($fh,$daemon,$maxsize,$sendusr1)=@_; &log($fh,'

'.$daemon.'

Log

'); printf("%-10s ",$daemon); if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|"); while (my $line=) { &log($fh,"$line"); if ($line=~/INFO/) { $notices++; } if ($line=~/WARNING/) { $notices++; } if ($line=~/CRITICAL/) { $warnings++; } }; close (DFH); } &log($fh,"

"); my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid"; my $restartflag=1; my $daemonpid; if (-e $pidfile) { my $lfh=IO::File->new("$pidfile"); $daemonpid=<$lfh>; chomp($daemonpid); if (kill 0 => $daemonpid) { &log($fh,"

$daemon at pid $daemonpid responding"); if ($sendusr1) { &log($fh,", sending USR1"); } &log($fh,"

"); if ($sendusr1) { kill USR1 => $daemonpid; } $restartflag=0; print "running\n"; } else { $errors++; &log($fh,"

$daemon at pid $daemonpid not responding

"); $restartflag=1; &log($fh,"

Decided to clean up stale .pid file and restart $daemon

"); } } if ($restartflag==1) { $simplestatus{$daemon}='off'; $errors++; &log($fh,'
Killall '.$daemon.': '. `killall $daemon 2>&1`.' - '); sleep 2; &log($fh,unlink($pidfile).' - '. `killall -9 $daemon 2>&1`. '
'); &log($fh,"

$daemon not running, trying to start

"); if (&start_daemon($fh,$daemon,$pidfile)) { &log($fh,"

$daemon at pid $daemonpid responding

"); $simplestatus{$daemon}='restarted'; print "started\n"; } else { $errors++; &log($fh,"

$daemon at pid $daemonpid not responding

"); &log($fh,"

Give it one more try ...

"); print " "; if (&start_daemon($fh,$daemon,$pidfile)) { &log($fh,"

$daemon at pid $daemonpid responding

"); $simplestatus{$daemon}='restarted'; print "started\n"; } else { print " failed\n"; $simplestatus{$daemon}='failed'; $errors++; $errors++; &log($fh,"

$daemon at pid $daemonpid not responding

"); &log($fh,"

Unable to start $daemon

"); } } if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ &log($fh,"

");
	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
	    while (my $line=) { 
		&log($fh,"$line");
		if ($line=~/WARNING/) { $notices++; }
		if ($line=~/CRITICAL/) { $notices++; }
	    };
	    close (DFH);
	    &log($fh,"

"); } } my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log"; my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); if ($size>$maxsize) { &log($fh,"

Rotating logs ...

"); rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } &errout($fh); } # --------------------------------------------------------------------- Machine sub log_machine_info { my ($fh)=@_; &log($fh,'

Machine Information

'); &log($fh,"

loadavg

"); open (LOADAVGH,"/proc/loadavg"); my $loadavg=; close (LOADAVGH); &log($fh,"$loadavg"); my @parts=split(/\s+/,$loadavg); if ($parts[1]>4.0) { $errors++; } elsif ($parts[1]>2.0) { $warnings++; } elsif ($parts[1]>1.0) { $notices++; } &log($fh,"

df

"); &log($fh,"
");

    open (DFH,"df|");
    while (my $line=) { 
	&log($fh,&encode_entities($line,'<>&"')); 
	@parts=split(/\s+/,$line);
	my $usage=$parts[4];
	$usage=~s/\W//g;
	if ($usage>90) { 
	    $warnings++;
	    $notices++; 
	} elsif ($usage>80) {
	    $warnings++;
	} elsif ($usage>60) {
	    $notices++;
	}
	if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
    }
    close (DFH);
    &log($fh,"
"); &log($fh,"

ps

"); &log($fh,"
");
    my $psproc=0;

    open (PSH,"ps -aux --cols 140 |");
    while (my $line=) { 
	&log($fh,&encode_entities($line,'<>&"')); 
	$psproc++;
    }
    close (PSH);
    &log($fh,"
"); if ($psproc>200) { $notices++; } if ($psproc>250) { $notices++; } &errout($fh); } sub start_logging { my ($hostdom,$hostrole,$hostname,$spareid)=@_; my $fh=IO::File->new(">$statusdir/newstatus.html"); my %simplestatus=(); my $now=time; my $date=localtime($now); &log($fh,(< LON Status Report $perlvar{'lonHostID'}

LON Status Report $perlvar{'lonHostID'}

$date ($now)

  1. Configuration
  2. Machine Information
  3. Temporary Files
  4. Session Tokens
  5. httpd
  6. lonsql
  7. lond
  8. lonc
  9. lonhttpd
  10. lonnet
  11. Connections
  12. Delayed Messages
  13. Error Count

Configuration

PerlVars

ENDHEADERS foreach my $varname (sort(keys(%perlvar))) { &log($fh,"\n"); } &log($fh,"
$varname". &encode_entities($perlvar{$varname},'<>&"')."

Hosts

"); foreach my $id (sort(keys(%{$hostname}))) { &log($fh, "\n"); } &log($fh,"
$id".$hostdom->{$id}. "".$hostrole->{$id}. "".$hostname->{$id}."

Spare Hosts

    "); foreach my $id (sort(keys(%{$spareid}))) { &log($fh,"
  1. $id\n
  2. "); } &log($fh,"
\n"); return $fh; } # --------------------------------------------------------------- clean out tmp sub clean_tmp { my ($fh)=@_; &log($fh,'

Temporary Files

'); my $cleaned=0; my $old=0; while (my $fname=<$perlvar{'lonDaemons'}/tmp/*>) { my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); my $now=time; my $since=$now-$mtime; if ($since>$perlvar{'lonExpire'}) { my $line=''; if (open(PROBE,$fname)) { $line=; close(PROBE); } unless ($line=~/^CHECKOUTTOKEN\&/) { $cleaned++; unlink("$fname"); } else { if ($since>365*$perlvar{'lonExpire'}) { $cleaned++; unlink("$fname"); } else { $old++; } } } } &log($fh,"Cleaned up ".$cleaned." files (".$old." old checkout tokens)."); } # ------------------------------------------------------------ clean out lonIDs sub clean_lonIDs { my ($fh)=@_; &log($fh,'

Session Tokens

'); my $cleaned=0; my $active=0; while (my $fname=<$perlvar{'lonIDsDir'}/*>) { my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); my $now=time; my $since=$now-$mtime; if ($since>$perlvar{'lonExpire'}) { $cleaned++; &log($fh,"Unlinking $fname
"); unlink("$fname"); } else { $active++; } } &log($fh,"

Cleaned up ".$cleaned." stale session token(s).

"); &log($fh,"

$active open session(s)

"); } # ----------------------------------------------------------------------- httpd sub check_httpd_logs { my ($fh)=@_; &log($fh,'

httpd

Access Log

');
    
    open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
    while (my $line=) { &log($fh,&encode_entities($line,'<>&"')) };
    close (DFH);
	
    &log($fh,"

Error Log

");
	
    open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
    while (my $line=) { 
	&log($fh,"$line");
	if ($line=~/\[error\]/) { $notices++; } 
    }
    close (DFH);
    &log($fh,"
"); &errout($fh); } # ---------------------------------------------------------------------- lonnet sub rotate_lonnet_logs { my ($fh)=@_; &log($fh,'

lonnet

Temp Log

');
    print "checking logs\n";
    if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
	open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
	while (my $line=) { 
	    &log($fh,&encode_entities($line,'<>&"'));
	}
	close (DFH);
    }
    &log($fh,"

Perm Log

");
    
    if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
	open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
	while (my $line=) { 
	    &log($fh,&encode_entities($line,'<>&"'));
	}
	close (DFH);
    } else { &log($fh,"No perm log\n") }

    my $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";

    my ($dev,$ino,$mode,$nlink,
	$uid,$gid,$rdev,$size,
	$atime,$mtime,$ctime,
	$blksize,$blocks)=stat($fname);

    if ($size>40000) {
	&log($fh,"

Rotating logs ...

"); rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } &log($fh,"
"); &errout($fh); } # ----------------------------------------------------------------- Connections sub test_connections { my ($fh,$hostname)=@_; &log($fh,'

Connections

'); print "testing connections\n"; &log($fh,""); my ($good,$bad)=(0,0); foreach my $tryserver (sort(keys(%{$hostname}))) { print("."); my $result; my $answer=reply("pong",$tryserver); if ($answer eq "$tryserver:$perlvar{'lonHostID'}") { $result="ok"; $good++; } else { $result=$answer; $warnings++; if ($answer eq 'con_lost') { $bad++; $warnings++; } } if ($answer =~ /con_lost/) { print(" $tryserver down\n"); } &log($fh,"\n"); } &log($fh,"
$tryserver$result
"); print "\n$good good, $bad bad connections\n"; &errout($fh); } # ------------------------------------------------------------ Delayed messages sub check_delayed_msg { my ($fh)=@_; &log($fh,'

Delayed Messages

'); print "checking buffers\n"; &log($fh,'

Scanning Permanent Log

'); my $unsend=0; my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log"); while (my $line=<$dfh>) { my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); if ($sdf eq 'F') { my $local=localtime($time); &log($fh,"Failed: $time, $dserv, $dcmd
"); $warnings++; } if ($sdf eq 'S') { $unsend--; } if ($sdf eq 'D') { $unsend++; } } &log($fh,"

Total unsend messages: $unsend

\n"); $warnings=$warnings+5*$unsend; if ($unsend) { $simplestatus{'unsend'}=$unsend; } &log($fh,"

Outgoing Buffer

\n
");

    open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
    while (my $line=) { 
	&log($fh,&encode_entities($line,'<>&"'));
    }
    &log($fh,"
\n"); close (DFH); } sub finish_logging { my ($fh)=@_; &log($fh,"
\n"); $totalcount=$notices+4*$warnings+100*$errors; &errout($fh); &log($fh,"

Total Error Count: $totalcount

"); my $now=time; my $date=localtime($now); &log($fh,"
$date ($now)\n"); print "lon-status webpage updated\n"; $fh->close(); if ($errors) { $simplestatus{'errors'}=$errors; } if ($warnings) { $simplestatus{'warnings'}=$warnings; } if ($notices) { $simplestatus{'notices'}=$notices; } $simplestatus{'time'}=time; } sub log_simplestatus { rename ("$statusdir/newstatus.html","$statusdir/index.html"); my $sfh=IO::File->new(">$statusdir/loncron_simple.txt"); foreach (keys %simplestatus) { print $sfh $_.'='.$simplestatus{$_}.'&'; } print $sfh "\n"; $sfh->close(); } sub send_mail { print "sending mail\n"; my $emailto="$perlvar{'lonAdmEMail'}"; if ($totalcount>1000) { $emailto.=",$perlvar{'lonSysEMail'}"; } my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html"); } sub usage { print(< \$help, "oldlonc" => \$oldlonc, "justcheckdaemons" => \$justcheckdaemons, "noemail" => \$noemail, "justcheckconnections" => \$justcheckconnections ); if ($help) { &usage(); return; } # --------------------------------- Read loncapa_apache.conf and loncapa.conf my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); %perlvar=%{$perlvarref}; undef $perlvarref; delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed # --------------------------------------- Make sure that LON-CAPA is configured # I only test for one thing here (lonHostID). This is just a safeguard. if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) { print("Unconfigured machine.\n"); my $emailto=$perlvar{'lonSysEMail'}; my $hostname=`/bin/hostname`; chop $hostname; $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell my $subj="LON: Unconfigured machine $hostname"; system("echo 'Unconfigured machine $hostname.' |\ mailto $emailto -s '$subj' > /dev/null"); exit 1; } # ----------------------------- Make sure this process is running from user=www my $wwwid=getpwnam('www'); if ($wwwid!=$<) { print("User ID mismatch. This program must be run as user 'www'\n"); my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. loncron must be run as user www.' |\ mailto $emailto -s '$subj' > /dev/null"); exit 1; } # ------------------------------------------------------------- Read hosts file my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); my (%hostname,%hostdom,%hostrole,%spareid); while (my $configline=<$config>) { next if ($configline =~ /^(\#|\s*\$)/); my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline); if ($id && $domain && $role && $name && $ip) { $hostname{$id}=$name; $hostdom{$id}=$domain; $hostrole{$id}=$role; } } undef $config; # ------------------------------------------------------ Read spare server file $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab"); while (my $configline=<$config>) { chomp($configline); if (($configline) && ($configline ne $perlvar{'lonHostID'})) { $spareid{$configline}=1; } } undef $config; # ---------------------------------------------------------------- Start report $errors=0; $warnings=0; $notices=0; my $fh; if (!$justcheckdaemons && !$justcheckconnections) { $fh=&start_logging(\%hostdom,\%hostrole,\%hostname,\%spareid); &log_machine_info($fh); &clean_tmp($fh); &clean_lonIDs($fh); &check_httpd_logs($fh); &rotate_lonnet_logs($fh); } if (!$justcheckconnections) { &checkon_daemon($fh,'lonsql',200000); &checkon_daemon($fh,'lond',40000,1); &checkon_daemon($fh,'lonc',40000,1); &checkon_daemon($fh,'lonhttpd',40000); } if (!$justcheckdaemons) { &test_connections($fh,\%hostname); } if (!$justcheckdaemons && !$justcheckconnections) { &check_delayed_msg($fh); &finish_logging($fh); &log_simplestatus(); if ($totalcount>200 && !$noemail) { &send_mail(); } } } &main(); 1;