#!/usr/bin/perl # The LearningOnline Network # Housekeeping program, started by cron # # (TCP networking package # 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30, # 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer) # # 7/14,7/15,7/19,7/21,7/22,11/18, # 2/8 Gerd Kortemeyer # 12/23 Gerd Kortemeyer # YEAR=2001 # 09/04,09/06,11/26 Gerd Kortemeyer $|=1; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use IO::File; use IO::Socket; # -------------------------------------------------- Non-critical communication sub reply { my ($cmd,$server)=@_; my $peerfile="$perlvar{'lonSockDir'}/$server"; my $client=IO::Socket::UNIX->new(Peer =>"$peerfile", Type => SOCK_STREAM, Timeout => 10) or return "con_lost"; print $client "$cmd\n"; my $answer=<$client>; chomp($answer); if (!$answer) { $answer="con_lost"; } return $answer; } # --------------------------------------------------------- Output error status sub errout { my $fh=shift; print $fh (<
Notices$notices
Warnings$warnings
Errors$errors

Top

ENDERROUT } sub start_daemon { my ($fh,$daemon,$pidfile) = @_; my $progname=$daemon; if ($daemon eq 'lonc' && $ARGV[0] eq 'new') { $progname='loncnew'; print "new "; } system("$perlvar{'lonDaemons'}/$progname 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); sleep 2; if (-e $pidfile) { print $fh "Seems like it started ...

"; my $lfh=IO::File->new("$pidfile"); my $daemonpid=<$lfh>; chomp($daemonpid); sleep 2; if (kill 0 => $daemonpid) { return 1; } else { return 0; } } print $fh "Seems like that did not work!

"; $errors++; return 0; } sub checkon_daemon { my ($fh,$daemon,$maxsize,$sendusr1)=@_; print $fh '


'.$daemon.'

Log

';
    printf("%-10s ",$daemon);
    if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
	open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
	while ($line=) { 
	    print $fh "$line";
	    if ($line=~/INFO/) { $notices++; }
	    if ($line=~/WARNING/) { $notices++; }
	    if ($line=~/CRITICAL/) { $warnings++; }
	};
	close (DFH);
    }
    print $fh "
"; my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid"; my $restartflag=1; if (-e $pidfile) { my $lfh=IO::File->new("$pidfile"); my $daemonpid=<$lfh>; chomp($daemonpid); if (kill 0 => $daemonpid) { print $fh "

$daemon at pid $daemonpid responding"; if ($sendusr1) { print $fh ", sending USR1"; } print $fh "

"; if ($sendusr1) { kill USR1 => $daemonpid; } $restartflag=0; print "running\n"; } else { $errors++; print $fh "

$daemon at pid $daemonpid not responding

"; $restartflag=1; print $fh "

Decided to clean up stale .pid file and restart $daemon

"; } } if ($restartflag==1) { $simplestatus{$daemon}='off'; $errors++; print $fh '
Killall '.$daemon.': '. `killall $daemon 2>&1`.' - '; sleep 2; print $fh unlink($pidfile).' - '. `killall -9 $daemon 2>&1`. '
'; print $fh "

$daemon not running, trying to start

"; if (&start_daemon($fh,$daemon,$pidfile)) { print $fh "

$daemon at pid $daemonpid responding

"; $simplestatus{$daemon}='restarted'; print "started\n"; } else { $errors++; print $fh "

$daemon at pid $daemonpid not responding

"; print $fh "Give it one more try ...

"; print " "; if (&start_daemon($fh,$daemon,$pidfile)) { print $fh "

$daemon at pid $daemonpid responding

"; $simplestatus{$daemon}='restarted'; print "started\n"; } else { print " failed\n"; $simplestatus{$daemon}='failed'; $errors++; $errors++; print $fh "

$daemon at pid $daemonpid not responding

"; print $fh "Unable to start $daemon

"; } } if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ print $fh "

";
	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
	    while ($line=) { 
		print $fh "$line";
		if ($line=~/WARNING/) { $notices++; }
		if ($line=~/CRITICAL/) { $notices++; }
	    };
	    close (DFH);
	    print $fh "
"; } } $fname="$perlvar{'lonDaemons'}/logs/$daemon.log"; my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); if ($size>$maxsize) { print $fh "Rotating logs ...

"; rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } &errout($fh); } # ================================================================ Main Program # --------------------------------- Read loncapa_apache.conf and loncapa.conf my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); %perlvar=%{$perlvarref}; undef $perlvarref; delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed # --------------------------------------- Make sure that LON-CAPA is configured # I only test for one thing here (lonHostID). This is just a safeguard. if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) { print("Unconfigured machine.\n"); $emailto=$perlvar{'lonSysEMail'}; $hostname=`/bin/hostname`; chop $hostname; $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell $subj="LON: Unconfigured machine $hostname"; system("echo 'Unconfigured machine $hostname.' |\ mailto $emailto -s '$subj' > /dev/null"); exit 1; } # ----------------------------- Make sure this process is running from user=www my $wwwid=getpwnam('www'); if ($wwwid!=$<) { print("User ID mismatch. This program must be run as user 'www'\n"); $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. loncron must be run as user www.' |\ mailto $emailto -s '$subj' > /dev/null"); exit 1; } # ------------------------------------------------------------- Read hosts file { my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); while (my $configline=<$config>) { my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline); if ($id && $domain && $role && $name && $ip) { $hostname{$id}=$name; $hostdom{$id}=$domain; $hostip{$id}=$ip; $hostrole{$id}=$role; if ($domdescr) { $domaindescription{$domain}=$domdescr; } if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) { $libserv{$id}=$name; } } else { if ($configline) { # &logthis("Skipping hosts.tab line -$configline-"); } } } } # ------------------------------------------------------ Read spare server file { my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab"); while (my $configline=<$config>) { chomp($configline); if (($configline) && ($configline ne $perlvar{'lonHostID'})) { $spareid{$configline}=1; } } } # ---------------------------------------------------------------- Start report $statusdir="/home/httpd/html/lon-status"; $errors=0; $warnings=0; $notices=0; $now=time; $date=localtime($now); { my $fh=IO::File->new(">$statusdir/newstatus.html"); my %simplestatus=(); print $fh (< LON Status Report $perlvar{'lonHostID'}

LON Status Report $perlvar{'lonHostID'}

$date ($now)

  1. Configuration
  2. Machine Information
  3. Temporary Files
  4. Session Tokens
  5. httpd
  6. lonsql
  7. lond
  8. lonc
  9. lonhttpd
  10. lonnet
  11. Connections
  12. Delayed Messages
  13. Error Count

Configuration

PerlVars

ENDHEADERS foreach $varname (sort(keys(%perlvar))) { print $fh "\n"; } print $fh "
$varname$perlvar{$varname}

Hosts

"; foreach $id (sort(keys(%hostname))) { print $fh ""; print $fh "\n"; } print $fh "
$id$hostdom{$id}$hostrole{$id}$hostname{$id}$hostip{$id}

Spare Hosts

    "; foreach $id (sort(keys(%spareid))) { print $fh "
  1. $id\n"; } print $fh "
\n"; # --------------------------------------------------------------------- Machine print $fh '

Machine Information

'; print $fh "

loadavg

"; open (LOADAVGH,"/proc/loadavg"); $loadavg=; close (LOADAVGH); print $fh "$loadavg"; @parts=split(/\s+/,$loadavg); if ($parts[1]>4.0) { $errors++; } elsif ($parts[1]>2.0) { $warnings++; } elsif ($parts[1]>1.0) { $notices++; } print $fh "

df

"; print $fh "
";

    open (DFH,"df|");
    while ($line=) { 
	print $fh "$line"; 
	@parts=split(/\s+/,$line);
	$usage=$parts[4];
	$usage=~s/\W//g;
	if ($usage>90) { 
	    $warnings++;
	    $notices++; 
	} elsif ($usage>80) {
	    $warnings++;
	} elsif ($usage>60) {
	    $notices++;
	}
	if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
    }
    close (DFH);
    print $fh "
"; print $fh "

ps

"; print $fh "
";
    $psproc=0;

    open (PSH,"ps -aux|");
    while ($line=) { 
	print $fh "$line"; 
	$psproc++;
    }
    close (PSH);
    print $fh "
"; if ($psproc>200) { $notices++; } if ($psproc>250) { $notices++; } &errout($fh); # --------------------------------------------------------------- clean out tmp print $fh '

Temporary Files

'; $cleaned=0; $old=0; while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) { my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); $now=time; $since=$now-$mtime; if ($since>$perlvar{'lonExpire'}) { $line=''; if (open(PROBE,$fname)) { $line=; close(PROBE); } unless ($line=~/^CHECKOUTTOKEN\&/) { $cleaned++; unlink("$fname"); } else { if ($since>365*$perlvar{'lonExpire'}) { $cleaned++; unlink("$fname"); } else { $old++; } } } } print $fh "Cleaned up ".$cleaned." files (".$old." old checkout tokens)."; # ------------------------------------------------------------ clean out lonIDs print $fh '

Session Tokens

'; $cleaned=0; $active=0; while ($fname=<$perlvar{'lonIDsDir'}/*>) { my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); $now=time; $since=$now-$mtime; if ($since>$perlvar{'lonExpire'}) { $cleaned++; print $fh "Unlinking $fname
"; unlink("$fname"); } else { $active++; } } print $fh "

Cleaned up ".$cleaned." stale session token(s)."; print $fh "

$active open session(s)

"; # ----------------------------------------------------------------------- httpd print $fh '

httpd

Access Log

';
    
    open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
    while ($line=) { print $fh "$line" };
    close (DFH);

    print $fh "

Error Log

";

    open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
    while ($line=) { 
	print $fh "$line";
	if ($line=~/\[error\]/) { $notices++; } 
    };
    close (DFH);
    print $fh "
"; &errout($fh); # ---------------------------------------------------------------------- lonsql &checkon_daemon($fh,'lonsql',200000); # ------------------------------------------------------------------------ lond &checkon_daemon($fh,'lond',40000,1); # ------------------------------------------------------------------------ lonc &checkon_daemon($fh,'lonc',40000,1); # -------------------------------------------------------------------- lonhttpd &checkon_daemon($fh,'lonhttpd',40000); # ---------------------------------------------------------------------- lonnet print $fh '

lonnet

Temp Log

';
    print "checking logs\n";
    if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
	open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
	while ($line=) { 
	    print $fh "$line";
	};
	close (DFH);
    }
    print $fh "

Perm Log

";
    
    if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
	open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
	while ($line=) { 
	    print $fh "$line";
	};
	close (DFH);
    } else { print $fh "No perm log\n" }

    $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";

    my ($dev,$ino,$mode,$nlink,
	$uid,$gid,$rdev,$size,
	$atime,$mtime,$ctime,
	$blksize,$blocks)=stat($fname);

    if ($size>40000) {
	print $fh "Rotating logs ...

"; rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } print $fh "

"; &errout($fh); # ----------------------------------------------------------------- Connections print $fh '

Connections

'; print "testing connections\n"; print $fh ""; foreach $tryserver (sort(keys(%hostname))) { print("."); $answer=reply("pong",$tryserver); if ($answer eq "$tryserver:$perlvar{'lonHostID'}") { $result="ok"; } else { $result=$answer; $warnings++; if ($answer eq 'con_lost') { $warnings++; } } if ($answer =~ /con_lost/) { print(" $tryserver down\n"); } print $fh "\n"; } print $fh "
$tryserver$result
"; &errout($fh); # ------------------------------------------------------------ Delayed messages print $fh '

Delayed Messages

'; print "checking buffers\n"; print $fh '

Scanning Permanent Log

'; $unsend=0; { my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log"); while ($line=<$dfh>) { ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); if ($sdf eq 'F') { $local=localtime($time); print $fh "Failed: $time, $dserv, $dcmd
"; $warnings++; } if ($sdf eq 'S') { $unsend--; } if ($sdf eq 'D') { $unsend++; } } } print $fh "Total unsend messages: $unsend

\n"; $warnings=$warnings+5*$unsend; if ($unsend) { $simplestatus{'unsend'}=$unsend; } print $fh "

Outgoing Buffer

"; open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|"); while ($line=) { print $fh "$line
"; }; close (DFH); # ------------------------------------------------------------------------- End print $fh "
\n"; $totalcount=$notices+4*$warnings+100*$errors; &errout($fh); print $fh "

Total Error Count: $totalcount

"; $now=time; $date=localtime($now); print $fh "
$date ($now)\n"; print "lon-status webpage updated\n"; $fh->close(); } if ($errors) { $simplestatus{'errors'}=$errors; } if ($warnings) { $simplestatus{'warnings'}=$warnings; } if ($notices) { $simplestatus{'notices'}=$notices; } $simplestatus{'time'}=time; rename ("$statusdir/newstatus.html","$statusdir/index.html"); { my $sfh=IO::File->new(">$statusdir/loncron_simple.txt"); foreach (keys %simplestatus) { print $sfh $_.'='.$simplestatus{$_}.'&'; } print $sfh "\n"; $sfh->close(); } if ($totalcount>200) { print "sending mail\n"; $emailto="$perlvar{'lonAdmEMail'}"; if ($totalcount>1000) { $emailto.=",$perlvar{'lonSysEMail'}"; } $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html"); } 1;