#!/usr/bin/perl # The LearningOnline Network # Housekeeping program, started by cron # # (TCP networking package # 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30, # 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer) # # 7/14,7/15,7/19,7/21,7/22,11/18, # 2/8 Gerd Kortemeyer # 12/6/2000,12/8 Scott Harrison # 12/23 Gerd Kortemeyer # YEAR=2001 # 1/10/2001, 2/12/, 2/26, 3/15, 04/11, 04/21,8/27 Scott Harrison # 09/04,09/06,11/26 Gerd Kortemeyer # YEAR=2002 # 5/11/2002 Scott Harrison $|=1; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use IO::File; use IO::Socket; # -------------------------------------------------- Non-critical communication sub reply { my ($cmd,$server)=@_; my $peerfile="$perlvar{'lonSockDir'}/$server"; my $client=IO::Socket::UNIX->new(Peer =>"$peerfile", Type => SOCK_STREAM, Timeout => 10) or return "con_lost"; print $client "$cmd\n"; my $answer=<$client>; chomp($answer); if (!$answer) { $answer="con_lost"; } return $answer; } # --------------------------------------------------------- Output error status sub errout { my $fh=shift; print $fh (<
Notices$notices
Warnings$warnings
Errors$errors

Top

ENDERROUT } # ================================================================ Main Program # --------------------------------- Read loncapa_apache.conf and loncapa.conf my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); %perlvar=%{$perlvarref}; undef $perlvarref; delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed # --------------------------------------- Make sure that LON-CAPA is configured # I only test for one thing here (lonHostID). This is just a safeguard. if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) { print("Unconfigured machine.\n"); $emailto=$perlvar{'lonSysEMail'}; $hostname=`/bin/hostname`; chop $hostname; $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell $subj="LON: Unconfigured machine $hostname"; system("echo 'Unconfigured machine $hostname.' |\ mailto $emailto -s '$subj' > /dev/null"); exit 1; } # ----------------------------- Make sure this process is running from user=www my $wwwid=getpwnam('www'); if ($wwwid!=$<) { print("User ID mismatch. This program must be run as user 'www'\n"); $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. loncron must be run as user www.' |\ mailto $emailto -s '$subj' > /dev/null"); exit 1; } # ------------------------------------------------------------- Read hosts file { my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); while (my $configline=<$config>) { my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline); if ($id && $domain && $role && $name && $ip) { $hostname{$id}=$name; $hostdom{$id}=$domain; $hostip{$id}=$ip; $hostrole{$id}=$role; if ($domdescr) { $domaindescription{$domain}=$domdescr; } if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) { $libserv{$id}=$name; } } else { if ($configline) { # &logthis("Skipping hosts.tab line -$configline-"); } } } } # ------------------------------------------------------ Read spare server file { my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab"); while (my $configline=<$config>) { chomp($configline); if (($configline) && ($configline ne $perlvar{'lonHostID'})) { $spareid{$configline}=1; } } } # ---------------------------------------------------------------- Start report $statusdir="/home/httpd/html/lon-status"; $errors=0; $warnings=0; $notices=0; $now=time; $date=localtime($now); { my $fh=IO::File->new(">$statusdir/newstatus.html"); print $fh (< LON Status Report $perlvar{'lonHostID'}

LON Status Report $perlvar{'lonHostID'}

$date ($now)

  1. Configuration
  2. Machine Information
  3. Temporary Files
  4. Session Tokens
  5. httpd
  6. lonsql
  7. lond
  8. lonc
  9. lonhttpd
  10. lonnet
  11. Connections
  12. Delayed Messages
  13. Error Count

Configuration

PerlVars

ENDHEADERS foreach $varname (sort(keys(%perlvar))) { print $fh "\n"; } print $fh "
$varname$perlvar{$varname}

Hosts

"; foreach $id (sort(keys(%hostname))) { print $fh ""; print $fh "\n"; } print $fh "
$id$hostdom{$id}$hostrole{$id}$hostname{$id}$hostip{$id}

Spare Hosts

    "; foreach $id (sort(keys(%spareid))) { print $fh "
  1. $id\n"; } print $fh "
\n"; # --------------------------------------------------------------------- Machine print $fh '

Machine Information

'; print $fh "

loadavg

"; open (LOADAVGH,"/proc/loadavg"); $loadavg=; close (LOADAVGH); print $fh "$loadavg"; @parts=split(/\s+/,$loadavg); if ($parts[1]>4.0) { $errors++; } elsif ($parts[1]>2.0) { $warnings++; } elsif ($parts[1]>1.0) { $notices++; } print $fh "

df

"; print $fh "
";

open (DFH,"df|");
while ($line=) { 
   print $fh "$line"; 
   @parts=split(/\s+/,$line);
   $usage=$parts[4];
   $usage=~s/\W//g;
   if ($usage>90) { 
      $warnings++;
      $notices++; 
   } elsif ($usage>80) {
      $warnings++;
   } elsif ($usage>60) {
      $notices++;
   }
   if ($usage>95) { $warnings++; $warnings++ }
}
close (DFH);
print $fh "
"; print $fh "

ps

"; print $fh "
";
$psproc=0;

open (PSH,"ps -aux|");
while ($line=) { 
   print $fh "$line"; 
   $psproc++;
}
close (PSH);
print $fh "
"; if ($psproc>200) { $notices++; } if ($psproc>250) { $notices++; } &errout($fh); # --------------------------------------------------------------- clean out tmp print $fh '

Temporary Files

'; $cleaned=0; $old=0; while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) { my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); $now=time; $since=$now-$mtime; if ($since>$perlvar{'lonExpire'}) { $line=''; if (open(PROBE,$fname)) { $line=; close(PROBE); } unless ($line=~/^CHECKOUTTOKEN\&/) { $cleaned++; unlink("$fname"); } else { if ($since>365*$perlvar{'lonExpire'}) { $cleaned++; unlink("$fname"); } else { $old++; } } } } print $fh "Cleaned up ".$cleaned." files (".$old." old checkout tokens)."; # ------------------------------------------------------------ clean out lonIDs print $fh '

Session Tokens

'; $cleaned=0; $active=0; while ($fname=<$perlvar{'lonIDsDir'}/*>) { my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); $now=time; $since=$now-$mtime; if ($since>$perlvar{'lonExpire'}) { $cleaned++; print $fh "Unlinking $fname
"; unlink("$fname"); } else { $active++; } } print $fh "

Cleaned up ".$cleaned." stale session token(s)."; print $fh "

$active open session(s)

"; # ----------------------------------------------------------------------- httpd print $fh '

httpd

Access Log

';

open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
while ($line=) { print $fh "$line" };
close (DFH);

print $fh "

Error Log

";

open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
while ($line=) { 
   print $fh "$line";
   if ($line=~/\[error\]/) { $notices++; } 
};
close (DFH);
print $fh "
"; &errout($fh); # ---------------------------------------------------------------------- lonsql my $restartflag=1; if ($perlvar{'lonRole'} eq "library") { print $fh '

lonsql

Log

';
    print "lonsql\n";
    if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
	open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
	while ($line=) { 
	    print $fh "$line";
	    if ($line=~/INFO/) { $notices++; }
	    if ($line=~/WARNING/) { $notices++; }
	    if ($line=~/CRITICAL/) { $warnings++; }
	};
	close (DFH);
    }
    print $fh "
"; my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid"; $restartflag=1; if (-e $lonsqlfile) { my $lfh=IO::File->new("$lonsqlfile"); my $lonsqlpid=<$lfh>; chomp($lonsqlpid); if (kill 0 => $lonsqlpid) { print $fh "

lonsql at pid $lonsqlpid responding

"; $restartflag=0; } else { $errors++; $errors++; print $fh "

lonsql at pid $lonsqlpid not responding

"; $restartflag=1; print $fh "

Decided to clean up stale .pid file and restart lonsql

"; } } if ($restartflag==1) { $errors++; print $fh '
Killall lonsql: '. system('killall lonsql').' - '; sleep 2; print $fh unlink($lonsqlfile).' - '. system('killall -9 lonsql'). '
'; print $fh "

lonsql not running, trying to start

"; system( "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors"); sleep 2; if (-e $lonsqlfile) { print $fh "Seems like it started ...

"; my $lfh=IO::File->new("$lonsqlfile"); my $lonsqlpid=<$lfh>; chomp($lonsqlpid); sleep 2; if (kill 0 => $lonsqlpid) { print $fh "

lonsql at pid $lonsqlpid responding

"; } else { $errors++; $errors++; print $fh "

lonsql at pid $lonsqlpid not responding

"; print $fh "Give it one more try ...

"; system( "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors"); sleep 2; } } else { print $fh "Seems like that did not work!

"; $errors++; } if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){ print $fh "

";
	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
	    while ($line=) { 
		print $fh "$line";
		if ($line=~/WARNING/) { $notices++; }
		if ($line=~/CRITICAL/) { $notices++; }
	    };
	    close (DFH);
	    print $fh "
"; } } $fname="$perlvar{'lonDaemons'}/logs/lonsql.log"; my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); if ($size>40000) { print $fh "Rotating logs ...

"; rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } &errout($fh); } # ------------------------------------------------------------------------ lond print $fh '


lond

Log

';
print "lond\n";

if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lond.log|");
while ($line=) { 
   print $fh "$line";
   if ($line=~/INFO/) { $notices++; }
   if ($line=~/WARNING/) { $notices++; }
   if ($line=~/CRITICAL/) { $warnings++; }
};
close (DFH);
}
print $fh "
"; my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid"; $restartflag=1; if (-e $londfile) { my $lfh=IO::File->new("$londfile"); my $londpid=<$lfh>; chomp($londpid); if (kill 0 => $londpid) { print $fh "

lond at pid $londpid responding, sending USR1

"; kill USR1 => $londpid; $restartflag=0; } else { $errors++; print $fh "

lond at pid $londpid not responding

"; $restartflag=1; print $fh "

Decided to clean up stale .pid file and restart lond

"; } } if ($restartflag==1) { $errors++; print $fh '
Killall lond: '. system('killall lond').' - '; sleep 2; print $fh unlink($londfile).' - '.system('killall -9 lond'). '
'; print $fh "

lond not running, trying to start

"; system( "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors"); sleep 2; if (-e $londfile) { print $fh "Seems like it started ...

"; my $lfh=IO::File->new("$londfile"); my $londpid=<$lfh>; chomp($londpid); sleep 2; if (kill 0 => $londpid) { print $fh "

lond at pid $londpid responding

"; } else { $errors++; $errors++; print $fh "

lond at pid $londpid not responding

"; print $fh "Give it one more try ...

"; system( "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors"); sleep 2; } } else { print $fh "Seems like that did not work!

"; $errors++; } if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){ print $fh "

";
    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
    while ($line=) { 
      print $fh "$line";
      if ($line=~/WARNING/) { $notices++; }
      if ($line=~/CRITICAL/) { $notices++; }
    };
    close (DFH);
    print $fh "
"; } } $fname="$perlvar{'lonDaemons'}/logs/lond.log"; my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); if ($size>40000) { print $fh "Rotating logs ...

"; rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } &errout($fh); # ------------------------------------------------------------------------ lonc print $fh '


lonc

Log

';
print "lonc\n";

if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lonc.log|");
while ($line=) { 
   print $fh "$line";
   if ($line=~/INFO/) { $notices++; }
   if ($line=~/WARNING/) { $notices++; }
   if ($line=~/CRITICAL/) { $warnings++; }
};
close (DFH);
}
print $fh "
"; my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid"; $restartflag=1; if (-e $loncfile) { my $lfh=IO::File->new("$loncfile"); my $loncpid=<$lfh>; chomp($loncpid); if (kill 0 => $loncpid) { print $fh "

lonc at pid $loncpid responding, sending USR1

"; kill USR1 => $loncpid; $restartflag=0; } else { $errors++; print $fh "

lonc at pid $loncpid not responding

"; # Solution: kill parent and children processes, remove .pid and restart $restartflag=1; print $fh "

Decided to clean up stale .pid file and restart lonc

"; } } if ($restartflag==1) { $errors++; print $fh '
Killall lonc: '. system('killall lonc').' - '; sleep 2; print $fh unlink($loncfile).' - '.system('killall -9 lonc'). '
'; print $fh "

lonc not running, trying to start

"; system( "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors"); sleep 2; if (-e $loncfile) { print $fh "Seems like it started ...

"; my $lfh=IO::File->new("$loncfile"); my $loncpid=<$lfh>; chomp($loncpid); sleep 2; if (kill 0 => $loncpid) { print $fh "

lonc at pid $loncpid responding

"; } else { $errors++; $errors++; print $fh "

lonc at pid $loncpid not responding

"; print $fh "Give it one more try ...

"; system( "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors"); sleep 2; } } else { print $fh "Seems like that did not work!

"; $errors++; } if (-e "$perlvar{'lonDaemons'}/logs/lonc.log") { print $fh "

";
    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
    while ($line=) { 
      print $fh "$line";
      if ($line=~/WARNING/) { $notices++; }
      if ($line=~/CRITICAL/) { $notices++; }
    };
    close (DFH);
    print $fh "
"; } } $fname="$perlvar{'lonDaemons'}/logs/lonc.log"; my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); if ($size>40000) { print $fh "Rotating logs ...

"; rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } &errout($fh); # -------------------------------------------------------------------- lonhttpd print $fh '


lonhttpd

Log

';
print "lonhttpd\n";

if (-e "$perlvar{'lonDaemons'}/logs/lonhttpd.log"){
open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lonhttpd.log|");
while ($line=) { 
   print $fh "$line";
   if ($line=~/INFO/) { $notices++; }
   if ($line=~/WARNING/) { $notices++; }
   if ($line=~/CRITICAL/) { $warnings++; }
};
close (DFH);
}
print $fh "
"; my $lonhttpdfile="$perlvar{'lonDaemons'}/logs/lonhttpd.pid"; $restartflag=1; if (-e $lonhttpdfile) { my $lfh=IO::File->new("$lonhttpdfile"); my $lonhttpdpid=<$lfh>; chomp($lonhttpdpid); if (kill 0 => $lonhttpdpid) { print $fh "

lonhttpd at pid $lonhttpdpid responding

"; $restartflag=0; } else { $errors++; print $fh "

lonhttpd at pid $lonhttpdpid not responding

"; # Solution: kill parent and children processes, remove .pid and restart $restartflag=1; print $fh "

Decided to clean up stale .pid file and restart lonhttpd

"; } } if ($restartflag==1) { $errors++; print $fh '
Killall lonhttpd: '. system('killall lonhttpd').' - '; sleep 2; print $fh unlink($lonhttpdfile).' - '.system('killall -9 lonhttpd'). '
'; print $fh "

lonhttpd not running, trying to start

"; system( "$perlvar{'lonDaemons'}/lonhttpd 2>>$perlvar{'lonDaemons'}/logs/lonhttpd_errors"); sleep 2; if (-e $lonhttpdfile) { print $fh "Seems like it started ...

"; my $lfh=IO::File->new("$lonhttpdfile"); my $lonhttpdpid=<$lfh>; chomp($lonhttpdpid); sleep 2; if (kill 0 => $lonhttpdpid) { print $fh "

lonhttpd at pid $lonhttpdpid responding

"; } else { $errors++; $errors++; print $fh "

lonhttpd at pid $lonhttpdpid not responding

"; print $fh "Give it one more try ...

"; system( "$perlvar{'lonDaemons'}/lonhttpd 2>>$perlvar{'lonDaemons'}/logs/lonhttpd_errors"); sleep 2; } } else { print $fh "Seems like that did not work!

"; $errors++; } if (-e "$perlvar{'lonDaemons'}/logs/lonhttpd.log") { print $fh "

";
    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonhttpd.log|");
    while ($line=) { 
      print $fh "$line";
      if ($line=~/WARNING/) { $notices++; }
      if ($line=~/CRITICAL/) { $notices++; }
    };
    close (DFH);
    print $fh "
"; } } $fname="$perlvar{'lonDaemons'}/logs/lonhttpd.log"; my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); if ($size>40000) { print $fh "Rotating logs ...

"; rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } &errout($fh); # ---------------------------------------------------------------------- lonnet print $fh '


lonnet

Temp Log

';
print "lonnet\n";
if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
while ($line=) { 
    print $fh "$line";
};
close (DFH);
}
print $fh "

Perm Log

";

if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
    open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
while ($line=) { 
   print $fh "$line";
};
close (DFH);
} else { print $fh "No perm log\n" }

$fname="$perlvar{'lonDaemons'}/logs/lonnet.log";

                          my ($dev,$ino,$mode,$nlink,
                              $uid,$gid,$rdev,$size,
                              $atime,$mtime,$ctime,
                              $blksize,$blocks)=stat($fname);

if ($size>40000) {
    print $fh "Rotating logs ...

"; rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } print $fh "

"; &errout($fh); # ----------------------------------------------------------------- Connections print $fh '

Connections

'; print $fh ""; foreach $tryserver (sort(keys(%hostname))) { $answer=reply("pong",$tryserver); if ($answer eq "$tryserver:$perlvar{'lonHostID'}") { $result="ok"; } else { $result=$answer; $warnings++; if ($answer eq 'con_lost') { $warnings++; } } print $fh "\n"; } print $fh "
$tryserver$result
"; &errout($fh); # ------------------------------------------------------------ Delayed messages print $fh '

Delayed Messages

'; print "buffers\n"; print $fh '

Scanning Permanent Log

'; $unsend=0; { my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log"); while ($line=<$dfh>) { ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); if ($sdf eq 'F') { $local=localtime($time); print "Failed: $time, $dserv, $dcmd
"; $warnings++; } if ($sdf eq 'S') { $unsend--; } if ($sdf eq 'D') { $unsend++; } } } print $fh "Total unsend messages: $unsend

\n"; $warnings=$warnings+5*$unsend; print $fh "

Outgoing Buffer

"; open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|"); while ($line=) { print $fh "$line
"; }; close (DFH); # ------------------------------------------------------------------------- End print $fh "
\n"; $totalcount=$notices+4*$warnings+100*$errors; &errout($fh); print $fh "

Total Error Count: $totalcount

"; $now=time; $date=localtime($now); print $fh "
$date ($now)\n"; print "writing done\n"; } rename ("$statusdir/newstatus.html","$statusdir/index.html"); if ($totalcount>200) { print "mailing\n"; $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; system( "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html"); } 1;