File:  [LON-CAPA] / loncom / loncron
Revision 1.41: download - view: text, annotated - select for diffs
Wed Jul 30 16:49:27 2003 UTC (20 years, 9 months ago) by www
Branches: MAIN
CVS tags: version_1_0_3, version_1_0_2, version_1_0_1, version_1_0_0, version_0_99_5, version_0_99_4, HEAD
Simple one-line status for cluster-wide automated reporting

    1: #!/usr/bin/perl
    2: 
    3: # The LearningOnline Network
    4: # Housekeeping program, started by cron
    5: #
    6: # (TCP networking package
    7: # 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30,
    8: # 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer)
    9: #
   10: # 7/14,7/15,7/19,7/21,7/22,11/18,
   11: # 2/8 Gerd Kortemeyer
   12: # 12/23 Gerd Kortemeyer
   13: # YEAR=2001
   14: # 09/04,09/06,11/26 Gerd Kortemeyer
   15: 
   16: $|=1;
   17: 
   18: use lib '/home/httpd/lib/perl/';
   19: use LONCAPA::Configuration;
   20: 
   21: use IO::File;
   22: use IO::Socket;
   23: 
   24: # -------------------------------------------------- Non-critical communication
   25: sub reply {
   26:     my ($cmd,$server)=@_;
   27:     my $peerfile="$perlvar{'lonSockDir'}/$server";
   28:     my $client=IO::Socket::UNIX->new(Peer    =>"$peerfile",
   29:                                      Type    => SOCK_STREAM,
   30:                                      Timeout => 10)
   31:        or return "con_lost";
   32:     print $client "$cmd\n";
   33:     my $answer=<$client>;
   34:     chomp($answer);
   35:     if (!$answer) { $answer="con_lost"; }
   36:     return $answer;
   37: }
   38: 
   39: # --------------------------------------------------------- Output error status
   40: 
   41: sub errout {
   42:    my $fh=shift;
   43:    print $fh (<<ENDERROUT);
   44:      <p><table border=2 bgcolor="#CCCCCC">
   45:      <tr><td>Notices</td><td>$notices</td></tr>
   46:      <tr><td>Warnings</td><td>$warnings</td></tr>
   47:      <tr><td>Errors</td><td>$errors</td></tr>
   48:      </table><p><a href="#top">Top</a><p>
   49: ENDERROUT
   50: }
   51: 
   52: # ================================================================ Main Program
   53: 
   54: # --------------------------------- Read loncapa_apache.conf and loncapa.conf
   55: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
   56: %perlvar=%{$perlvarref};
   57: undef $perlvarref;
   58: delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
   59: delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
   60: 
   61: # --------------------------------------- Make sure that LON-CAPA is configured
   62: # I only test for one thing here (lonHostID).  This is just a safeguard.
   63: if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
   64:    print("Unconfigured machine.\n");
   65:    $emailto=$perlvar{'lonSysEMail'};
   66:    $hostname=`/bin/hostname`;
   67:    chop $hostname;
   68:    $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
   69:    $subj="LON: Unconfigured machine $hostname";
   70:    system("echo 'Unconfigured machine $hostname.' |\
   71:  mailto $emailto -s '$subj' > /dev/null");
   72:     exit 1;
   73: }
   74: 
   75: # ----------------------------- Make sure this process is running from user=www
   76: my $wwwid=getpwnam('www');
   77: if ($wwwid!=$<) {
   78:    print("User ID mismatch.  This program must be run as user 'www'\n");
   79:    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
   80:    $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
   81:    system("echo 'User ID mismatch.  loncron must be run as user www.' |\
   82:  mailto $emailto -s '$subj' > /dev/null");
   83:    exit 1;
   84: }
   85: 
   86: # ------------------------------------------------------------- Read hosts file
   87: {
   88:     my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
   89: 
   90:     while (my $configline=<$config>) {
   91: 	my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline);
   92: 	if ($id && $domain && $role && $name && $ip) {
   93: 	    $hostname{$id}=$name;
   94: 	    $hostdom{$id}=$domain;
   95: 	    $hostip{$id}=$ip;
   96: 	    $hostrole{$id}=$role;
   97: 	    if ($domdescr) { $domaindescription{$domain}=$domdescr; }
   98: 	    if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
   99: 		$libserv{$id}=$name;
  100: 	    }
  101: 	} else {
  102: 	    if ($configline) {
  103: #		&logthis("Skipping hosts.tab line -$configline-");
  104: 	    }
  105: 	}
  106:     }
  107: }
  108: 
  109: # ------------------------------------------------------ Read spare server file
  110: {
  111:     my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab");
  112: 
  113:     while (my $configline=<$config>) {
  114:        chomp($configline);
  115:        if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
  116:           $spareid{$configline}=1;
  117:        }
  118:     }
  119: }
  120: 
  121: # ---------------------------------------------------------------- Start report
  122: 
  123: $statusdir="/home/httpd/html/lon-status";
  124: 
  125: $errors=0;
  126: $warnings=0;
  127: $notices=0;
  128: 
  129: $now=time;
  130: $date=localtime($now);
  131: 
  132: {
  133: my $fh=IO::File->new(">$statusdir/newstatus.html");
  134: my %simplestatus=();
  135: 
  136: print $fh (<<ENDHEADERS);
  137: <html>
  138: <head>
  139: <title>LON Status Report $perlvar{'lonHostID'}</title>
  140: </head>
  141: <body bgcolor="#AAAAAA">
  142: <a name="top">
  143: <h1>LON Status Report $perlvar{'lonHostID'}</h1>
  144: <h2>$date ($now)</h2>
  145: <ol>
  146: <li><a href="#configuration">Configuration</a>
  147: <li><a href="#machine">Machine Information</a>
  148: <li><a href="#tmp">Temporary Files</a>
  149: <li><a href="#tokens">Session Tokens</a>
  150: <li><a href="#httpd">httpd</a>
  151: <li><a href="#lonsql">lonsql</a>
  152: <li><a href="#lond">lond</a>
  153: <li><a href="#lonc">lonc</a>
  154: <li><a href="#lonhttpd">lonhttpd</a>
  155: <li><a href="#lonnet">lonnet</a>
  156: <li><a href="#connections">Connections</a>
  157: <li><a href="#delayed">Delayed Messages</a>
  158: <li><a href="#errcount">Error Count</a>
  159: </ol>
  160: <hr>
  161: <a name="configuration">
  162: <h2>Configuration</h2>
  163: <h3>PerlVars</h3>
  164: <table border=2>
  165: ENDHEADERS
  166: 
  167: foreach $varname (sort(keys(%perlvar))) {
  168:     print $fh "<tr><td>$varname</td><td>$perlvar{$varname}</td></tr>\n";
  169: }
  170: print $fh "</table><h3>Hosts</h3><table border=2>";
  171: foreach $id (sort(keys(%hostname))) {
  172:     print $fh 
  173: 	"<tr><td>$id</td><td>$hostdom{$id}</td><td>$hostrole{$id}</td>";
  174:     print $fh "<td>$hostname{$id}</td><td>$hostip{$id}</td></tr>\n";
  175: }
  176: print $fh "</table><h3>Spare Hosts</h3><ol>";
  177: foreach $id (sort(keys(%spareid))) {
  178:     print $fh "<li>$id\n";
  179: }
  180: 
  181: print $fh "</ol>\n";
  182: 
  183: # --------------------------------------------------------------------- Machine
  184: 
  185: print $fh '<hr><a name="machine"><h2>Machine Information</h2>';
  186: print $fh "<h3>loadavg</h3>";
  187: 
  188: open (LOADAVGH,"/proc/loadavg");
  189: $loadavg=<LOADAVGH>;
  190: close (LOADAVGH);
  191: 
  192: print $fh "<tt>$loadavg</tt>";
  193: 
  194: @parts=split(/\s+/,$loadavg);
  195: if ($parts[1]>4.0) {
  196:     $errors++;
  197: } elsif ($parts[1]>2.0) {
  198:     $warnings++;
  199: } elsif ($parts[1]>1.0) {
  200:     $notices++;
  201: }
  202: 
  203: print $fh "<h3>df</h3>";
  204: print $fh "<pre>";
  205: 
  206: open (DFH,"df|");
  207: while ($line=<DFH>) { 
  208:    print $fh "$line"; 
  209:    @parts=split(/\s+/,$line);
  210:    $usage=$parts[4];
  211:    $usage=~s/\W//g;
  212:    if ($usage>90) { 
  213:       $warnings++;
  214:       $notices++; 
  215:    } elsif ($usage>80) {
  216:       $warnings++;
  217:    } elsif ($usage>60) {
  218:       $notices++;
  219:    }
  220:    if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
  221: }
  222: close (DFH);
  223: print $fh "</pre>";
  224: 
  225: 
  226: print $fh "<h3>ps</h3>";
  227: print $fh "<pre>";
  228: $psproc=0;
  229: 
  230: open (PSH,"ps -aux|");
  231: while ($line=<PSH>) { 
  232:    print $fh "$line"; 
  233:    $psproc++;
  234: }
  235: close (PSH);
  236: print $fh "</pre>";
  237: 
  238: if ($psproc>200) { $notices++; }
  239: if ($psproc>250) { $notices++; }
  240: 
  241: &errout($fh);
  242: 
  243: # --------------------------------------------------------------- clean out tmp
  244: print $fh '<hr><a name="tmp"><h2>Temporary Files</h2>';
  245: $cleaned=0;
  246: $old=0;
  247: while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) {
  248:                           my ($dev,$ino,$mode,$nlink,
  249:                               $uid,$gid,$rdev,$size,
  250:                               $atime,$mtime,$ctime,
  251:                               $blksize,$blocks)=stat($fname);
  252:                           $now=time;
  253:                           $since=$now-$mtime;
  254:                           if ($since>$perlvar{'lonExpire'}) {
  255:                               $line='';
  256:                               if (open(PROBE,$fname)) {
  257: 				  $line=<PROBE>;
  258:                                   close(PROBE);
  259: 			      }
  260: 			      unless ($line=~/^CHECKOUTTOKEN\&/) {
  261:                                  $cleaned++;
  262:                                  unlink("$fname");
  263: 			      } else {
  264: 				  if ($since>365*$perlvar{'lonExpire'}) {
  265:                                      $cleaned++;
  266:                                      unlink("$fname");
  267: 				 } else { $old++; }
  268:                               }
  269:                           }
  270:     
  271: }
  272: print $fh "Cleaned up ".$cleaned." files (".$old." old checkout tokens).";
  273: 
  274: # ------------------------------------------------------------ clean out lonIDs
  275: print $fh '<hr><a name="tokens"><h2>Session Tokens</h2>';
  276: $cleaned=0;
  277: $active=0;
  278: while ($fname=<$perlvar{'lonIDsDir'}/*>) {
  279:                           my ($dev,$ino,$mode,$nlink,
  280:                               $uid,$gid,$rdev,$size,
  281:                               $atime,$mtime,$ctime,
  282:                               $blksize,$blocks)=stat($fname);
  283:                           $now=time;
  284:                           $since=$now-$mtime;
  285:                           if ($since>$perlvar{'lonExpire'}) {
  286:                               $cleaned++;
  287:                               print $fh "Unlinking $fname<br>";
  288:                               unlink("$fname");
  289:                           } else {
  290:                               $active++;
  291:                           }
  292:     
  293: }
  294: print $fh "<p>Cleaned up ".$cleaned." stale session token(s).";
  295: print $fh "<h3>$active open session(s)</h3>";
  296: 
  297: # ----------------------------------------------------------------------- httpd
  298: 
  299: print $fh '<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>';
  300: 
  301: open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
  302: while ($line=<DFH>) { print $fh "$line" };
  303: close (DFH);
  304: 
  305: print $fh "</pre><h3>Error Log</h3><pre>";
  306: 
  307: open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
  308: while ($line=<DFH>) { 
  309:    print $fh "$line";
  310:    if ($line=~/\[error\]/) { $notices++; } 
  311: };
  312: close (DFH);
  313: print $fh "</pre>";
  314: &errout($fh);
  315: 
  316: 
  317: # ---------------------------------------------------------------------- lonsql
  318: 
  319: my $restartflag=1;
  320:     print $fh '<hr><a name="lonsql"><h2>lonsql</h2><h3>Log</h3><pre>';
  321:     print "lonsql\n";
  322:     if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
  323: 	open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
  324: 	while ($line=<DFH>) { 
  325: 	    print $fh "$line";
  326: 	    if ($line=~/INFO/) { $notices++; }
  327: 	    if ($line=~/WARNING/) { $notices++; }
  328: 	    if ($line=~/CRITICAL/) { $warnings++; }
  329: 	};
  330: 	close (DFH);
  331:     }
  332:     print $fh "</pre>";
  333:     
  334:     my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
  335:  
  336:     $restartflag=1;
  337:    
  338:     if (-e $lonsqlfile) {
  339: 	my $lfh=IO::File->new("$lonsqlfile");
  340: 	my $lonsqlpid=<$lfh>;
  341: 	chomp($lonsqlpid);
  342: 	if (kill 0 => $lonsqlpid) {
  343: 	    print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
  344: 	    $restartflag=0;
  345: 	} else {
  346: 	    $errors++; $errors++;
  347: 	    print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
  348: 		$restartflag=1;
  349: 	print $fh 
  350: 	    "<h3>Decided to clean up stale .pid file and restart lonsql</h3>";
  351: 	}
  352:     }
  353:     if ($restartflag==1) {
  354: 	$simplestatus{'lonsql'}='off';
  355: 	$errors++;
  356: 	         print $fh '<br><font color="red">Killall lonsql: '.
  357:                     system('killall lonsql').' - ';
  358:                     sleep 2;
  359:                     print $fh unlink($lonsqlfile).' - '.
  360:                               system('killall -9 lonsql').
  361:                     '</font><br>';
  362: 	print $fh "<h3>lonsql not running, trying to start</h3>";
  363: 	system(
  364:  "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
  365: 	sleep 2;
  366: 	if (-e $lonsqlfile) {
  367: 	    print $fh "Seems like it started ...<p>";
  368: 	    my $lfh=IO::File->new("$lonsqlfile");
  369: 	    my $lonsqlpid=<$lfh>;
  370: 	    chomp($lonsqlpid);
  371: 	    sleep 2;
  372: 	    if (kill 0 => $lonsqlpid) {
  373: 		print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
  374: 		$simplestatus{'lonsql'}='restarted';
  375: 	    } else {
  376: 		$errors++; $errors++;
  377: 		print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
  378: 		print $fh "Give it one more try ...<p>";
  379: 		system(
  380:  "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
  381: 		sleep 2;
  382: 	    }
  383: 	} else {
  384: 	    print $fh "Seems like that did not work!<p>";
  385: 	    $errors++;
  386: 	}
  387: 	if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
  388: 	    print $fh "<p><pre>";
  389: 	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
  390: 	    while ($line=<DFH>) { 
  391: 		print $fh "$line";
  392: 		if ($line=~/WARNING/) { $notices++; }
  393: 		if ($line=~/CRITICAL/) { $notices++; }
  394: 	    };
  395: 	    close (DFH);
  396: 	    print $fh "</pre>";
  397: 	}
  398:     }
  399: 
  400:     $fname="$perlvar{'lonDaemons'}/logs/lonsql.log";
  401: 
  402:     my ($dev,$ino,$mode,$nlink,
  403: 	$uid,$gid,$rdev,$size,
  404: 	$atime,$mtime,$ctime,
  405: 	$blksize,$blocks)=stat($fname);
  406: 
  407:     if ($size>200000) {
  408: 	print $fh "Rotating logs ...<p>";
  409: 	rename("$fname.2","$fname.3");
  410: 	rename("$fname.1","$fname.2");
  411: 	rename("$fname","$fname.1");
  412:     }
  413: 
  414:     &errout($fh);
  415: # ------------------------------------------------------------------------ lond
  416: 
  417: print $fh '<hr><a name="lond"><h2>lond</h2><h3>Log</h3><pre>';
  418: print "lond\n";
  419: 
  420: if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
  421: open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lond.log|");
  422: while ($line=<DFH>) { 
  423:    print $fh "$line";
  424:    if ($line=~/INFO/) { $notices++; }
  425:    if ($line=~/WARNING/) { $notices++; }
  426:    if ($line=~/CRITICAL/) { $warnings++; }
  427: };
  428: close (DFH);
  429: }
  430: print $fh "</pre>";
  431: 
  432: my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid";
  433: 
  434: $restartflag=1;
  435: if (-e $londfile) {    
  436:    my $lfh=IO::File->new("$londfile");
  437:    my $londpid=<$lfh>;
  438:    chomp($londpid);
  439:    if (kill 0 => $londpid) {
  440:       print $fh "<h3>lond at pid $londpid responding, sending USR1</h3>";
  441:       kill USR1 => $londpid;
  442:       $restartflag=0;
  443:    } else {
  444:       $errors++;
  445:       print $fh "<h3>lond at pid $londpid not responding</h3>";
  446:       $restartflag=1;
  447:       print $fh 
  448: 	  "<h3>Decided to clean up stale .pid file and restart lond</h3>";
  449:    }
  450: } 
  451: if ($restartflag==1) {
  452:     $simplestatus{'lond'}='off';
  453:    $errors++;
  454: 	  print $fh '<br><font color="red">Killall lond: '.
  455:                     system('killall lond').' - ';
  456:           sleep 2;
  457:           print $fh unlink($londfile).' - '.system('killall -9 lond').
  458:                     '</font><br>';
  459:    print $fh "<h3>lond not running, trying to start</h3>";
  460:    system(
  461:      "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
  462:    sleep 2;
  463:    if (-e $londfile) {
  464:        print $fh "Seems like it started ...<p>";
  465:        my $lfh=IO::File->new("$londfile");
  466:        my $londpid=<$lfh>;
  467:        chomp($londpid);
  468:        sleep 2;
  469:        if (kill 0 => $londpid) {
  470:           print $fh "<h3>lond at pid $londpid responding</h3>";
  471: 	  $simplestatus{'lond'}='restarted';
  472:        } else {
  473:           $errors++; $errors++;
  474:           print $fh "<h3>lond at pid $londpid not responding</h3>";
  475:           print $fh "Give it one more try ...<p>";
  476: 	  system(
  477:  "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
  478:           sleep 2;
  479:        }
  480:    } else {
  481:        print $fh "Seems like that did not work!<p>";
  482:        $errors++;
  483:    }
  484:    if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
  485:     print $fh "<p><pre>";
  486:     open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
  487:     while ($line=<DFH>) { 
  488:       print $fh "$line";
  489:       if ($line=~/WARNING/) { $notices++; }
  490:       if ($line=~/CRITICAL/) { $notices++; }
  491:     };
  492:     close (DFH);
  493:     print $fh "</pre>";
  494:    }
  495: }
  496: 
  497: $fname="$perlvar{'lonDaemons'}/logs/lond.log";
  498: 
  499:                           my ($dev,$ino,$mode,$nlink,
  500:                               $uid,$gid,$rdev,$size,
  501:                               $atime,$mtime,$ctime,
  502:                               $blksize,$blocks)=stat($fname);
  503: 
  504: if ($size>40000) {
  505:     print $fh "Rotating logs ...<p>";
  506:     rename("$fname.2","$fname.3");
  507:     rename("$fname.1","$fname.2");
  508:     rename("$fname","$fname.1");
  509: }
  510: 
  511: &errout($fh);
  512: # ------------------------------------------------------------------------ lonc
  513: 
  514: print $fh '<hr><a name="lonc"><h2>lonc</h2><h3>Log</h3><pre>';
  515: print "lonc\n";
  516: 
  517: if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
  518: open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lonc.log|");
  519: while ($line=<DFH>) { 
  520:    print $fh "$line";
  521:    if ($line=~/INFO/) { $notices++; }
  522:    if ($line=~/WARNING/) { $notices++; }
  523:    if ($line=~/CRITICAL/) { $warnings++; }
  524: };
  525: close (DFH);
  526: }
  527: print $fh "</pre>";
  528: 
  529: my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
  530: 
  531: $restartflag=1;
  532: if (-e $loncfile) {
  533:    my $lfh=IO::File->new("$loncfile");
  534:    my $loncpid=<$lfh>;
  535:    chomp($loncpid);
  536:    if (kill 0 => $loncpid) {
  537:       print $fh "<h3>lonc at pid $loncpid responding, sending USR1</h3>";
  538:       kill USR1 => $loncpid;
  539:       $restartflag=0;
  540:    } else {
  541:       $errors++;
  542:       print $fh "<h3>lonc at pid $loncpid not responding</h3>";
  543:       # Solution: kill parent and children processes, remove .pid and restart
  544: 	  $restartflag=1;
  545:       print $fh 
  546: 	  "<h3>Decided to clean up stale .pid file and restart lonc</h3>";
  547:    }
  548: } 
  549: if ($restartflag==1) {
  550:     $simplestatus{'lonc'}='off';
  551:    $errors++;
  552: 	  print $fh '<br><font color="red">Killall lonc: '.
  553: 	            system('killall lonc').' - ';
  554:           sleep 2;
  555:           print $fh unlink($loncfile).' - '.system('killall -9 lonc').
  556:                     '</font><br>';
  557:    print $fh "<h3>lonc not running, trying to start</h3>";
  558: 	system(
  559:  "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
  560:    sleep 2;
  561:    if (-e $loncfile) {
  562:        print $fh "Seems like it started ...<p>";
  563:        my $lfh=IO::File->new("$loncfile");
  564:        my $loncpid=<$lfh>;
  565:        chomp($loncpid);
  566:        sleep 2;
  567:        if (kill 0 => $loncpid) {
  568:           print $fh "<h3>lonc at pid $loncpid responding</h3>";
  569: 	  $simplestatus{'lonc'}='restarted';
  570:        } else {
  571:           $errors++; $errors++;
  572:           print $fh "<h3>lonc at pid $loncpid not responding</h3>";
  573:           print $fh "Give it one more try ...<p>";
  574:  	  system(
  575:  "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
  576:           sleep 2;
  577:        }
  578:    } else {
  579:        print $fh "Seems like that did not work!<p>";
  580:        $errors++;
  581:    }
  582:    if (-e "$perlvar{'lonDaemons'}/logs/lonc.log") {
  583:     print $fh "<p><pre>";
  584:     open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
  585:     while ($line=<DFH>) { 
  586:       print $fh "$line";
  587:       if ($line=~/WARNING/) { $notices++; }
  588:       if ($line=~/CRITICAL/) { $notices++; }
  589:     };
  590:     close (DFH);
  591:     print $fh "</pre>";
  592:    }
  593: }
  594: 
  595: $fname="$perlvar{'lonDaemons'}/logs/lonc.log";
  596: 
  597:                           my ($dev,$ino,$mode,$nlink,
  598:                               $uid,$gid,$rdev,$size,
  599:                               $atime,$mtime,$ctime,
  600:                               $blksize,$blocks)=stat($fname);
  601: 
  602: if ($size>40000) {
  603:     print $fh "Rotating logs ...<p>";
  604:     rename("$fname.2","$fname.3");
  605:     rename("$fname.1","$fname.2");
  606:     rename("$fname","$fname.1");
  607: }
  608: 
  609:    
  610: &errout($fh);
  611: # -------------------------------------------------------------------- lonhttpd
  612: 
  613: print $fh '<hr><a name="lonhttpd"><h2>lonhttpd</h2><h3>Log</h3><pre>';
  614: print "lonhttpd\n";
  615: 
  616: if (-e "$perlvar{'lonDaemons'}/logs/lonhttpd.log"){
  617: open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lonhttpd.log|");
  618: while ($line=<DFH>) { 
  619:    print $fh "$line";
  620:    if ($line=~/INFO/) { $notices++; }
  621:    if ($line=~/WARNING/) { $notices++; }
  622:    if ($line=~/CRITICAL/) { $warnings++; }
  623: };
  624: close (DFH);
  625: }
  626: print $fh "</pre>";
  627: 
  628: my $lonhttpdfile="$perlvar{'lonDaemons'}/logs/lonhttpd.pid";
  629: 
  630: $restartflag=1;
  631: if (-e $lonhttpdfile) {
  632:    my $lfh=IO::File->new("$lonhttpdfile");
  633:    my $lonhttpdpid=<$lfh>;
  634:    chomp($lonhttpdpid);
  635:    if (kill 0 => $lonhttpdpid) {
  636:       print $fh "<h3>lonhttpd at pid $lonhttpdpid responding</h3>";
  637:       $restartflag=0;
  638:    } else {
  639:       $errors++;
  640:       print $fh "<h3>lonhttpd at pid $lonhttpdpid not responding</h3>";
  641:       # Solution: kill parent and children processes, remove .pid and restart
  642: 	  $restartflag=1;
  643:       print $fh 
  644: 	  "<h3>Decided to clean up stale .pid file and restart lonhttpd</h3>";
  645:    }
  646: } 
  647: if ($restartflag==1) {
  648:     $simplestatus{'lonhttpd'}='off';
  649:    $errors++;
  650: 	  print $fh '<br><font color="red">Killall lonhttpd: '.
  651: 	            system('killall lonhttpd').' - ';
  652:           sleep 2;
  653:           print $fh unlink($lonhttpdfile).' - '.system('killall -9 lonhttpd').
  654:                     '</font><br>';
  655:    print $fh "<h3>lonhttpd not running, trying to start</h3>";
  656: 	system(
  657:  "$perlvar{'lonDaemons'}/lonhttpd 2>>$perlvar{'lonDaemons'}/logs/lonhttpd_errors");
  658:    sleep 2;
  659:    if (-e $lonhttpdfile) {
  660:        print $fh "Seems like it started ...<p>";
  661:        my $lfh=IO::File->new("$lonhttpdfile");
  662:        my $lonhttpdpid=<$lfh>;
  663:        chomp($lonhttpdpid);
  664:        sleep 2;
  665:        if (kill 0 => $lonhttpdpid) {
  666:           print $fh "<h3>lonhttpd at pid $lonhttpdpid responding</h3>";
  667: 	  $simplestatus{'lonhttpd'}='restarted';
  668:        } else {
  669:           $errors++; $errors++;
  670:           print $fh "<h3>lonhttpd at pid $lonhttpdpid not responding</h3>";
  671:           print $fh "Give it one more try ...<p>";
  672:  	  system(
  673:  "$perlvar{'lonDaemons'}/lonhttpd 2>>$perlvar{'lonDaemons'}/logs/lonhttpd_errors");
  674:           sleep 2;
  675:        }
  676:    } else {
  677:        print $fh "Seems like that did not work!<p>";
  678:        $errors++;
  679:    }
  680:    if (-e "$perlvar{'lonDaemons'}/logs/lonhttpd.log") {
  681:     print $fh "<p><pre>";
  682:     open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonhttpd.log|");
  683:     while ($line=<DFH>) { 
  684:       print $fh "$line";
  685:       if ($line=~/WARNING/) { $notices++; }
  686:       if ($line=~/CRITICAL/) { $notices++; }
  687:     };
  688:     close (DFH);
  689:     print $fh "</pre>";
  690:    }
  691: }
  692: 
  693: $fname="$perlvar{'lonDaemons'}/logs/lonhttpd.log";
  694: 
  695:                           my ($dev,$ino,$mode,$nlink,
  696:                               $uid,$gid,$rdev,$size,
  697:                               $atime,$mtime,$ctime,
  698:                               $blksize,$blocks)=stat($fname);
  699: 
  700: if ($size>40000) {
  701:     print $fh "Rotating logs ...<p>";
  702:     rename("$fname.2","$fname.3");
  703:     rename("$fname.1","$fname.2");
  704:     rename("$fname","$fname.1");
  705: }
  706: 
  707:    
  708: &errout($fh);
  709: # ---------------------------------------------------------------------- lonnet
  710: 
  711: print $fh '<hr><a name="lonnet"><h2>lonnet</h2><h3>Temp Log</h3><pre>';
  712: print "lonnet\n";
  713: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
  714: open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
  715: while ($line=<DFH>) { 
  716:     print $fh "$line";
  717: };
  718: close (DFH);
  719: }
  720: print $fh "</pre><h3>Perm Log</h3><pre>";
  721: 
  722: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
  723:     open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
  724: while ($line=<DFH>) { 
  725:    print $fh "$line";
  726: };
  727: close (DFH);
  728: } else { print $fh "No perm log\n" }
  729: 
  730: $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
  731: 
  732:                           my ($dev,$ino,$mode,$nlink,
  733:                               $uid,$gid,$rdev,$size,
  734:                               $atime,$mtime,$ctime,
  735:                               $blksize,$blocks)=stat($fname);
  736: 
  737: if ($size>40000) {
  738:     print $fh "Rotating logs ...<p>";
  739:     rename("$fname.2","$fname.3");
  740:     rename("$fname.1","$fname.2");
  741:     rename("$fname","$fname.1");
  742: }
  743: 
  744: print $fh "</pre>";
  745: &errout($fh);
  746: # ----------------------------------------------------------------- Connections
  747: 
  748: print $fh '<hr><a name="connections"><h2>Connections</h2>';
  749: 
  750: print $fh "<table border=2>";
  751: foreach $tryserver (sort(keys(%hostname))) {
  752: 
  753:     $answer=reply("pong",$tryserver);
  754:     if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
  755: 	$result="<b>ok</b>";
  756:     } else {
  757:         $result=$answer;
  758:         $warnings++;
  759:         if ($answer eq 'con_lost') { $warnings++; }
  760:     }
  761:     print $fh "<tr><td>$tryserver</td><td>$result</td></tr>\n";
  762: 
  763: }
  764: print $fh "</table>";
  765: 
  766: &errout($fh);
  767: # ------------------------------------------------------------ Delayed messages
  768: 
  769: print $fh '<hr><a name="delayed"><h2>Delayed Messages</h2>';
  770: print "buffers\n";
  771: 
  772: print $fh '<h3>Scanning Permanent Log</h3>';
  773: 
  774: $unsend=0;
  775: {
  776:     my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
  777:     while ($line=<$dfh>) {
  778: 	($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
  779:         if ($sdf eq 'F') { 
  780: 	    $local=localtime($time);
  781:             print "<b>Failed: $time, $dserv, $dcmd</b><br>";
  782:             $warnings++;
  783:         }
  784:         if ($sdf eq 'S') { $unsend--; }
  785:         if ($sdf eq 'D') { $unsend++; }
  786:     }
  787: }
  788: print $fh "Total unsend messages: <b>$unsend</b><p>\n";
  789: $warnings=$warnings+5*$unsend;
  790: 
  791: if ($unsend) { $simplestatus{'unsend'}=$unsend; }
  792: print $fh "<h3>Outgoing Buffer</h3>";
  793: 
  794: open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
  795: while ($line=<DFH>) { 
  796:     print $fh "$line<br>";
  797: };
  798: close (DFH);
  799: 
  800: # ------------------------------------------------------------------------- End
  801: print $fh "<a name=errcount>\n";
  802: $totalcount=$notices+4*$warnings+100*$errors;
  803: &errout($fh);
  804: print $fh "<h1>Total Error Count: $totalcount</h1>";
  805: $now=time;
  806: $date=localtime($now);
  807: print $fh "<hr>$date ($now)</body></html>\n";
  808: print "writing done\n";
  809: $fh->close();
  810: }
  811: if ($errors) { $simplestatus{'errors'}=$errors; }
  812: if ($warnings) { $simplestatus{'warnings'}=$warnings; }
  813: if ($notices) { $simplestatus{'notices'}=$notices; }
  814: $simplestatus{'time'}=time;
  815: 
  816: rename ("$statusdir/newstatus.html","$statusdir/index.html");
  817: {
  818: my $sfh=IO::File->new(">$statusdir/loncron_simple.txt");
  819: foreach (keys %simplestatus) {
  820:     print $sfh $_.'='.$simplestatus{$_}.'&';
  821: }
  822: print $sfh "\n";
  823: $sfh->close();
  824: }
  825: if ($totalcount>200) {
  826:    print "mailing\n";
  827:    $emailto="$perlvar{'lonAdmEMail'}";
  828:    if ($totalcount>1000) {
  829:       $emailto.=",$perlvar{'lonSysEMail'}";
  830:    }
  831:    $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; 
  832:    system(
  833:  "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
  834: }
  835: 1;
  836: 
  837: 
  838: 
  839: 
  840: 
  841: 
  842: 
  843: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>