Annotation of loncom/loncron, revision 1.25

1.1       albertel    1: #!/usr/bin/perl
                      2: 
                      3: # The LearningOnline Network
                      4: # Housekeeping program, started by cron
                      5: #
                      6: # (TCP networking package
                      7: # 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30,
                      8: # 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer)
                      9: #
1.3       www        10: # 7/14,7/15,7/19,7/21,7/22,11/18,
                     11: # 2/8 Gerd Kortemeyer
1.20      harris41   12: # 12/6/2000,12/8 Scott Harrison
1.11      www        13: # 12/23 Gerd Kortemeyer
1.22      harris41   14: # YEAR=2001
                     15: # 1/10/2001, 2/12/, 2/26, 3/15, 04/11, 04/21,8/27 Scott Harrison
1.25    ! www        16: # 09/04,09/06,11/26 Gerd Kortemeyer
1.24      www        17: 
                     18: $|=1;
1.1       albertel   19: 
                     20: use IO::File;
                     21: use IO::Socket;
                     22: 
                     23: # -------------------------------------------------- Non-critical communication
                     24: sub reply {
                     25:     my ($cmd,$server)=@_;
                     26:     my $peerfile="$perlvar{'lonSockDir'}/$server";
                     27:     my $client=IO::Socket::UNIX->new(Peer    =>"$peerfile",
                     28:                                      Type    => SOCK_STREAM,
                     29:                                      Timeout => 10)
                     30:        or return "con_lost";
                     31:     print $client "$cmd\n";
                     32:     my $answer=<$client>;
                     33:     chomp($answer);
                     34:     if (!$answer) { $answer="con_lost"; }
                     35:     return $answer;
                     36: }
                     37: 
                     38: # --------------------------------------------------------- Output error status
                     39: 
                     40: sub errout {
                     41:    my $fh=shift;
                     42:    print $fh (<<ENDERROUT);
                     43:      <p><table border=2 bgcolor="#CCCCCC">
                     44:      <tr><td>Notices</td><td>$notices</td></tr>
                     45:      <tr><td>Warnings</td><td>$warnings</td></tr>
                     46:      <tr><td>Errors</td><td>$errors</td></tr>
                     47:      </table><p><a href="#top">Top</a><p>
                     48: ENDERROUT
                     49: }
                     50: 
                     51: # ================================================================ Main Program
                     52: 
                     53: # ------------------------------------------------------------ Read access.conf
                     54: {
                     55:     my $config=IO::File->new("/etc/httpd/conf/access.conf");
                     56: 
                     57:     while (my $configline=<$config>) {
                     58:         if ($configline =~ /PerlSetVar/) {
                     59: 	   my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
                     60:            $perlvar{$varname}=$varvalue;
                     61:         }
                     62:     }
1.19      harris41   63:     delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
                     64:     delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
1.13      harris41   65: }
                     66: 
1.14      harris41   67: # --------------------------------------- Make sure that LON-CAPA is configured
                     68: # I only test for one thing here (lonHostID).  This is just a safeguard.
                     69: if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
1.15      harris41   70:    print("Unconfigured machine.\n");
1.14      harris41   71:    $emailto=$perlvar{'lonSysEMail'};
                     72:    $hostname=`/bin/hostname`;
                     73:    chop $hostname;
                     74:    $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
                     75:    $subj="LON: Unconfigured machine $hostname";
                     76:    system("echo 'Unconfigured machine $hostname.' |\
                     77:  mailto $emailto -s '$subj' > /dev/null");
                     78:     exit 1;
                     79: }
                     80: 
1.13      harris41   81: # ----------------------------- Make sure this process is running from user=www
                     82: my $wwwid=getpwnam('www');
                     83: if ($wwwid!=$<) {
1.14      harris41   84:    print("User ID mismatch.  This program must be run as user 'www'\n");
1.13      harris41   85:    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
                     86:    $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
                     87:    system("echo 'User ID mismatch.  loncron must be run as user www.' |\
                     88:  mailto $emailto -s '$subj' > /dev/null");
1.14      harris41   89:    exit 1;
1.1       albertel   90: }
                     91: 
                     92: # ------------------------------------------------------------- Read hosts file
                     93: {
                     94:     my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
                     95: 
                     96:     while (my $configline=<$config>) {
                     97:        my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
                     98:        $hostname{$id}=$name;
                     99:        $hostdom{$id}=$domain;
                    100:        $hostrole{$id}=$role;
                    101:        $hostip{$id}=$ip;
                    102:        if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
                    103: 	   $libserv{$id}=$name;
                    104:        }
                    105:     }
                    106: }
                    107: 
                    108: # ------------------------------------------------------ Read spare server file
                    109: {
                    110:     my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab");
                    111: 
                    112:     while (my $configline=<$config>) {
                    113:        chomp($configline);
                    114:        if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
                    115:           $spareid{$configline}=1;
                    116:        }
                    117:     }
                    118: }
                    119: 
                    120: # ---------------------------------------------------------------- Start report
                    121: 
                    122: $statusdir="/home/httpd/html/lon-status";
                    123: 
                    124: $errors=0;
                    125: $warnings=0;
                    126: $notices=0;
                    127: 
                    128: $now=time;
                    129: $date=localtime($now);
                    130: 
                    131: {
                    132: my $fh=IO::File->new(">$statusdir/newstatus.html");
                    133: 
                    134: print $fh (<<ENDHEADERS);
                    135: <html>
                    136: <head>
                    137: <title>LON Status Report $perlvar{'lonHostID'}</title>
                    138: </head>
1.3       www       139: <body bgcolor="#AAAAAA">
1.1       albertel  140: <a name="top">
                    141: <h1>LON Status Report $perlvar{'lonHostID'}</h1>
                    142: <h2>$date ($now)</h2>
                    143: <ol>
                    144: <li><a href="#configuration">Configuration</a>
                    145: <li><a href="#machine">Machine Information</a>
1.11      www       146: <li><a href="#tmp">Temporary Files</a>
                    147: <li><a href="#tokens">Session Tokens</a>
1.1       albertel  148: <li><a href="#httpd">httpd</a>
1.11      www       149: <li><a href="#lonsql">lonsql</a>
1.1       albertel  150: <li><a href="#lond">lond</a>
                    151: <li><a href="#lonc">lonc</a>
                    152: <li><a href="#lonnet">lonnet</a>
                    153: <li><a href="#connections">Connections</a>
                    154: <li><a href="#delayed">Delayed Messages</a>
                    155: <li><a href="#errcount">Error Count</a>
                    156: </ol>
                    157: <hr>
                    158: <a name="configuration">
                    159: <h2>Configuration</h2>
                    160: <h3>PerlVars</h3>
                    161: <table border=2>
                    162: ENDHEADERS
                    163: 
                    164: foreach $varname (keys %perlvar) {
                    165:     print $fh "<tr><td>$varname</td><td>$perlvar{$varname}</td></tr>\n";
                    166: }
                    167: print $fh "</table><h3>Hosts</h3><table border=2>";
                    168: foreach $id (keys %hostname) {
                    169: print $fh 
                    170:     "<tr><td>$id</td><td>$hostdom{$id}</td><td>$hostrole{$id}</td>";
                    171: print $fh "<td>$hostname{$id}</td><td>$hostip{$id}</td></tr>\n";
                    172: }
                    173: print $fh "</table><h3>Spare Hosts</h3><ol>";
                    174: foreach $id (keys %spareid) {
                    175:     print $fh "<li>$id\n";
                    176: }
                    177: 
                    178: print $fh "</ol>\n";
                    179: 
                    180: # --------------------------------------------------------------------- Machine
                    181: 
                    182: print $fh '<hr><a name="machine"><h2>Machine Information</h2>';
                    183: print $fh "<h3>loadavg</h3>";
                    184: 
                    185: open (LOADAVGH,"/proc/loadavg");
                    186: $loadavg=<LOADAVGH>;
                    187: close (LOADAVGH);
                    188: 
                    189: print $fh "<tt>$loadavg</tt>";
                    190: 
                    191: @parts=split(/\s+/,$loadavg);
1.4       www       192: if ($parts[1]>4.0) {
1.1       albertel  193:     $errors++;
                    194: } elsif ($parts[1]>2.0) {
                    195:     $warnings++;
                    196: } elsif ($parts[1]>1.0) {
                    197:     $notices++;
                    198: }
                    199: 
                    200: print $fh "<h3>df</h3>";
                    201: print $fh "<pre>";
                    202: 
                    203: open (DFH,"df|");
                    204: while ($line=<DFH>) { 
                    205:    print $fh "$line"; 
                    206:    @parts=split(/\s+/,$line);
                    207:    $usage=$parts[4];
                    208:    $usage=~s/\W//g;
                    209:    if ($usage>90) { 
1.24      www       210:       $warnings++;
                    211:       $notices++; 
1.1       albertel  212:    } elsif ($usage>80) {
                    213:       $warnings++;
                    214:    } elsif ($usage>60) {
                    215:       $notices++;
                    216:    }
1.4       www       217:    if ($usage>95) { $warnings++; $warnings++ }
1.1       albertel  218: }
                    219: close (DFH);
                    220: print $fh "</pre>";
1.24      www       221: 
                    222: 
                    223: print $fh "<h3>ps</h3>";
                    224: print $fh "<pre>";
                    225: $psproc=0;
                    226: 
                    227: open (PSH,"ps -aux|");
                    228: while ($line=<PSH>) { 
                    229:    print $fh "$line"; 
                    230:    $psproc++;
                    231: }
                    232: close (PSH);
                    233: print $fh "</pre>";
                    234: 
                    235: if ($psproc>200) { $notices++; }
                    236: if ($psproc>250) { $notices++; }
                    237: 
1.1       albertel  238: &errout($fh);
1.11      www       239: 
                    240: # --------------------------------------------------------------- clean out tmp
                    241: print $fh '<hr><a name="tmp"><h2>Temporary Files</h2>';
                    242: $cleaned=0;
                    243: while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) {
                    244:                           my ($dev,$ino,$mode,$nlink,
                    245:                               $uid,$gid,$rdev,$size,
                    246:                               $atime,$mtime,$ctime,
                    247:                               $blksize,$blocks)=stat($fname);
                    248:                           $now=time;
                    249:                           $since=$now-$mtime;
                    250:                           if ($since>$perlvar{'lonExpire'}) {
                    251:                               $cleaned++;
                    252:                               unlink("$fname");
                    253:                           }
                    254:     
                    255: }
                    256: print $fh "Cleaned up ".$cleaned." files.";
                    257: 
                    258: # ------------------------------------------------------------ clean out lonIDs
                    259: print $fh '<hr><a name="tokens"><h2>Session Tokens</h2>';
                    260: $cleaned=0;
                    261: $active=0;
                    262: while ($fname=<$perlvar{'lonIDsDir'}/*>) {
                    263:                           my ($dev,$ino,$mode,$nlink,
                    264:                               $uid,$gid,$rdev,$size,
                    265:                               $atime,$mtime,$ctime,
                    266:                               $blksize,$blocks)=stat($fname);
                    267:                           $now=time;
                    268:                           $since=$now-$mtime;
                    269:                           if ($since>$perlvar{'lonExpire'}) {
                    270:                               $cleaned++;
                    271:                               print $fh "Unlinking $fname<br>";
                    272:                               unlink("$fname");
                    273:                           } else {
                    274:                               $active++;
                    275:                           }
                    276:     
                    277: }
                    278: print $fh "<p>Cleaned up ".$cleaned." stale session token(s).";
                    279: print $fh "<h3>$active open session(s)</h3>";
                    280: 
1.1       albertel  281: # ----------------------------------------------------------------------- httpd
                    282: 
                    283: print $fh '<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>';
                    284: 
1.23      www       285: open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
1.1       albertel  286: while ($line=<DFH>) { print $fh "$line" };
                    287: close (DFH);
                    288: 
                    289: print $fh "</pre><h3>Error Log</h3><pre>";
                    290: 
1.23      www       291: open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
1.1       albertel  292: while ($line=<DFH>) { 
                    293:    print $fh "$line";
                    294:    if ($line=~/\[error\]/) { $notices++; } 
                    295: };
                    296: close (DFH);
                    297: print $fh "</pre>";
                    298: &errout($fh);
1.5       harris41  299: 
                    300: 
1.11      www       301: # ---------------------------------------------------------------------- lonsql
1.22      harris41  302: 
                    303: my $restartflag=1;
1.18      harris41  304: if ($perlvar{'lonRole'} eq "library") {
1.5       harris41  305: 
1.11      www       306:     print $fh '<hr><a name="lonsql"><h2>lonsql</h2><h3>Log</h3><pre>';
1.23      www       307:     print "lonsql\n";
1.5       harris41  308:     if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
                    309: 	open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
                    310: 	while ($line=<DFH>) { 
                    311: 	    print $fh "$line";
                    312: 	    if ($line=~/INFO/) { $notices++; }
                    313: 	    if ($line=~/WARNING/) { $notices++; }
                    314: 	    if ($line=~/CRITICAL/) { $warnings++; }
                    315: 	};
                    316: 	close (DFH);
                    317:     }
                    318:     print $fh "</pre>";
                    319:     
                    320:     my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
1.23      www       321:  
                    322:     $restartflag=1;
                    323:    
1.5       harris41  324:     if (-e $lonsqlfile) {
                    325: 	my $lfh=IO::File->new("$lonsqlfile");
                    326: 	my $lonsqlpid=<$lfh>;
                    327: 	chomp($lonsqlpid);
                    328: 	if (kill 0 => $lonsqlpid) {
                    329: 	    print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
1.22      harris41  330: 	    $restartflag=0;
1.5       harris41  331: 	} else {
                    332: 	    $errors++; $errors++;
                    333: 	    print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
1.22      harris41  334: 		$restartflag=1;
1.23      www       335: 	print $fh 
                    336: 	    "<h3>Decided to clean up stale .pid file and restart lonsql</h3>";
1.5       harris41  337: 	}
1.22      harris41  338:     }
                    339:     if ($restartflag==1) {
1.5       harris41  340: 	$errors++;
1.23      www       341: 	         print $fh '<br><font color="red">Killall lonsql: '.
                    342:                     system('killall lonsql').' - ';
                    343:                     sleep 60;
                    344:                     print $fh unlink($lonsqlfile).' - '.
                    345:                               system('killall -9 lonsql').
                    346:                     '</font><br>';
1.5       harris41  347: 	print $fh "<h3>lonsql not running, trying to start</h3>";
1.16      harris41  348: 	system(
                    349:  "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
1.24      www       350: 	sleep 10;
1.5       harris41  351: 	if (-e $lonsqlfile) {
                    352: 	    print $fh "Seems like it started ...<p>";
                    353: 	    my $lfh=IO::File->new("$lonsqlfile");
                    354: 	    my $lonsqlpid=<$lfh>;
                    355: 	    chomp($lonsqlpid);
1.24      www       356: 	    sleep 30;
1.5       harris41  357: 	    if (kill 0 => $lonsqlpid) {
                    358: 		print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
                    359: 	    } else {
                    360: 		$errors++; $errors++;
                    361: 		print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
                    362: 		print $fh "Give it one more try ...<p>";
1.16      harris41  363: 		system(
                    364:  "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
1.24      www       365: 		sleep 10;
1.5       harris41  366: 	    }
                    367: 	} else {
                    368: 	    print $fh "Seems like that did not work!<p>";
                    369: 	    $errors++;
                    370: 	}
                    371: 	if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
                    372: 	    print $fh "<p><pre>";
                    373: 	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
                    374: 	    while ($line=<DFH>) { 
                    375: 		print $fh "$line";
                    376: 		if ($line=~/WARNING/) { $notices++; }
                    377: 		if ($line=~/CRITICAL/) { $notices++; }
                    378: 	    };
                    379: 	    close (DFH);
                    380: 	    print $fh "</pre>";
                    381: 	}
                    382:     }
                    383: 
                    384:     $fname="$perlvar{'lonDaemons'}/logs/lonsql.log";
                    385: 
                    386:     my ($dev,$ino,$mode,$nlink,
                    387: 	$uid,$gid,$rdev,$size,
                    388: 	$atime,$mtime,$ctime,
                    389: 	$blksize,$blocks)=stat($fname);
                    390: 
                    391:     if ($size>40000) {
                    392: 	print $fh "Rotating logs ...<p>";
                    393: 	rename("$fname.2","$fname.3");
                    394: 	rename("$fname.1","$fname.2");
                    395: 	rename("$fname","$fname.1");
                    396:     }
                    397: 
                    398:     &errout($fh);
                    399: }
1.1       albertel  400: # ------------------------------------------------------------------------ lond
                    401: 
                    402: print $fh '<hr><a name="lond"><h2>lond</h2><h3>Log</h3><pre>';
1.23      www       403: print "lond\n";
1.1       albertel  404: 
                    405: if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
1.23      www       406: open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lond.log|");
1.1       albertel  407: while ($line=<DFH>) { 
                    408:    print $fh "$line";
1.3       www       409:    if ($line=~/INFO/) { $notices++; }
1.4       www       410:    if ($line=~/WARNING/) { $notices++; }
                    411:    if ($line=~/CRITICAL/) { $warnings++; }
1.1       albertel  412: };
                    413: close (DFH);
                    414: }
                    415: print $fh "</pre>";
                    416: 
                    417: my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid";
                    418: 
1.22      harris41  419: $restartflag=1;
1.7       harris41  420: if (-e $londfile) {    
1.1       albertel  421:    my $lfh=IO::File->new("$londfile");
                    422:    my $londpid=<$lfh>;
                    423:    chomp($londpid);
                    424:    if (kill 0 => $londpid) {
1.25    ! www       425:       print $fh "<h3>lond at pid $londpid responding, sending USR1</h3>";
        !           426:       kill USR1 => $londpid;
1.7       harris41  427:       $restartflag=0;
1.1       albertel  428:    } else {
1.8       harris41  429:       $errors++;
1.1       albertel  430:       print $fh "<h3>lond at pid $londpid not responding</h3>";
1.23      www       431:       $restartflag=1;
1.8       harris41  432:       print $fh 
1.23      www       433: 	  "<h3>Decided to clean up stale .pid file and restart lond</h3>";
1.1       albertel  434:    }
1.7       harris41  435: } 
                    436: if ($restartflag==1) {
1.1       albertel  437:    $errors++;
1.23      www       438: 	  print $fh '<br><font color="red">Killall lond: '.
                    439:                     system('killall lond').' - ';
                    440:           sleep 60;
                    441:           print $fh unlink($londfile).' - '.system('killall -9 lond').
                    442:                     '</font><br>';
1.1       albertel  443:    print $fh "<h3>lond not running, trying to start</h3>";
1.16      harris41  444:    system(
                    445:      "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
1.24      www       446:    sleep 10;
1.1       albertel  447:    if (-e $londfile) {
                    448:        print $fh "Seems like it started ...<p>";
                    449:        my $lfh=IO::File->new("$londfile");
                    450:        my $londpid=<$lfh>;
                    451:        chomp($londpid);
1.24      www       452:        sleep 30;
1.1       albertel  453:        if (kill 0 => $londpid) {
                    454:           print $fh "<h3>lond at pid $londpid responding</h3>";
                    455:        } else {
                    456:           $errors++; $errors++;
                    457:           print $fh "<h3>lond at pid $londpid not responding</h3>";
                    458:           print $fh "Give it one more try ...<p>";
1.16      harris41  459: 	  system(
                    460:  "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
1.24      www       461:           sleep 10;
1.1       albertel  462:        }
                    463:    } else {
                    464:        print $fh "Seems like that did not work!<p>";
                    465:        $errors++;
                    466:    }
1.3       www       467:    if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
                    468:     print $fh "<p><pre>";
                    469:     open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
                    470:     while ($line=<DFH>) { 
                    471:       print $fh "$line";
1.4       www       472:       if ($line=~/WARNING/) { $notices++; }
                    473:       if ($line=~/CRITICAL/) { $notices++; }
1.3       www       474:     };
                    475:     close (DFH);
                    476:     print $fh "</pre>";
                    477:    }
1.1       albertel  478: }
                    479: 
                    480: $fname="$perlvar{'lonDaemons'}/logs/lond.log";
                    481: 
                    482:                           my ($dev,$ino,$mode,$nlink,
                    483:                               $uid,$gid,$rdev,$size,
                    484:                               $atime,$mtime,$ctime,
                    485:                               $blksize,$blocks)=stat($fname);
                    486: 
                    487: if ($size>40000) {
                    488:     print $fh "Rotating logs ...<p>";
                    489:     rename("$fname.2","$fname.3");
                    490:     rename("$fname.1","$fname.2");
                    491:     rename("$fname","$fname.1");
                    492: }
                    493: 
                    494: &errout($fh);
                    495: # ------------------------------------------------------------------------ lonc
                    496: 
                    497: print $fh '<hr><a name="lonc"><h2>lonc</h2><h3>Log</h3><pre>';
1.23      www       498: print "lonc\n";
1.1       albertel  499: 
                    500: if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
1.23      www       501: open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lonc.log|");
1.1       albertel  502: while ($line=<DFH>) { 
                    503:    print $fh "$line";
1.3       www       504:    if ($line=~/INFO/) { $notices++; }
1.4       www       505:    if ($line=~/WARNING/) { $notices++; }
                    506:    if ($line=~/CRITICAL/) { $warnings++; }
1.1       albertel  507: };
                    508: close (DFH);
                    509: }
                    510: print $fh "</pre>";
                    511: 
                    512: my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
                    513: 
1.7       harris41  514: $restartflag=1;
1.1       albertel  515: if (-e $loncfile) {
                    516:    my $lfh=IO::File->new("$loncfile");
                    517:    my $loncpid=<$lfh>;
                    518:    chomp($loncpid);
                    519:    if (kill 0 => $loncpid) {
                    520:       print $fh "<h3>lonc at pid $loncpid responding, sending USR1</h3>";
                    521:       kill USR1 => $loncpid;
1.7       harris41  522:       $restartflag=0;
1.1       albertel  523:    } else {
1.8       harris41  524:       $errors++;
1.1       albertel  525:       print $fh "<h3>lonc at pid $loncpid not responding</h3>";
1.10      harris41  526:       # Solution: kill parent and children processes, remove .pid and restart
1.8       harris41  527: 	  $restartflag=1;
                    528:       print $fh 
1.23      www       529: 	  "<h3>Decided to clean up stale .pid file and restart lonc</h3>";
1.1       albertel  530:    }
1.7       harris41  531: } 
                    532: if ($restartflag==1) {
1.1       albertel  533:    $errors++;
1.23      www       534: 	  print $fh '<br><font color="red">Killall lonc: '.
                    535: 	            system('killall lonc').' - ';
                    536:           sleep 60;
                    537:           print $fh unlink($loncfile).' - '.system('killall -9 lonc').
                    538:                     '</font><br>';
1.1       albertel  539:    print $fh "<h3>lonc not running, trying to start</h3>";
1.16      harris41  540: 	system(
1.17      harris41  541:  "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
1.24      www       542:    sleep 10;
1.1       albertel  543:    if (-e $loncfile) {
                    544:        print $fh "Seems like it started ...<p>";
                    545:        my $lfh=IO::File->new("$loncfile");
                    546:        my $loncpid=<$lfh>;
                    547:        chomp($loncpid);
1.24      www       548:        sleep 30;
1.1       albertel  549:        if (kill 0 => $loncpid) {
                    550:           print $fh "<h3>lonc at pid $loncpid responding</h3>";
                    551:        } else {
                    552:           $errors++; $errors++;
                    553:           print $fh "<h3>lonc at pid $loncpid not responding</h3>";
                    554:           print $fh "Give it one more try ...<p>";
1.16      harris41  555:  	  system(
1.17      harris41  556:  "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
1.24      www       557:           sleep 10;
1.1       albertel  558:        }
                    559:    } else {
                    560:        print $fh "Seems like that did not work!<p>";
                    561:        $errors++;
                    562:    }
1.3       www       563:    if (-e "$perlvar{'lonDaemons'}/logs/lonc.log") {
                    564:     print $fh "<p><pre>";
                    565:     open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
                    566:     while ($line=<DFH>) { 
                    567:       print $fh "$line";
1.4       www       568:       if ($line=~/WARNING/) { $notices++; }
                    569:       if ($line=~/CRITICAL/) { $notices++; }
1.3       www       570:     };
                    571:     close (DFH);
                    572:     print $fh "</pre>";
                    573:    }
1.1       albertel  574: }
                    575: 
                    576: $fname="$perlvar{'lonDaemons'}/logs/lonc.log";
                    577: 
                    578:                           my ($dev,$ino,$mode,$nlink,
                    579:                               $uid,$gid,$rdev,$size,
                    580:                               $atime,$mtime,$ctime,
                    581:                               $blksize,$blocks)=stat($fname);
                    582: 
                    583: if ($size>40000) {
                    584:     print $fh "Rotating logs ...<p>";
                    585:     rename("$fname.2","$fname.3");
                    586:     rename("$fname.1","$fname.2");
                    587:     rename("$fname","$fname.1");
                    588: }
                    589: 
                    590:    
                    591: &errout($fh);
                    592: # ---------------------------------------------------------------------- lonnet
                    593: 
                    594: print $fh '<hr><a name="lonnet"><h2>lonnet</h2><h3>Temp Log</h3><pre>';
1.23      www       595: print "lonnet\n";
1.1       albertel  596: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
                    597: open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
                    598: while ($line=<DFH>) { 
                    599:     print $fh "$line";
                    600: };
                    601: close (DFH);
                    602: }
1.11      www       603: print $fh "</pre><h3>Perm Log</h3><pre>";
1.1       albertel  604: 
                    605: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
                    606:     open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
                    607: while ($line=<DFH>) { 
                    608:    print $fh "$line";
                    609: };
                    610: close (DFH);
                    611: } else { print $fh "No perm log\n" }
                    612: 
                    613: $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
                    614: 
                    615:                           my ($dev,$ino,$mode,$nlink,
                    616:                               $uid,$gid,$rdev,$size,
                    617:                               $atime,$mtime,$ctime,
                    618:                               $blksize,$blocks)=stat($fname);
                    619: 
                    620: if ($size>40000) {
                    621:     print $fh "Rotating logs ...<p>";
                    622:     rename("$fname.2","$fname.3");
                    623:     rename("$fname.1","$fname.2");
                    624:     rename("$fname","$fname.1");
                    625: }
                    626: 
                    627: print $fh "</pre>";
                    628: &errout($fh);
                    629: # ----------------------------------------------------------------- Connections
                    630: 
                    631: print $fh '<hr><a name="connections"><h2>Connections</h2>';
                    632: 
                    633: print $fh "<table border=2>";
                    634: foreach $tryserver (keys %hostname) {
                    635: 
                    636:     $answer=reply("pong",$tryserver);
                    637:     if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
                    638: 	$result="<b>ok</b>";
                    639:     } else {
                    640:         $result=$answer;
                    641:         $warnings++;
                    642:         if ($answer eq 'con_lost') { $warnings++; }
                    643:     }
                    644:     print $fh "<tr><td>$tryserver</td><td>$result</td></tr>\n";
                    645: 
                    646: }
                    647: print $fh "</table>";
                    648: 
                    649: &errout($fh);
                    650: # ------------------------------------------------------------ Delayed messages
                    651: 
                    652: print $fh '<hr><a name="delayed"><h2>Delayed Messages</h2>';
1.23      www       653: print "buffers\n";
1.1       albertel  654: 
                    655: print $fh '<h3>Scanning Permanent Log</h3>';
                    656: 
                    657: $unsend=0;
                    658: {
                    659:     my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
                    660:     while ($line=<$dfh>) {
                    661: 	($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
                    662:         if ($sdf eq 'F') { 
                    663: 	    $local=localtime($time);
                    664:             print "<b>Failed: $time, $dserv, $dcmd</b><br>";
                    665:             $warnings++;
                    666:         }
                    667:         if ($sdf eq 'S') { $unsend--; }
                    668:         if ($sdf eq 'D') { $unsend++; }
                    669:     }
                    670: }
                    671: print $fh "Total unsend messages: <b>$unsend</b><p>\n";
                    672: $warnings=$warnings+5*$unsend;
                    673: 
                    674: print $fh "<h3>Outgoing Buffer</h3>";
                    675: 
                    676: open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
                    677: while ($line=<DFH>) { 
                    678:     print $fh "$line<br>";
                    679: };
                    680: close (DFH);
                    681: 
                    682: # ------------------------------------------------------------------------- End
                    683: print $fh "<a name=errcount>\n";
                    684: $totalcount=$notices+4*$warnings+100*$errors;
                    685: &errout($fh);
                    686: print $fh "<h1>Total Error Count: $totalcount</h1>";
                    687: $now=time;
                    688: $date=localtime($now);
                    689: print $fh "<hr>$date ($now)</body></html>\n";
1.23      www       690: print "writing done\n";
1.1       albertel  691: }
                    692: 
                    693: rename ("$statusdir/newstatus.html","$statusdir/index.html");
                    694: 
                    695: if ($totalcount>200) {
1.23      www       696:    print "mailing\n";
1.1       albertel  697:    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
                    698:    $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; 
                    699:    system(
1.24      www       700:  "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
1.1       albertel  701: }
                    702: 1;
                    703: 
                    704: 
                    705: 
                    706: 
                    707: 
                    708: 
                    709: 
                    710: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>