Annotation of loncom/loncron, revision 1.22

1.1       albertel    1: #!/usr/bin/perl
                      2: 
                      3: # The LearningOnline Network
                      4: # Housekeeping program, started by cron
                      5: #
                      6: # (TCP networking package
                      7: # 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30,
                      8: # 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer)
                      9: #
1.3       www        10: # 7/14,7/15,7/19,7/21,7/22,11/18,
                     11: # 2/8 Gerd Kortemeyer
1.20      harris41   12: # 12/6/2000,12/8 Scott Harrison
1.11      www        13: # 12/23 Gerd Kortemeyer
1.22    ! harris41   14: # YEAR=2001
        !            15: # 1/10/2001, 2/12/, 2/26, 3/15, 04/11, 04/21,8/27 Scott Harrison
1.1       albertel   16: 
                     17: use IO::File;
                     18: use IO::Socket;
                     19: 
1.20      harris41   20: my $qflag=0;
                     21: if (@ARGV) {
                     22:     my $arg=shift @ARGV;
                     23:     $qflag=1 if $arg eq 'quick';
                     24: }
                     25: 
1.1       albertel   26: # -------------------------------------------------- Non-critical communication
                     27: sub reply {
                     28:     my ($cmd,$server)=@_;
                     29:     my $peerfile="$perlvar{'lonSockDir'}/$server";
                     30:     my $client=IO::Socket::UNIX->new(Peer    =>"$peerfile",
                     31:                                      Type    => SOCK_STREAM,
                     32:                                      Timeout => 10)
                     33:        or return "con_lost";
                     34:     print $client "$cmd\n";
                     35:     my $answer=<$client>;
                     36:     chomp($answer);
                     37:     if (!$answer) { $answer="con_lost"; }
                     38:     return $answer;
                     39: }
                     40: 
                     41: # --------------------------------------------------------- Output error status
                     42: 
                     43: sub errout {
                     44:    my $fh=shift;
                     45:    print $fh (<<ENDERROUT);
                     46:      <p><table border=2 bgcolor="#CCCCCC">
                     47:      <tr><td>Notices</td><td>$notices</td></tr>
                     48:      <tr><td>Warnings</td><td>$warnings</td></tr>
                     49:      <tr><td>Errors</td><td>$errors</td></tr>
                     50:      </table><p><a href="#top">Top</a><p>
                     51: ENDERROUT
                     52: }
                     53: 
                     54: # ================================================================ Main Program
                     55: 
                     56: # ------------------------------------------------------------ Read access.conf
                     57: {
                     58:     my $config=IO::File->new("/etc/httpd/conf/access.conf");
                     59: 
                     60:     while (my $configline=<$config>) {
                     61:         if ($configline =~ /PerlSetVar/) {
                     62: 	   my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
                     63:            $perlvar{$varname}=$varvalue;
                     64:         }
                     65:     }
1.19      harris41   66:     delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
                     67:     delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
1.13      harris41   68: }
                     69: 
1.14      harris41   70: # --------------------------------------- Make sure that LON-CAPA is configured
                     71: # I only test for one thing here (lonHostID).  This is just a safeguard.
                     72: if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
1.15      harris41   73:    print("Unconfigured machine.\n");
1.14      harris41   74:    $emailto=$perlvar{'lonSysEMail'};
                     75:    $hostname=`/bin/hostname`;
                     76:    chop $hostname;
                     77:    $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
                     78:    $subj="LON: Unconfigured machine $hostname";
                     79:    system("echo 'Unconfigured machine $hostname.' |\
                     80:  mailto $emailto -s '$subj' > /dev/null");
                     81:     exit 1;
                     82: }
                     83: 
1.13      harris41   84: # ----------------------------- Make sure this process is running from user=www
                     85: my $wwwid=getpwnam('www');
                     86: if ($wwwid!=$<) {
1.14      harris41   87:    print("User ID mismatch.  This program must be run as user 'www'\n");
1.13      harris41   88:    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
                     89:    $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
                     90:    system("echo 'User ID mismatch.  loncron must be run as user www.' |\
                     91:  mailto $emailto -s '$subj' > /dev/null");
1.14      harris41   92:    exit 1;
1.1       albertel   93: }
                     94: 
                     95: # ------------------------------------------------------------- Read hosts file
                     96: {
                     97:     my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
                     98: 
                     99:     while (my $configline=<$config>) {
                    100:        my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
                    101:        $hostname{$id}=$name;
                    102:        $hostdom{$id}=$domain;
                    103:        $hostrole{$id}=$role;
                    104:        $hostip{$id}=$ip;
                    105:        if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) {
                    106: 	   $libserv{$id}=$name;
                    107:        }
                    108:     }
                    109: }
                    110: 
                    111: # ------------------------------------------------------ Read spare server file
                    112: {
                    113:     my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab");
                    114: 
                    115:     while (my $configline=<$config>) {
                    116:        chomp($configline);
                    117:        if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
                    118:           $spareid{$configline}=1;
                    119:        }
                    120:     }
                    121: }
                    122: 
                    123: # ---------------------------------------------------------------- Start report
                    124: 
                    125: $statusdir="/home/httpd/html/lon-status";
                    126: 
                    127: $errors=0;
                    128: $warnings=0;
                    129: $notices=0;
                    130: 
                    131: $now=time;
                    132: $date=localtime($now);
                    133: 
                    134: {
                    135: my $fh=IO::File->new(">$statusdir/newstatus.html");
                    136: 
                    137: print $fh (<<ENDHEADERS);
                    138: <html>
                    139: <head>
                    140: <title>LON Status Report $perlvar{'lonHostID'}</title>
                    141: </head>
1.3       www       142: <body bgcolor="#AAAAAA">
1.1       albertel  143: <a name="top">
                    144: <h1>LON Status Report $perlvar{'lonHostID'}</h1>
                    145: <h2>$date ($now)</h2>
                    146: <ol>
                    147: <li><a href="#configuration">Configuration</a>
                    148: <li><a href="#machine">Machine Information</a>
1.11      www       149: <li><a href="#tmp">Temporary Files</a>
                    150: <li><a href="#tokens">Session Tokens</a>
1.1       albertel  151: <li><a href="#httpd">httpd</a>
1.11      www       152: <li><a href="#lonsql">lonsql</a>
1.1       albertel  153: <li><a href="#lond">lond</a>
                    154: <li><a href="#lonc">lonc</a>
                    155: <li><a href="#lonnet">lonnet</a>
                    156: <li><a href="#connections">Connections</a>
                    157: <li><a href="#delayed">Delayed Messages</a>
                    158: <li><a href="#errcount">Error Count</a>
                    159: </ol>
                    160: <hr>
                    161: <a name="configuration">
                    162: <h2>Configuration</h2>
                    163: <h3>PerlVars</h3>
                    164: <table border=2>
                    165: ENDHEADERS
                    166: 
                    167: foreach $varname (keys %perlvar) {
                    168:     print $fh "<tr><td>$varname</td><td>$perlvar{$varname}</td></tr>\n";
                    169: }
                    170: print $fh "</table><h3>Hosts</h3><table border=2>";
                    171: foreach $id (keys %hostname) {
                    172: print $fh 
                    173:     "<tr><td>$id</td><td>$hostdom{$id}</td><td>$hostrole{$id}</td>";
                    174: print $fh "<td>$hostname{$id}</td><td>$hostip{$id}</td></tr>\n";
                    175: }
                    176: print $fh "</table><h3>Spare Hosts</h3><ol>";
                    177: foreach $id (keys %spareid) {
                    178:     print $fh "<li>$id\n";
                    179: }
                    180: 
                    181: print $fh "</ol>\n";
                    182: 
                    183: # --------------------------------------------------------------------- Machine
                    184: 
                    185: print $fh '<hr><a name="machine"><h2>Machine Information</h2>';
                    186: print $fh "<h3>loadavg</h3>";
                    187: 
                    188: open (LOADAVGH,"/proc/loadavg");
                    189: $loadavg=<LOADAVGH>;
                    190: close (LOADAVGH);
                    191: 
                    192: print $fh "<tt>$loadavg</tt>";
                    193: 
                    194: @parts=split(/\s+/,$loadavg);
1.4       www       195: if ($parts[1]>4.0) {
1.1       albertel  196:     $errors++;
                    197: } elsif ($parts[1]>2.0) {
                    198:     $warnings++;
                    199: } elsif ($parts[1]>1.0) {
                    200:     $notices++;
                    201: }
                    202: 
                    203: print $fh "<h3>df</h3>";
                    204: print $fh "<pre>";
                    205: 
                    206: open (DFH,"df|");
                    207: while ($line=<DFH>) { 
                    208:    print $fh "$line"; 
                    209:    @parts=split(/\s+/,$line);
                    210:    $usage=$parts[4];
                    211:    $usage=~s/\W//g;
                    212:    if ($usage>90) { 
1.4       www       213:       $warnings++; 
1.1       albertel  214:    } elsif ($usage>80) {
                    215:       $warnings++;
                    216:    } elsif ($usage>60) {
                    217:       $notices++;
                    218:    }
1.4       www       219:    if ($usage>95) { $warnings++; $warnings++ }
1.1       albertel  220: }
                    221: close (DFH);
                    222: print $fh "</pre>";
                    223: &errout($fh);
1.11      www       224: 
                    225: # --------------------------------------------------------------- clean out tmp
                    226: print $fh '<hr><a name="tmp"><h2>Temporary Files</h2>';
                    227: $cleaned=0;
                    228: while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) {
                    229:                           my ($dev,$ino,$mode,$nlink,
                    230:                               $uid,$gid,$rdev,$size,
                    231:                               $atime,$mtime,$ctime,
                    232:                               $blksize,$blocks)=stat($fname);
                    233:                           $now=time;
                    234:                           $since=$now-$mtime;
                    235:                           if ($since>$perlvar{'lonExpire'}) {
                    236:                               $cleaned++;
                    237:                               unlink("$fname");
                    238:                           }
                    239:     
                    240: }
                    241: print $fh "Cleaned up ".$cleaned." files.";
                    242: 
                    243: # ------------------------------------------------------------ clean out lonIDs
                    244: print $fh '<hr><a name="tokens"><h2>Session Tokens</h2>';
                    245: $cleaned=0;
                    246: $active=0;
                    247: while ($fname=<$perlvar{'lonIDsDir'}/*>) {
                    248:                           my ($dev,$ino,$mode,$nlink,
                    249:                               $uid,$gid,$rdev,$size,
                    250:                               $atime,$mtime,$ctime,
                    251:                               $blksize,$blocks)=stat($fname);
                    252:                           $now=time;
                    253:                           $since=$now-$mtime;
                    254:                           if ($since>$perlvar{'lonExpire'}) {
                    255:                               $cleaned++;
                    256:                               print $fh "Unlinking $fname<br>";
                    257:                               unlink("$fname");
                    258:                           } else {
                    259:                               $active++;
                    260:                           }
                    261:     
                    262: }
                    263: print $fh "<p>Cleaned up ".$cleaned." stale session token(s).";
                    264: print $fh "<h3>$active open session(s)</h3>";
                    265: 
1.1       albertel  266: # ----------------------------------------------------------------------- httpd
                    267: 
                    268: print $fh '<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>';
                    269: 
                    270: open (DFH,"tail -n40 /etc/httpd/logs/access_log|");
                    271: while ($line=<DFH>) { print $fh "$line" };
                    272: close (DFH);
                    273: 
                    274: print $fh "</pre><h3>Error Log</h3><pre>";
                    275: 
                    276: open (DFH,"tail -n50 /etc/httpd/logs/error_log|");
                    277: while ($line=<DFH>) { 
                    278:    print $fh "$line";
                    279:    if ($line=~/\[error\]/) { $notices++; } 
                    280: };
                    281: close (DFH);
                    282: print $fh "</pre>";
                    283: &errout($fh);
1.5       harris41  284: 
                    285: 
1.11      www       286: # ---------------------------------------------------------------------- lonsql
1.22    ! harris41  287: 
        !           288: my $restartflag=1;
1.18      harris41  289: if ($perlvar{'lonRole'} eq "library") {
1.5       harris41  290: 
1.11      www       291:     print $fh '<hr><a name="lonsql"><h2>lonsql</h2><h3>Log</h3><pre>';
1.5       harris41  292:     
                    293:     if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
                    294: 	open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
                    295: 	while ($line=<DFH>) { 
                    296: 	    print $fh "$line";
                    297: 	    if ($line=~/INFO/) { $notices++; }
                    298: 	    if ($line=~/WARNING/) { $notices++; }
                    299: 	    if ($line=~/CRITICAL/) { $warnings++; }
                    300: 	};
                    301: 	close (DFH);
                    302:     }
                    303:     print $fh "</pre>";
                    304:     
                    305:     my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
                    306:     
                    307:     if (-e $lonsqlfile) {
                    308: 	my $lfh=IO::File->new("$lonsqlfile");
                    309: 	my $lonsqlpid=<$lfh>;
                    310: 	chomp($lonsqlpid);
                    311: 	if (kill 0 => $lonsqlpid) {
                    312: 	    print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
1.22    ! harris41  313: 	    $restartflag=0;
1.5       harris41  314: 	} else {
                    315: 	    $errors++; $errors++;
                    316: 	    print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
1.22    ! harris41  317: 	    # Intelligently handle this.
        !           318: 	    # Possibility #1: there is no process
        !           319: 	    # Solution: remove .pid file and restart
        !           320: 	    if (getpgrp($lonsqlpid)==-1) {
        !           321: 		unlink($lonsqlfile);
        !           322: 		$restartflag=1;
        !           323: 	    }
        !           324: 	    else {
        !           325: 		# Possibility #2: there is a live process that is not
        !           326: 		# responding for an unknown reason
        !           327: 		# Solution: kill parent and children processes, remove .pid
        !           328: 		# and restart
        !           329: 		`killall -9 lonsql`;
        !           330: 		unlink($lonsqlfile);
        !           331: 		$restartflag=1;
        !           332: 	    }
1.5       harris41  333: 	}
1.22    ! harris41  334: 	print $fh 
        !           335: 	    "<h3>Deciding to clean up stale .pid file and restart lonsql</h3>";
        !           336:     }
        !           337:     if ($restartflag==1) {
1.5       harris41  338: 	$errors++;
                    339: 	print $fh "<h3>lonsql not running, trying to start</h3>";
1.16      harris41  340: 	system(
                    341:  "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
1.20      harris41  342: 	sleep 120 unless $qflag;
1.5       harris41  343: 	if (-e $lonsqlfile) {
                    344: 	    print $fh "Seems like it started ...<p>";
                    345: 	    my $lfh=IO::File->new("$lonsqlfile");
                    346: 	    my $lonsqlpid=<$lfh>;
                    347: 	    chomp($lonsqlpid);
1.20      harris41  348: 	    sleep 30 unless $qflag;
1.5       harris41  349: 	    if (kill 0 => $lonsqlpid) {
                    350: 		print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
                    351: 	    } else {
                    352: 		$errors++; $errors++;
                    353: 		print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
                    354: 		print $fh "Give it one more try ...<p>";
1.16      harris41  355: 		system(
                    356:  "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
1.20      harris41  357: 		sleep 120 unless $qflag;
1.5       harris41  358: 	    }
                    359: 	} else {
                    360: 	    print $fh "Seems like that did not work!<p>";
                    361: 	    $errors++;
                    362: 	}
                    363: 	if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
                    364: 	    print $fh "<p><pre>";
                    365: 	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
                    366: 	    while ($line=<DFH>) { 
                    367: 		print $fh "$line";
                    368: 		if ($line=~/WARNING/) { $notices++; }
                    369: 		if ($line=~/CRITICAL/) { $notices++; }
                    370: 	    };
                    371: 	    close (DFH);
                    372: 	    print $fh "</pre>";
                    373: 	}
                    374:     }
                    375: 
                    376:     $fname="$perlvar{'lonDaemons'}/logs/lonsql.log";
                    377: 
                    378:     my ($dev,$ino,$mode,$nlink,
                    379: 	$uid,$gid,$rdev,$size,
                    380: 	$atime,$mtime,$ctime,
                    381: 	$blksize,$blocks)=stat($fname);
                    382: 
                    383:     if ($size>40000) {
                    384: 	print $fh "Rotating logs ...<p>";
                    385: 	rename("$fname.2","$fname.3");
                    386: 	rename("$fname.1","$fname.2");
                    387: 	rename("$fname","$fname.1");
                    388:     }
                    389: 
                    390:     &errout($fh);
                    391: }
1.1       albertel  392: # ------------------------------------------------------------------------ lond
                    393: 
                    394: print $fh '<hr><a name="lond"><h2>lond</h2><h3>Log</h3><pre>';
                    395: 
                    396: if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
1.3       www       397: open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
1.1       albertel  398: while ($line=<DFH>) { 
                    399:    print $fh "$line";
1.3       www       400:    if ($line=~/INFO/) { $notices++; }
1.4       www       401:    if ($line=~/WARNING/) { $notices++; }
                    402:    if ($line=~/CRITICAL/) { $warnings++; }
1.1       albertel  403: };
                    404: close (DFH);
                    405: }
                    406: print $fh "</pre>";
                    407: 
                    408: my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid";
                    409: 
1.22    ! harris41  410: $restartflag=1;
1.7       harris41  411: if (-e $londfile) {    
1.1       albertel  412:    my $lfh=IO::File->new("$londfile");
                    413:    my $londpid=<$lfh>;
                    414:    chomp($londpid);
                    415:    if (kill 0 => $londpid) {
                    416:       print $fh "<h3>lond at pid $londpid responding</h3>";
1.7       harris41  417:       $restartflag=0;
1.1       albertel  418:    } else {
1.8       harris41  419:       $errors++;
1.1       albertel  420:       print $fh "<h3>lond at pid $londpid not responding</h3>";
1.7       harris41  421:       # Intelligently handle this.
                    422:       # Possibility #1: there is no process
                    423:       # Solution: remove .pid file and restart
                    424:       if (getpgrp($londpid)==-1) {
                    425: 	  unlink($londfile);
                    426: 	  $restartflag=1;
                    427:       }
1.8       harris41  428:       else {
1.10      harris41  429:       # Possibility #2: there is a live process that is not responding
1.7       harris41  430:       #                 for an unknown reason
1.10      harris41  431:       # Solution: kill parent and children processes, remove .pid and restart
1.9       harris41  432: 	  `killall -9 lond`;
1.8       harris41  433: 	  unlink($londfile);
                    434: 	  $restartflag=1;
                    435:       }
                    436:       print $fh 
                    437: 	  "<h3>Deciding to clean up stale .pid file and restart lond</h3>";
1.1       albertel  438:    }
1.7       harris41  439: } 
                    440: if ($restartflag==1) {
1.1       albertel  441:    $errors++;
                    442:    print $fh "<h3>lond not running, trying to start</h3>";
1.16      harris41  443:    system(
                    444:      "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
1.20      harris41  445:    sleep 120 unless $qflag;
1.1       albertel  446:    if (-e $londfile) {
                    447:        print $fh "Seems like it started ...<p>";
                    448:        my $lfh=IO::File->new("$londfile");
                    449:        my $londpid=<$lfh>;
                    450:        chomp($londpid);
1.20      harris41  451:        sleep 30 unless $qflag;
1.1       albertel  452:        if (kill 0 => $londpid) {
                    453:           print $fh "<h3>lond at pid $londpid responding</h3>";
                    454:        } else {
                    455:           $errors++; $errors++;
                    456:           print $fh "<h3>lond at pid $londpid not responding</h3>";
                    457:           print $fh "Give it one more try ...<p>";
1.16      harris41  458: 	  system(
                    459:  "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
1.20      harris41  460:           sleep 120 unless $qflag;
1.1       albertel  461:        }
                    462:    } else {
                    463:        print $fh "Seems like that did not work!<p>";
                    464:        $errors++;
                    465:    }
1.3       www       466:    if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
                    467:     print $fh "<p><pre>";
                    468:     open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
                    469:     while ($line=<DFH>) { 
                    470:       print $fh "$line";
1.4       www       471:       if ($line=~/WARNING/) { $notices++; }
                    472:       if ($line=~/CRITICAL/) { $notices++; }
1.3       www       473:     };
                    474:     close (DFH);
                    475:     print $fh "</pre>";
                    476:    }
1.1       albertel  477: }
                    478: 
                    479: $fname="$perlvar{'lonDaemons'}/logs/lond.log";
                    480: 
                    481:                           my ($dev,$ino,$mode,$nlink,
                    482:                               $uid,$gid,$rdev,$size,
                    483:                               $atime,$mtime,$ctime,
                    484:                               $blksize,$blocks)=stat($fname);
                    485: 
                    486: if ($size>40000) {
                    487:     print $fh "Rotating logs ...<p>";
                    488:     rename("$fname.2","$fname.3");
                    489:     rename("$fname.1","$fname.2");
                    490:     rename("$fname","$fname.1");
                    491: }
                    492: 
                    493: &errout($fh);
                    494: # ------------------------------------------------------------------------ lonc
                    495: 
                    496: print $fh '<hr><a name="lonc"><h2>lonc</h2><h3>Log</h3><pre>';
                    497: 
                    498: if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
1.3       www       499: open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
1.1       albertel  500: while ($line=<DFH>) { 
                    501:    print $fh "$line";
1.3       www       502:    if ($line=~/INFO/) { $notices++; }
1.4       www       503:    if ($line=~/WARNING/) { $notices++; }
                    504:    if ($line=~/CRITICAL/) { $warnings++; }
1.1       albertel  505: };
                    506: close (DFH);
                    507: }
                    508: print $fh "</pre>";
                    509: 
                    510: my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
                    511: 
1.7       harris41  512: $restartflag=1;
1.1       albertel  513: if (-e $loncfile) {
                    514:    my $lfh=IO::File->new("$loncfile");
                    515:    my $loncpid=<$lfh>;
                    516:    chomp($loncpid);
                    517:    if (kill 0 => $loncpid) {
                    518:       print $fh "<h3>lonc at pid $loncpid responding, sending USR1</h3>";
                    519:       kill USR1 => $loncpid;
1.7       harris41  520:       $restartflag=0;
1.1       albertel  521:    } else {
1.8       harris41  522:       $errors++;
1.1       albertel  523:       print $fh "<h3>lonc at pid $loncpid not responding</h3>";
1.7       harris41  524:       # Intelligently handle this.
                    525:       # Possibility #1: there is no process
                    526:       # Solution: remove .pid file and restart
                    527:       if (getpgrp($loncpid)==-1) {
                    528: 	  unlink($loncfile);
                    529: 	  $restartflag=1;
                    530:       }
1.8       harris41  531:       else {
1.10      harris41  532:       # Possibility #2: there is a live process that is not responding
1.7       harris41  533:       #                 for an unknown reason
1.10      harris41  534:       # Solution: kill parent and children processes, remove .pid and restart
1.9       harris41  535: 	  `killall -9 lonc`;
1.8       harris41  536: 	  unlink($loncfile);
                    537: 	  $restartflag=1;
                    538:       }
                    539:       print $fh 
                    540: 	  "<h3>Deciding to clean up stale .pid file and restart lonc</h3>";
1.1       albertel  541:    }
1.7       harris41  542: } 
                    543: if ($restartflag==1) {
1.1       albertel  544:    $errors++;
                    545:    print $fh "<h3>lonc not running, trying to start</h3>";
1.16      harris41  546: 	system(
1.17      harris41  547:  "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
1.20      harris41  548:    sleep 120 unless $qflag;
1.1       albertel  549:    if (-e $loncfile) {
                    550:        print $fh "Seems like it started ...<p>";
                    551:        my $lfh=IO::File->new("$loncfile");
                    552:        my $loncpid=<$lfh>;
                    553:        chomp($loncpid);
1.20      harris41  554:        sleep 30 unless $qflag;
1.1       albertel  555:        if (kill 0 => $loncpid) {
                    556:           print $fh "<h3>lonc at pid $loncpid responding</h3>";
                    557:        } else {
                    558:           $errors++; $errors++;
                    559:           print $fh "<h3>lonc at pid $loncpid not responding</h3>";
                    560:           print $fh "Give it one more try ...<p>";
1.16      harris41  561:  	  system(
1.17      harris41  562:  "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
1.20      harris41  563:           sleep 120 unless $qflag;
1.1       albertel  564:        }
                    565:    } else {
                    566:        print $fh "Seems like that did not work!<p>";
                    567:        $errors++;
                    568:    }
1.3       www       569:    if (-e "$perlvar{'lonDaemons'}/logs/lonc.log") {
                    570:     print $fh "<p><pre>";
                    571:     open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
                    572:     while ($line=<DFH>) { 
                    573:       print $fh "$line";
1.4       www       574:       if ($line=~/WARNING/) { $notices++; }
                    575:       if ($line=~/CRITICAL/) { $notices++; }
1.3       www       576:     };
                    577:     close (DFH);
                    578:     print $fh "</pre>";
                    579:    }
1.1       albertel  580: }
                    581: 
                    582: $fname="$perlvar{'lonDaemons'}/logs/lonc.log";
                    583: 
                    584:                           my ($dev,$ino,$mode,$nlink,
                    585:                               $uid,$gid,$rdev,$size,
                    586:                               $atime,$mtime,$ctime,
                    587:                               $blksize,$blocks)=stat($fname);
                    588: 
                    589: if ($size>40000) {
                    590:     print $fh "Rotating logs ...<p>";
                    591:     rename("$fname.2","$fname.3");
                    592:     rename("$fname.1","$fname.2");
                    593:     rename("$fname","$fname.1");
                    594: }
                    595: 
                    596:    
                    597: &errout($fh);
                    598: # ---------------------------------------------------------------------- lonnet
                    599: 
                    600: print $fh '<hr><a name="lonnet"><h2>lonnet</h2><h3>Temp Log</h3><pre>';
                    601: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
                    602: open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
                    603: while ($line=<DFH>) { 
                    604:     print $fh "$line";
                    605: };
                    606: close (DFH);
                    607: }
1.11      www       608: print $fh "</pre><h3>Perm Log</h3><pre>";
1.1       albertel  609: 
                    610: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
                    611:     open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
                    612: while ($line=<DFH>) { 
                    613:    print $fh "$line";
                    614: };
                    615: close (DFH);
                    616: } else { print $fh "No perm log\n" }
                    617: 
                    618: $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
                    619: 
                    620:                           my ($dev,$ino,$mode,$nlink,
                    621:                               $uid,$gid,$rdev,$size,
                    622:                               $atime,$mtime,$ctime,
                    623:                               $blksize,$blocks)=stat($fname);
                    624: 
                    625: if ($size>40000) {
                    626:     print $fh "Rotating logs ...<p>";
                    627:     rename("$fname.2","$fname.3");
                    628:     rename("$fname.1","$fname.2");
                    629:     rename("$fname","$fname.1");
                    630: }
                    631: 
                    632: print $fh "</pre>";
                    633: &errout($fh);
                    634: # ----------------------------------------------------------------- Connections
                    635: 
                    636: print $fh '<hr><a name="connections"><h2>Connections</h2>';
                    637: 
                    638: print $fh "<table border=2>";
                    639: foreach $tryserver (keys %hostname) {
                    640: 
                    641:     $answer=reply("pong",$tryserver);
                    642:     if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
                    643: 	$result="<b>ok</b>";
                    644:     } else {
                    645:         $result=$answer;
                    646:         $warnings++;
                    647:         if ($answer eq 'con_lost') { $warnings++; }
                    648:     }
                    649:     print $fh "<tr><td>$tryserver</td><td>$result</td></tr>\n";
                    650: 
                    651: }
                    652: print $fh "</table>";
                    653: 
                    654: &errout($fh);
                    655: # ------------------------------------------------------------ Delayed messages
                    656: 
                    657: print $fh '<hr><a name="delayed"><h2>Delayed Messages</h2>';
                    658: 
                    659: print $fh '<h3>Scanning Permanent Log</h3>';
                    660: 
                    661: $unsend=0;
                    662: {
                    663:     my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
                    664:     while ($line=<$dfh>) {
                    665: 	($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
                    666:         if ($sdf eq 'F') { 
                    667: 	    $local=localtime($time);
                    668:             print "<b>Failed: $time, $dserv, $dcmd</b><br>";
                    669:             $warnings++;
                    670:         }
                    671:         if ($sdf eq 'S') { $unsend--; }
                    672:         if ($sdf eq 'D') { $unsend++; }
                    673:     }
                    674: }
                    675: print $fh "Total unsend messages: <b>$unsend</b><p>\n";
                    676: $warnings=$warnings+5*$unsend;
                    677: 
                    678: print $fh "<h3>Outgoing Buffer</h3>";
                    679: 
                    680: open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
                    681: while ($line=<DFH>) { 
                    682:     print $fh "$line<br>";
                    683: };
                    684: close (DFH);
                    685: 
                    686: # ------------------------------------------------------------------------- End
                    687: print $fh "<a name=errcount>\n";
                    688: $totalcount=$notices+4*$warnings+100*$errors;
                    689: &errout($fh);
                    690: print $fh "<h1>Total Error Count: $totalcount</h1>";
                    691: $now=time;
                    692: $date=localtime($now);
                    693: print $fh "<hr>$date ($now)</body></html>\n";
                    694: 
                    695: }
                    696: 
                    697: rename ("$statusdir/newstatus.html","$statusdir/index.html");
                    698: 
                    699: if ($totalcount>200) {
                    700:    $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
                    701:    $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; 
                    702:    system(
1.21      harris41  703:  "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html")
                    704:     unless $qflag;
1.1       albertel  705: }
                    706: 1;
                    707: 
                    708: 
                    709: 
                    710: 
                    711: 
                    712: 
                    713: 
                    714: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>