--- loncom/loncron 2007/04/03 00:47:26 1.71 +++ loncom/loncron 2013/02/02 14:42:01 1.97 @@ -2,7 +2,7 @@ # Housekeeping program, started by cron, loncontrol and loncron.pl # -# $Id: loncron,v 1.71 2007/04/03 00:47:26 albertel Exp $ +# $Id: loncron,v 1.97 2013/02/02 14:42:01 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -32,6 +32,10 @@ use strict; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; +use LONCAPA::Checksumming; +use LONCAPA; +use Apache::lonnet; +use Apache::loncommon; use IO::File; use IO::Socket; @@ -43,21 +47,6 @@ use vars qw (%perlvar %simplestatus $err my $statusdir="/home/httpd/html/lon-status"; -# -------------------------------------------------- Non-critical communication -sub reply { - my ($cmd,$server,$hostname)=@_; - my $peerfile="$perlvar{'lonSockDir'}/".$hostname->{$server}; - my $client=IO::Socket::UNIX->new(Peer =>"$peerfile", - Type => SOCK_STREAM, - Timeout => 10) - or return "con_lost"; - print $client "sethost:$server:$cmd\n"; - my $answer=<$client>; - chomp($answer); - if (!$answer) { $answer="con_lost"; } - return $answer; -} - # --------------------------------------------------------- Output error status sub log { @@ -76,6 +65,17 @@ sub errout { ENDERROUT } +sub rotate_logfile { + my ($file,$fh,$description) = @_; + my $size=(stat($file))[7]; + if ($size>40000) { + &log($fh,"

Rotating $description ...

"); + rename("$file.2","$file.3"); + rename("$file.1","$file.2"); + rename("$file","$file.1"); + } +} + sub start_daemon { my ($fh,$daemon,$pidfile,$args) = @_; my $progname=$daemon; @@ -83,12 +83,9 @@ sub start_daemon { $progname='loncnew'; } my $error_fname="$perlvar{'lonDaemons'}/logs/${daemon}_errors"; - my $size=(stat($error_fname))[7]; - if ($size>40000) { - &log($fh,"

Rotating error logs ...

"); - rename("$error_fname.2","$error_fname.3"); - rename("$error_fname.1","$error_fname.2"); - rename("$error_fname","$error_fname.1"); + &rotate_logfile($error_fname,$fh,'error logs'); + if ($daemon eq 'lonc') { + &clean_sockets($fh); } system("$perlvar{'lonDaemons'}/$progname 2>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); sleep 1; @@ -207,18 +204,7 @@ sub checkon_daemon { } my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log"; - - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - - if ($size>$maxsize) { - &log($fh,"

Rotating logs ...

"); - rename("$fname.2","$fname.3"); - rename("$fname.1","$fname.2"); - rename("$fname","$fname.1"); - } + &rotate_logfile($fname,$fh,'logs'); &errout($fh); return $result; @@ -285,19 +271,13 @@ sub log_machine_info { &log($fh,"

distprobe

"); &log($fh,"
");
-    open(DSH,"$perlvar{'lonDaemons'}/distprobe |");
-    while (my $line=) { 
-	&log($fh,&encode_entities($line,'<>&"')); 
-	$psproc++;
-    }
-    close(DSH);
+    &get_distro($perlvar{'lonDaemons'},$fh);
     &log($fh,"
"); &errout($fh); } sub start_logging { - my ($hostdom,$hostrole,$hostname,$spareid)=@_; my $fh=IO::File->new(">$statusdir/newstatus.html"); my %simplestatus=(); my $now=time; @@ -322,7 +302,6 @@ sub start_logging {
  • lonsql
  • lond
  • lonc
  • -
  • lonhttpd
  • lonnet
  • Connections
  • Delayed Messages
  • @@ -340,17 +319,24 @@ ENDHEADERS &encode_entities($perlvar{$varname},'<>&"')."\n"); } &log($fh,"

    Hosts

    "); - foreach my $id (sort(keys(%{$hostname}))) { + my %hostname = &Apache::lonnet::all_hostnames(); + foreach my $id (sort(keys(%hostname))) { + my $role = (&Apache::lonnet::is_library($id) ? 'library' + : 'access'); &log($fh, - "\n"); - } - &log($fh,"
    $id".$hostdom->{$id}. - "".$hostrole->{$id}. - "".$hostname->{$id}."

    Spare Hosts

      "); - foreach my $id (sort(keys(%{$spareid}))) { - &log($fh,"
    1. $id\n
    2. "); + "$id".&Apache::lonnet::host_domain($id). + "".$role. + "".&Apache::lonnet::hostname($id)."\n"); + } + &log($fh,"

      Spare Hosts

    \n"); + &log($fh,"\n"); return $fh; } @@ -358,33 +344,127 @@ ENDHEADERS sub clean_tmp { my ($fh)=@_; &log($fh,'

    Temporary Files

    '); - my $cleaned=0; - my $old=0; - while (my $fname=<$perlvar{'lonDaemons'}/tmp/*>) { - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - my $now=time; - my $since=$now-$mtime; - if ($since>$perlvar{'lonExpire'}) { - my $line=''; - if (open(PROBE,$fname)) { - $line=; - close(PROBE); - } - unless ($line=~/^CHECKOUTTOKEN\&/) { - $cleaned++; - unlink("$fname"); - } else { - if ($since>365*$perlvar{'lonExpire'}) { - $cleaned++; - unlink("$fname"); - } else { $old++; } - } - } + my ($cleaned,$old,$removed) = (0,0,0); + my %errors = ( + dir => [], + file => [], + failopen => [], + ); + my %error_titles = ( + dir => 'failed to remove empty directory:', + file => 'failed to unlike stale file', + failopen => 'failed to open file or directory' + ); + ($cleaned,$old,$removed) = &recursive_clean_tmp('',$cleaned,$old,$removed,\%errors); + &log($fh,"Cleaned up: ".$cleaned." files; removed: $removed empty directories; (found: $old old checkout tokens)"); + foreach my $key (sort(keys(%errors))) { + if (ref($errors{$key}) eq 'ARRAY') { + if (@{$errors{$key}} > 0) { + &log($fh,"Error during cleanup ($error_titles{$key}):
    '); + } + } + } +} + +sub recursive_clean_tmp { + my ($subdir,$cleaned,$old,$removed,$errors) = @_; + my $base = "$perlvar{'lonDaemons'}/tmp"; + my $path = $base; + next if ($subdir =~ m{\.\./}); + next unless (ref($errors) eq 'HASH'); + unless ($subdir eq '') { + $path .= '/'.$subdir; + } + if (opendir(my $dh,"$path")) { + while (my $file = readdir($dh)) { + next if ($file =~ /^\.\.?$/); + my $fname = "$path/$file"; + if (-d $fname) { + my $innerdir; + if ($subdir eq '') { + $innerdir = $file; + } else { + $innerdir = $subdir.'/'.$file; + } + ($cleaned,$old,$removed) = + &recursive_clean_tmp($innerdir,$cleaned,$old,$removed,$errors); + my @doms = &Apache::lonnet::current_machine_domains(); + + if (open(my $dirhandle,$fname)) { + unless (($innerdir eq 'helprequests') || + (($innerdir =~ /^addcourse/) && ($innerdir !~ m{/\d+$}))) { + my @contents = grep {!/^\.\.?$/} readdir($dirhandle); + join('&&',@contents)."\n"; + if (scalar(grep {!/^\.\.?$/} readdir($dirhandle)) == 0) { + closedir($dirhandle); + if ($fname =~ m{^\Q$perlvar{'lonDaemons'}\E/tmp/}) { + if (rmdir($fname)) { + $removed ++; + } elsif (ref($errors->{dir}) eq 'ARRAY') { + push(@{$errors->{dir}},$fname); + } + } + } + } else { + closedir($dirhandle); + } + } + } else { + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + my $now=time; + my $since=$now-$mtime; + if ($since>$perlvar{'lonExpire'}) { + if ($subdir eq '') { + my $line=''; + if ($fname =~ /\.db$/) { + if (unlink($fname)) { + $cleaned++; + } elsif (ref($errors->{file}) eq 'ARRAY') { + push(@{$errors->{file}},$fname); + } + } elsif (open(PROBE,$fname)) { + my $line=''; + $line=; + close(PROBE); + if ($line=~/^CHECKOUTTOKEN\&/) { + if ($since>365*$perlvar{'lonExpire'}) { + if (unlink($fname)) { + $cleaned++; + } elsif (ref($errors->{file}) eq 'ARRAY') { + push(@{$errors->{file}},$fname); + } + } else { + $old++; + } + } else { + if (unlink($fname)) { + $cleaned++; + } elsif (ref($errors->{file}) eq 'ARRAY') { + push(@{$errors->{file}},$fname); + } + } + } elsif (ref($errors->{failopen}) eq 'ARRAY') { + push(@{$errors->{failopen}},$fname); + } + } else { + if (unlink($fname)) { + $cleaned++; + } elsif (ref($errors->{file}) eq 'ARRAY') { + push(@{$errors->{file}},$fname); + } + } + } + } + } + closedir($dh); + } elsif (ref($errors->{failopen}) eq 'ARRAY') { + push(@{$errors->{failopen}},$path); } - &log($fh,"Cleaned up ".$cleaned." files (".$old." old checkout tokens)."); + return ($cleaned,$old,$removed); } # ------------------------------------------------------------ clean out lonIDs @@ -412,25 +492,32 @@ sub clean_lonIDs { &log($fh,"

    $active open session(s)

    "); } +# ----------------------------------------------------------- clean out sockets +sub clean_sockets { + my ($fh)=@_; + my $cleaned=0; + opendir(SOCKETS,$perlvar{'lonSockDir'}); + while (my $fname=readdir(SOCKETS)) { + next if (-d $fname + || $fname=~/(mysqlsock|maximasock|rsock|\Q$perlvar{'lonSockDir'}\E)/); + $cleaned++; + &log($fh,"Unlinking $fname
    "); + unlink("/home/httpd/sockets/$fname"); + } + &log($fh,"

    Cleaned up ".$cleaned." stale sockets.

    "); +} + # ----------------------------------------------------------------------- httpd sub check_httpd_logs { my ($fh)=@_; - &log($fh,'

    httpd

    Access Log

    ');
    -    
    -    open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
    -    while (my $line=) { &log($fh,&encode_entities($line,'<>&"')) };
    -    close (DFH);
    -	
    -    &log($fh,"

    Error Log

    ");
    -	
    -    open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
    -    while (my $line=) { 
    -	&log($fh,"$line");
    -	if ($line=~/\[error\]/) { $notices++; } 
    +    if (open(PIPE,"./lchttpdlogs|")) {
    +        while (my $line=) {
    +            &log($fh,$line);
    +            if ($line=~/\[error\]/) { $notices++; }
    +        }
    +        close(PIPE);
         }
    -    close (DFH);
    -    &log($fh,"
    "); &errout($fh); } @@ -458,34 +545,39 @@ sub rotate_lonnet_logs { } else { &log($fh,"No perm log\n") } my $fname="$perlvar{'lonDaemons'}/logs/lonnet.log"; - - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - - if ($size>40000) { - &log($fh,"

    Rotating logs ...

    "); - rename("$fname.2","$fname.3"); - rename("$fname.1","$fname.2"); - rename("$fname","$fname.1"); - } + &rotate_logfile($fname,$fh,'lonnet log'); &log($fh,""); &errout($fh); } +sub rotate_other_logs { + my ($fh) = @_; + my %logs = ( + autoenroll => 'Auto Enroll log', + autocreate => 'Create Course log', + searchcat => 'Search Cataloguing log', + autoupdate => 'Auto Update log', + refreshcourseids_db => 'Refresh CourseIDs db log', + ); + foreach my $item (keys(%logs)) { + my $fname=$perlvar{'lonDaemons'}.'/logs/'.$item.'.log'; + &rotate_logfile($fname,$fh,$logs{$item}); + } +} + # ----------------------------------------------------------------- Connections sub test_connections { - my ($fh,$hostname)=@_; + my ($fh)=@_; &log($fh,'

    Connections

    '); print "testing connections\n"; &log($fh,""); my ($good,$bad)=(0,0); - foreach my $tryserver (sort(keys(%{$hostname}))) { + my %hostname = &Apache::lonnet::all_hostnames(); + foreach my $tryserver (sort(keys(%hostname))) { print("."); my $result; - my $answer=&reply("ping",$tryserver,$hostname); + my $answer=&Apache::lonnet::reply("ping",$tryserver); if ($answer eq "$tryserver:$perlvar{'lonHostID'}") { $result="ok"; $good++; @@ -510,7 +602,7 @@ sub test_connections { # ------------------------------------------------------------ Delayed messages sub check_delayed_msg { - my ($fh,$hostname)=@_; + my ($fh)=@_; &log($fh,'

    Delayed Messages

    '); print "checking buffers\n"; @@ -531,7 +623,9 @@ sub check_delayed_msg { } &log($fh,"

    Total unsend messages: $unsend

    \n"); - $warnings=$warnings+5*$unsend; + if ($unsend > 0) { + $warnings=$warnings+5*$unsend; + } if ($unsend) { $simplestatus{'unsend'}=$unsend; } &log($fh,"

    Outgoing Buffer

    \n
    ");
    @@ -545,11 +639,28 @@ sub check_delayed_msg {
         }
         &log($fh,"
    \n"); close (DFH); + my %hostname = &Apache::lonnet::all_hostnames(); + my $numhosts = scalar(keys(%hostname)); # pong to all servers that have delayed messages # this will trigger a reverse connection, which should flush the buffers - foreach my $tryserver (keys %servers) { - my $answer=&reply("pong",$tryserver,$hostname); - &log($fh,"Pong to $tryserver: $answer
    "); + foreach my $tryserver (sort(keys(%servers))) { + if ($hostname{$tryserver} || !$numhosts) { + my $answer; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(20); + $answer = &Apache::lonnet::reply("pong",$tryserver); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + &log($fh,"Attempted pong to $tryserver timed out
    "); + print "time out while contacting: $tryserver for pong\n"; + } else { + &log($fh,"Pong to $tryserver: $answer
    "); + } + } else { + &log($fh,"$tryserver has delayed messages, but is not part of the cluster -- skipping 'Pong'.
    "); + } } } @@ -572,7 +683,7 @@ sub finish_logging { } sub log_simplestatus { - rename ("$statusdir/newstatus.html","$statusdir/index.html"); + rename("$statusdir/newstatus.html","$statusdir/index.html"); my $sfh=IO::File->new(">$statusdir/loncron_simple.txt"); foreach (keys %simplestatus) { @@ -582,9 +693,90 @@ sub log_simplestatus { $sfh->close(); } +sub write_loncaparevs { + print "Retrieving LON-CAPA version information\n"; + if (open(my $fh,">$perlvar{'lonTabDir'}/loncaparevs.tab")) { + my %hostname = &Apache::lonnet::all_hostnames(); + foreach my $id (sort(keys(%hostname))) { + if ($id ne '') { + my $loncaparev; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(10); + $loncaparev = + &Apache::lonnet::get_server_loncaparev('',$id,1,'loncron'); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + print "time out while contacting lonHost: $id for version\n"; + } + if ($loncaparev =~ /^[\w.\-]+$/) { + print $fh $id.':'.$loncaparev."\n"; + } + } + } + close($fh); + } + return; +} + +sub write_serverhomeIDs { + print "Retrieving LON-CAPA lonHostID information\n"; + if (open(my $fh,">$perlvar{'lonTabDir'}/serverhomeIDs.tab")) { + my %name_to_host = &Apache::lonnet::all_names(); + foreach my $name (sort(keys(%name_to_host))) { + if ($name ne '') { + if (ref($name_to_host{$name}) eq 'ARRAY') { + my $serverhomeID; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(10); + $serverhomeID = + &Apache::lonnet::get_server_homeID($name,1,'loncron'); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + print "Time out while contacting server: $name\n"; + } + if ($serverhomeID ne '') { + print $fh $name.':'.$serverhomeID."\n"; + } else { + print $fh $name.':'.$name_to_host{$name}->[0]."\n"; + } + } + } + } + close($fh); + } + return; +} + +sub write_checksums { + my $distro = &get_distro($perlvar{'lonDaemons'}); + if ($distro) { + print "Retrieving file version and checksumming.\n"; + my $numchksums = 0; + my ($chksumsref,$versionsref) = + &LONCAPA::Checksumming::get_checksums($distro,$perlvar{'lonDaemons'}, + $perlvar{'lonLib'}, + $perlvar{'lonIncludes'}, + $perlvar{'lonTabDir'}); + if (ref($chksumsref) eq 'HASH') { + $numchksums = scalar(keys(%{$chksumsref})); + } + print "File version retrieved and checksumming completed for $numchksums files.\n"; + } else { + print "File version retrieval and checksumming skipped - could not determine Linux distro.\n"; + } + return; +} + sub send_mail { print "sending mail\n"; - my $emailto="$perlvar{'lonAdmEMail'}"; + my $defdom = $perlvar{'lonDefDomain'}; + my $origmail = $perlvar{'lonAdmEMail'}; + my $emailto = &Apache::loncommon::build_recipient_list(undef, + 'lonstatusmail',$defdom,$origmail); if ($totalcount>2500) { $emailto.=",$perlvar{'lonSysEMail'}"; } @@ -596,6 +788,21 @@ sub send_mail { } } +sub get_distro { + my ($dir,$fh) = @_; + my $distro; + if (open(my $disth,"$dir/distprobe |")) { + while (my $line=<$disth>) { + if ($fh) { + &log($fh,&encode_entities($line,'<>&"')); + } + $distro .= $line; + } + close($disth); + } + return $distro; +} + sub usage { print(<new("$perlvar{'lonTabDir'}/hosts.tab"); - - my (%hostname,%hostdom,%hostrole,%spareid); - while (my $configline=<$config>) { - next if ($configline =~ /^(\#|\s*\$)/); - my ($id,$domain,$role,$name)=split(/:/,$configline); - if ($id && $domain && $role && $name) { - $name=~s/\s//g; - $hostname{$id}=$name; - $hostdom{$id}=$domain; - $hostrole{$id}=$role; - } +# -------------------------------------------- Force reload of host information + &Apache::lonnet::load_hosts_tab(1); + &Apache::lonnet::load_domain_tab(1); + &Apache::lonnet::get_iphost(1); + +# ----------------------------------------- Force firewall update for lond port + + if ((!$justcheckdaemons) && (!$justreload)) { + my $now = time; + my $tmpfile = $perlvar{'lonDaemons'}.'/tmp/lciptables_iphost_'. + $now.$$.int(rand(10000)); + if (open(my $fh,">$tmpfile")) { + my %iphosts = &Apache::lonnet::get_iphost(); + foreach my $key (keys(%iphosts)) { + print $fh "$key\n"; + } + close($fh); + if (&LONCAPA::try_to_lock('/tmp/lock_lciptables')) { + my $execpath = $perlvar{'lonDaemons'}.'/lciptables'; + system("$execpath $tmpfile"); + unlink('/tmp/lock_lciptables'); # Remove the lock file. + } + unlink($tmpfile); + } } - undef $config; - -# ------------------------------------------------------ Read spare server file - $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab"); - - while (my $configline=<$config>) { - chomp($configline); - if (($configline) && ($configline ne $perlvar{'lonHostID'})) { - $spareid{$configline}=1; - } - } - undef $config; # ---------------------------------------------------------------- Start report @@ -698,36 +904,39 @@ sub main () { my $fh; if (!$justcheckdaemons && !$justcheckconnections && !$justreload) { - $fh=&start_logging(\%hostdom,\%hostrole,\%hostname,\%spareid); + $fh=&start_logging(); &log_machine_info($fh); &clean_tmp($fh); &clean_lonIDs($fh); &check_httpd_logs($fh); &rotate_lonnet_logs($fh); + &rotate_other_logs($fh); } if (!$justcheckconnections && !$justreload) { + &checkon_daemon($fh,'lonmemcached',40000); &checkon_daemon($fh,'lonsql',200000); if ( &checkon_daemon($fh,'lond',40000,'USR1') eq 'running') { &checkon_daemon($fh,'lond',40000,'USR2'); } &checkon_daemon($fh,'lonc',40000,'USR1'); - &checkon_daemon($fh,'lonhttpd',40000); - &checkon_daemon($fh,'lonmemcached',40000); &checkon_daemon($fh,'lonmaxima',40000); + &checkon_daemon($fh,'lonr',40000); } if ($justreload) { &checkon_daemon($fh,'lond',40000,'USR2'); &checkon_daemon($fh,'lonc',40000,'USR2'); } if ($justcheckconnections) { - &test_connections($fh,\%hostname); + &test_connections($fh); } if (!$justcheckdaemons && !$justcheckconnections && !$justreload) { - &check_delayed_msg($fh,\%hostname); + &check_delayed_msg($fh); &finish_logging($fh); &log_simplestatus(); - + &write_loncaparevs(); + &write_serverhomeIDs(); + &write_checksums(); if ($totalcount>200 && !$noemail) { &send_mail(); } } } @@ -735,10 +944,3 @@ sub main () { &main(); 1; - - - - - - - 500 Internal Server Error

    Internal Server Error

    The server encountered an internal error or misconfiguration and was unable to complete your request.

    Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

    More information about this error may be available in the server error log.