--- loncom/loncron 2011/11/08 22:25:25 1.94 +++ loncom/loncron 2011/11/14 17:27:34 1.95 @@ -2,7 +2,7 @@ # Housekeeping program, started by cron, loncontrol and loncron.pl # -# $Id: loncron,v 1.94 2011/11/08 22:25:25 raeburn Exp $ +# $Id: loncron,v 1.95 2011/11/14 17:27:34 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -627,7 +627,9 @@ sub check_delayed_msg { } &log($fh,"

Total unsend messages: $unsend

\n"); - $warnings=$warnings+5*$unsend; + if ($unsend > 0) { + $warnings=$warnings+5*$unsend; + } if ($unsend) { $simplestatus{'unsend'}=$unsend; } &log($fh,"

Outgoing Buffer

\n
");
@@ -641,20 +643,27 @@ sub check_delayed_msg {
     }
     &log($fh,"
\n"); close (DFH); + my %hostname = &Apache::lonnet::all_hostnames(); + my $numhosts = scalar(keys(%hostname)); # pong to all servers that have delayed messages # this will trigger a reverse connection, which should flush the buffers - foreach my $tryserver (keys %servers) { - my $answer; - eval { - local $SIG{ ALRM } = sub { die "TIMEOUT" }; - alarm(20); - $answer = &Apache::lonnet::reply("pong",$tryserver); - alarm(0); - }; - if ($@ && $@ =~ m/TIMEOUT/) { - print "time out while contacting: $tryserver for pong\n"; + foreach my $tryserver (sort(keys(%servers))) { + if ($hostname{$tryserver} || !$numhosts) { + my $answer; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(20); + $answer = &Apache::lonnet::reply("pong",$tryserver); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + &log($fh,"Attempted pong to $tryserver timed out
"); + print "time out while contacting: $tryserver for pong\n"; + } else { + &log($fh,"Pong to $tryserver: $answer
"); + } } else { - &log($fh,"Pong to $tryserver: $answer
"); + &log($fh,"$tryserver has delayed messages, but is not part of the cluster -- skipping 'Pong'.
"); } } }