--- loncom/cgi/clusterstatus.pl 2003/07/31 19:18:16 1.11 +++ loncom/cgi/clusterstatus.pl 2005/04/13 18:30:46 1.24 @@ -1,12 +1,34 @@ #!/usr/bin/perl $|=1; -# The LearningOnline Network with CAPA -# Cluster Status +# Generates a html page showing various sataus reports about the cluster +# $Id: clusterstatus.pl,v 1.24 2005/04/13 18:30:46 albertel Exp $ +# +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ # -# $Id: clusterstatus.pl,v 1.11 2003/07/31 19:18:16 www Exp $ use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; +use strict; use LWP::UserAgent(); use HTTP::Headers; @@ -19,6 +41,32 @@ my %connectionstatus=(); my %perlvar=(); my $mode; +my $concount=0; +my $fromcache; + +my %domaindescription = (); +my %domain_auth_def = (); +my %domain_auth_arg_def = (); +my %domain_lang_def=(); +my %domain_city=(); +my %domain_longi=(); +my %domain_lati=(); + +my %hostname=(); +my %hostip=(); +my %hostdom=(); +my %hostrole=(); +my %libserv=(); + +my $maxusers=0; +my $maxload=0; +my $totalusers=0; + +my %FORM=(); + +my $stat_total=0; +my $stat_notyet=0; +my $stat_fromcache=0; sub select_form { my ($def,$name,%hash) = @_; @@ -47,13 +95,26 @@ sub hidden { sub request { my ($local,$url,$cachetime)=@_; + $cachetime*=(0.5+rand); my $key=&key($local,$url); my $reply=''; + $stat_total++; +# if fromcache flag is set, only return cached values + if ($fromcache) { + if ($FORM{$key.'_time'}) { + return $FORM{$key}; + $stat_fromcache++; + } else { + return 'not_yet'; + $stat_notyet++; + } + } +# normal mode, refresh when expired or not yet present if ($FORM{$key.'_time'}) { if ((time-$FORM{$key.'_time'})<$cachetime) { $reply=$FORM{$key}; &hidden($key.'_time',$FORM{$key.'_time'}); - &hidden($key.'_fromcache',1); + $stat_fromcache++; } } unless ($reply) { @@ -61,7 +122,7 @@ sub request { $reply='local_unknown'; } else { - my $ua=new LWP::UserAgent(timeout => 20); + my $ua=new LWP::UserAgent(timeout => 10); my $request=new HTTP::Request('GET', "http://".$hostname{$local}.$url); @@ -91,10 +152,17 @@ sub connected { unless ($hostname{$remote}) { return 'remote_unknown'; } my $url='/cgi-bin/ping.pl?'.$remote; # -# Slowly phase this in: if not cached, only do 10 percent of the cases +# Slowly phase this in: if not cached, only do 5 percent of the cases, +# but always do the first five. # unless ($FORM{&key($local,$url)}) { - unless (rand>0.9) { return 'not_yet'; } + unless (($concount<=5) || (rand>0.95)) { + $stat_total++; + $stat_notyet++; + return 'not_yet'; + } else { + $concount++; + } } # # Actually do the query @@ -169,6 +237,40 @@ sub server { print &otherwindow($local,'/server-status','Server Status'); } +sub announcement { + my $local=shift; + print &otherwindow($local,'/announcement.txt','Announcement'); +} + +sub takeonline { + my $local=shift; + print &otherwindow($local,'/cgi-bin/takeonline.pl','Take online'); +} + +sub takeoffline { + my $local=shift; + print &otherwindow($local,'/cgi-bin/takeoffline.pl','Take offline'); +} + +sub reroute { + my ($local,$remote)=@_; + print &otherwindow($local,'/cgi-bin/takeoffline.pl?'. + $hostname{$remote}.'&'.$hostdom{$local} + ,$remote)."\n"; +} + +sub allreroutes { + my $local=shift; + &takeoffline($local); + print ' Reroute to: '; + foreach my $remote (sort keys %hostname) { + unless ($local eq $remote) { + &reroute($local,$remote); + } + } + print ''; +} + # ========================================================= Produce a green bar sub bar { my $parm=shift; @@ -190,15 +292,26 @@ sub serverstatus { $local $hostdom{$local} ($hostname{$local}; $hostrole{$local})
$domaindescription{$hostdom{$local}} +$domain_city{$hostdom{$local}}
ENDHEADER &login($local);&server($local);&users($local);&versions($local); + &announcement($local); &loncron($local);&lond($local);&lonc($local);&runloncron($local); print ""; if ($trouble) { print ("$trouble"); } print ""; +# re-routing + if ($host{$local.'_reroute'}) { + print "
Reroute: ".$host{$local.'_reroute'}; + &takeonline($local); + } +# version + if ($host{$local.'_version'}) { + print "
Version: ".$host{$local.'_version'} + } # load if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) { print "
Load: ".$host{$local.'_load'} @@ -229,6 +342,8 @@ ENDHEADER if ($host{$local.'_errors'}) { print "
loncron errors: ".$host{$local.'_errors'}; } + print ""; + &allreroutes($local); print "
"; } @@ -249,125 +364,23 @@ sub doomedness { return sort { $alldoomed{$b} <=> $alldoomed{$a} } @allhosts; } -# ====================================================================== Status -sub statuslist { - my ($local,$what)=@_; - print -"\n"; -} - -# -# Main program -# -# ========================================================= Get form parameters -my $buffer; - -read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); -my @pairs=split(/&/,$buffer); -my $pair; my $name; my $value; -undef %FORM; -%FORM=(); -foreach $pair (@pairs) { - ($name,$value) = split(/=/,$pair); - $value =~ tr/+/ /; - $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; - $FORM{$name}=$value; -} - -$buffer=$ENV{'QUERY_STRING'}; -@pairs=split(/&/,$buffer); -foreach $pair (@pairs) { - ($name,$value) = split(/=/,$pair); - $value =~ tr/+/ /; - $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; - $FORM{$name}=$value; -} - -# ====================================================== Determine refresh rate - -my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:120); -if ($refresh<30) { $refresh=30; } -my $starttime=time; - -# ============================================================== Determine mode - -my %modes=('trouble' => 'Trouble', - 'users_doomed' => 'Doomed: Users', - 'loncron_doomed' => 'Doomed: General (loncron)', - 'mysql_doomed' => 'Doomed: Database (mysql)', - 'notconnected_doomed' => 'Doomed: Connections', - 'checkrpms_doomed' => 'Doomed: RPMs', - 'load_doomed' => 'Doomed: Load', - 'unresponsive_doomed' => 'Doomed: Status could not be determined', - 'users' => 'User Report', - 'load' => 'Load Report', - 'connections' => 'Connections Matrix'); - -$mode=$FORM{'mode'}; -unless ($modes{$mode}) { $mode='trouble'; } -# ================================================================ Send Headers -print "Content-type: text/html\n\n". - "\n"; -# -------------------- Read loncapa.conf (and by default, loncapa_apache.conf). -my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); -%perlvar=%{$perlvarref}; -undef $perlvarref; # remove since sensitive and not needed -delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed -delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed - -# ------------------------------------------------------------- Read hosts file -{ - my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); - - $total=0; - while (my $configline=<$config>) { - $configline=~s/#.*$//; - unless ($configline=~/\w/) { next; } - my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); - $hostname{$id}=$name; - $hostdom{$id}=$domain; - $hostrole{$id}=$role; - $hostip{$id}=$ip; - $total++; - if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) { - $libserv{$id}=$name; - } - } -} -# ------------------------------------------------------------ Read domain file -{ - my $fh=IO::File->new($perlvar{'lonTabDir'}.'/domain.tab'); - %domaindescription = (); - %domain_auth_def = (); - %domain_auth_arg_def = (); - if ($fh) { - while (<$fh>) { - next if (/^(\#|\s*$)/); - chomp; - my ($domain, $domain_description, $def_auth, $def_auth_arg) - = split(/:/,$_,4); - $domain_auth_def{$domain}=$def_auth; - $domain_auth_arg_def{$domain}=$def_auth_arg; - $domaindescription{$domain}=$domain_description; - } - } +sub resetvars { + $maxusers=0; + $maxload=0; + $totalusers=0; + $stat_total=0; + $stat_notyet=0; + $stat_fromcache=0; + $concount=0; + undef %host; + %host=(); } -print "

LON-CAPA Cluster Status ".localtime()."

"; -print "
\n". -"
". -"
\n";; -print "
\n"; -print 'Choose next report: '.&select_form($mode,'mode',%modes).'
'; -&hidden('refresh',$refresh); - +sub mainloop { + &resetvars(); # ==================================================== Main Loop over all Hosts -my $maxusers=0; -my $maxload=0; -my $totalusers=0; - -foreach $local (sort keys %hostname) { +foreach my $local (sort keys %hostname) { $host{$local.'_unresponsive_doomed'}=0; # -- Check general status &statuslist($local,'General'); @@ -386,6 +399,15 @@ foreach $local (sort keys %hostname) { $host{$local.'_errors'}=$loncron{'errors'}; } } +# -- Check version + &statuslist($local,'Version'); + my $version=&request($local,'/lon-status/version.txt',7200); + if ($version eq 'local_error') { + $host{$local.'_version'}='Could not determine.'; + $host{$local.'_unresponsive_doomed'}++; + } else { + $host{$local.'_version'}=$version; + } # -- Check user status &statuslist($local,'Users'); my %userstatus=&replyhash($local,'/cgi-bin/userstatus.pl?simple',600); @@ -407,6 +429,19 @@ foreach $local (sort keys %hostname) { } $host{$local.'_load'}=$userstatus{'loadavg'}; } +# -- Check reroute status + &statuslist($local,'Reroute'); + my %reroute=&replyhash($local,'/lon-status/reroute.txt',1800); + if ($reroute{'status'} eq 'rerouting') { + if ($reroute{'server'}) { + $host{$local.'_reroute'}= + 'Rerouting to '.$reroute{'server'}. + ', domain: '.$reroute{'domain'}. + ' (since '.localtime($reroute{'time'}).')'; + } else { + $host{$local.'_reroute'}='offline'; + } + } # -- Check mysql status &statuslist($local,'Database'); my %mysql=&replyhash($local,'/lon-status/mysql.txt',3600); @@ -452,7 +487,7 @@ foreach $local (sort keys %hostname) { &statuslist($local,'Connections'); $host{$local.'_notconnected'}=''; $host{$local.'_notconnected_doomed'}=0; - foreach $remote (sort keys %hostname) { + foreach my $remote (sort keys %hostname) { my $status=&connected($local,$remote); $connectionstatus{$local.'_TO_'.$remote}=$status; unless (($status eq 'ok') || ($status eq 'not_yet')) { @@ -460,9 +495,12 @@ foreach $local (sort keys %hostname) { $host{$local.'_notconnected_doomed'}++; } } -# =============================================================== End Mail Loop +# =============================================================== End Main Loop } -&statuslist('Done.'); + +} + +sub reports { # ====================================================================== Output if ($mode=~/\_doomed$/) { # Output by doomedness @@ -474,21 +512,35 @@ foreach $local (sort keys %hostname) { "". ""; foreach my $remote (sort keys %hostname) { - print ''; + print ''; } print "\n"; # connection matrix foreach my $local (sort keys %hostname) { - print ''; + print ''; foreach my $remote (sort keys %hostname) { if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') { - print ''; + my $cellcolor='#FFFFFF'; + if ($local eq $remote) { $cellcolor='#DDDDDD'; } + print ''; } elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') { + my $cellcolor='#BBDDBB'; + if ($local eq $remote) { $cellcolor='#99DD99'; } print -''; +''; } else { + my $cellcolor='#DDCCAA'; + if ($connectionstatus{$local.'_TO_'.$remote} eq 'local_error') { + if ($local eq $remote) { + $cellcolor='#DD88AA'; + } else { + $cellcolor='#DDAACC'; + } + } else { + if ($local eq $remote) { $cellcolor='#DDBB77'; } + } print - ''; @@ -504,12 +556,14 @@ foreach $local (sort keys %hostname) { print "

Total active user(s): $totalusers

". "
 '.$remote.''.$remote.'
'.$local.'
'.$local.' not yet testedokok'. + ''. $connectionstatus{$local.'_TO_'.$remote}.'
'; &lonc($local); &lond($remote); print '
"; - foreach $local (sort keys %hostname) { + foreach my $local (sort keys %hostname) { if (defined($host{$local.'_users'})) { print -'
'.$local. - ''; - &users(); +'
'.$local. + '
'. + $domaindescription{$hostdom{$local}}. + '
'; + &users($local); print ''. $host{$local.'_users'}.'"; - foreach $local (sort keys %hostname) { + foreach my $local (sort keys %hostname) { if (defined($host{$local.'_load_doomed'})) { print -'
'. +'
'. $local. - ''; - &server(); + '
'. + $domaindescription{$hostdom{$local}}. + '
'; + &server($local); print ''. $host{$local.'_load_doomed'}.'3) { + $trouble='Does not respond to several queries.
'; + } if ($host{$local.'_errors'}) { $trouble='Has loncron errors.
'; - } elsif ($host{$local.'_loncron_doomed'}>600) { + } elsif ($host{$local.'_loncron_doomed'}>2500) { $trouble='High loncron count.
'; } if ($host{$local.'_load_doomed'}>5) { @@ -566,17 +625,156 @@ foreach $local (sort keys %hostname) { if ($host{$local.'_checkrpms_doomed'}>100) { $trouble='RPMs outdated.
'; } + if ($host{$local.'_reroute'}) { + $trouble='Rerouting
'; + } if ($trouble) { $count++; &serverstatus($local,$trouble); } } unless ($count) { print "No mayor trouble."; } } +} + +# ====================================================================== Status +sub statuslist { + my ($local,$what)=@_; + print +"\n"; +} + +# ============================================================================= +# ============================================================================= +# Main program +# +# ========================================================= Get form parameters +my $buffer; + +read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); +my @pairs=split(/&/,$buffer); +my $pair; my $name; my $value; +undef %FORM; +%FORM=(); +foreach $pair (@pairs) { + ($name,$value) = split(/=/,$pair); + $value =~ tr/+/ /; + $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; + $FORM{$name}=$value; +} + +$buffer=$ENV{'QUERY_STRING'}; +@pairs=split(/&/,$buffer); +foreach $pair (@pairs) { + ($name,$value) = split(/=/,$pair); + $value =~ tr/+/ /; + $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; + $FORM{$name}=$value; +} + +# ====================================================== Determine refresh rate + +my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:30); +if ($refresh<30) { $refresh=30; } +my $starttime=time; + +# ============================================================== Determine mode + +my %modes=('trouble' => 'Trouble', + 'users_doomed' => 'Doomed: Users', + 'loncron_doomed' => 'Doomed: General (loncron)', + 'mysql_doomed' => 'Doomed: Database (mysql)', + 'notconnected_doomed' => 'Doomed: Connections', + 'checkrpms_doomed' => 'Doomed: RPMs', + 'load_doomed' => 'Doomed: Load', + 'unresponsive_doomed' => 'Doomed: Status could not be determined', + 'users' => 'User Report', + 'load' => 'Load Report', + 'connections' => 'Connections Matrix'); + +$mode=$FORM{'mode'}; +unless ($modes{$mode}) { $mode='trouble'; } +# ================================================================ Send Headers +print "Content-type: text/html\n\n". + "\n"; +# -------------------- Read loncapa.conf (and by default, loncapa_apache.conf). +my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); +%perlvar=%{$perlvarref}; +undef $perlvarref; # remove since sensitive and not needed +delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed +delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed + +# ------------------------------------------------------------- Read hosts file +{ + my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); + + while (my $configline=<$config>) { + $configline=~s/#.*$//; + unless ($configline=~/\w/) { next; } + my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); + $hostname{$id}=$name; + $hostdom{$id}=$domain; + $hostrole{$id}=$role; + $hostip{$id}=$ip; + if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) { + $libserv{$id}=$name; + } + } +} +# ------------------------------------------------------------ Read domain file +{ + my $fh=IO::File->new($perlvar{'lonTabDir'}.'/domain.tab'); + if ($fh) { + while (<$fh>) { + next if (/^(\#|\s*$)/); + chomp; + my ($domain, $domain_description, $def_auth, $def_auth_arg, + $def_lang, $city, $longi, $lati) = split(/:/,$_); + $domain_auth_def{$domain}=$def_auth; + $domain_auth_arg_def{$domain}=$def_auth_arg; + $domaindescription{$domain}=$domain_description; + $domain_lang_def{$domain}=$def_lang; + $domain_city{$domain}=$city; + $domain_longi{$domain}=$longi; + $domain_lati{$domain}=$lati; + } + } +} + +print "

LON-CAPA Cluster Status ".localtime()."

"; +print "\n". +"
". +"\n";; +print "
\n"; +print 'Choose next report: '.&select_form($mode,'mode',%modes).'
'; +&hidden('refresh',$refresh); + + if (!$FORM{'runonetime'}) { + print + "

Gathering initial cluster data

This may take some time ...
"; + $fromcache=0; + &mainloop(); + &statuslist('Done initial run.'); + &reports(); + } else { + $fromcache=1; + &mainloop(); + &statuslist('Done gathering cached data'); + &reports(); + $fromcache=0; + &mainloop(); + } + &hidden('runonetime',1); +print '
Total number of queries: '.$stat_total. + '
Percent complete: '. + int(($stat_total-$stat_notyet)/$stat_total*100.). + '
Percent from cache: '. + int($stat_fromcache/$stat_total*100.).'
'; + # ============================================================== Close, refresh print "