#!/usr/bin/perl $|=1; # Generates a html page showing various sataus reports about the cluster # $Id: clusterstatus.pl,v 1.23 2003/09/14 19:00:03 www Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use strict; use LWP::UserAgent(); use HTTP::Headers; use IO::File; my %host=(); my $oneday=60*60*24; my %connectionstatus=(); my %perlvar=(); my $mode; my $concount=0; my $fromcache; my %domaindescription = (); my %domain_auth_def = (); my %domain_auth_arg_def = (); my %domain_lang_def=(); my %domain_city=(); my %domain_longi=(); my %domain_lati=(); my %hostname=(); my %hostip=(); my %hostdom=(); my %hostrole=(); my %libserv=(); my $maxusers=0; my $maxload=0; my $totalusers=0; my %FORM=(); my $stat_total=0; my $stat_notyet=0; my $stat_fromcache=0; sub select_form { my ($def,$name,%hash) = @_; my $selectform = ""; return $selectform; } sub key { my ($local,$url)=@_; my $key=$local.'_'.$url; $key=~s/\W/\_/gs; return $key; } sub hidden { my ($name,$value)=@_; print "\n"; } sub request { my ($local,$url,$cachetime)=@_; $cachetime*=(0.5+rand); my $key=&key($local,$url); my $reply=''; $stat_total++; # if fromcache flag is set, only return cached values if ($fromcache) { if ($FORM{$key.'_time'}) { return $FORM{$key}; $stat_fromcache++; } else { return 'not_yet'; $stat_notyet++; } } # normal mode, refresh when expired or not yet present if ($FORM{$key.'_time'}) { if ((time-$FORM{$key.'_time'})<$cachetime) { $reply=$FORM{$key}; &hidden($key.'_time',$FORM{$key.'_time'}); $stat_fromcache++; } } unless ($reply) { unless ($hostname{$local}) { $reply='local_unknown'; } else { my $ua=new LWP::UserAgent(timeout => 10); my $request=new HTTP::Request('GET', "http://".$hostname{$local}.$url); $request->authorization_basic('lonadm','litelite'); my $response=$ua->request($request); unless ($response->is_success) { $reply='local_error'; } else { $reply=$response->content; chomp($reply); } } &hidden($key.'_time',time); } &hidden($key,$reply); return $reply; } # ============================================= Are local and remote connected? sub connected { my ($local,$remote)=@_; $local=~s/\W//g; $remote=~s/\W//g; unless ($hostname{$remote}) { return 'remote_unknown'; } my $url='/cgi-bin/ping.pl?'.$remote; # # Slowly phase this in: if not cached, only do 5 percent of the cases, # but always do the first five. # unless ($FORM{&key($local,$url)}) { unless (($concount<=5) || (rand>0.95)) { $stat_total++; $stat_notyet++; return 'not_yet'; } else { $concount++; } } # # Actually do the query # &statuslist($local,'connecting '.$remote); my $reply=&request($local,$url,3600); $reply=(split("\n",$reply))[0]; $reply=~s/\W//g; if ($reply ne $remote) { return $reply; } return 'ok'; } # ============================================================ Get a reply hash sub replyhash { my %returnhash=(); foreach (split(/\&/,&request(@_))) { my ($name,$value)=split(/\=/,$_); if ($name) { unless ($value) { $value=''; } $returnhash{$name}=$value; } } return %returnhash; } # ================================================================ Link to host sub otherwindow { my ($local,$url,$label)=@_; return " $label "; } sub login { my $local=shift; print &otherwindow($local,'/adm/login?domain='.$perlvar{'lonDefDomain'}, 'Login'); } sub runloncron { my $local=shift; print &otherwindow($local,'/cgi-bin/loncron.pl','Run loncron'); } sub loncron { my $local=shift; print &otherwindow($local,'/lon-status','loncron'); } sub lonc { my $local=shift; print &otherwindow($local,'/lon-status/loncstatus.txt','lonc'); } sub lond { my $local=shift; print &otherwindow($local,'/lon-status/londstatus.txt','lond'); } sub users { my $local=shift; print &otherwindow($local,'/cgi-bin/userstatus.pl','Users'); } sub versions { my $local=shift; print &otherwindow($local,'/cgi-bin/lonversions.pl','Versions'); } sub server { my $local=shift; print &otherwindow($local,'/server-status','Server Status'); } sub announcement { my $local=shift; print &otherwindow($local,'/announcement.txt','Announcement'); } sub takeonline { my $local=shift; print &otherwindow($local,'/cgi-bin/takeonline.pl','Take online'); } sub takeoffline { my $local=shift; print &otherwindow($local,'/cgi-bin/takeoffline.pl','Take offline'); } sub reroute { my ($local,$remote)=@_; print &otherwindow($local,'/cgi-bin/takeoffline.pl?'. $hostname{$remote}.'&'.$hostdom{$local} ,$remote)."\n"; } sub allreroutes { my $local=shift; &takeoffline($local); print ' Reroute to: '; foreach my $remote (sort keys %hostname) { unless ($local eq $remote) { &reroute($local,$remote); } } print ''; } # ========================================================= Produce a green bar sub bar { my $parm=shift; my $number=int($parm+0.5); print "
"; for (my $i=0;$i<$number;$i++) { print "+"; } print "
"; } # ========================================================== Show server status sub serverstatus { my ($local,$trouble)=@_; print (< "; if ($trouble) { print (""); } print "
$local $hostdom{$local} ($hostname{$local}; $hostrole{$local})
$domaindescription{$hostdom{$local}} $domain_city{$hostdom{$local}}
ENDHEADER &login($local);&server($local);&users($local);&versions($local); &announcement($local); &loncron($local);&lond($local);&lonc($local);&runloncron($local); print "
$trouble
"; # re-routing if ($host{$local.'_reroute'}) { print "
Reroute: ".$host{$local.'_reroute'}; &takeonline($local); } # version if ($host{$local.'_version'}) { print "
Version: ".$host{$local.'_version'} } # load if (($host{$local.'_load_doomed'}>0.5) || ($mode eq 'load_doomed')) { print "
Load: ".$host{$local.'_load'} } # users if (($host{$local.'_users_doomed'}>10) || ($mode eq 'users_doomed')) { print "
Active Users: ".$host{$local.'_users'} } # checkrpms if ($host{$local.'_checkrpms'}) { print "
RPMs: ".$host{$local.'_checkrpms'} } # mysql if ($host{$local.'_mysql'}) { print "
MySQL Database: ".$host{$local.'_mysql'} } # connections if ($host{$local.'_notconnected'}) { print "
Not connected: "; foreach (split(/ /,$host{$local.'_notconnected'})) { if ($_) { print " $_"; } } } # errors if ($host{$local.'_errors'}) { print "
loncron errors: ".$host{$local.'_errors'}; } print "
"; &allreroutes($local); print "

"; } # =========================================================== Doomedness sorted sub doomedness { my $crit=shift; my %alldoomed=(); my @allhosts=(); foreach (keys %host) { if ($_=~/^(\w+)\_$crit$/) { if ($host{$_}) { push (@allhosts,$1); $alldoomed{$1}=$host{$_}; } } } return sort { $alldoomed{$b} <=> $alldoomed{$a} } @allhosts; } sub resetvars { $maxusers=0; $maxload=0; $totalusers=0; $stat_total=0; $stat_notyet=0; $stat_fromcache=0; $concount=0; undef %host; %host=(); } sub mainloop { &resetvars(); # ==================================================== Main Loop over all Hosts foreach my $local (sort keys %hostname) { $host{$local.'_unresponsive_doomed'}=0; # -- Check general status &statuslist($local,'General'); my %loncron=&replyhash($local,'/lon-status/loncron_simple.txt',1200); if (defined($loncron{'local_error'})) { $host{$local.'_loncron'}='Could not determine.'; $host{$local.'_unresponsive_doomed'}++; } else { if ((time-$loncron{'time'})>$oneday) { $host{$local.'_loncron'}='Stale.'; $host{$local.'_unresponsive_doomed'}++; } else { $host{$local.'_loncron_doomed'}=$loncron{'notices'} +4*$loncron{'warnings'} +100*$loncron{'errors'}; $host{$local.'_errors'}=$loncron{'errors'}; } } # -- Check version &statuslist($local,'Version'); my $version=&request($local,'/lon-status/version.txt',7200); if ($version eq 'local_error') { $host{$local.'_version'}='Could not determine.'; $host{$local.'_unresponsive_doomed'}++; } else { $host{$local.'_version'}=$version; } # -- Check user status &statuslist($local,'Users'); my %userstatus=&replyhash($local,'/cgi-bin/userstatus.pl?simple',600); if (defined($userstatus{'local_error'})) { $host{$local.'_userstatus'}='Could not determine.'; $host{$local.'_unresponsive_doomed'}++; } else { $host{$local.'_users_doomed'}=$userstatus{'Active'}; $host{$local.'_users'}=$userstatus{'Active'}; unless ($host{$local.'_users'}) { $host{$local.'_users'}=0; } if ($host{$local.'_users'}>$maxusers) { $maxusers=$host{$local.'_users'}; } $totalusers+=$host{$local.'_users'}; my ($sload,$mload,$lload)=split(/ /,$userstatus{'loadavg'}); $host{$local.'_load_doomed'}=$mload; if ($mload>$maxload) { $maxload=$mload; } $host{$local.'_load'}=$userstatus{'loadavg'}; } # -- Check reroute status &statuslist($local,'Reroute'); my %reroute=&replyhash($local,'/lon-status/reroute.txt',1800); if ($reroute{'status'} eq 'rerouting') { if ($reroute{'server'}) { $host{$local.'_reroute'}= 'Rerouting to '.$reroute{'server'}. ', domain: '.$reroute{'domain'}. ' (since '.localtime($reroute{'time'}).')'; } else { $host{$local.'_reroute'}='offline'; } } # -- Check mysql status &statuslist($local,'Database'); my %mysql=&replyhash($local,'/lon-status/mysql.txt',3600); if (defined($mysql{'local_error'})) { $host{$local.'_mysql'}='Could not determine.'; $host{$local.'_unresponsive_doomed'}++; } else { if ((time-$mysql{'time'})>(7*$oneday)) { if ($hostrole{$local} eq 'library') { $host{$local.'_mysql'}='Stale.'; $host{$local.'_mysql_doomed'}=1; } if ($mysql{'mysql'} eq 'defunct') { $host{$local.'_mysql'}='Defunct (maybe stale).'; $host{$local.'_mysql_doomed'}=2; } } elsif ($mysql{'mysql'} eq 'defunct') { $host{$local.'_mysql'}='Defunct.'; $host{$local.'_mysql_doomed'}=3; } } # -- Check rpm status &statuslist($local,'RPMs'); my %checkrpms=&replyhash($local,'/lon-status/checkrpms.txt',7200); if (defined($checkrpms{'local_error'})) { $host{$local.'_checkrpms'}='Could not determine.'; $host{$local.'_unresponsive_doomed'}++; } else { if ((time-$checkrpms{'time'})>(4*$oneday)) { $host{$local.'_checkrpms'}='Stale.'; $host{$local.'_checkrpms_doomed'}=50; $host{$local.'_unresponsive_doomed'}++; } elsif ($checkrpms{'status'} eq 'fail') { $host{$local.'_checkrpms'}='Could not checked RPMs.'; $host{$local.'_checkrpms_doomed'}=100; } elsif ($checkrpms{'rpmcount'}) { $host{$local.'_checkrpms'}='Outdated RPMs: '. $checkrpms{'rpmcount'}; $host{$local.'_checkrpms_doomed'}=$checkrpms{'rpmcount'}; } } # -- Check connections &statuslist($local,'Connections'); $host{$local.'_notconnected'}=''; $host{$local.'_notconnected_doomed'}=0; foreach my $remote (sort keys %hostname) { my $status=&connected($local,$remote); $connectionstatus{$local.'_TO_'.$remote}=$status; unless (($status eq 'ok') || ($status eq 'not_yet')) { $host{$local.'_notconnected'}.=' '.$remote; $host{$local.'_notconnected_doomed'}++; } } # =============================================================== End Main Loop } } sub reports { # ====================================================================== Output if ($mode=~/\_doomed$/) { # Output by doomedness foreach (&doomedness($mode)) { &serverstatus($_); } } elsif ($mode eq 'connections') { print "". ""; foreach my $remote (sort keys %hostname) { print ''; } print "\n"; # connection matrix foreach my $local (sort keys %hostname) { print ''; foreach my $remote (sort keys %hostname) { if ($connectionstatus{$local.'_TO_'.$remote} eq 'not_yet') { my $cellcolor='#FFFFFF'; if ($local eq $remote) { $cellcolor='#DDDDDD'; } print ''; } elsif ($connectionstatus{$local.'_TO_'.$remote} eq 'ok') { my $cellcolor='#BBDDBB'; if ($local eq $remote) { $cellcolor='#99DD99'; } print ''; } else { my $cellcolor='#DDCCAA'; if ($connectionstatus{$local.'_TO_'.$remote} eq 'local_error') { if ($local eq $remote) { $cellcolor='#DD88AA'; } else { $cellcolor='#DDAACC'; } } else { if ($local eq $remote) { $cellcolor='#DDBB77'; } } print ''; } } print "\n"; } print "
 '.$remote.'
'.$local.'not yet testedok'. $connectionstatus{$local.'_TO_'.$remote}.'
'; &lonc($local); &lond($remote); print '
"; } elsif ($mode eq 'users') { # Users if ($maxusers) { my $factor=50/$maxusers; print "

Total active user(s): $totalusers

". ""; foreach my $local (sort keys %hostname) { if (defined($host{$local.'_users'})) { print '\n"; } } print "
'.$local. '
'. $domaindescription{$hostdom{$local}}. '
'; &users($local); print ''. $host{$local.'_users'}.'
"; } else { print "No active users logged in."; } } elsif ($mode eq 'load') { # Load if ($maxload) { my $factor=50/$maxload; print ""; foreach my $local (sort keys %hostname) { if (defined($host{$local.'_load_doomed'})) { print '\n"; } } print "
'. $local. '
'. $domaindescription{$hostdom{$local}}. '
'; &server($local); print ''. $host{$local.'_load_doomed'}.'
"; } else { print "No workload."; } } elsif ($mode eq 'trouble') { my $count=0; foreach my $local (sort keys %hostname) { my $trouble=''; if ($host{$local.'_unresponsive_doomed'}>3) { $trouble='Does not respond to several queries.
'; } if ($host{$local.'_errors'}) { $trouble='Has loncron errors.
'; } elsif ($host{$local.'_loncron_doomed'}>600) { $trouble='High loncron count.
'; } if ($host{$local.'_load_doomed'}>5) { $trouble='High load.
'; } if ($host{$local.'_users_doomed'}>200) { $trouble='High user volume.
'; } if ($host{$local.'_mysql_doomed'}>1) { $trouble='MySQL database apparently offline.
'; } if ($host{$local.'_checkrpms_doomed'}>100) { $trouble='RPMs outdated.
'; } if ($host{$local.'_reroute'}) { $trouble='Rerouting
'; } if ($trouble) { $count++; &serverstatus($local,$trouble); } } unless ($count) { print "No mayor trouble."; } } } # ====================================================================== Status sub statuslist { my ($local,$what)=@_; print "\n"; } # ============================================================================= # ============================================================================= # Main program # # ========================================================= Get form parameters my $buffer; read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'}); my @pairs=split(/&/,$buffer); my $pair; my $name; my $value; undef %FORM; %FORM=(); foreach $pair (@pairs) { ($name,$value) = split(/=/,$pair); $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; $FORM{$name}=$value; } $buffer=$ENV{'QUERY_STRING'}; @pairs=split(/&/,$buffer); foreach $pair (@pairs) { ($name,$value) = split(/=/,$pair); $value =~ tr/+/ /; $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; $FORM{$name}=$value; } # ====================================================== Determine refresh rate my $refresh=(($FORM{'refresh'}=~/^\d+$/)?$FORM{'refresh'}:30); if ($refresh<30) { $refresh=30; } my $starttime=time; # ============================================================== Determine mode my %modes=('trouble' => 'Trouble', 'users_doomed' => 'Doomed: Users', 'loncron_doomed' => 'Doomed: General (loncron)', 'mysql_doomed' => 'Doomed: Database (mysql)', 'notconnected_doomed' => 'Doomed: Connections', 'checkrpms_doomed' => 'Doomed: RPMs', 'load_doomed' => 'Doomed: Load', 'unresponsive_doomed' => 'Doomed: Status could not be determined', 'users' => 'User Report', 'load' => 'Load Report', 'connections' => 'Connections Matrix'); $mode=$FORM{'mode'}; unless ($modes{$mode}) { $mode='trouble'; } # ================================================================ Send Headers print "Content-type: text/html\n\n". "\n"; # -------------------- Read loncapa.conf (and by default, loncapa_apache.conf). my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); %perlvar=%{$perlvarref}; undef $perlvarref; # remove since sensitive and not needed delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed # ------------------------------------------------------------- Read hosts file { my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); while (my $configline=<$config>) { $configline=~s/#.*$//; unless ($configline=~/\w/) { next; } my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); $hostname{$id}=$name; $hostdom{$id}=$domain; $hostrole{$id}=$role; $hostip{$id}=$ip; if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) { $libserv{$id}=$name; } } } # ------------------------------------------------------------ Read domain file { my $fh=IO::File->new($perlvar{'lonTabDir'}.'/domain.tab'); if ($fh) { while (<$fh>) { next if (/^(\#|\s*$)/); chomp; my ($domain, $domain_description, $def_auth, $def_auth_arg, $def_lang, $city, $longi, $lati) = split(/:/,$_); $domain_auth_def{$domain}=$def_auth; $domain_auth_arg_def{$domain}=$def_auth_arg; $domaindescription{$domain}=$domain_description; $domain_lang_def{$domain}=$def_lang; $domain_city{$domain}=$city; $domain_longi{$domain}=$longi; $domain_lati{$domain}=$lati; } } } print "

LON-CAPA Cluster Status ".localtime()."

"; print "
\n". "
". "
\n";; print "
\n"; print 'Choose next report: '.&select_form($mode,'mode',%modes).'
'; &hidden('refresh',$refresh); if (!$FORM{'runonetime'}) { print "

Gathering initial cluster data

This may take some time ...
"; $fromcache=0; &mainloop(); &statuslist('Done initial run.'); &reports(); } else { $fromcache=1; &mainloop(); &statuslist('Done gathering cached data'); &reports(); $fromcache=0; &mainloop(); } &hidden('runonetime',1); print '
Total number of queries: '.$stat_total. '
Percent complete: '. int(($stat_total-$stat_notyet)/$stat_total*100.). '
Percent from cache: '. int($stat_fromcache/$stat_total*100.).'
'; # ============================================================== Close, refresh print "
"; exit 0;