--- loncom/metadata_database/searchcat.pl 2003/06/19 20:24:57 1.34 +++ loncom/metadata_database/searchcat.pl 2003/12/24 19:58:37 1.45 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.34 2003/06/19 20:24:57 matthew Exp $ +# $Id: searchcat.pl,v 1.45 2003/12/24 19:58:37 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -65,6 +65,8 @@ and correct user experience. =cut +use strict; + use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; @@ -74,9 +76,40 @@ use DBI; use GDBM_File; use POSIX qw(strftime mktime); +require "find.pl"; + my @metalist; +my $simplestatus=''; +my %countext=(); +# ----------------------------------------------------- write out simple status +sub writesimple { + open(SMP,'>/home/httpd/html/lon-status/mysql.txt'); + print SMP $simplestatus."\n"; + close(SMP); +} + +sub writecount { + open(RSMP,'>/home/httpd/html/lon-status/rescount.txt'); + foreach (keys %countext) { + print RSMP $_.'='.$countext{$_}.'&'; + } + print RSMP 'time='.time."\n"; + close(RSMP); +} + +# -------------------------------------- counts files with different extensions +sub count { + my $file=shift; + $file=~/\.(\w+)$/; + my $ext=lc($1); + if (defined($countext{$ext})) { + $countext{$ext}++; + } else { + $countext{$ext}=1; + } +} # ----------------------------------------------------- Un-Escape Special Chars sub unescape { @@ -93,95 +126,91 @@ sub escape { return $str; } - # ------------------------------------------- Code to evaluate dynamic metadata sub dynamicmeta { - my $url=&declutter(shift); $url=~s/\.meta$//; my %returnhash=(); my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); my $prodir=&propath($adomain,$aauthor); - if ((tie(%evaldata,'GDBM_File', - $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) && - (tie(%newevaldata,'GDBM_File', - $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { - my %sum=(); - my %cnt=(); - my %listitems=('count' => 'add', - 'course' => 'add', - 'avetries' => 'avg', - 'stdno' => 'add', - 'difficulty' => 'avg', - 'clear' => 'avg', - 'technical' => 'avg', - 'helpful' => 'avg', - 'correct' => 'avg', - 'depth' => 'avg', - 'comments' => 'app', - 'usage' => 'cnt' - ); - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - study($regexp); - while (my ($key,$value) = each(%evaldata)) { - $key=&unescape($key); - next if ($key !~ /$regexp/); - my $ctype=$1; - if (defined($cnt{$ctype})) { - $cnt{$ctype}++; - } else { - $cnt{$ctype}=1; - } - unless ($listitems{$ctype} eq 'app') { - if (defined($sum{$ctype})) { - $sum{$ctype}+=$value; - } else { - $sum{$ctype}=$value; - } - } else { - if (defined($sum{$ctype})) { - if ($value) { - $sum{$ctype}.='
'.$value; - } - } else { - $sum{$ctype}=''.$value; - } - } - if ($ctype ne 'count') { - $newevaldata{$_}=$value; - } - } - while (my($key,$value) = each(%cnt)) { - if ($listitems{$key} eq 'avg') { - $returnhash{$key}=int(($sum{$key}/$value)*100.0+0.5)/100.0; - } elsif ($listitems{$key} eq 'cnt') { - $returnhash{$key}=$value; - } else { - $returnhash{$key}=$sum{$key}; - } - } - if ($returnhash{'count'}) { - my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; - $newevaldata{$newkey}=$returnhash{'count'}; - } - untie(%evaldata); - untie(%newevaldata); + +# Get metadata except counts + if (tie(my %evaldata,'GDBM_File', + $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { + my %sum=(); + my %cnt=(); + my %concat=(); + my %listitems=( + 'course' => 'add', + 'goto' => 'add', + 'comefrom' => 'add', + 'avetries' => 'avg', + 'stdno' => 'add', + 'difficulty' => 'avg', + 'clear' => 'avg', + 'technical' => 'avg', + 'helpful' => 'avg', + 'correct' => 'avg', + 'depth' => 'avg', + 'comments' => 'app', + 'usage' => 'cnt' + ); + + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; + while (my ($esckey,$value)=each %evaldata) { + my $key=&unescape($esckey); + if ($key=~/$regexp/) { + my ($item,$purl,$cat)=split(/___/,$key); + if (defined($cnt{$cat})) { $cnt{$cat}++; } else { $cnt{$cat}=1; } + unless ($listitems{$cat} eq 'app') { + if (defined($sum{$cat})) { + $sum{$cat}+=$evaldata{$esckey}; + $concat{$cat}.=','.$item; + } else { + $sum{$cat}=$evaldata{$esckey}; + $concat{$cat}=$item; + } + } else { + if (defined($sum{$cat})) { + if ($evaldata{$esckey}=~/\w/) { + $sum{$cat}.='
'.$evaldata{$esckey}; + } + } else { + $sum{$cat}=''.$evaldata{$esckey}; + } + } + } + } + untie(%evaldata); +# transfer gathered data to returnhash, calculate averages where applicable + while (my $cat=each(%cnt)) { + if ($listitems{$cat} eq 'avg') { + $returnhash{$cat}=int(($sum{$cat}/$cnt{$cat})*100.0+0.5)/100.0; + } elsif ($listitems{$cat} eq 'cnt') { + $returnhash{$cat}=$cnt{$cat}; + } else { + $returnhash{$cat}=$sum{$cat}; + } + $returnhash{$cat.'_list'}=$concat{$cat}; + } + } +# get count + if (tie(my %evaldata,'GDBM_File', + $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { + my $escurl=&escape($url); + if (! exists($evaldata{$escurl})) { + $returnhash{'count'}='Not Available'; + } else { + $returnhash{'count'}=$evaldata{$escurl}; + } + untie %evaldata; } return %returnhash; } - -# ----------------- Code to enable 'find' subroutine listing of the .meta files -require "find.pl"; -sub wanted { - (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && - -f _ && - /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && - push(@metalist,"$dir/$_"); -} - + # --------------- Read loncapa_apache.conf and loncapa.conf and get variables my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); my %perlvar=%{$perlvarref}; @@ -195,8 +224,8 @@ exit unless $perlvar{'lonRole'} eq 'libr my $wwwid=getpwnam('www'); if ($wwwid!=$<) { - $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ mailto $emailto -s '$subj' > /dev/null"); exit 1; @@ -207,6 +236,7 @@ if ($wwwid!=$<) { open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); print LOG '==== Searchcat Run '.localtime()."====\n\n"; +$simplestatus='time='.time.'&'; my $dbh; # ------------------------------------- Make sure that database can be accessed { @@ -214,8 +244,11 @@ my $dbh; $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0}) ) { print LOG "Cannot connect to database!\n"; + $simplestatus.='mysql=defunct'; + &writesimple(); exit; } + my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (". "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ". "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ". @@ -263,9 +296,8 @@ my $insert_sth = $dbh->prepare foreach my $user (@homeusers) { print LOG "\n=== User: ".$user."\n\n"; - # Remove left-over db-files from potentially crashed searchcat run + my $prodir=&propath($perlvar{'lonDefDomain'},$user); - unlink($prodir.'/nohist_new_resevaldata.db'); # Use find.pl undef @metalist; @metalist=(); @@ -278,7 +310,10 @@ foreach my $user (@homeusers) { my $ref=&metadata($m); my $m2='/res/'.&declutter($m); $m2=~s/\.meta$//; + if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; } + if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; } &dynamicmeta($m2); + &count($m2); $delete_sth->execute($m2); $insert_sth->execute($ref->{'title'}, $ref->{'author'}, @@ -304,14 +339,13 @@ foreach my $user (@homeusers) { # Need to, perhaps, remove stale SQL database records. # ... not yet implemented - # ------------------------------------------- Copy over the new db-files - system('mv '.$prodir.'/nohist_new_resevaldata.db '. - $prodir.'/nohist_resevaldata.db'); } # --------------------------------------------------- Close database connection $dbh->disconnect; print LOG "\n==== Searchcat completed ".localtime()." ====\n"; close(LOG); +&writesimple(); +&writecount(); exit 0; @@ -436,3 +470,13 @@ sub unsqltime { return $timestamp; } +# ----------------- Code to enable 'find' subroutine listing of the .meta files + +no strict "vars"; + +sub wanted { + (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && + -f _ && + /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && + push(@metalist,"$dir/$_"); +}