--- loncom/metadata_database/searchcat.pl 2003/08/08 12:38:00 1.37 +++ loncom/metadata_database/searchcat.pl 2003/12/24 19:58:37 1.45 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.37 2003/08/08 12:38:00 matthew Exp $ +# $Id: searchcat.pl,v 1.45 2003/12/24 19:58:37 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -65,6 +65,8 @@ and correct user experience. =cut +use strict; + use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; @@ -74,16 +76,40 @@ use DBI; use GDBM_File; use POSIX qw(strftime mktime); +require "find.pl"; + my @metalist; -$simplestatus=''; +my $simplestatus=''; +my %countext=(); +# ----------------------------------------------------- write out simple status sub writesimple { open(SMP,'>/home/httpd/html/lon-status/mysql.txt'); print SMP $simplestatus."\n"; close(SMP); } +sub writecount { + open(RSMP,'>/home/httpd/html/lon-status/rescount.txt'); + foreach (keys %countext) { + print RSMP $_.'='.$countext{$_}.'&'; + } + print RSMP 'time='.time."\n"; + close(RSMP); +} + +# -------------------------------------- counts files with different extensions +sub count { + my $file=shift; + $file=~/\.(\w+)$/; + my $ext=lc($1); + if (defined($countext{$ext})) { + $countext{$ext}++; + } else { + $countext{$ext}=1; + } +} # ----------------------------------------------------- Un-Escape Special Chars sub unescape { @@ -100,95 +126,91 @@ sub escape { return $str; } - # ------------------------------------------- Code to evaluate dynamic metadata sub dynamicmeta { - my $url=&declutter(shift); $url=~s/\.meta$//; my %returnhash=(); my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); my $prodir=&propath($adomain,$aauthor); - if ((tie(%evaldata,'GDBM_File', - $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) && - (tie(%newevaldata,'GDBM_File', - $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { - my %sum=(); - my %cnt=(); - my %listitems=('count' => 'add', - 'course' => 'add', - 'avetries' => 'avg', - 'stdno' => 'add', - 'difficulty' => 'avg', - 'clear' => 'avg', - 'technical' => 'avg', - 'helpful' => 'avg', - 'correct' => 'avg', - 'depth' => 'avg', - 'comments' => 'app', - 'usage' => 'cnt' - ); - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - foreach (keys %evaldata) { - my $key=&unescape($_); - if ($key=~/$regexp/) { - my $ctype=$1; - if (defined($cnt{$ctype})) { - $cnt{$ctype}++; - } else { - $cnt{$ctype}=1; - } - unless ($listitems{$ctype} eq 'app') { - if (defined($sum{$ctype})) { - $sum{$ctype}+=$evaldata{$_}; - } else { - $sum{$ctype}=$evaldata{$_}; - } - } else { - if (defined($sum{$ctype})) { - if ($evaldata{$_}) { - $sum{$ctype}.='
'.$evaldata{$_}; - } - } else { - $sum{$ctype}=''.$evaldata{$_}; - } + +# Get metadata except counts + if (tie(my %evaldata,'GDBM_File', + $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { + my %sum=(); + my %cnt=(); + my %concat=(); + my %listitems=( + 'course' => 'add', + 'goto' => 'add', + 'comefrom' => 'add', + 'avetries' => 'avg', + 'stdno' => 'add', + 'difficulty' => 'avg', + 'clear' => 'avg', + 'technical' => 'avg', + 'helpful' => 'avg', + 'correct' => 'avg', + 'depth' => 'avg', + 'comments' => 'app', + 'usage' => 'cnt' + ); + + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; + while (my ($esckey,$value)=each %evaldata) { + my $key=&unescape($esckey); + if ($key=~/$regexp/) { + my ($item,$purl,$cat)=split(/___/,$key); + if (defined($cnt{$cat})) { $cnt{$cat}++; } else { $cnt{$cat}=1; } + unless ($listitems{$cat} eq 'app') { + if (defined($sum{$cat})) { + $sum{$cat}+=$evaldata{$esckey}; + $concat{$cat}.=','.$item; + } else { + $sum{$cat}=$evaldata{$esckey}; + $concat{$cat}=$item; + } + } else { + if (defined($sum{$cat})) { + if ($evaldata{$esckey}=~/\w/) { + $sum{$cat}.='
'.$evaldata{$esckey}; + } + } else { + $sum{$cat}=''.$evaldata{$esckey}; + } + } + } + } + untie(%evaldata); +# transfer gathered data to returnhash, calculate averages where applicable + while (my $cat=each(%cnt)) { + if ($listitems{$cat} eq 'avg') { + $returnhash{$cat}=int(($sum{$cat}/$cnt{$cat})*100.0+0.5)/100.0; + } elsif ($listitems{$cat} eq 'cnt') { + $returnhash{$cat}=$cnt{$cat}; + } else { + $returnhash{$cat}=$sum{$cat}; } - if ($ctype ne 'count') { - $newevaldata{$_}=$evaldata{$_}; - } - } - } - foreach (keys %cnt) { - if ($listitems{$_} eq 'avg') { - $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; - } elsif ($listitems{$_} eq 'cnt') { - $returnhash{$_}=$cnt{$_}; - } else { - $returnhash{$_}=$sum{$_}; - } - } - if ($returnhash{'count'}) { - my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; - $newevaldata{$newkey}=$returnhash{'count'}; - } - untie(%evaldata); - untie(%newevaldata); - } - return %returnhash; + $returnhash{$cat.'_list'}=$concat{$cat}; + } + } +# get count + if (tie(my %evaldata,'GDBM_File', + $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { + my $escurl=&escape($url); + if (! exists($evaldata{$escurl})) { + $returnhash{'count'}='Not Available'; + } else { + $returnhash{'count'}=$evaldata{$escurl}; + } + untie %evaldata; + } + return %returnhash; } -# ----------------- Code to enable 'find' subroutine listing of the .meta files -require "find.pl"; -sub wanted { - (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && - -f _ && - /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && - push(@metalist,"$dir/$_"); -} - # --------------- Read loncapa_apache.conf and loncapa.conf and get variables my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); my %perlvar=%{$perlvarref}; @@ -202,8 +224,8 @@ exit unless $perlvar{'lonRole'} eq 'libr my $wwwid=getpwnam('www'); if ($wwwid!=$<) { - $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ mailto $emailto -s '$subj' > /dev/null"); exit 1; @@ -274,9 +296,8 @@ my $insert_sth = $dbh->prepare foreach my $user (@homeusers) { print LOG "\n=== User: ".$user."\n\n"; - # Remove left-over db-files from potentially crashed searchcat run + my $prodir=&propath($perlvar{'lonDefDomain'},$user); - unlink($prodir.'/nohist_new_resevaldata.db'); # Use find.pl undef @metalist; @metalist=(); @@ -289,7 +310,10 @@ foreach my $user (@homeusers) { my $ref=&metadata($m); my $m2='/res/'.&declutter($m); $m2=~s/\.meta$//; + if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; } + if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; } &dynamicmeta($m2); + &count($m2); $delete_sth->execute($m2); $insert_sth->execute($ref->{'title'}, $ref->{'author'}, @@ -315,34 +339,13 @@ foreach my $user (@homeusers) { # Need to, perhaps, remove stale SQL database records. # ... not yet implemented - # ------------------------------------------- Copy over the new db-files - # - # Check the size of nohist_new_resevaldata.db compared to - # nohist_resevaldata.db - my @stat_result = stat($prodir.'/nohist_new_resevaldata.db'); - my $new_size = $stat_result[7]; - @stat_result = stat($prodir.'/nohist_resevaldata.db'); - my $old_size = $stat_result[7]; - if ($new_size/$old_size > 0.15 ) { - system('mv '.$prodir.'/nohist_new_resevaldata.db '. - $prodir.'/nohist_resevaldata.db'); - } else { - print LOG "Size of '$user' old nohist_reseval: $old_size ". - "Size of new: $new_size. Not overwriting.\n"; - my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - my $subj="LON: $perlvar{'lonHostID'} searchcat.pl $user reseval ". - "modification error."; - system("echo ". - "'See /home/httpd/perl/logs/searchcat.txt for information.' ". - "| mailto $emailto -s '$subj' > /dev/null"); - } - } # --------------------------------------------------- Close database connection $dbh->disconnect; print LOG "\n==== Searchcat completed ".localtime()." ====\n"; close(LOG); &writesimple(); +&writecount(); exit 0; @@ -467,3 +470,13 @@ sub unsqltime { return $timestamp; } +# ----------------- Code to enable 'find' subroutine listing of the .meta files + +no strict "vars"; + +sub wanted { + (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && + -f _ && + /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && + push(@metalist,"$dir/$_"); +}