--- loncom/metadata_database/searchcat.pl 2003/09/26 00:23:09 1.40 +++ loncom/metadata_database/searchcat.pl 2003/12/25 04:06:52 1.47 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.40 2003/09/26 00:23:09 www Exp $ +# $Id: searchcat.pl,v 1.47 2003/12/25 04:06:52 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -65,6 +65,8 @@ and correct user experience. =cut +use strict; + use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; @@ -74,11 +76,14 @@ use DBI; use GDBM_File; use POSIX qw(strftime mktime); +require "find.pl"; + my @metalist; -$simplestatus=''; +my $simplestatus=''; my %countext=(); +# ----------------------------------------------------- write out simple status sub writesimple { open(SMP,'>/home/httpd/html/lon-status/mysql.txt'); print SMP $simplestatus."\n"; @@ -94,6 +99,7 @@ sub writecount { close(RSMP); } +# -------------------------------------- counts files with different extensions sub count { my $file=shift; $file=~/\.(\w+)$/; @@ -120,95 +126,91 @@ sub escape { return $str; } - # ------------------------------------------- Code to evaluate dynamic metadata sub dynamicmeta { - my $url=&declutter(shift); $url=~s/\.meta$//; my %returnhash=(); my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); my $prodir=&propath($adomain,$aauthor); - if ((tie(%evaldata,'GDBM_File', - $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) && - (tie(%newevaldata,'GDBM_File', - $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { - my %sum=(); - my %cnt=(); - my %listitems=('count' => 'add', - 'course' => 'add', - 'avetries' => 'avg', - 'stdno' => 'add', - 'difficulty' => 'avg', - 'clear' => 'avg', - 'technical' => 'avg', - 'helpful' => 'avg', - 'correct' => 'avg', - 'depth' => 'avg', - 'comments' => 'app', - 'usage' => 'cnt' - ); - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - foreach (keys %evaldata) { - my $key=&unescape($_); - if ($key=~/$regexp/) { - my $ctype=$1; - if (defined($cnt{$ctype})) { - $cnt{$ctype}++; - } else { - $cnt{$ctype}=1; - } - unless ($listitems{$ctype} eq 'app') { - if (defined($sum{$ctype})) { - $sum{$ctype}+=$evaldata{$_}; - } else { - $sum{$ctype}=$evaldata{$_}; - } - } else { - if (defined($sum{$ctype})) { - if ($evaldata{$_}) { - $sum{$ctype}.='
'.$evaldata{$_}; - } - } else { - $sum{$ctype}=''.$evaldata{$_}; - } + +# Get metadata except counts + if (tie(my %evaldata,'GDBM_File', + $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { + my %sum=(); + my %cnt=(); + my %concat=(); + my %listitems=( + 'course' => 'add', + 'goto' => 'add', + 'comefrom' => 'add', + 'avetries' => 'avg', + 'stdno' => 'add', + 'difficulty' => 'avg', + 'clear' => 'avg', + 'technical' => 'avg', + 'helpful' => 'avg', + 'correct' => 'avg', + 'depth' => 'avg', + 'comments' => 'app', + 'usage' => 'cnt' + ); + + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; + while (my ($esckey,$value)=each %evaldata) { + my $key=&unescape($esckey); + if ($key=~/$regexp/) { + my ($item,$purl,$cat)=split(/___/,$key); + if (defined($cnt{$cat})) { $cnt{$cat}++; } else { $cnt{$cat}=1; } + unless ($listitems{$cat} eq 'app') { + if (defined($sum{$cat})) { + $sum{$cat}+=$evaldata{$esckey}; + $concat{$cat}.=','.$item; + } else { + $sum{$cat}=$evaldata{$esckey}; + $concat{$cat}=$item; + } + } else { + if (defined($sum{$cat})) { + if ($evaldata{$esckey}=~/\w/) { + $sum{$cat}.='
'.$evaldata{$esckey}; + } + } else { + $sum{$cat}=''.$evaldata{$esckey}; + } + } + } + } + untie(%evaldata); +# transfer gathered data to returnhash, calculate averages where applicable + while (my $cat=each(%cnt)) { + if ($listitems{$cat} eq 'avg') { + $returnhash{$cat}=int(($sum{$cat}/$cnt{$cat})*100.0+0.5)/100.0; + } elsif ($listitems{$cat} eq 'cnt') { + $returnhash{$cat}=$cnt{$cat}; + } else { + $returnhash{$cat}=$sum{$cat}; } - if ($ctype ne 'count') { - $newevaldata{$_}=$evaldata{$_}; - } - } - } - foreach (keys %cnt) { - if ($listitems{$_} eq 'avg') { - $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; - } elsif ($listitems{$_} eq 'cnt') { - $returnhash{$_}=$cnt{$_}; - } else { - $returnhash{$_}=$sum{$_}; - } - } - if ($returnhash{'count'}) { - my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; - $newevaldata{$newkey}=$returnhash{'count'}; - } - untie(%evaldata); - untie(%newevaldata); - } - return %returnhash; + $returnhash{$cat.'_list'}=$concat{$cat}; + } + } +# get count + if (tie(my %evaldata,'GDBM_File', + $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { + my $escurl=&escape($url); + if (! exists($evaldata{$escurl})) { + $returnhash{'count'}=0; + } else { + $returnhash{'count'}=$evaldata{$escurl}; + } + untie %evaldata; + } + return %returnhash; } -# ----------------- Code to enable 'find' subroutine listing of the .meta files -require "find.pl"; -sub wanted { - (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && - -f _ && - /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && - push(@metalist,"$dir/$_"); -} - # --------------- Read loncapa_apache.conf and loncapa.conf and get variables my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); my %perlvar=%{$perlvarref}; @@ -222,8 +224,8 @@ exit unless $perlvar{'lonRole'} eq 'libr my $wwwid=getpwnam('www'); if ($wwwid!=$<) { - $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ mailto $emailto -s '$subj' > /dev/null"); exit 1; @@ -247,19 +249,34 @@ my $dbh; exit; } - my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (". +# Make temporary table + $dbh->do("DROP TABLE IF EXISTS newmetadata"); + my $make_metadata_table = "CREATE TABLE IF NOT EXISTS newmetadata (". "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ". "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ". "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ". - "copyright TEXT, FULLTEXT idx_title (title), ". + "copyright TEXT, ". + "count INTEGER UNSIGNED, ". + "course INTEGER UNSIGNED, course_list TEXT, ". + "goto INTEGER UNSIGNED, goto_list TEXT, ". + "comefrom INTEGER UNSIGNED, comefrom_list TEXT, ". + "fusage INTEGER UNSIGNED, fusage_list TEXT, ". + "stdno INTEGER UNSIGNED, stdno_list TEXT, ". + "avetries FLOAT, avetries_list TEXT, ". + "difficulty FLOAT, difficulty_list TEXT, ". + "FULLTEXT idx_title (title), ". "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ". "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ". "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ". "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ". "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ". - "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM"; + "FULLTEXT idx_copyright (copyright)) ". + "TYPE=MyISAM"; # It would sure be nice to have some logging mechanism. - $dbh->do($make_metadata_table); + unless ($dbh->do($make_metadata_table)) { + print LOG "\nMySQL Error Create: ".$dbh->errstr."\n"; + die $dbh->errstr; + } } # ------------------------------------------------------------- get .meta files @@ -271,15 +288,13 @@ closedir RESOURCES; # # Create the statement handlers we need -my $delete_sth = $dbh->prepare - ("DELETE FROM metadata WHERE url LIKE BINARY ?"); my $insert_sth = $dbh->prepare - ("INSERT INTO metadata VALUES (". + ("INSERT INTO newmetadata VALUES (". "?,". # title "?,". # author "?,". # subject - "?,". # m2??? + "?,". # declutter url "?,". # version "?,". # current "?,". # notes @@ -289,14 +304,28 @@ my $insert_sth = $dbh->prepare "?,". # creationdate "?,". # revisiondate "?,". # owner - "?)" # copyright + "?,". # copyright + "?,". # count + "?,". # course + "?,". # course_list + "?,". # goto + "?,". # goto_list + "?,". # comefrom + "?,". # comefrom_list + "?,". # usage + "?,". # usage_list + "?,". # stdno + "?,". # stdno_list + "?,". # avetries + "?,". # avetries_list + "?,". # difficulty + "?," # difficulty_list ); foreach my $user (@homeusers) { print LOG "\n=== User: ".$user."\n\n"; - # Remove left-over db-files from potentially crashed searchcat run + my $prodir=&propath($perlvar{'lonDefDomain'},$user); - unlink($prodir.'/nohist_new_resevaldata.db'); # Use find.pl undef @metalist; @metalist=(); @@ -307,14 +336,14 @@ foreach my $user (@homeusers) { foreach my $m (@metalist) { print LOG "- ".$m."\n"; my $ref=&metadata($m); - if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; } - if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; } my $m2='/res/'.&declutter($m); $m2=~s/\.meta$//; -# &dynamicmeta($m2); + if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; } + if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; } + my %dyn=&dynamicmeta($m2); &count($m2); - $delete_sth->execute($m2); - $insert_sth->execute($ref->{'title'}, + unless ($insert_sth->execute( + $ref->{'title'}, $ref->{'author'}, $ref->{'subject'}, $m2, @@ -327,43 +356,39 @@ foreach my $user (@homeusers) { sqltime($ref->{'creationdate'}), sqltime($ref->{'lastrevisiondate'}), $ref->{'owner'}, - $ref->{'copyright'}); -# if ($dbh->err()) { -# print STDERR "Error:".$dbh->errstr()."\n"; -# } + $ref->{'copyright'}, + $dyn{'count'}, + $dyn{'course'}, + $dyn{'course_list'}, + $dyn{'goto'}, + $dyn{'goto_list'}, + $dyn{'comefrom'}, + $dyn{'comefrom_list'}, + $dyn{'usage'}, + $dyn{'usage_list'}, + $dyn{'stdno'}, + $dyn{'stdno_list'}, + $dyn{'avetries'}, + $dyn{'avetries_list'}, + $dyn{'difficulty'}, + $dyn{'difficulty_list'} + )) { + print LOG "\nMySQL Error Insert: ".$dbh->errstr."\n"; + die $dbh->errstr; + } $ref = undef; } - - # --------------------------------------------------- Clean up database - # Need to, perhaps, remove stale SQL database records. - # ... not yet implemented - - # ------------------------------------------- Copy over the new db-files - # - # Check the size of nohist_new_resevaldata.db compared to - # nohist_resevaldata.db -# my @stat_result = stat($prodir.'/nohist_new_resevaldata.db'); -# my $new_size = $stat_result[7]; -# @stat_result = stat($prodir.'/nohist_resevaldata.db'); -# my $old_size = $stat_result[7]; -# if ($old_size) { -# if ($new_size/$old_size > 0.15 ) { -# system('mv '.$prodir.'/nohist_new_resevaldata.db '. -# $prodir.'/nohist_resevaldata.db'); -# } else { -# print LOG "Size of '$user' old nohist_reseval: $old_size ". -# "Size of new: $new_size. Not overwriting.\n"; -# my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; -# my $subj="LON: $perlvar{'lonHostID'} searchcat.pl $user reseval ". -# "modification error."; -# system("echo ". -# "'See /home/httpd/perl/logs/searchcat.txt for information.' ". -# "| mailto $emailto -s '$subj' > /dev/null"); -# } -# } } # --------------------------------------------------- Close database connection -$dbh->disconnect; +$dbh->do("DROP TABLE IF EXISTS metadata"); +unless ($dbh->do("RENAME TABLE newmetadata TO metadata")) { + print LOG "\nMySQL Error Rename: ".$dbh->errstr."\n"; + die $dbh->errstr; +} +unless ($dbh->disconnect) { + print LOG "\nMySQL Error Disconnect: ".$dbh->errstr."\n"; + die $dbh->errstr; +} print LOG "\n==== Searchcat completed ".localtime()." ====\n"; close(LOG); &writesimple(); @@ -378,7 +403,7 @@ exit 0; # significantly altered from subroutine present in lonnet sub metadata { my ($uri,$what)=@_; - my %metacache; + my %metacache=(); $uri=&declutter($uri); my $filename=$uri; $uri=~s/\.meta$//; @@ -492,3 +517,13 @@ sub unsqltime { return $timestamp; } +# ----------------- Code to enable 'find' subroutine listing of the .meta files + +no strict "vars"; + +sub wanted { + (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && + -f _ && + /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && + push(@metalist,"$dir/$_"); +}