--- loncom/metadata_database/searchcat.pl 2003/01/04 19:23:31 1.27 +++ loncom/metadata_database/searchcat.pl 2003/06/19 20:24:57 1.34 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.27 2003/01/04 19:23:31 www Exp $ +# $Id: searchcat.pl,v 1.34 2003/06/19 20:24:57 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -26,17 +26,44 @@ # # http://www.lon-capa.org/ # -# YEAR=2001 -# 04/14/2001, 04/16/2001 Scott Harrison -# -# YEAR=2002 -# 05/11/2002 Scott Harrison -# ### -# This script goes through a LON-CAPA resource -# directory and gathers metadata. -# The metadata is entered into a SQL database. +=pod + +=head1 NAME + +B - put authoritative filesystem data into sql database. + +=head1 SYNOPSIS + +Ordinarily this script is to be called from a loncapa cron job +(CVS source location: F; typical +filesystem installation location: F). + +Here is the cron job entry. + +C<# Repopulate and refresh the metadata database used for the search catalog.> +C<10 1 * * 7 www /home/httpd/perl/searchcat.pl> + +This script only allows itself to be run as the user C. + +=head1 DESCRIPTION + +This script goes through a loncapa resource directory and gathers metadata. +The metadata is entered into a SQL database. + +This script also does general database maintenance such as reformatting +the C table if it is deprecated. + +This script evaluates dynamic metadata from the authors' +F database file in order to store it in MySQL, as +well as to compress the filesize (add up all "count"-type metadata). + +This script is playing an increasingly important role for a loncapa +library server. The proper operation of this script is critical for a smooth +and correct user experience. + +=cut use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; @@ -77,82 +104,82 @@ sub dynamicmeta { my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); my $prodir=&propath($adomain,$aauthor); if ((tie(%evaldata,'GDBM_File', - $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) && + $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) && (tie(%newevaldata,'GDBM_File', - $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { - my %sum=(); - my %cnt=(); - my %listitems=('count' => 'add', - 'course' => 'add', - 'avetries' => 'avg', - 'stdno' => 'add', - 'difficulty' => 'avg', - 'clear' => 'avg', - 'technical' => 'avg', - 'helpful' => 'avg', - 'correct' => 'avg', - 'depth' => 'avg', - 'comments' => 'app', - 'usage' => 'cnt' - ); - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - foreach (keys %evaldata) { - my $key=&unescape($_); - if ($key=~/$regexp/) { - my $ctype=$1; + $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { + my %sum=(); + my %cnt=(); + my %listitems=('count' => 'add', + 'course' => 'add', + 'avetries' => 'avg', + 'stdno' => 'add', + 'difficulty' => 'avg', + 'clear' => 'avg', + 'technical' => 'avg', + 'helpful' => 'avg', + 'correct' => 'avg', + 'depth' => 'avg', + 'comments' => 'app', + 'usage' => 'cnt' + ); + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; + study($regexp); + while (my ($key,$value) = each(%evaldata)) { + $key=&unescape($key); + next if ($key !~ /$regexp/); + my $ctype=$1; if (defined($cnt{$ctype})) { - $cnt{$ctype}++; + $cnt{$ctype}++; } else { - $cnt{$ctype}=1; + $cnt{$ctype}=1; } unless ($listitems{$ctype} eq 'app') { - if (defined($sum{$ctype})) { - $sum{$ctype}+=$evaldata{$_}; - } else { - $sum{$ctype}=$evaldata{$_}; - } + if (defined($sum{$ctype})) { + $sum{$ctype}+=$value; + } else { + $sum{$ctype}=$value; + } } else { - if (defined($sum{$ctype})) { - if ($evaldata{$_}) { - $sum{$ctype}.='
'.$evaldata{$_}; - } - } else { - $sum{$ctype}=''.$evaldata{$_}; - } - } - if ($ctype ne 'count') { - $newevaldata{$_}=$evaldata{$_}; - } - } - } - foreach (keys %cnt) { - if ($listitems{$_} eq 'avg') { - $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; - } elsif ($listitems{$_} eq 'cnt') { - $returnhash{$_}=$cnt{$_}; - } else { - $returnhash{$_}=$sum{$_}; - } - } - if ($returnhash{'count'}) { - my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; - $newevaldata{$newkey}=$returnhash{'count'}; - } - untie(%evaldata); - untie(%newevaldata); - } - return %returnhash; + if (defined($sum{$ctype})) { + if ($value) { + $sum{$ctype}.='
'.$value; + } + } else { + $sum{$ctype}=''.$value; + } + } + if ($ctype ne 'count') { + $newevaldata{$_}=$value; + } + } + while (my($key,$value) = each(%cnt)) { + if ($listitems{$key} eq 'avg') { + $returnhash{$key}=int(($sum{$key}/$value)*100.0+0.5)/100.0; + } elsif ($listitems{$key} eq 'cnt') { + $returnhash{$key}=$value; + } else { + $returnhash{$key}=$sum{$key}; + } + } + if ($returnhash{'count'}) { + my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; + $newevaldata{$newkey}=$returnhash{'count'}; + } + untie(%evaldata); + untie(%newevaldata); + } + return %returnhash; } - + # ----------------- Code to enable 'find' subroutine listing of the .meta files require "find.pl"; sub wanted { (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && - -f _ && - /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && - push(@metalist,"$dir/$_"); + -f _ && + /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && + push(@metalist,"$dir/$_"); } # --------------- Read loncapa_apache.conf and loncapa.conf and get variables @@ -168,11 +195,11 @@ exit unless $perlvar{'lonRole'} eq 'libr my $wwwid=getpwnam('www'); if ($wwwid!=$<) { - $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; - system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ + $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ mailto $emailto -s '$subj' > /dev/null"); - exit 1; + exit 1; } @@ -206,69 +233,89 @@ my $dbh; # ------------------------------------------------------------- get .meta files opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); -my @homeusers=grep - {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")} - grep {!/^\.\.?$/} readdir(RESOURCES); +my @homeusers = grep { + &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_") + } grep {!/^\.\.?$/} readdir(RESOURCES); closedir RESOURCES; + +# +# Create the statement handlers we need +my $delete_sth = $dbh->prepare + ("DELETE FROM metadata WHERE url LIKE BINARY ?"); + +my $insert_sth = $dbh->prepare + ("INSERT INTO metadata VALUES (". + "?,". # title + "?,". # author + "?,". # subject + "?,". # m2??? + "?,". # version + "?,". # current + "?,". # notes + "?,". # abstract + "?,". # mime + "?,". # language + "?,". # creationdate + "?,". # revisiondate + "?,". # owner + "?)" # copyright + ); + foreach my $user (@homeusers) { print LOG "\n=== User: ".$user."\n\n"; -# Remove left-over db-files from potentially crashed searchcat run + # Remove left-over db-files from potentially crashed searchcat run my $prodir=&propath($perlvar{'lonDefDomain'},$user); unlink($prodir.'/nohist_new_resevaldata.db'); -# Use find.pl + # Use find.pl undef @metalist; @metalist=(); &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); - -# -- process each file to get metadata and put into search catalog SQL database -# Also, check to see if already there. -# I could just delete (without searching first), but this works for now. -foreach my $m (@metalist) { - print LOG "- ".$m."\n"; - my $ref=&metadata($m); - my $m2='/res/'.&declutter($m); - $m2=~s/\.meta$//; - &dynamicmeta($m2); - my $q2="select * from metadata where url like binary '$m2'"; - my $sth = $dbh->prepare($q2); - $sth->execute(); - my $r1=$sth->fetchall_arrayref; - if (@$r1) { - $sth=$dbh->prepare("delete from metadata where url like binary '$m2'"); - $sth->execute(); + # -- process each file to get metadata and put into search catalog SQL + # database. Also, check to see if already there. + # I could just delete (without searching first), but this works for now. + foreach my $m (@metalist) { + print LOG "- ".$m."\n"; + my $ref=&metadata($m); + my $m2='/res/'.&declutter($m); + $m2=~s/\.meta$//; + &dynamicmeta($m2); + $delete_sth->execute($m2); + $insert_sth->execute($ref->{'title'}, + $ref->{'author'}, + $ref->{'subject'}, + $m2, + $ref->{'keywords'}, + 'current', + $ref->{'notes'}, + $ref->{'abstract'}, + $ref->{'mime'}, + $ref->{'language'}, + sqltime($ref->{'creationdate'}), + sqltime($ref->{'lastrevisiondate'}), + $ref->{'owner'}, + $ref->{'copyright'}); +# if ($dbh->err()) { +# print STDERR "Error:".$dbh->errstr()."\n"; +# } + $ref = undef; } - $sth=$dbh->prepare('insert into metadata values ('. - '"'.delete($ref->{'title'}).'"'.','. - '"'.delete($ref->{'author'}).'"'.','. - '"'.delete($ref->{'subject'}).'"'.','. - '"'.$m2.'"'.','. - '"'.delete($ref->{'keywords'}).'"'.','. - '"'.'current'.'"'.','. - '"'.delete($ref->{'notes'}).'"'.','. - '"'.delete($ref->{'abstract'}).'"'.','. - '"'.delete($ref->{'mime'}).'"'.','. - '"'.delete($ref->{'language'}).'"'.','. - '"'.sqltime(delete($ref->{'creationdate'})).'"'.','. - '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','. - '"'.delete($ref->{'owner'}).'"'.','. - '"'.delete($ref->{'copyright'}).'"'.')'); - $sth->execute(); -} - -# ----------------------------------------------------------- Clean up database -# Need to, perhaps, remove stale SQL database records. -# ... not yet implemented - - -# -------------------------------------------------- Copy over the new db-files + + # --------------------------------------------------- Clean up database + # Need to, perhaps, remove stale SQL database records. + # ... not yet implemented + + # ------------------------------------------- Copy over the new db-files system('mv '.$prodir.'/nohist_new_resevaldata.db '. - $prodir.'/nohist_resevaldata.db'); + $prodir.'/nohist_resevaldata.db'); } # --------------------------------------------------- Close database connection $dbh->disconnect; print LOG "\n==== Searchcat completed ".localtime()." ====\n"; close(LOG); exit 0; + + + # ============================================================================= # ---------------------------------------------------------------- Get metadata @@ -286,30 +333,30 @@ sub metadata { my $parser=HTML::TokeParser->new(\$metastring); my $token; while ($token=$parser->get_token) { - if ($token->[0] eq 'S') { - my $entry=$token->[1]; - my $unikey=$entry; - if (defined($token->[2]->{'part'})) { - $unikey.='_'.$token->[2]->{'part'}; - } - if (defined($token->[2]->{'name'})) { - $unikey.='_'.$token->[2]->{'name'}; - } - if ($metacache{$uri.'keys'}) { - $metacache{$uri.'keys'}.=','.$unikey; - } else { - $metacache{$uri.'keys'}=$unikey; - } - map { - $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; - } @{$token->[3]}; - unless ( - $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry) - ) { $metacache{$uri.''.$unikey}= - $metacache{$uri.''.$unikey.'.default'}; - } - } - } + if ($token->[0] eq 'S') { + my $entry=$token->[1]; + my $unikey=$entry; + if (defined($token->[2]->{'part'})) { + $unikey.='_'.$token->[2]->{'part'}; + } + if (defined($token->[2]->{'name'})) { + $unikey.='_'.$token->[2]->{'name'}; + } + if ($metacache{$uri.'keys'}) { + $metacache{$uri.'keys'}.=','.$unikey; + } else { + $metacache{$uri.'keys'}=$unikey; + } + map { + $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; + } @{$token->[3]}; + unless ( + $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry) + ) { $metacache{$uri.''.$unikey}= + $metacache{$uri.''.$unikey.'.default'}; + } + } + } } return \%metacache; } @@ -317,12 +364,12 @@ sub metadata { # ------------------------------------------------------------ Serves up a file # returns either the contents of the file or a -1 sub getfile { - my $file=shift; - if (! -e $file ) { return -1; }; - my $fh=IO::File->new($file); - my $a=''; - while (<$fh>) { $a .=$_; } - return $a + my $file=shift; + if (! -e $file ) { return -1; }; + my $fh=IO::File->new($file); + my $a=''; + while (<$fh>) { $a .=$_; } + return $a; } # ------------------------------------------------------------- Declutters URLs @@ -370,9 +417,9 @@ sub sqltime { sub maketime { my %th=@_; - return POSIX::mktime( - ($th{'seconds'},$th{'minutes'},$th{'hours'}, - $th{'day'},$th{'month'}-1,$th{'year'}-1900,0,0,$th{'dlsav'})); + return POSIX::mktime(($th{'seconds'},$th{'minutes'},$th{'hours'}, + $th{'day'},$th{'month'}-1, + $th{'year'}-1900,0,0,$th{'dlsav'})); } @@ -383,9 +430,8 @@ sub maketime { sub unsqltime { my $timestamp=shift; if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) { - $timestamp=&maketime( - 'year'=>$1,'month'=>$2,'day'=>$3, - 'hours'=>$4,'minutes'=>$5,'seconds'=>$6); + $timestamp=&maketime('year'=>$1,'month'=>$2,'day'=>$3, + 'hours'=>$4,'minutes'=>$5,'seconds'=>$6); } return $timestamp; }