--- loncom/metadata_database/searchcat.pl 2001/04/14 18:24:54 1.1 +++ loncom/metadata_database/searchcat.pl 2002/10/08 15:09:36 1.21 @@ -1,74 +1,218 @@ #!/usr/bin/perl # The LearningOnline Network # searchcat.pl "Search Catalog" batch script - -# 04/14/2001 Scott Harrison +# +# $Id: searchcat.pl,v 1.21 2002/10/08 15:09:36 www Exp $ +# +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ +# +# YEAR=2001 +# 04/14/2001, 04/16/2001 Scott Harrison +# +# YEAR=2002 +# 05/11/2002 Scott Harrison +# +### # This script goes through a LON-CAPA resource # directory and gathers metadata. # The metadata is entered into a SQL database. -use strict; +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration; use IO::File; use HTML::TokeParser; +use DBI; +use GDBM_File; my @metalist; + + +# ----------------------------------------------------- Un-Escape Special Chars + +sub unescape { + my $str=shift; + $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; + return $str; +} + + +# ------------------------------------------- Code to evaluate dynamic metadata + +sub dynamicmeta { +# +# +# Do nothing for now ... +# +# + return; +# +# ..., but stuff below already works +# + my $url=&declutter(shift); + $url=~s/\.meta$//; + my %returnhash=(); + my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); + my $prodir=&propath($adomain,$aauthor); + if (tie(%evaldata,'GDBM_File', + $prodir.'/nohist_resevaldata.db',&GDBM_READER,0640)) { + my %sum=(); + my %cnt=(); + my %listitems=('count' => 'add', + 'course' => 'add', + 'avetries' => 'avg', + 'stdno' => 'add', + 'difficulty' => 'avg', + 'clear' => 'avg', + 'technical' => 'avg', + 'helpful' => 'avg', + 'correct' => 'avg', + 'depth' => 'avg', + 'comments' => 'app', + 'usage' => 'cnt' + ); + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; + foreach (keys %evaldata) { + my $key=&unescape($_); + if ($key=~/$regexp/) { + if (defined($cnt{$1})) { $cnt{$1}++; } else { $cnt{$1}=1; } + unless ($listitems{$1} eq 'app') { + if (defined($sum{$1})) { + $sum{$1}+=$evaldata{$_}; + } else { + $sum{$1}=$evaldata{$_}; + } + } else { + if (defined($sum{$1})) { + if ($evaldata{$_}) { + $sum{$1}.='
'.$evaldata{$_}; + } + } else { + $sum{$1}=''.$evaldata{$_}; + } + } + } + foreach (keys %cnt) { + if ($listitems{$_} eq 'avg') { + $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; + } elsif ($listitems{$_} eq 'cnt') { + $returnhash{$_}=$cnt{$_}; + } else { + $returnhash{$_}=$sum{$_}; + } + } + } + untie(%evaldata); + } + return %returnhash; +} + # ----------------- Code to enable 'find' subroutine listing of the .meta files require "find.pl"; sub wanted { (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && -f _ && - /^.*\.meta$/ && + /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && push(@metalist,"$dir/$_"); } -# ------------------------------------ Read httpd access.conf and get variables -open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; +# --------------- Read loncapa_apache.conf and loncapa.conf and get variables +my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); +my %perlvar=%{$perlvarref}; +undef $perlvarref; # remove since sensitive and not needed +delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed -while ($configline=) { - if ($configline =~ /PerlSetVar/) { - my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); - chomp($varvalue); - $perlvar{$varname}=$varvalue; - } -} -close(CONFIG); +# ------------------------------------- Only run if machine is a library server +exit unless $perlvar{'lonRole'} eq 'library'; +my $dbh; # ------------------------------------- Make sure that database can be accessed { - my $dbh; unless ( $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0}) ) { print "Cannot connect to database!\n"; exit; } + my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (". + "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ". + "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ". + "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ". + "copyright TEXT, FULLTEXT idx_title (title), ". + "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ". + "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ". + "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ". + "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ". + "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ". + "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM"; + # It would sure be nice to have some logging mechanism. + $dbh->do($make_metadata_table); } # ------------------------------------------------------------- get .meta files -# need to actually loop over existing users here.. will fix soon -&find("$perlvar{'lonDocRoot'}/res"); +opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); +my @homeusers=grep + {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")} + grep {!/^\.\.?$/} readdir(RESOURCES); +closedir RESOURCES; +foreach my $user (@homeusers) { + &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); +} # -- process each file to get metadata and put into search catalog SQL database +# Also, check to see if already there. +# I could just delete (without searching first), but this works for now. foreach my $m (@metalist) { my $ref=&metadata($m); - my $sth=$dbh->prepare('insert into metadata values ('. - delete($ref->{'title'}), - delete($ref->{'author'}).','. - delete($ref->{'subject'}).','. - delete($ref->{'url'}).','. - delete($ref->{'keywords'}).','. - delete($ref->{'version'}).','. - delete($ref->{'notes'}).','. - delete($ref->{'abstract'}).','. - delete($ref->{'mime'}).','. - delete($ref->{'language'}).','. - delete($ref->{'creationdate'}).','. - delete($ref->{'lastrevisiondate'}).','. - delete($ref->{'owner'}).','. - delete($ref->{'copyright'}). - ')'; + my $m2='/res/'.&declutter($m); + $m2=~s/\.meta$//; + &dynamicmeta($m2); + my $q2="select * from metadata where url like binary '$m2'"; + my $sth = $dbh->prepare($q2); + $sth->execute(); + my $r1=$sth->fetchall_arrayref; + if (@$r1) { + $sth=$dbh->prepare("delete from metadata where url like binary '$m2'"); + $sth->execute(); + } + $sth=$dbh->prepare('insert into metadata values ('. + '"'.delete($ref->{'title'}).'"'.','. + '"'.delete($ref->{'author'}).'"'.','. + '"'.delete($ref->{'subject'}).'"'.','. + '"'.$m2.'"'.','. + '"'.delete($ref->{'keywords'}).'"'.','. + '"'.'current'.'"'.','. + '"'.delete($ref->{'notes'}).'"'.','. + '"'.delete($ref->{'abstract'}).'"'.','. + '"'.delete($ref->{'mime'}).'"'.','. + '"'.delete($ref->{'language'}).'"'.','. + '"'.sqltime(delete($ref->{'creationdate'})).'"'.','. + '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','. + '"'.delete($ref->{'owner'}).'"'.','. + '"'.delete($ref->{'copyright'}).'"'.')'); $sth->execute(); } @@ -141,3 +285,37 @@ sub declutter { $thisfn=~s/^res\///; return $thisfn; } + +# --------------------------------------- Is this the home server of an author? +# (copied from lond, modification of the return value) +sub ishome { + my $author=shift; + $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; + my ($udom,$uname)=split(/\//,$author); + my $proname=propath($udom,$uname); + if (-e $proname) { + return 1; + } else { + return 0; + } +} + +# -------------------------------------------- Return path to profile directory +# (copied from lond) +sub propath { + my ($udom,$uname)=@_; + $udom=~s/\W//g; + $uname=~s/\W//g; + my $subdir=$uname.'__'; + $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; + my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; + return $proname; +} + +# ---------------------------- convert 'time' format into a datetime sql format +sub sqltime { + my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = + localtime(@_[0]); + $mon++; $year+=1900; + return "$year-$mon-$mday $hour:$min:$sec"; +}