#!/usr/bin/perl # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # # $Id: searchcat.pl,v 1.17 2002/05/11 21:28:20 harris41 Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # # YEAR=2001 # 04/14/2001, 04/16/2001 Scott Harrison # # YEAR=2002 # 05/11/2002 Scott Harrison # ### # This script goes through a LON-CAPA resource # directory and gathers metadata. # The metadata is entered into a SQL database. use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use IO::File; use HTML::TokeParser; use DBI; my @metalist; # ----------------- Code to enable 'find' subroutine listing of the .meta files require "find.pl"; sub wanted { (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && -f _ && /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && push(@metalist,"$dir/$_"); } # ------------------------- Read access.conf and loncapa.conf and get variables my $perlvarref=LONCAPA::Configuration::read_conf('access.conf','loncapa.conf'); my %perlvar=%{$perlvarref}; undef $perlvarref; # remove since sensitive and not needed delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed # ------------------------------------- Only run if machine is a library server exit unless $perlvar{'lonRole'} eq 'library'; my $dbh; # ------------------------------------- Make sure that database can be accessed { unless ( $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0}) ) { print "Cannot connect to database!\n"; exit; } } # ------------------------------------------------------------- get .meta files opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); my @homeusers=grep {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")} grep {!/^\.\.?$/} readdir(RESOURCES); closedir RESOURCES; foreach my $user (@homeusers) { &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); } # -- process each file to get metadata and put into search catalog SQL database # Also, check to see if already there. # I could just delete (without searching first), but this works for now. foreach my $m (@metalist) { my $ref=&metadata($m); my $m2='/res/'.&declutter($m); $m2=~s/\.meta$//; my $q2="select * from metadata where url like binary '$m2'"; my $sth = $dbh->prepare($q2); $sth->execute(); my $r1=$sth->fetchall_arrayref; if (@$r1) { $sth=$dbh->prepare("delete from metadata where url like binary '$m2'"); $sth->execute(); } $sth=$dbh->prepare('insert into metadata values ('. '"'.delete($ref->{'title'}).'"'.','. '"'.delete($ref->{'author'}).'"'.','. '"'.delete($ref->{'subject'}).'"'.','. '"'.$m2.'"'.','. '"'.delete($ref->{'keywords'}).'"'.','. '"'.'current'.'"'.','. '"'.delete($ref->{'notes'}).'"'.','. '"'.delete($ref->{'abstract'}).'"'.','. '"'.delete($ref->{'mime'}).'"'.','. '"'.delete($ref->{'language'}).'"'.','. '"'.sqltime(delete($ref->{'creationdate'})).'"'.','. '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','. '"'.delete($ref->{'owner'}).'"'.','. '"'.delete($ref->{'copyright'}).'"'.')'); $sth->execute(); } # ----------------------------------------------------------- Clean up database # Need to, perhaps, remove stale SQL database records. # ... not yet implemented # --------------------------------------------------- Close database connection $dbh->disconnect; # ---------------------------------------------------------------- Get metadata # significantly altered from subroutine present in lonnet sub metadata { my ($uri,$what)=@_; my %metacache; $uri=&declutter($uri); my $filename=$uri; $uri=~s/\.meta$//; $uri=''; unless ($metacache{$uri.'keys'}) { unless ($filename=~/\.meta$/) { $filename.='.meta'; } my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); my $parser=HTML::TokeParser->new(\$metastring); my $token; while ($token=$parser->get_token) { if ($token->[0] eq 'S') { my $entry=$token->[1]; my $unikey=$entry; if (defined($token->[2]->{'part'})) { $unikey.='_'.$token->[2]->{'part'}; } if (defined($token->[2]->{'name'})) { $unikey.='_'.$token->[2]->{'name'}; } if ($metacache{$uri.'keys'}) { $metacache{$uri.'keys'}.=','.$unikey; } else { $metacache{$uri.'keys'}=$unikey; } map { $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; } @{$token->[3]}; unless ( $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry) ) { $metacache{$uri.''.$unikey}= $metacache{$uri.''.$unikey.'.default'}; } } } } return \%metacache; } # ------------------------------------------------------------ Serves up a file # returns either the contents of the file or a -1 sub getfile { my $file=shift; if (! -e $file ) { return -1; }; my $fh=IO::File->new($file); my $a=''; while (<$fh>) { $a .=$_; } return $a } # ------------------------------------------------------------- Declutters URLs sub declutter { my $thisfn=shift; $thisfn=~s/^$perlvar{'lonDocRoot'}//; $thisfn=~s/^\///; $thisfn=~s/^res\///; return $thisfn; } # --------------------------------------- Is this the home server of an author? # (copied from lond, modification of the return value) sub ishome { my $author=shift; $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; my ($udom,$uname)=split(/\//,$author); my $proname=propath($udom,$uname); if (-e $proname) { return 1; } else { return 0; } } # -------------------------------------------- Return path to profile directory # (copied from lond) sub propath { my ($udom,$uname)=@_; $udom=~s/\W//g; $uname=~s/\W//g; my $subdir=$uname.'__'; $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; return $proname; } # ---------------------------- convert 'time' format into a datetime sql format sub sqltime { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(@_[0]); $mon++; $year+=1900; return "$year-$mon-$mday $hour:$min:$sec"; }