#!/usr/bin/perl # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # 04/14/2001 Scott Harrison # This script goes through a LON-CAPA resource # directory and gathers metadata. # The metadata is entered into a SQL database. use strict; use IO::File; use HTML::TokeParser; my @metalist; # ----------------- Code to enable 'find' subroutine listing of the .meta files require "find.pl"; sub wanted { (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && -f _ && /^.*\.meta$/ && push(@metalist,"$dir/$_"); } # ------------------------------------ Read httpd access.conf and get variables open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; while ($configline=) { if ($configline =~ /PerlSetVar/) { my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); chomp($varvalue); $perlvar{$varname}=$varvalue; } } close(CONFIG); # ------------------------------------- Make sure that database can be accessed { my $dbh; unless ( $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0}) ) { print "Cannot connect to database!\n"; exit; } } # ------------------------------------------------------------- get .meta files # need to actually loop over existing users here.. will fix soon &find("$perlvar{'lonDocRoot'}/res"); # -- process each file to get metadata and put into search catalog SQL database foreach my $m (@metalist) { my $ref=&metadata($m); my $sth=$dbh->prepare('insert into metadata values ('. delete($ref->{'title'}), delete($ref->{'author'}).','. delete($ref->{'subject'}).','. delete($ref->{'url'}).','. delete($ref->{'keywords'}).','. delete($ref->{'version'}).','. delete($ref->{'notes'}).','. delete($ref->{'abstract'}).','. delete($ref->{'mime'}).','. delete($ref->{'language'}).','. delete($ref->{'creationdate'}).','. delete($ref->{'lastrevisiondate'}).','. delete($ref->{'owner'}).','. delete($ref->{'copyright'}). ')'; $sth->execute(); } # ----------------------------------------------------------- Clean up database # Need to, perhaps, remove stale SQL database records. # ... not yet implemented # --------------------------------------------------- Close database connection $dbh->disconnect; # ---------------------------------------------------------------- Get metadata # significantly altered from subroutine present in lonnet sub metadata { my ($uri,$what)=@_; my %metacache; $uri=&declutter($uri); my $filename=$uri; $uri=~s/\.meta$//; $uri=''; unless ($metacache{$uri.'keys'}) { unless ($filename=~/\.meta$/) { $filename.='.meta'; } my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); my $parser=HTML::TokeParser->new(\$metastring); my $token; while ($token=$parser->get_token) { if ($token->[0] eq 'S') { my $entry=$token->[1]; my $unikey=$entry; if (defined($token->[2]->{'part'})) { $unikey.='_'.$token->[2]->{'part'}; } if (defined($token->[2]->{'name'})) { $unikey.='_'.$token->[2]->{'name'}; } if ($metacache{$uri.'keys'}) { $metacache{$uri.'keys'}.=','.$unikey; } else { $metacache{$uri.'keys'}=$unikey; } map { $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; } @{$token->[3]}; unless ( $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry) ) { $metacache{$uri.''.$unikey}= $metacache{$uri.''.$unikey.'.default'}; } } } } return \%metacache; } # ------------------------------------------------------------ Serves up a file # returns either the contents of the file or a -1 sub getfile { my $file=shift; if (! -e $file ) { return -1; }; my $fh=IO::File->new($file); my $a=''; while (<$fh>) { $a .=$_; } return $a } # ------------------------------------------------------------- Declutters URLs sub declutter { my $thisfn=shift; $thisfn=~s/^$perlvar{'lonDocRoot'}//; $thisfn=~s/^\///; $thisfn=~s/^res\///; return $thisfn; }