#!/usr/bin/perl # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # 04/14/2001 Scott Harrison # This script goes through a LON-CAPA resource # directory and gathers metadata. # The metadata is entered into a SQL database. use IO::File; use HTML::TokeParser; use DBI; my @metalist; # ----------------- Code to enable 'find' subroutine listing of the .meta files require "find.pl"; sub wanted { (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && -f _ && /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && push(@metalist,"$dir/$_"); } # ------------------------------------ Read httpd access.conf and get variables open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; while ($configline=) { if ($configline =~ /PerlSetVar/) { my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); chomp($varvalue); $perlvar{$varname}=$varvalue; } } close(CONFIG); my $dbh; # ------------------------------------- Make sure that database can be accessed { unless ( $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0}) ) { print "Cannot connect to database!\n"; exit; } } # ------------------------------------------------------------- get .meta files opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); my @homeusers=grep {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")} grep {!/^\.\.?$/} readdir(RESOURCES); closedir RESOURCES; foreach my $user (@homeusers) { &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); } # -- process each file to get metadata and put into search catalog SQL database # Also, check to see if already there. # I could not search first and just delete, but this works for now. foreach my $m (@metalist) { my $ref=&metadata($m); my $m2=&declutter($m); my $q2="select * from metadata where url like '$m2'"; my $sth = $dbh->prepare($q2); $sth->execute(); my $r1=$sth->fetchall_arrayref; if (@$r1) { $sth=$dbh->prepare("delete from metadata where url like '$m2'"); $sth->execute(); } $sth=$dbh->prepare('insert into metadata values ('. '"'.delete($ref->{'title'}).'"'.','. '"'.delete($ref->{'author'}).'"'.','. '"'.delete($ref->{'subject'}).'"'.','. '"'.$m2.'"'.','. '"'.delete($ref->{'keywords'}).'"'.','. '"'.'current'.'"'.','. '"'.delete($ref->{'notes'}).'"'.','. '"'.delete($ref->{'abstract'}).'"'.','. '"'.delete($ref->{'mime'}).'"'.','. '"'.delete($ref->{'language'}).'"'.','. '"'.delete($ref->{'creationdate'}).'"'.','. '"'.delete($ref->{'lastrevisiondate'}).'"'.','. '"'.delete($ref->{'owner'}).'"'.','. '"'.delete($ref->{'copyright'}).'"'.')'); $sth->execute(); } # ----------------------------------------------------------- Clean up database # Need to, perhaps, remove stale SQL database records. # ... not yet implemented # --------------------------------------------------- Close database connection $dbh->disconnect; # ---------------------------------------------------------------- Get metadata # significantly altered from subroutine present in lonnet sub metadata { my ($uri,$what)=@_; my %metacache; $uri=&declutter($uri); my $filename=$uri; $uri=~s/\.meta$//; $uri=''; unless ($metacache{$uri.'keys'}) { unless ($filename=~/\.meta$/) { $filename.='.meta'; } my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); my $parser=HTML::TokeParser->new(\$metastring); my $token; while ($token=$parser->get_token) { if ($token->[0] eq 'S') { my $entry=$token->[1]; my $unikey=$entry; if (defined($token->[2]->{'part'})) { $unikey.='_'.$token->[2]->{'part'}; } if (defined($token->[2]->{'name'})) { $unikey.='_'.$token->[2]->{'name'}; } if ($metacache{$uri.'keys'}) { $metacache{$uri.'keys'}.=','.$unikey; } else { $metacache{$uri.'keys'}=$unikey; } map { $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; } @{$token->[3]}; unless ( $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry) ) { $metacache{$uri.''.$unikey}= $metacache{$uri.''.$unikey.'.default'}; } } } } return \%metacache; } # ------------------------------------------------------------ Serves up a file # returns either the contents of the file or a -1 sub getfile { my $file=shift; if (! -e $file ) { return -1; }; my $fh=IO::File->new($file); my $a=''; while (<$fh>) { $a .=$_; } return $a } # ------------------------------------------------------------- Declutters URLs sub declutter { my $thisfn=shift; $thisfn=~s/^$perlvar{'lonDocRoot'}//; $thisfn=~s/^\///; $thisfn=~s/^res\///; return $thisfn; } # --------------------------------------- Is this the home server of an author? # (copied from lond, modification of the return value) sub ishome { my $author=shift; $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; my ($udom,$uname)=split(/\//,$author); my $proname=propath($udom,$uname); if (-e $proname) { return 1; } else { return 0; } } # -------------------------------------------- Return path to profile directory # (copied from lond) sub propath { my ($udom,$uname)=@_; $udom=~s/\W//g; $uname=~s/\W//g; my $subdir=$uname.'__'; $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; return $proname; }