File:  [LON-CAPA] / loncom / metadata_database / searchcat.pl
Revision 1.15: download - view: text, annotated - select for diffs
Tue Apr 17 13:36:07 2001 UTC (23 years ago) by harris41
Branches: MAIN
CVS tags: stable_2002_spring, stable_2001_fall, HEAD
updating change information at beginning as well as
only running if machine is a library server -Scott

#!/usr/bin/perl
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script

# 04/14/2001, 04/16/2001 Scott Harrison

# This script goes through a LON-CAPA resource
# directory and gathers metadata.
# The metadata is entered into a SQL database.

use IO::File;
use HTML::TokeParser;
use DBI;

my @metalist;
# ----------------- Code to enable 'find' subroutine listing of the .meta files
require "find.pl";
sub wanted {
    (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
    -f _ &&
    /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
    push(@metalist,"$dir/$_");
}

# ------------------------------------ Read httpd access.conf and get variables
open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";

while ($configline=<CONFIG>) {
    if ($configline =~ /PerlSetVar/) {
	my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
        chomp($varvalue);
        $perlvar{$varname}=$varvalue;
    }
}
close(CONFIG);

# ------------------------------------- Only run if machine is a library server
exit unless $perlvar{'lonRole'} eq 'library';

my $dbh;
# ------------------------------------- Make sure that database can be accessed
{
    unless (
	    $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
	    ) { 
	print "Cannot connect to database!\n";
	exit;
    }
}

# ------------------------------------------------------------- get .meta files
opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
my @homeusers=grep
          {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")}
          grep {!/^\.\.?$/} readdir(RESOURCES);
closedir RESOURCES;
foreach my $user (@homeusers) {
    &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
}

# -- process each file to get metadata and put into search catalog SQL database
# Also, check to see if already there.
# I could just delete (without searching first), but this works for now.
foreach my $m (@metalist) {
    my $ref=&metadata($m);
    my $m2='/res/'.&declutter($m);
    $m2=~s/\.meta$//;
    my $q2="select * from metadata where url like binary '$m2'";
    my $sth = $dbh->prepare($q2);
    $sth->execute();
    my $r1=$sth->fetchall_arrayref;
    if (@$r1) {
	$sth=$dbh->prepare("delete from metadata where url like binary '$m2'");
        $sth->execute();
    }
    $sth=$dbh->prepare('insert into metadata values ('.
			  '"'.delete($ref->{'title'}).'"'.','.
			  '"'.delete($ref->{'author'}).'"'.','.
			  '"'.delete($ref->{'subject'}).'"'.','.
			  '"'.$m2.'"'.','.
			  '"'.delete($ref->{'keywords'}).'"'.','.
			  '"'.'current'.'"'.','.
			  '"'.delete($ref->{'notes'}).'"'.','.
			  '"'.delete($ref->{'abstract'}).'"'.','.
			  '"'.delete($ref->{'mime'}).'"'.','.
			  '"'.delete($ref->{'language'}).'"'.','.
			  '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.
			  '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.
			  '"'.delete($ref->{'owner'}).'"'.','.
			  '"'.delete($ref->{'copyright'}).'"'.')');
    $sth->execute();
}

# ----------------------------------------------------------- Clean up database
# Need to, perhaps, remove stale SQL database records.
# ... not yet implemented

# --------------------------------------------------- Close database connection
$dbh->disconnect;

# ---------------------------------------------------------------- Get metadata
# significantly altered from subroutine present in lonnet
sub metadata {
    my ($uri,$what)=@_;
    my %metacache;
    $uri=&declutter($uri);
    my $filename=$uri;
    $uri=~s/\.meta$//;
    $uri='';
    unless ($metacache{$uri.'keys'}) {
        unless ($filename=~/\.meta$/) { $filename.='.meta'; }
	my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
        my $parser=HTML::TokeParser->new(\$metastring);
        my $token;
        while ($token=$parser->get_token) {
           if ($token->[0] eq 'S') {
	      my $entry=$token->[1];
              my $unikey=$entry;
              if (defined($token->[2]->{'part'})) { 
                 $unikey.='_'.$token->[2]->{'part'}; 
	      }
              if (defined($token->[2]->{'name'})) { 
                 $unikey.='_'.$token->[2]->{'name'}; 
	      }
              if ($metacache{$uri.'keys'}) {
                 $metacache{$uri.'keys'}.=','.$unikey;
              } else {
                 $metacache{$uri.'keys'}=$unikey;
	      }
              map {
		  $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
              } @{$token->[3]};
              unless (
                 $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
		      ) { $metacache{$uri.''.$unikey}=
			      $metacache{$uri.''.$unikey.'.default'};
		      }
          }
       }
    }
    return \%metacache;
}

# ------------------------------------------------------------ Serves up a file
# returns either the contents of the file or a -1
sub getfile {
  my $file=shift;
  if (! -e $file ) { return -1; };
  my $fh=IO::File->new($file);
  my $a='';
  while (<$fh>) { $a .=$_; }
  return $a
}

# ------------------------------------------------------------- Declutters URLs
sub declutter {
    my $thisfn=shift;
    $thisfn=~s/^$perlvar{'lonDocRoot'}//;
    $thisfn=~s/^\///;
    $thisfn=~s/^res\///;
    return $thisfn;
}

# --------------------------------------- Is this the home server of an author?
# (copied from lond, modification of the return value)
sub ishome {
    my $author=shift;
    $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
    my ($udom,$uname)=split(/\//,$author);
    my $proname=propath($udom,$uname);
    if (-e $proname) {
	return 1;
    } else {
        return 0;
    }
}

# -------------------------------------------- Return path to profile directory
# (copied from lond)
sub propath {
    my ($udom,$uname)=@_;
    $udom=~s/\W//g;
    $uname=~s/\W//g;
    my $subdir=$uname.'__';
    $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
    my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
    return $proname;
} 

# ---------------------------- convert 'time' format into a datetime sql format
sub sqltime {
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
	localtime(@_[0]);
    $mon++; $year+=1900;
    return "$year-$mon-$mday $hour:$min:$sec";
}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>