File:  [LON-CAPA] / loncom / metadata_database / searchcat.pl
Revision 1.15: download - view: text, annotated - select for diffs
Tue Apr 17 13:36:07 2001 UTC (23 years, 1 month ago) by harris41
Branches: MAIN
CVS tags: stable_2002_spring, stable_2001_fall, HEAD
updating change information at beginning as well as
only running if machine is a library server -Scott

    1: #!/usr/bin/perl
    2: # The LearningOnline Network
    3: # searchcat.pl "Search Catalog" batch script
    4: 
    5: # 04/14/2001, 04/16/2001 Scott Harrison
    6: 
    7: # This script goes through a LON-CAPA resource
    8: # directory and gathers metadata.
    9: # The metadata is entered into a SQL database.
   10: 
   11: use IO::File;
   12: use HTML::TokeParser;
   13: use DBI;
   14: 
   15: my @metalist;
   16: # ----------------- Code to enable 'find' subroutine listing of the .meta files
   17: require "find.pl";
   18: sub wanted {
   19:     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
   20:     -f _ &&
   21:     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
   22:     push(@metalist,"$dir/$_");
   23: }
   24: 
   25: # ------------------------------------ Read httpd access.conf and get variables
   26: open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";
   27: 
   28: while ($configline=<CONFIG>) {
   29:     if ($configline =~ /PerlSetVar/) {
   30: 	my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
   31:         chomp($varvalue);
   32:         $perlvar{$varname}=$varvalue;
   33:     }
   34: }
   35: close(CONFIG);
   36: 
   37: # ------------------------------------- Only run if machine is a library server
   38: exit unless $perlvar{'lonRole'} eq 'library';
   39: 
   40: my $dbh;
   41: # ------------------------------------- Make sure that database can be accessed
   42: {
   43:     unless (
   44: 	    $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
   45: 	    ) { 
   46: 	print "Cannot connect to database!\n";
   47: 	exit;
   48:     }
   49: }
   50: 
   51: # ------------------------------------------------------------- get .meta files
   52: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
   53: my @homeusers=grep
   54:           {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")}
   55:           grep {!/^\.\.?$/} readdir(RESOURCES);
   56: closedir RESOURCES;
   57: foreach my $user (@homeusers) {
   58:     &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
   59: }
   60: 
   61: # -- process each file to get metadata and put into search catalog SQL database
   62: # Also, check to see if already there.
   63: # I could just delete (without searching first), but this works for now.
   64: foreach my $m (@metalist) {
   65:     my $ref=&metadata($m);
   66:     my $m2='/res/'.&declutter($m);
   67:     $m2=~s/\.meta$//;
   68:     my $q2="select * from metadata where url like binary '$m2'";
   69:     my $sth = $dbh->prepare($q2);
   70:     $sth->execute();
   71:     my $r1=$sth->fetchall_arrayref;
   72:     if (@$r1) {
   73: 	$sth=$dbh->prepare("delete from metadata where url like binary '$m2'");
   74:         $sth->execute();
   75:     }
   76:     $sth=$dbh->prepare('insert into metadata values ('.
   77: 			  '"'.delete($ref->{'title'}).'"'.','.
   78: 			  '"'.delete($ref->{'author'}).'"'.','.
   79: 			  '"'.delete($ref->{'subject'}).'"'.','.
   80: 			  '"'.$m2.'"'.','.
   81: 			  '"'.delete($ref->{'keywords'}).'"'.','.
   82: 			  '"'.'current'.'"'.','.
   83: 			  '"'.delete($ref->{'notes'}).'"'.','.
   84: 			  '"'.delete($ref->{'abstract'}).'"'.','.
   85: 			  '"'.delete($ref->{'mime'}).'"'.','.
   86: 			  '"'.delete($ref->{'language'}).'"'.','.
   87: 			  '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.
   88: 			  '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.
   89: 			  '"'.delete($ref->{'owner'}).'"'.','.
   90: 			  '"'.delete($ref->{'copyright'}).'"'.')');
   91:     $sth->execute();
   92: }
   93: 
   94: # ----------------------------------------------------------- Clean up database
   95: # Need to, perhaps, remove stale SQL database records.
   96: # ... not yet implemented
   97: 
   98: # --------------------------------------------------- Close database connection
   99: $dbh->disconnect;
  100: 
  101: # ---------------------------------------------------------------- Get metadata
  102: # significantly altered from subroutine present in lonnet
  103: sub metadata {
  104:     my ($uri,$what)=@_;
  105:     my %metacache;
  106:     $uri=&declutter($uri);
  107:     my $filename=$uri;
  108:     $uri=~s/\.meta$//;
  109:     $uri='';
  110:     unless ($metacache{$uri.'keys'}) {
  111:         unless ($filename=~/\.meta$/) { $filename.='.meta'; }
  112: 	my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
  113:         my $parser=HTML::TokeParser->new(\$metastring);
  114:         my $token;
  115:         while ($token=$parser->get_token) {
  116:            if ($token->[0] eq 'S') {
  117: 	      my $entry=$token->[1];
  118:               my $unikey=$entry;
  119:               if (defined($token->[2]->{'part'})) { 
  120:                  $unikey.='_'.$token->[2]->{'part'}; 
  121: 	      }
  122:               if (defined($token->[2]->{'name'})) { 
  123:                  $unikey.='_'.$token->[2]->{'name'}; 
  124: 	      }
  125:               if ($metacache{$uri.'keys'}) {
  126:                  $metacache{$uri.'keys'}.=','.$unikey;
  127:               } else {
  128:                  $metacache{$uri.'keys'}=$unikey;
  129: 	      }
  130:               map {
  131: 		  $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
  132:               } @{$token->[3]};
  133:               unless (
  134:                  $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
  135: 		      ) { $metacache{$uri.''.$unikey}=
  136: 			      $metacache{$uri.''.$unikey.'.default'};
  137: 		      }
  138:           }
  139:        }
  140:     }
  141:     return \%metacache;
  142: }
  143: 
  144: # ------------------------------------------------------------ Serves up a file
  145: # returns either the contents of the file or a -1
  146: sub getfile {
  147:   my $file=shift;
  148:   if (! -e $file ) { return -1; };
  149:   my $fh=IO::File->new($file);
  150:   my $a='';
  151:   while (<$fh>) { $a .=$_; }
  152:   return $a
  153: }
  154: 
  155: # ------------------------------------------------------------- Declutters URLs
  156: sub declutter {
  157:     my $thisfn=shift;
  158:     $thisfn=~s/^$perlvar{'lonDocRoot'}//;
  159:     $thisfn=~s/^\///;
  160:     $thisfn=~s/^res\///;
  161:     return $thisfn;
  162: }
  163: 
  164: # --------------------------------------- Is this the home server of an author?
  165: # (copied from lond, modification of the return value)
  166: sub ishome {
  167:     my $author=shift;
  168:     $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
  169:     my ($udom,$uname)=split(/\//,$author);
  170:     my $proname=propath($udom,$uname);
  171:     if (-e $proname) {
  172: 	return 1;
  173:     } else {
  174:         return 0;
  175:     }
  176: }
  177: 
  178: # -------------------------------------------- Return path to profile directory
  179: # (copied from lond)
  180: sub propath {
  181:     my ($udom,$uname)=@_;
  182:     $udom=~s/\W//g;
  183:     $uname=~s/\W//g;
  184:     my $subdir=$uname.'__';
  185:     $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
  186:     my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
  187:     return $proname;
  188: } 
  189: 
  190: # ---------------------------- convert 'time' format into a datetime sql format
  191: sub sqltime {
  192:     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
  193: 	localtime(@_[0]);
  194:     $mon++; $year+=1900;
  195:     return "$year-$mon-$mday $hour:$min:$sec";
  196: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>