loncom/metadata_database/searchcat.pl - diff

Return to searchcat.pl CVS log

Up to [LON-CAPA] / loncom / metadata_database

Diff for /loncom/metadata_database/searchcat.pl between versions 1.19 and 1.29

-version 1.19, 2002/07/01 18:23:00
+version 1.29, 2003/02/03 13:42:16
  Line 32
  # YEAR=2002
  # 05/11/2002 Scott Harrison
  #
+ # YEAR=2003
+ # Scott Harrison
+ #
  ###
- # This script goes through a LON-CAPA resource
+ =pod
- # directory and gathers metadata.
- # The metadata is entered into a SQL database.
+ =head1 NAME
+ B<searchcat.pl> - put authoritative filesystem data into sql database.
+ =head1 SYNOPSIS
+ Ordinarily this script is to be called from a loncapa cron job
+ (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
+ filesystem installation location: F</etc/cron.d/loncapa>).
+ Here is the cron job entry.
+ C<# Repopulate and refresh the metadata database used for the search catalog.>
+ C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
+ This script only allows itself to be run as the user C<www>.
+ =head1 DESCRIPTION
+ This script goes through a loncapa resource directory and gathers metadata.
+ The metadata is entered into a SQL database.
+ This script also does general database maintenance such as reformatting
+ the C<loncapa:metadata> table if it is deprecated.
+ This script also builds dynamic temporal metadata and stores this inside
+ a F<nohist_resevaldata.db> database file.
+ This script is playing an increasingly important role for a loncapa
+ library server.  The proper operation of this script is critical for a smooth
+ and correct user experience.
+ =cut
+ # ========================================================== Setting things up.
+ # ------------------------------------------------------  Use external modules.
  use lib '/home/httpd/lib/perl/';
  use LONCAPA::Configuration;
- Line 44  use LONCAPA::Configuration;
+ Line 84  use LONCAPA::Configuration;
  use IO::File;
  use HTML::TokeParser;
  use DBI;
+ use GDBM_File;
+ use POSIX qw(strftime mktime);
- my @metalist;
  # ----------------- Code to enable 'find' subroutine listing of the .meta files
- require "find.pl";
+ use File::Find;
- sub wanted {
-     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
-     -f _ &&
-     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
-     push(@metalist,"$dir/$_");
- }
- # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables
+ # List of .meta files (used on a per-user basis).
- my $perlvarref=LONCAPA::Configuration::read_conf('loncapa_apache.conf',
+ my @metalist;
-                                                  'loncapa.conf');
- my %perlvar=%{$perlvarref};
- undef $perlvarref; # remove since sensitive and not needed
- delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
- # ------------------------------------- Only run if machine is a library server
+ # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.
- exit unless $perlvar{'lonRole'} eq 'library';
+ my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
+ my %perlvar = %{$perlvarref};
+ undef($perlvarref); # Remove since sensitive and not needed.
+ delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
- my $dbh;
+ # ------------------------------------- Only run if machine is a library server
- # ------------------------------------- Make sure that database can be accessed
+ if ($perlvar{'lonRole'} ne 'library')
- {
+   {
-     unless (
+     exit(0);
- 	    $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
+   }
- 	    ) {
- 	print "Cannot connect to database!\n";
+ # ------------------------------ Make sure this process is running as user=www.
- 	exit;
+ my $wwwid = getpwnam('www');
-     }
+ if ($wwwid != $<)
-     my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
+   {
-         "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".
+     $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
-         "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".
+     $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
-         "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ".
+     system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
-         "copyright TEXT, FULLTEXT idx_title (title), ".
+ 	   "mailto $emailto -s '$subj' > /dev/null");
-         "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ".
+     exit(1);
-         "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ".
+   }
-         "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ".
-         "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ".
+ # ------------------------------------------------------ Initialize log output.
-         "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ".
+ open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
-         "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM";
+ print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
-     # It would sure be nice to have some logging mechanism.
+ my $dbh; # Database object reference handle.
+ # ----------------------------- Verify connection to loncapa:metadata database.
+ unless (
+ 	$dbh = DBI->connect('DBI:mysql:loncapa','www',
+ 			    $perlvar{'lonSqlAccess'},
+ 			    { RaiseError => 0,PrintError => 0})
+ 	)
+   {
+     print(LOG '**** ERROR **** Cannot connect to database!'."\n");
+     exit(0);
+   }
+ # ------------------------------ Create loncapa:metadata table if non-existent.
+ my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
+     'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
+     'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
+     'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
+     'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
+     'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
+     'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
+     'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
+     'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
+     'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
+     'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
+ $dbh->do($make_metadata_table); # Generate the table.
+ # ----------------------------- Verify format of the loncapa:metadata database.
+ #                               (delete and recreate database if necessary).
+ # Make a positive control for verifying table structure.
+ my $make_metadata_table_CONTROL = $make_metadata_table;
+ $make_metadata_table_CONTROL =~
+     s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
+ $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
+ $dbh->do($make_metadata_table_CONTROL);
+ my $table_description; # selectall reference to the table description.
+ my $CONTROL_table_string; # What the table description should look like.
+ my $table_string; # What the table description does look like.
+ # Calculate the CONTROL table description (what it should be).
+ $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
+ foreach my $table_row (@{$table_description})
+   {
+     $CONTROL_table_string .= join(',',@{$table_row})."\n";
+   }
+ # Calculate the current table description (what it currently looks like).
+ $table_description = $dbh->selectall_arrayref('describe metadata');
+ foreach my $table_row (@{$table_description})
+   {
+     $table_string .= join(',',@{$table_row})."\n";
+   }
+ if ($table_string ne $CONTROL_table_string)
+   {
+     # Log this incident.
+     print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
+ 	  '.'."\n");
+     # Delete the table.
+     $dbh->do('DROP TABLE IF EXISTS metadata');
+     # Generate the table.
      $dbh->do($make_metadata_table);
- }
+   }
+ $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.
+ # ----------------------------------------------- Set utilitysemaphore to zero.
+ $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
+ # ========================================================= Main functionality.
- # ------------------------------------------------------------- get .meta files
+ # - Determine home authors on this server based on resources dir and user tree.
- opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
- my @homeusers=grep
+ # RESOURCES: the resources directory (subdirs correspond to author usernames).
-           {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")}
+ opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
-           grep {!/^\.\.?$/} readdir(RESOURCES);
+     (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
- closedir RESOURCES;
+      and exit(0));
- foreach my $user (@homeusers) {
-     &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
+ # query_home_server_status will look for user home directories on this machine.
- }
+ my @homeusers =
+     grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
- # -- process each file to get metadata and put into search catalog SQL database
+ 				    $perlvar{'lonDefDomain'}.'/'.$_)
- # Also, check to see if already there.
+ 	  } grep {!/^\.\.?$/} readdir(RESOURCES);
- # I could just delete (without searching first), but this works for now.
+ closedir(RESOURCES);
- foreach my $m (@metalist) {
-     my $ref=&metadata($m);
+ unless (@homeusers)
-     my $m2='/res/'.&declutter($m);
+   {
-     $m2=~s/\.meta$//;
+     print(LOG '=== No home users found on this server.'."\n");
-     my $q2="select * from metadata where url like binary '$m2'";
+   }
-     my $sth = $dbh->prepare($q2);
-     $sth->execute();
+ # Consider each author individually.
-     my $r1=$sth->fetchall_arrayref;
+ foreach my $user (@homeusers)
-     if (@$r1) {
+   {
- 	$sth=$dbh->prepare("delete from metadata where url like binary '$m2'");
+     # Make a log entry.
-         $sth->execute();
+     print(LOG "\n".'=== User: '.$user."\n\n");
-     }
-     $sth=$dbh->prepare('insert into metadata values ('.
+     # Get filesystem path to this user's directory.
- 			  '"'.delete($ref->{'title'}).'"'.','.
+     my $user_directory =
- 			  '"'.delete($ref->{'author'}).'"'.','.
+ 	&construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
- 			  '"'.delete($ref->{'subject'}).'"'.','.
- 			  '"'.$m2.'"'.','.
+     # Remove left-over db-files from a potentially crashed searchcat run.
- 			  '"'.delete($ref->{'keywords'}).'"'.','.
+     unlink($user_directory.'/nohist_new_resevaldata.db');
- 			  '"'.'current'.'"'.','.
- 			  '"'.delete($ref->{'notes'}).'"'.','.
+     # Cleanup the metalist array.
- 			  '"'.delete($ref->{'abstract'}).'"'.','.
+     undef(@metalist);
- 			  '"'.delete($ref->{'mime'}).'"'.','.
+     @metalist = ();
- 			  '"'.delete($ref->{'language'}).'"'.','.
- 			  '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.
+     # This will add entries to the @metalist array.
- 			  '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.
+     &File::Find::find(\&wanted,
- 			  '"'.delete($ref->{'owner'}).'"'.','.
+ 		      $perlvar{'lonDocRoot'}.'/res/'.
- 			  '"'.delete($ref->{'copyright'}).'"'.')');
+ 		      $perlvar{'lonDefDomain'}.'/'.$user);
-     $sth->execute();
- }
+     # -- process file to get metadata and put into search catalog SQL database
+     # Also, build and store dynamic metadata.
- # ----------------------------------------------------------- Clean up database
+     # Also, delete record entries before refreshing.
- # Need to, perhaps, remove stale SQL database records.
+     foreach my $m (@metalist)
- # ... not yet implemented
+       {
+ 	# Log this action.
+ 	print(LOG "- ".$m."\n");
+ 	# Get metadata from the file.
+ 	my $ref = get_metadata_from_file($m);
+ 	# Make a datarecord identifier for this resource.
+ 	my $m2 = '/res/'.declutter($m);
+ 	$m2 =~ s/\.meta$//;
+ 	# Build and store dynamic metadata inside nohist_resevaldata.db.
+ 	build_on_the_fly_dynamic_metadata($m2);
+ 	# Delete record if it already exists.
+ 	my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
+ 	my $sth = $dbh->prepare($q2);
+ 	$sth->execute();
+ 	my $r1 = $sth->fetchall_arrayref;
+ 	if (@$r1)
+ 	  {
+ 	    $sth =
+ 		$dbh->prepare('delete from metadata where url like binary '.
+ 			      "'".$m2."'");
+ 	    $sth->execute();
+ 	  }
+ 	# Add new/replacement record into the loncapa:metadata table.
+ 	$sth = $dbh->prepare('insert into metadata values ('.
+ 			     '"'.delete($ref->{'title'}).'"'.','.
+ 			     '"'.delete($ref->{'author'}).'"'.','.
+ 			     '"'.delete($ref->{'subject'}).'"'.','.
+ 			     '"'.$m2.'"'.','.
+ 			     '"'.delete($ref->{'keywords'}).'"'.','.
+ 			     '"'.'current'.'"'.','.
+ 			     '"'.delete($ref->{'notes'}).'"'.','.
+ 			     '"'.delete($ref->{'abstract'}).'"'.','.
+ 			     '"'.delete($ref->{'mime'}).'"'.','.
+ 			     '"'.delete($ref->{'language'}).'"'.','.
+ 			     '"'.sql_formatted_time(
+ 				       delete($ref->{'creationdate'})).'"'.','.
+ 			     '"'.sql_formatted_time(
+ 				   delete($ref->{'lastrevisiondate'})).'"'.','.
+ 			     '"'.delete($ref->{'owner'}).'"'.','.
+ 			     '"'.delete($ref->{'copyright'}).'"'.','.
+ 			     '1'.')');
+ 	$sth->execute();
+       }
+ # ----------------------- Clean up database, remove stale SQL database records.
+     $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
+ # -------------------------------------------------- Copy over the new db-files
+     system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
+ 	         $user_directory.'/nohist_resevaldata.db');
+   }
  # --------------------------------------------------- Close database connection
  $dbh->disconnect;
+ print LOG "\n==== Searchcat completed ".localtime()." ====\n";
+ close(LOG);
+ exit(0);
+ # ================================================================ Subroutines.
+ =pod
+ =head1 SUBROUTINES
+ =cut
+ =pod
+ B<unescape> - translate to unstrange escaped syntax to strange characters.
+ =over 4
+ Parameters:
+ =item I<$str> - string with unweird characters.
+ =back
+ =over 4
+ Returns:
+ =item C<string> - string with potentially weird characters.
+ =back
+ =cut
+ sub unescape ($)
+   {
+     my $str = shift(@_);
+     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
+     return($str);
+   }
+ =pod
+ B<escape> - translate strange characters to unstrange escaped syntax.
+ =over 4
+ Parameters:
+ =item I<$str> - string with potentially weird characters to unweird-ify.
+ =back
+ =over 4
+ Returns:
+ =item C<string> - unweird-ified string.
+ =back
+ =cut
+ sub escape ($)
+   {
+     my $str = shift(@_);
+     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
+     return($str);
+   }
+ =pod
+ B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
+ Dynamic metadata is stored in a nohist_resevaldata GDBM database.
+ The only thing that this subroutine really makes happen is adjusting
+ a 'count' value inside the F<nohist_new_resevaldata.db> as well
+ as updating F<nohist_new_resevaldata.db> with information from
+ F<nohist_resevaldata.db>.
- # ---------------------------------------------------------------- Get metadata
+ It may need optmization, but since it gets called once a week. . .
- # significantly altered from subroutine present in lonnet
+ =over 4
- sub metadata {
-     my ($uri,$what)=@_;
+ Parameters:
-     my %metacache;
-     $uri=&declutter($uri);
+ =item I<$url> - the filesystem path (url may be a misnomer...)
-     my $filename=$uri;
-     $uri=~s/\.meta$//;
+ =back
-     $uri='';
-     unless ($metacache{$uri.'keys'}) {
+ =over 4
-         unless ($filename=~/\.meta$/) { $filename.='.meta'; }
- 	my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
+ Returns:
-         my $parser=HTML::TokeParser->new(\$metastring);
-         my $token;
+ =item C<hash> - key-value table of dynamically evaluated metadata.
-         while ($token=$parser->get_token) {
-            if ($token->[0] eq 'S') {
+ =back
- 	      my $entry=$token->[1];
-               my $unikey=$entry;
+ =cut
-               if (defined($token->[2]->{'part'})) {
-                  $unikey.='_'.$token->[2]->{'part'};
+ sub build_on_the_fly_dynamic_metadata ($)
- 	      }
+   {
-               if (defined($token->[2]->{'name'})) {
+     # some elements in here maybe non-obvious
-                  $unikey.='_'.$token->[2]->{'name'};
- 	      }
+     # Need to compute the user's directory.
-               if ($metacache{$uri.'keys'}) {
+     my $url = &declutter(shift(@_));
-                  $metacache{$uri.'keys'}.=','.$unikey;
+     $url =~ s/\.meta$//;
-               } else {
+     my %returnhash = ();
-                  $metacache{$uri.'keys'}=$unikey;
+     my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
- 	      }
+     my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
-               map {
- 		  $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
+     # Attempt a GDBM database instantiation inside users directory and proceed.
-               } @{$token->[3]};
+     if ((tie(%evaldata,'GDBM_File',
-               unless (
+             $user_directory.
-                  $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
+ 	     '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
- 		      ) { $metacache{$uri.''.$unikey}=
+         (tie(%newevaldata,'GDBM_File',
- 			      $metacache{$uri.''.$unikey.'.default'};
+ 	     $user_directory.
+ 	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
+       {
+ 	# For different variables, track the running sum and counts.
+ 	my %sum = ();
+ 	my %cnt = ();
+ 	# Define computed items as a sum (add) or an average (avg) or a raw
+ 	# count (cnt) or 'app'?
+ 	my %listitems=('count'        => 'add',
+ 		       'course'       => 'add',
+ 		       'avetries'     => 'avg',
+ 		       'stdno'        => 'add',
+ 		       'difficulty'   => 'avg',
+ 		       'clear'        => 'avg',
+ 		       'technical'    => 'avg',
+ 		       'helpful'      => 'avg',
+ 		       'correct'      => 'avg',
+ 		       'depth'        => 'avg',
+ 		       'comments'     => 'app',
+ 		       'usage'        => 'cnt'
+ 		       );
+ 	# Untaint the url and use as part of a regular expression.
+ 	my $regexp = $url;
+ 	$regexp =~ s/(\W)/\\$1/g;
+ 	$regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs
+ 	# Check existing nohist database for this url.
+         # this is modfying the 'count' entries
+         # and copying all othe entries over
+ 	foreach (keys %evaldata)
+ 	  {
+ 	    my $key = &unescape($_);
+ 	    if ($key =~ /$regexp/) # If url-based entry exists.
+ 	      {
+ 		my $ctype = $1; # Set to specific category type.
+ 		# Do an increment for this category type.
+ 		if (defined($cnt{$ctype}))
+ 		  {
+ 		    $cnt{$ctype}++;
+ 		  }
+ 		else
+ 		  {
+ 		    $cnt{$ctype} = 1;
+ 		  }
+                 unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
+ 		  {
+ 		    # Increment the sum based on the evaluated data in the db.
+ 		    if (defined($sum{$ctype}))
+ 		      {
+ 			$sum{$ctype} += $evaldata{$_};
  		      }
-           }
+ 		    else
-        }
+ 		      {
-     }
+ 			$sum{$ctype} = $evaldata{$_};
-     return \%metacache;
+ 		      }
- }
+  		  }
+ 		else # 'app' mode, means to use '<hr />' as a separator
- # ------------------------------------------------------------ Serves up a file
+ 		  {
- # returns either the contents of the file or a -1
+ 		    if (defined($sum{$ctype}))
- sub getfile {
+ 		      {
-   my $file=shift;
+ 			if ($evaldata{$_})
-   if (! -e $file ) { return -1; };
+ 			  {
-   my $fh=IO::File->new($file);
+ 			    $sum{$ctype} .= '<hr />'.$evaldata{$_};
-   my $a='';
+ 			  }
-   while (<$fh>) { $a .=$_; }
+ 		      }
-   return $a
+ 		    else
- }
+ 		      {
+ 			$sum{$ctype} = ''.$evaldata{$_};
- # ------------------------------------------------------------- Declutters URLs
+ 		      }
- sub declutter {
+ 		  }
-     my $thisfn=shift;
+ 		if ($ctype ne 'count')
-     $thisfn=~s/^$perlvar{'lonDocRoot'}//;
+ 		  {
-     $thisfn=~s/^\///;
+                     # this is copying all data except 'count' attributes
-     $thisfn=~s/^res\///;
+ 		    $newevaldata{$_} = $evaldata{$_};
-     return $thisfn;
+ 		  }
- }
+ 	      }
+ 	  }
- # --------------------------------------- Is this the home server of an author?
- # (copied from lond, modification of the return value)
+         # the only other time this loop is useful is for the 'count' hash
- sub ishome {
+         # element
-     my $author=shift;
+ 	foreach (keys %cnt)
-     $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
+ 	  {
-     my ($udom,$uname)=split(/\//,$author);
+ 	    if ($listitems{$_} eq 'avg')
-     my $proname=propath($udom,$uname);
+ 	      {
-     if (-e $proname) {
+ 		$returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
- 	return 1;
+ 	      }
-     } else {
+ 	    elsif ($listitems{$_} eq 'cnt')
-         return 0;
+ 	      {
-     }
+ 		$returnhash{$_} = $cnt{$_};
- }
+ 	      }
+ 	    else
- # -------------------------------------------- Return path to profile directory
+ 	      {
- # (copied from lond)
+ 		$returnhash{$_} = $sum{$_};
- sub propath {
+ 	      }
-     my ($udom,$uname)=@_;
+ 	  }
-     $udom=~s/\W//g;
-     $uname=~s/\W//g;
+         # seems to be doing something useful
-     my $subdir=$uname.'__';
+ 	if ($returnhash{'count'})
-     $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
+ 	  {
-     my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
+ 	    my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
-     return $proname;
+ 	    $newevaldata{$newkey} = $returnhash{'count'};
- }
+ 	  }
+ 	untie(%evaldata); # Close/release the original nohist database.
+ 	untie(%newevaldata); # Close/release the new nohist database.
+       }
+     return(%returnhash);
+   }
+ =pod
+ B<wanted> - used by B<File::Find::find> subroutine.
+ This evaluates whether a file is wanted, and pushes it onto the
+ I<@metalist> array.  This subroutine was, for the most part, auto-generated
+ by the B<find2perl> command.
+ =over 4
+ Parameters:
+ =item I<$file> - a path to the file.
+ =back
+ =over 4
+ Returns:
+ =item C<boolean> - true or false based on logical statement.
+ =back
+ =cut
+ sub wanted ($)
+   {
+     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
+     -f $_ &&
+     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
+     push(@metalist,$File::Find::dir.'/'.$_);
+   }
+ =pod
+ B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
+ I<Note that this is significantly altered from a subroutine present in lonnet.>
+ =over 4
+ Parameters:
+ =item I<$file> - a path.to the file.
+ =back
+ =over 4
+ Returns:
+ =item C<hash reference> - a hash array (keys and values).
+ =back
+ =cut
+ sub get_metadata_from_file ($)
+   {
+     my ($filename) = @_;
+     my %metatable; # Used to store return value of hash-tabled metadata.
+     $filename = &declutter($filename); # Remove non-identifying filesystem info
+     my $uri = ''; # The URI is not relevant in this scenario.
+     unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
+       {
+ 	$filename .= '.meta'; # Append a .meta suffix.
+       }
+     # Get the file contents.
+     my $metadata_string =
+ 	&get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
+     # Parse the file based on its XML tags.
+     my $parser = HTML::TokeParser->new(\$metadata_string);
+     my $token;
+     while ($token = $parser->get_token) # Loop through tokens.
+       {
+ 	if ($token->[0] eq 'S') # If it is a start token.
+ 	  {
+ 	    my $entry = $token->[1];
+ 	    my $unikey = $entry; # A unique identifier for this xml tag key.
+ 	    if (defined($token->[2]->{'part'}))
+ 	      {
+ 		$unikey .= '_'.$token->[2]->{'part'};
+ 	      }
+ 	    if (defined($token->[2]->{'name'}))
+ 	      {
+ 		$unikey .= '_'.$token->[2]->{'name'};
+ 	      }
+ 	    # Append $unikey to metatable's keys entry.
+ 	    if ($metatable{$uri.'keys'})
+ 	      {
+ 		$metatable{$uri.'keys'} .= ','.$unikey;
+ 	      }
+ 	    else
+ 	      {
+ 		$metatable{$uri.'keys'} = $unikey;
+ 	      }
+ 	    # Insert contents into metatable entry for the unikey.
+ 	    foreach my $t3 (@{$token->[3]})
+ 	      {
+ 		$metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
+ 	      }
+ 	    # If there was no text contained inside the tags, set = default.
+ 	    unless
+ 	      (
+ 	        $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
+ 	      )
+ 	      {
+ 		$metatable{$uri.''.$unikey} =
+ 		    $metatable{$uri.''.$unikey.'.default'};
+ 	      }
+ 	  }
+       }
+     # Return with a key-value table of XML tags and their tag contents.
+     return(\%metatable);
+   }
+ =pod
+ B<get_file_contents> - returns either the contents of the file or a -1.
+ =over 4
+ Parameters:
+ =item I<$file> - a complete filesystem path.to the file.
+ =back
+ =over 4
+ Returns:
+ =item C<string> - file contents or a -1.
+ =back
+ =cut
+ sub get_file_contents ($)
+   {
+     my $file = shift(@_);
+     # If file does not exist, then return a -1 value.
+     unless (-e $file)
+       {
+ 	return(-1);
+       }
+     # Read in file contents.
+     my $file_handle = IO::File->new($file);
+     my $file_contents = '';
+     while (<$file_handle>)
+       {
+ 	$file_contents .= $_;
+       }
+     # Return file contents.
+     return($file_contents);
+   }
+ =pod
+ B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
+ =over 4
+ Parameters:
+ =item I<$filesystem_path> - a complete filesystem path.
+ =back
+ =over 4
+ Returns:
+ =item C<string> - remnants of the filesystem path (beginning portion removed).
+ =back
+ =cut
+ sub declutter
+   {
+     my $filesystem_path = shift(@_);
+     # Remove beginning portions of the filesystem path.
+     $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
+     $filesystem_path =~ s!^/!!;
+     $filesystem_path =~ s!^res/!!;
+     # Return what is remaining for the filesystem path.
+     return($filesystem_path);
+   }
+ =pod
+ B<query_home_server_status> - Is this the home server of an author's directory?
+ =over 4
+ Parameters:
+ =item I<$author_filesystem_path> - directory path for a user.
+ =back
+ =over 4
+ Returns:
+ =item C<boolean> - 1 if true; 0 if false.
+ =back
+ =cut
+ sub query_home_server_status ($)
+   {
+     my $author_filesystem_path = shift(@_);
+     # Remove beginning portion of this filesystem path.
+     $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
+     # Construct path to the author's ordinary user directory.
+     my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
+     my $user_directory_path = construct_path_to_user_directory($user_domain,
+ 							       $username);
+     # Return status of whether the user directory path is defined.
+     if (-e $user_directory_path)
+       {
+ 	return(1); # True.
+       }
+     else
+       {
+         return(0); # False.
+       }
+   }
+ =pod
+ B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
+ =over 4
+ Parameters:
+ =item I<$user_domain> - the loncapa domain of the user.
+ =item I<$username> - the unique username (user id) of the user.
+ =back
+ =over 4
+ Returns:
+ =item C<string> - representing the path on the filesystem.
- # ---------------------------- convert 'time' format into a datetime sql format
+ =back
- sub sqltime {
+ =cut
+ sub construct_path_to_user_directory ($$)
+   {
+     my ($user_domain,$username) = @_;
+     # Untaint.
+     $user_domain =~ s/\W//g;
+     $username =~ s/\W//g;
+     # Create three levels of sub-directoried filesystem path
+     # based on the first three characters of the username.
+     my $sub_filesystem_path = $username.'__';
+     $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
+     # Use the sub-directoried levels and other variables to generate
+     # the complete filesystem path.
+     my $complete_filesystem_path =
+ 	join('/',($perlvar{'lonUsersDir'},
+ 		  $user_domain,
+ 		  $sub_filesystem_path,
+ 		  $username));
+     # Return the complete filesystem path.
+     return($complete_filesystem_path);
+   }
+ =pod
+ B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
+ =over 4
+ Parameters:
+ =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
+ =back
+ =over 4
+ Returns:
+ =item C<string> - datetime sql formatted string.
+ =back
+ =cut
+ sub sql_formatted_time ($)
+   {
+     # Sanitize the time argument and convert to localtime array.
      my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
- 	localtime(@_[0]);
+ 	localtime(&sanitize_time(shift(@_)));
-     $mon++; $year+=1900;
-     return "$year-$mon-$mday $hour:$min:$sec";
+     # Convert month from (0..11) to (1..12).
- }
+     $mon += 1;
+     # Make the year compatible with A.D. specification.
+     $year += 1900;
+     # Return a date which is compatible with MySQL's "DATETIME" format.
+     return(join('-',($year,$mon,$mday)).
+ 	   ' '.
+ 	   join(':',($hour,$min,$sec))
+ 	   );
+   }
+ # ==================================== The following two subroutines are needed
+ #                 for accommodating incorrect time formats inside the metadata.
+ =pod
+ B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
+ =over 4
+ Parameters:
+ =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
+ =back
+ =over 4
+ Returns:
+ =item C<integer> - seconds since epoch.
+ =back
+ =cut
+ sub make_seconds_since_epoch (@)
+   {
+     # Keytable of time metadata.
+     my %time_metadata = @_;
+     # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
+     return(POSIX::mktime(
+ 			 ($time_metadata{'seconds'},
+ 			  $time_metadata{'minutes'},
+ 			  $time_metadata{'hours'},
+ 			  $time_metadata{'day'},
+ 			  $time_metadata{'month'}-1,
+ 			  $time_metadata{'year'}-1900,
+,
+,
+ 			  $time_metadata{'dlsav'})
+ 			 )
+ 	   );
+   }
+ =pod
+ B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
+ Somebody described this subroutine as
+ "retro-fixing of un-backward-compatible time format".
+ What this means, is that a part of this code expects to get UTC seconds
+ since the epoch (beginning of 1970).  Yet, some of the .meta files have
+ sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
+ integers (e.g. 1044147435).  These time strings do not encode the timezone
+ and, in this sense, can be considered "un-backwards-compatible".
+ =over 4
+ Parameters:
+ =item I<$potentially_badformat_string> - string to "retro-fix".
+ =back
+ =over 4
+ Returns:
+ =item C<integer> - seconds since epoch.
+ =back
+ =cut
+ sub sanitize_time ($)
+   {
+     my $timestamp = shift(@_);
+     # If timestamp is in this unexpected format....
+     if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
+       {
+ 	# then convert into seconds since epoch (the expected format).
+ 	$timestamp = &make_seconds_since_epoch(
+ 					       'year' => $1,
+ 					       'month' => $2,
+ 					       'day' => $3,
+ 					       'hours' => $4,
+ 					       'minutes' => $5,
+ 					       'seconds' => $6
+ 					       );
+       }
+     # Otherwise we assume timestamp to be as expected.
+     return($timestamp);
+   }
+ =pod
+ =head1 AUTHOR
+ Written to help the loncapa project.
+ Scott Harrison, sharrison@users.sourceforge.net
+ This is distributed under the same terms as loncapa (i.e. "freeware").
+ =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.19
changed lines
	Added in v.1.29