loncom/metadata_database/searchcat.pl - diff

Return to searchcat.pl CVS log

Up to [LON-CAPA] / loncom / metadata_database

Diff for /loncom/metadata_database/searchcat.pl between versions 1.1 and 1.28

-version 1.1, 2001/04/14 18:24:54
+version 1.28, 2003/02/03 05:39:37
  Line 1
  #!/usr/bin/perl
  # The LearningOnline Network
  # searchcat.pl "Search Catalog" batch script
+ #
+ # $Id$
+ #
+ # Copyright Michigan State University Board of Trustees
+ #
+ # This file is part of the LearningOnline Network with a
+ # Computer assisted personalized approach (loncapa).
+ #
+ # Loncapa is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or
+ # (at your option) any later version.
+ #
+ # Loncapa is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with loncapa; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ #
+ # /home/httpd/html/adm/gpl.txt
+ #
+ # http://www.loncapa.org/
+ #
+ # YEAR=2001
+ # 04/14/2001, 04/16/2001 Scott Harrison
+ #
+ # YEAR=2002
+ # 05/11/2002 Scott Harrison
+ #
+ # YEAR=2003
+ # Scott Harrison
+ #
+ ###
+ =pod
+ =head1 NAME
+ B<searchcat.pl> - put authoritative filesystem data into sql database.
+ =head1 SYNOPSIS
+ Ordinarily this script is to be called from a loncapa cron job
+ (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
+ filesystem installation location: F</etc/cron.d/loncapa>).
+ Here is the cron job entry.
+ C<# Repopulate and refresh the metadata database used for the search catalog.>
+ C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
+ This script only allows itself to be run as the user C<www>.
+ =head1 DESCRIPTION
+ This script goes through a loncapa resource directory and gathers metadata.
+ The metadata is entered into a SQL database.
+ This script also does general database maintenance such as reformatting
+ the C<loncapa:metadata> table if it is deprecated.
+ This script also builds dynamic temporal metadata and stores this inside
+ a F<nohist_resevaldata.db> database file.
+ This script is playing an increasingly important role for a loncapa
+ library server.  The proper operation of this script is critical for a smooth
+ and correct user experience.
- # 04/14/2001 Scott Harrison
+ =cut
- # This script goes through a LON-CAPA resource
+ # ========================================================== Setting things up.
- # directory and gathers metadata.
- # The metadata is entered into a SQL database.
- use strict;
+ # ------------------------------------------------------  Use external modules.
+ use lib '/home/httpd/lib/perl/';
+ use LONCAPA::Configuration;
  use IO::File;
  use HTML::TokeParser;
+ use DBI;
+ use GDBM_File;
+ use POSIX qw(strftime mktime);
- my @metalist;
  # ----------------- Code to enable 'find' subroutine listing of the .meta files
- require "find.pl";
+ use File::Find;
- sub wanted {
-     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
+ # List of .meta files (used on a per-user basis).
-     -f _ &&
+ my @metalist;
-     /^.*\.meta$/ &&
-     push(@metalist,"$dir/$_");
+ # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.
- }
+ my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
+ my %perlvar = %{$perlvarref};
- # ------------------------------------ Read httpd access.conf and get variables
+ undef($perlvarref); # Remove since sensitive and not needed.
- open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf";
+ delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
- while ($configline=<CONFIG>) {
+ # ------------------------------------- Only run if machine is a library server
-     if ($configline =~ /PerlSetVar/) {
+ if ($perlvar{'lonRole'} ne 'library')
- 	my ($dummy,$varname,$varvalue)=split(/\s+/,$configline);
+   {
-         chomp($varvalue);
+     exit(0);
-         $perlvar{$varname}=$varvalue;
+   }
-     }
- }
+ # ------------------------------ Make sure this process is running as user=www.
- close(CONFIG);
+ my $wwwid = getpwnam('www');
+ if ($wwwid != $<)
- # ------------------------------------- Make sure that database can be accessed
+   {
- {
+     $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
-     my $dbh;
+     $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
-     unless (
+     system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
- 	    $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
+ 	   "mailto $emailto -s '$subj' > /dev/null");
- 	    ) {
+     exit(1);
- 	print "Cannot connect to database!\n";
+   }
- 	exit;
-     }
+ # ------------------------------------------------------ Initialize log output.
- }
+ open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
+ print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
- # ------------------------------------------------------------- get .meta files
- # need to actually loop over existing users here.. will fix soon
+ my $dbh; # Database object reference handle.
- &find("$perlvar{'lonDocRoot'}/res");
+ # ----------------------------- Verify connection to loncapa:metadata database.
- # -- process each file to get metadata and put into search catalog SQL database
+ unless (
- foreach my $m (@metalist) {
+ 	$dbh = DBI->connect('DBI:mysql:loncapa','www',
-     my $ref=&metadata($m);
+ 			    $perlvar{'lonSqlAccess'},
-     my $sth=$dbh->prepare('insert into metadata values ('.
+ 			    { RaiseError => 0,PrintError => 0})
- 			  delete($ref->{'title'}),
+ 	)
- 			  delete($ref->{'author'}).','.
+   {
- 			  delete($ref->{'subject'}).','.
+     print(LOG '**** ERROR **** Cannot connect to database!'."\n");
- 			  delete($ref->{'url'}).','.
+     exit(0);
- 			  delete($ref->{'keywords'}).','.
+   }
- 			  delete($ref->{'version'}).','.
- 			  delete($ref->{'notes'}).','.
+ # ------------------------------ Create loncapa:metadata table if non-existent.
- 			  delete($ref->{'abstract'}).','.
+ my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
- 			  delete($ref->{'mime'}).','.
+     'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
- 			  delete($ref->{'language'}).','.
+     'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
- 			  delete($ref->{'creationdate'}).','.
+     'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
- 			  delete($ref->{'lastrevisiondate'}).','.
+     'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
- 			  delete($ref->{'owner'}).','.
+     'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
- 			  delete($ref->{'copyright'}).
+     'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
- 			  ')';
+     'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
-     $sth->execute();
+     'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
- }
+     'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
+     'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
- # ----------------------------------------------------------- Clean up database
- # Need to, perhaps, remove stale SQL database records.
+ $dbh->do($make_metadata_table); # Generate the table.
- # ... not yet implemented
+ # ----------------------------- Verify format of the loncapa:metadata database.
+ #                               (delete and recreate database if necessary).
+ # Make a positive control for verifying table structure.
+ my $make_metadata_table_CONTROL = $make_metadata_table;
+ $make_metadata_table_CONTROL =~
+     s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
+ $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
+ $dbh->do($make_metadata_table_CONTROL);
+ my $table_description; # selectall reference to the table description.
+ my $CONTROL_table_string; # What the table description should look like.
+ my $table_string; # What the table description does look like.
+ # Calculate the CONTROL table description (what it should be).
+ $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
+ foreach my $table_row (@{$table_description})
+   {
+     $CONTROL_table_string .= join(',',@{$table_row})."\n";
+   }
+ # Calculate the current table description (what it currently looks like).
+ $table_description = $dbh->selectall_arrayref('describe metadata');
+ foreach my $table_row (@{$table_description})
+   {
+     $table_string .= join(',',@{$table_row})."\n";
+   }
+ if ($table_string ne $CONTROL_table_string)
+   {
+     # Log this incident.
+     print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
+ 	  '.'."\n");
+     # Delete the table.
+     $dbh->do('DROP TABLE IF EXISTS metadata');
+     # Generate the table.
+     $dbh->do($make_metadata_table);
+   }
+ $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.
+ # ----------------------------------------------- Set utilitysemaphore to zero.
+ $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
+ # ========================================================= Main functionality.
+ # - Determine home authors on this server based on resources dir and user tree.
+ # RESOURCES: the resources directory (subdirs correspond to author usernames).
+ opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
+     (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
+      and exit(0));
+ # query_home_server_status will look for user home directories on this machine.
+ my @homeusers =
+     grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
+ 				    $perlvar{'lonDefDomain'}.'/'.$_)
+ 	  } grep {!/^\.\.?$/} readdir(RESOURCES);
+ closedir(RESOURCES);
+ unless (@homeusers)
+   {
+     print(LOG '=== No home users found on this server.'."\n");
+   }
+ # Consider each author individually.
+ foreach my $user (@homeusers)
+   {
+     # Make a log entry.
+     print(LOG "\n".'=== User: '.$user."\n\n");
+     # Get filesystem path to this user's directory.
+     my $user_directory =
+ 	&construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
+     # Remove left-over db-files from a potentially crashed searchcat run.
+     unlink($user_directory.'/nohist_new_resevaldata.db');
+     # Cleanup the metalist array.
+     undef(@metalist);
+     @metalist = ();
+     # This will add entries to the @metalist array.
+     &File::Find::find(\&wanted,
+ 		      $perlvar{'lonDocRoot'}.'/res/'.
+ 		      $perlvar{'lonDefDomain'}.'/'.$user);
+     # -- process file to get metadata and put into search catalog SQL database
+     # Also, build and store dynamic metadata.
+     # Also, delete record entries before refreshing.
+     foreach my $m (@metalist)
+       {
+ 	# Log this action.
+ 	print(LOG "- ".$m."\n");
+ 	# Get metadata from the file.
+ 	my $ref = get_metadata_from_file($m);
+ 	# Make a datarecord identifier for this resource.
+ 	my $m2 = '/res/'.declutter($m);
+ 	$m2 =~ s/\.meta$//;
+ 	# Build and store dynamic metadata inside nohist_resevaldata.db.
+ 	build_on_the_fly_dynamic_metadata($m2);
+ 	# Delete record if it already exists.
+ 	my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
+ 	my $sth = $dbh->prepare($q2);
+ 	$sth->execute();
+ 	my $r1 = $sth->fetchall_arrayref;
+ 	if (@$r1)
+ 	  {
+ 	    $sth =
+ 		$dbh->prepare('delete from metadata where url like binary '.
+ 			      "'".$m2."'");
+ 	    $sth->execute();
+ 	  }
+ 	# Add new/replacement record into the loncapa:metadata table.
+ 	$sth = $dbh->prepare('insert into metadata values ('.
+ 			     '"'.delete($ref->{'title'}).'"'.','.
+ 			     '"'.delete($ref->{'author'}).'"'.','.
+ 			     '"'.delete($ref->{'subject'}).'"'.','.
+ 			     '"'.$m2.'"'.','.
+ 			     '"'.delete($ref->{'keywords'}).'"'.','.
+ 			     '"'.'current'.'"'.','.
+ 			     '"'.delete($ref->{'notes'}).'"'.','.
+ 			     '"'.delete($ref->{'abstract'}).'"'.','.
+ 			     '"'.delete($ref->{'mime'}).'"'.','.
+ 			     '"'.delete($ref->{'language'}).'"'.','.
+ 			     '"'.sql_formatted_time(
+ 				       delete($ref->{'creationdate'})).'"'.','.
+ 			     '"'.sql_formatted_time(
+ 				   delete($ref->{'lastrevisiondate'})).'"'.','.
+ 			     '"'.delete($ref->{'owner'}).'"'.','.
+ 			     '"'.delete($ref->{'copyright'}).'"'.','.
+ 			     '1'.')');
+ 	$sth->execute();
+       }
+ # ----------------------- Clean up database, remove stale SQL database records.
+     $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
+ # -------------------------------------------------- Copy over the new db-files
+     system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
+ 	         $user_directory.'/nohist_resevaldata.db');
+   }
  # --------------------------------------------------- Close database connection
  $dbh->disconnect;
+ print LOG "\n==== Searchcat completed ".localtime()." ====\n";
+ close(LOG);
+ exit(0);
+ # ================================================================ Subroutines.
+ =pod
+ =head1 SUBROUTINES
+ =cut
+ =pod
+ B<unescape> - translate to unstrange escaped syntax to strange characters.
+ =over 4
+ Parameters:
+ =item I<$str> - string with unweird characters.
+ =back
+ =over 4
+ Returns:
+ =item C<string> - string with potentially weird characters.
+ =back
+ =cut
+ sub unescape ($)
+   {
+     my $str = shift(@_);
+     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
+     return($str);
+   }
+ =pod
+ B<escape> - translate strange characters to unstrange escaped syntax.
+ =over 4
+ Parameters:
- # ---------------------------------------------------------------- Get metadata
+ =item I<$str> - string with potentially weird characters to unweird-ify.
- # significantly altered from subroutine present in lonnet
- sub metadata {
+ =back
-     my ($uri,$what)=@_;
-     my %metacache;
+ =over 4
-     $uri=&declutter($uri);
-     my $filename=$uri;
+ Returns:
-     $uri=~s/\.meta$//;
-     $uri='';
+ =item C<string> - unweird-ified string.
-     unless ($metacache{$uri.'keys'}) {
-         unless ($filename=~/\.meta$/) { $filename.='.meta'; }
+ =back
- 	my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
-         my $parser=HTML::TokeParser->new(\$metastring);
+ =cut
-         my $token;
-         while ($token=$parser->get_token) {
+ sub escape ($)
-            if ($token->[0] eq 'S') {
+   {
- 	      my $entry=$token->[1];
+     my $str = shift(@_);
-               my $unikey=$entry;
+     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
-               if (defined($token->[2]->{'part'})) {
+     return($str);
-                  $unikey.='_'.$token->[2]->{'part'};
+   }
- 	      }
-               if (defined($token->[2]->{'name'})) {
+ =pod
-                  $unikey.='_'.$token->[2]->{'name'};
- 	      }
+ B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
-               if ($metacache{$uri.'keys'}) {
-                  $metacache{$uri.'keys'}.=','.$unikey;
+ Dynamic metadata is stored in a nohist_resevaldata GDBM database.
-               } else {
+ Most of the calculations in this subroutine are totally pointless
-                  $metacache{$uri.'keys'}=$unikey;
+ and not useful for anything that this subroutine does.
- 	      }
+ (THIS IS A FRUSTRATED SUBROUTINE THAT IS NON-OPTIMAL, *&*&!.)
-               map {
+ The only thing that this subroutine really makes happen is adjusting
- 		  $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
+ a 'count' value inside the F<nohist_new_resevaldata.db> as well
-               } @{$token->[3]};
+ as updating F<nohist_new_resevaldata.db> with information from
-               unless (
+ F<nohist_resevaldata.db>.
-                  $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
- 		      ) { $metacache{$uri.''.$unikey}=
+ =over 4
- 			      $metacache{$uri.''.$unikey.'.default'};
+ Parameters:
+ =item I<$url> - the filesystem path (url may be a misnomer...)
+ =back
+ =over 4
+ Returns:
+ =item C<hash> - key-value table of dynamically evaluated metadata.
+ =back
+ =cut
+ sub build_on_the_fly_dynamic_metadata ($)
+   {
+     # BEWARE ALL WHO TRY TO UNDERSTAND THIS ABSURDLY HORRIBLE SUBROUTINE.
+     # Do all sorts of mumbo-jumbo to compute the user's directory.
+     my $url = &declutter(shift(@_));
+     $url =~ s/\.meta$//;
+     my %returnhash = ();
+     my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
+     my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
+     # Attempt a GDBM database instantiation inside users directory and proceed.
+     if ((tie(%evaldata,'GDBM_File',
+             $user_directory.
+ 	     '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
+         (tie(%newevaldata,'GDBM_File',
+ 	     $user_directory.
+ 	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
+       {
+ 	# For different variables, track the running sum and counts.
+ 	my %sum = ();
+ 	my %cnt = ();
+ 	# Define computed items as a sum (add) or an average (avg) or a raw
+ 	# count (cnt) or 'app'?
+ 	my %listitems=('count'        => 'add',
+ 		       'course'       => 'add',
+ 		       'avetries'     => 'avg',
+ 		       'stdno'        => 'add',
+ 		       'difficulty'   => 'avg',
+ 		       'clear'        => 'avg',
+ 		       'technical'    => 'avg',
+ 		       'helpful'      => 'avg',
+ 		       'correct'      => 'avg',
+ 		       'depth'        => 'avg',
+ 		       'comments'     => 'app',
+ 		       'usage'        => 'cnt'
+ 		       );
+ 	# Untaint the url and use as part of a regular expression.
+ 	my $regexp = $url;
+ 	$regexp =~ s/(\W)/\\$1/g;
+ 	$regexp = '___'.$regexp.'___([a-z]+)$';
+ 	# Check existing nohist database for this url.
+ 	# THE ONLY TIME THIS IS IMPORTANT FOR THIS AWFUL SUBROUTINE
+ 	# IS FOR 'count' ENTRIES
+ 	# AND FOR REFRESHING non-'count' ENTRIES INSIDE nohist_new DATABASE.
+ 	foreach (keys %evaldata)
+ 	  {
+ 	    my $key = &unescape($_);
+ 	    if ($key =~ /$regexp/) # If url-based entry exists.
+ 	      {
+ 		my $ctype = $1; # Set to specific category type.
+ 		# Do an increment for this category type.
+ 		if (defined($cnt{$ctype}))
+ 		  {
+ 		    $cnt{$ctype}++;
+ 		  }
+ 		else
+ 		  {
+ 		    $cnt{$ctype} = 1;
+ 		  }
+                 unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
+ 		  {
+ 		    # Increment the sum based on the evaluated data in the db.
+ 		    if (defined($sum{$ctype}))
+ 		      {
+ 			$sum{$ctype} += $evaldata{$_};
+ 		      }
+ 		    else
+ 		      {
+ 			$sum{$ctype} = $evaldata{$_};
+ 		      }
+  		  }
+ 		else # 'app' mode, means to use '<hr />' as a separator
+ 		  {
+ 		    if (defined($sum{$ctype}))
+ 		      {
+ 			if ($evaldata{$_})
+ 			  {
+ 			    $sum{$ctype} .= '<hr />'.$evaldata{$_};
+ 			  }
  		      }
-           }
+ 		    else
-        }
+ 		      {
-     }
+ 			$sum{$ctype} = ''.$evaldata{$_};
-     return \%metacache;
+ 		      }
- }
+ 		  }
+ 		if ($ctype ne 'count')
- # ------------------------------------------------------------ Serves up a file
+ 		  {
- # returns either the contents of the file or a -1
+ 		    # ALERT!  THIS HORRIBLE LOOP IS ACTUALLY DOING SOMETHING
- sub getfile {
+ 		    # USEFUL!
-   my $file=shift;
+ 		    $newevaldata{$_} = $evaldata{$_};
-   if (! -e $file ) { return -1; };
+ 		  }
-   my $fh=IO::File->new($file);
+ 	      }
-   my $a='';
+ 	  }
-   while (<$fh>) { $a .=$_; }
-   return $a
+ 	# THE ONLY OTHER TIME THIS LOOP IS USEFUL IS FOR THE 'count' HASH
- }
+ 	# ELEMENT.
+ 	foreach (keys %cnt)
- # ------------------------------------------------------------- Declutters URLs
+ 	  {
- sub declutter {
+ 	    if ($listitems{$_} eq 'avg')
-     my $thisfn=shift;
+ 	      {
-     $thisfn=~s/^$perlvar{'lonDocRoot'}//;
+ 		$returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
-     $thisfn=~s/^\///;
+ 	      }
-     $thisfn=~s/^res\///;
+ 	    elsif ($listitems{$_} eq 'cnt')
-     return $thisfn;
+ 	      {
- }
+ 		$returnhash{$_} = $cnt{$_};
+ 	      }
+ 	    else
+ 	      {
+ 		$returnhash{$_} = $sum{$_};
+ 	      }
+ 	  }
+ 	# A RARE MOMENT OF DOING ANYTHING USEFUL INSIDE THIS
+ 	# BLEEPING SUBROUTINE.
+ 	if ($returnhash{'count'})
+ 	  {
+ 	    my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
+ 	    $newevaldata{$newkey} = $returnhash{'count'};
+ 	  }
+ 	untie(%evaldata); # Close/release the original nohist database.
+ 	untie(%newevaldata); # Close/release the new nohist database.
+       }
+     return(%returnhash);
+     # Celebrate!  We have now accomplished some simple calculations using
+     # 1000% bloated functionality in our subroutine.  Go wash your eyeballs
+     # out now.
+   }
+ =pod
+ B<wanted> - used by B<File::Find::find> subroutine.
+ This evaluates whether a file is wanted, and pushes it onto the
+ I<@metalist> array.  This subroutine was, for the most part, auto-generated
+ by the B<find2perl> command.
+ =over 4
+ Parameters:
+ =item I<$file> - a path to the file.
+ =back
+ =over 4
+ Returns:
+ =item C<boolean> - true or false based on logical statement.
+ =back
+ =cut
+ sub wanted ($)
+   {
+     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
+     -f $_ &&
+     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
+     push(@metalist,$File::Find::dir.'/'.$_);
+   }
+ =pod
+ B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
+ I<Note that this is significantly altered from a subroutine present in lonnet.>
+ =over 4
+ Parameters:
+ =item I<$file> - a path.to the file.
+ =back
+ =over 4
+ Returns:
+ =item C<hash reference> - a hash array (keys and values).
+ =back
+ =cut
+ sub get_metadata_from_file ($)
+   {
+     my ($filename) = @_;
+     my %metatable; # Used to store return value of hash-tabled metadata.
+     $filename = &declutter($filename); # Remove non-identifying filesystem info
+     my $uri = ''; # The URI is not relevant in this scenario.
+     unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
+       {
+ 	$filename .= '.meta'; # Append a .meta suffix.
+       }
+     # Get the file contents.
+     my $metadata_string =
+ 	&get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
+     # Parse the file based on its XML tags.
+     my $parser = HTML::TokeParser->new(\$metadata_string);
+     my $token;
+     while ($token = $parser->get_token) # Loop through tokens.
+       {
+ 	if ($token->[0] eq 'S') # If it is a start token.
+ 	  {
+ 	    my $entry = $token->[1];
+ 	    my $unikey = $entry; # A unique identifier for this xml tag key.
+ 	    if (defined($token->[2]->{'part'}))
+ 	      {
+ 		$unikey .= '_'.$token->[2]->{'part'};
+ 	      }
+ 	    if (defined($token->[2]->{'name'}))
+ 	      {
+ 		$unikey .= '_'.$token->[2]->{'name'};
+ 	      }
+ 	    # Append $unikey to metatable's keys entry.
+ 	    if ($metatable{$uri.'keys'})
+ 	      {
+ 		$metatable{$uri.'keys'} .= ','.$unikey;
+ 	      }
+ 	    else
+ 	      {
+ 		$metatable{$uri.'keys'} = $unikey;
+ 	      }
+ 	    # Insert contents into metatable entry for the unikey.
+ 	    foreach my $t3 (@{$token->[3]})
+ 	      {
+ 		$metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
+ 	      }
+ 	    # If there was no text contained inside the tags, set = default.
+ 	    unless
+ 	      (
+ 	        $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
+ 	      )
+ 	      {
+ 		$metatable{$uri.''.$unikey} =
+ 		    $metatable{$uri.''.$unikey.'.default'};
+ 	      }
+ 	  }
+       }
+     # Return with a key-value table of XML tags and their tag contents.
+     return(\%metatable);
+   }
+ =pod
+ B<get_file_contents> - returns either the contents of the file or a -1.
+ =over 4
+ Parameters:
+ =item I<$file> - a complete filesystem path.to the file.
+ =back
+ =over 4
+ Returns:
+ =item C<string> - file contents or a -1.
+ =back
+ =cut
+ sub get_file_contents ($)
+   {
+     my $file = shift(@_);
+     # If file does not exist, then return a -1 value.
+     unless (-e $file)
+       {
+ 	return(-1);
+       }
+     # Read in file contents.
+     my $file_handle = IO::File->new($file);
+     my $file_contents = '';
+     while (<$file_handle>)
+       {
+ 	$file_contents .= $_;
+       }
+     # Return file contents.
+     return($file_contents);
+   }
+ =pod
+ B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
+ =over 4
+ Parameters:
+ =item I<$filesystem_path> - a complete filesystem path.
+ =back
+ =over 4
+ Returns:
+ =item C<string> - remnants of the filesystem path (beginning portion removed).
+ =back
+ =cut
+ sub declutter
+   {
+     my $filesystem_path = shift(@_);
+     # Remove beginning portions of the filesystem path.
+     $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
+     $filesystem_path =~ s!^/!!;
+     $filesystem_path =~ s!^res/!!;
+     # Return what is remaining for the filesystem path.
+     return($filesystem_path);
+   }
+ =pod
+ B<query_home_server_status> - Is this the home server of an author's directory?
+ =over 4
+ Parameters:
+ =item I<$author_filesystem_path> - directory path for a user.
+ =back
+ =over 4
+ Returns:
+ =item C<boolean> - 1 if true; 0 if false.
+ =back
+ =cut
+ sub query_home_server_status ($)
+   {
+     my $author_filesystem_path = shift(@_);
+     # Remove beginning portion of this filesystem path.
+     $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
+     # Construct path to the author's ordinary user directory.
+     my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
+     my $user_directory_path = construct_path_to_user_directory($user_domain,
+ 							       $username);
+     # Return status of whether the user directory path is defined.
+     if (-e $user_directory_path)
+       {
+ 	return(1); # True.
+       }
+     else
+       {
+         return(0); # False.
+       }
+   }
+ =pod
+ B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
+ =over 4
+ Parameters:
+ =item I<$user_domain> - the loncapa domain of the user.
+ =item I<$username> - the unique username (user id) of the user.
+ =back
+ =over 4
+ Returns:
+ =item C<string> - representing the path on the filesystem.
+ =back
+ =cut
+ sub construct_path_to_user_directory ($$)
+   {
+     my ($user_domain,$username) = @_;
+     # Untaint.
+     $user_domain =~ s/\W//g;
+     $username =~ s/\W//g;
+     # Create three levels of sub-directoried filesystem path
+     # based on the first three characters of the username.
+     my $sub_filesystem_path = $username.'__';
+     $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
+     # Use the sub-directoried levels and other variables to generate
+     # the complete filesystem path.
+     my $complete_filesystem_path =
+ 	join('/',($perlvar{'lonUsersDir'},
+ 		  $user_domain,
+ 		  $sub_filesystem_path,
+ 		  $username));
+     # Return the complete filesystem path.
+     return($complete_filesystem_path);
+   }
+ =pod
+ B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
+ =over 4
+ Parameters:
+ =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
+ =back
+ =over 4
+ Returns:
+ =item C<string> - datetime sql formatted string.
+ =back
+ =cut
+ sub sql_formatted_time ($)
+   {
+     # Sanitize the time argument and convert to localtime array.
+     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
+ 	localtime(&sanitize_time(shift(@_)));
+     # Convert month from (0..11) to (1..12).
+     $mon += 1;
+     # Make the year compatible with A.D. specification.
+     $year += 1900;
+     # Return a date which is compatible with MySQL's "DATETIME" format.
+     return(join('-',($year,$mon,$mday)).
+ 	   ' '.
+ 	   join(':',($hour,$min,$sec))
+ 	   );
+   }
+ # ==================================== The following two subroutines are needed
+ #                 for accommodating incorrect time formats inside the metadata.
+ =pod
+ B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
+ =over 4
+ Parameters:
+ =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
+ =back
+ =over 4
+ Returns:
+ =item C<integer> - seconds since epoch.
+ =back
+ =cut
+ sub make_seconds_since_epoch (@)
+   {
+     # Keytable of time metadata.
+     my %time_metadata = @_;
+     # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
+     return(POSIX::mktime(
+ 			 ($time_metadata{'seconds'},
+ 			  $time_metadata{'minutes'},
+ 			  $time_metadata{'hours'},
+ 			  $time_metadata{'day'},
+ 			  $time_metadata{'month'}-1,
+ 			  $time_metadata{'year'}-1900,
+,
+,
+ 			  $time_metadata{'dlsav'})
+ 			 )
+ 	   );
+   }
+ =pod
+ B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
+ Somebody described this subroutine as
+ "retro-fixing of un-backward-compatible time format".
+ What this means, is that a part of this code expects to get UTC seconds
+ since the epoch (beginning of 1970).  Yet, some of the .meta files have
+ sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
+ integers (e.g. 1044147435).  These time strings do not encode the timezone
+ and, in this sense, can be considered "un-backwards-compatible".
+ =over 4
+ Parameters:
+ =item I<$potentially_badformat_string> - string to "retro-fix".
+ =back
+ =over 4
+ Returns:
+ =item C<integer> - seconds since epoch.
+ =back
+ =cut
+ sub sanitize_time ($)
+   {
+     my $timestamp = shift(@_);
+     # If timestamp is in this unexpected format....
+     if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
+       {
+ 	# then convert into seconds since epoch (the expected format).
+ 	$timestamp = &make_seconds_since_epoch(
+ 					       'year' => $1,
+ 					       'month' => $2,
+ 					       'day' => $3,
+ 					       'hours' => $4,
+ 					       'minutes' => $5,
+ 					       'seconds' => $6
+ 					       );
+       }
+     # Otherwise we assume timestamp to be as expected.
+     return($timestamp);
+   }
+ =pod
+ =head1 AUTHOR
+ Written to help the loncapa project.
+ Scott Harrison, sharrison@users.sourceforge.net
+ This is distributed under the same terms as loncapa (i.e. "freeware").
+ =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.1
changed lines
	Added in v.1.28