File:  [LON-CAPA] / loncom / metadata_database / searchcat.pl
Revision 1.28: download - view: text, annotated - select for diffs
Mon Feb 3 05:39:37 2003 UTC (21 years, 3 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
beautified; BUG 1215 FIXED documentation that was horrible is now non-horrible
and
it now exists in every place that it should be; BUG 1216 FIXED stale records that
are not accounted for on the system are deleted; BUG 1217 FIXED, the
loncapa:metadata table can now change structure, and this script will
verify and alter the table structure if necessary; BUG 1218 PARTIALLY FIXED
(a small solid step toward The Great Metadata Overhaul); also using File::Find
now instead of find.pl

    1: #!/usr/bin/perl
    2: # The LearningOnline Network
    3: # searchcat.pl "Search Catalog" batch script
    4: #
    5: # $Id: searchcat.pl,v 1.28 2003/02/03 05:39:37 harris41 Exp $
    6: #
    7: # Copyright Michigan State University Board of Trustees
    8: #
    9: # This file is part of the LearningOnline Network with a
   10: # Computer assisted personalized approach (loncapa).
   11: #
   12: # Loncapa is free software; you can redistribute it and/or modify
   13: # it under the terms of the GNU General Public License as published by
   14: # the Free Software Foundation; either version 2 of the License, or
   15: # (at your option) any later version.
   16: #
   17: # Loncapa is distributed in the hope that it will be useful,
   18: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   19: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   20: # GNU General Public License for more details.
   21: #
   22: # You should have received a copy of the GNU General Public License
   23: # along with loncapa; if not, write to the Free Software
   24: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   25: #
   26: # /home/httpd/html/adm/gpl.txt
   27: #
   28: # http://www.loncapa.org/
   29: #
   30: # YEAR=2001
   31: # 04/14/2001, 04/16/2001 Scott Harrison
   32: #
   33: # YEAR=2002
   34: # 05/11/2002 Scott Harrison
   35: #
   36: # YEAR=2003
   37: # Scott Harrison
   38: #
   39: ###
   40: 
   41: =pod
   42: 
   43: =head1 NAME
   44: 
   45: B<searchcat.pl> - put authoritative filesystem data into sql database.
   46: 
   47: =head1 SYNOPSIS
   48: 
   49: Ordinarily this script is to be called from a loncapa cron job
   50: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
   51: filesystem installation location: F</etc/cron.d/loncapa>).
   52: 
   53: Here is the cron job entry.
   54: 
   55: C<# Repopulate and refresh the metadata database used for the search catalog.>
   56: 
   57: C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
   58: 
   59: This script only allows itself to be run as the user C<www>.
   60: 
   61: =head1 DESCRIPTION
   62: 
   63: This script goes through a loncapa resource directory and gathers metadata.
   64: The metadata is entered into a SQL database.
   65: 
   66: This script also does general database maintenance such as reformatting
   67: the C<loncapa:metadata> table if it is deprecated.
   68: 
   69: This script also builds dynamic temporal metadata and stores this inside
   70: a F<nohist_resevaldata.db> database file.
   71: 
   72: This script is playing an increasingly important role for a loncapa
   73: library server.  The proper operation of this script is critical for a smooth
   74: and correct user experience.
   75: 
   76: =cut
   77: 
   78: # ========================================================== Setting things up.
   79: 
   80: # ------------------------------------------------------  Use external modules.
   81: 
   82: use lib '/home/httpd/lib/perl/';
   83: use LONCAPA::Configuration;
   84: 
   85: use IO::File;
   86: use HTML::TokeParser;
   87: use DBI;
   88: use GDBM_File;
   89: use POSIX qw(strftime mktime);
   90: 
   91: # ----------------- Code to enable 'find' subroutine listing of the .meta files
   92: use File::Find;
   93: 
   94: # List of .meta files (used on a per-user basis).
   95: my @metalist;
   96: 
   97: # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.
   98: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
   99: my %perlvar = %{$perlvarref};
  100: undef($perlvarref); # Remove since sensitive and not needed.
  101: delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
  102: 
  103: # ------------------------------------- Only run if machine is a library server
  104: if ($perlvar{'lonRole'} ne 'library')
  105:   {
  106:     exit(0);
  107:   }
  108: 
  109: # ------------------------------ Make sure this process is running as user=www.
  110: my $wwwid = getpwnam('www');
  111: if ($wwwid != $<)
  112:   {
  113:     $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
  114:     $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
  115:     system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
  116: 	   "mailto $emailto -s '$subj' > /dev/null");
  117:     exit(1);
  118:   }
  119: 
  120: # ------------------------------------------------------ Initialize log output.
  121: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
  122: print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
  123: 
  124: my $dbh; # Database object reference handle.
  125: 
  126: # ----------------------------- Verify connection to loncapa:metadata database.
  127: unless (
  128: 	$dbh = DBI->connect('DBI:mysql:loncapa','www',
  129: 			    $perlvar{'lonSqlAccess'},
  130: 			    { RaiseError => 0,PrintError => 0})
  131: 	)
  132:   { 
  133:     print(LOG '**** ERROR **** Cannot connect to database!'."\n");
  134:     exit(0);
  135:   }
  136: 
  137: # ------------------------------ Create loncapa:metadata table if non-existent.
  138: my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
  139:     'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
  140:     'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
  141:     'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
  142:     'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
  143:     'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
  144:     'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
  145:     'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
  146:     'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
  147:     'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
  148:     'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
  149: 
  150: $dbh->do($make_metadata_table); # Generate the table.
  151: 
  152: # ----------------------------- Verify format of the loncapa:metadata database.
  153: #                               (delete and recreate database if necessary).
  154: 
  155: # Make a positive control for verifying table structure.
  156: my $make_metadata_table_CONTROL = $make_metadata_table;
  157: $make_metadata_table_CONTROL =~
  158:     s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
  159: 
  160: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
  161: $dbh->do($make_metadata_table_CONTROL);
  162: 
  163: my $table_description; # selectall reference to the table description.
  164: 
  165: my $CONTROL_table_string; # What the table description should look like.
  166: my $table_string; # What the table description does look like.
  167: 
  168: # Calculate the CONTROL table description (what it should be).
  169: $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
  170: foreach my $table_row (@{$table_description})
  171:   {
  172:     $CONTROL_table_string .= join(',',@{$table_row})."\n";
  173:   }
  174: 
  175: # Calculate the current table description (what it currently looks like).
  176: $table_description = $dbh->selectall_arrayref('describe metadata');
  177: foreach my $table_row (@{$table_description})
  178:   {
  179:     $table_string .= join(',',@{$table_row})."\n";
  180:   }
  181: 
  182: if ($table_string ne $CONTROL_table_string)
  183:   {
  184:     # Log this incident.
  185:     print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
  186: 	  '.'."\n");
  187:     # Delete the table.
  188:     $dbh->do('DROP TABLE IF EXISTS metadata');
  189:     # Generate the table.
  190:     $dbh->do($make_metadata_table);
  191:   }
  192: 
  193: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.
  194: 
  195: # ----------------------------------------------- Set utilitysemaphore to zero.
  196: $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
  197: 
  198: # ========================================================= Main functionality.
  199: 
  200: # - Determine home authors on this server based on resources dir and user tree.
  201: 
  202: # RESOURCES: the resources directory (subdirs correspond to author usernames).
  203: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
  204:     (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
  205:      and exit(0));
  206: 
  207: # query_home_server_status will look for user home directories on this machine.
  208: my @homeusers =
  209:     grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
  210: 				    $perlvar{'lonDefDomain'}.'/'.$_)
  211: 	  } grep {!/^\.\.?$/} readdir(RESOURCES);
  212: closedir(RESOURCES);
  213: 
  214: unless (@homeusers)
  215:   {
  216:     print(LOG '=== No home users found on this server.'."\n");
  217:   }
  218: 
  219: # Consider each author individually.
  220: foreach my $user (@homeusers)
  221:   {
  222:     # Make a log entry.
  223:     print(LOG "\n".'=== User: '.$user."\n\n");
  224: 
  225:     # Get filesystem path to this user's directory.
  226:     my $user_directory =
  227: 	&construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
  228: 
  229:     # Remove left-over db-files from a potentially crashed searchcat run.
  230:     unlink($user_directory.'/nohist_new_resevaldata.db');
  231: 
  232:     # Cleanup the metalist array.
  233:     undef(@metalist);
  234:     @metalist = ();
  235: 
  236:     # This will add entries to the @metalist array.
  237:     &File::Find::find(\&wanted,
  238: 		      $perlvar{'lonDocRoot'}.'/res/'.
  239: 		      $perlvar{'lonDefDomain'}.'/'.$user);
  240: 
  241:     # -- process file to get metadata and put into search catalog SQL database
  242:     # Also, build and store dynamic metadata.
  243:     # Also, delete record entries before refreshing.
  244:     foreach my $m (@metalist)
  245:       {
  246: 	# Log this action.
  247: 	print(LOG "- ".$m."\n");
  248: 
  249: 	# Get metadata from the file.
  250: 	my $ref = get_metadata_from_file($m);
  251: 
  252: 	# Make a datarecord identifier for this resource.
  253: 	my $m2 = '/res/'.declutter($m);
  254: 	$m2 =~ s/\.meta$//;
  255: 
  256: 	# Build and store dynamic metadata inside nohist_resevaldata.db.
  257: 	build_on_the_fly_dynamic_metadata($m2);
  258: 
  259: 	# Delete record if it already exists.
  260: 	my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
  261: 	my $sth = $dbh->prepare($q2);
  262: 	$sth->execute();
  263: 	my $r1 = $sth->fetchall_arrayref;
  264: 	if (@$r1)
  265: 	  {
  266: 	    $sth = 
  267: 		$dbh->prepare('delete from metadata where url like binary '.
  268: 			      "'".$m2."'");
  269: 	    $sth->execute();
  270: 	  }
  271: 
  272: 	# Add new/replacement record into the loncapa:metadata table.
  273: 	$sth = $dbh->prepare('insert into metadata values ('.
  274: 			     '"'.delete($ref->{'title'}).'"'.','.
  275: 			     '"'.delete($ref->{'author'}).'"'.','.
  276: 			     '"'.delete($ref->{'subject'}).'"'.','.
  277: 			     '"'.$m2.'"'.','.
  278: 			     '"'.delete($ref->{'keywords'}).'"'.','.
  279: 			     '"'.'current'.'"'.','.
  280: 			     '"'.delete($ref->{'notes'}).'"'.','.
  281: 			     '"'.delete($ref->{'abstract'}).'"'.','.
  282: 			     '"'.delete($ref->{'mime'}).'"'.','.
  283: 			     '"'.delete($ref->{'language'}).'"'.','.
  284: 			     '"'.sql_formatted_time(
  285: 				       delete($ref->{'creationdate'})).'"'.','.
  286: 			     '"'.sql_formatted_time(
  287: 				   delete($ref->{'lastrevisiondate'})).'"'.','.
  288: 			     '"'.delete($ref->{'owner'}).'"'.','.
  289: 			     '"'.delete($ref->{'copyright'}).'"'.','.
  290: 			     '1'.')');
  291: 	$sth->execute();
  292:       }
  293: 
  294: # ----------------------- Clean up database, remove stale SQL database records.
  295:     $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
  296: 
  297: # -------------------------------------------------- Copy over the new db-files
  298:     system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
  299: 	         $user_directory.'/nohist_resevaldata.db');
  300:   }
  301: 
  302: # --------------------------------------------------- Close database connection
  303: $dbh->disconnect;
  304: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
  305: close(LOG);
  306: exit(0);
  307: 
  308: # ================================================================ Subroutines.
  309: 
  310: =pod
  311: 
  312: =head1 SUBROUTINES
  313: 
  314: =cut
  315: 
  316: =pod
  317: 
  318: B<unescape> - translate to unstrange escaped syntax to strange characters.
  319: 
  320: =over 4
  321: 
  322: Parameters:
  323: 
  324: =item I<$str> - string with unweird characters.
  325: 
  326: =back
  327: 
  328: =over 4
  329: 
  330: Returns:
  331: 
  332: =item C<string> - string with potentially weird characters.
  333: 
  334: =back
  335: 
  336: =cut
  337: 
  338: sub unescape ($)
  339:   {
  340:     my $str = shift(@_);
  341:     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
  342:     return($str);
  343:   }
  344: 
  345: =pod
  346: 
  347: B<escape> - translate strange characters to unstrange escaped syntax.
  348: 
  349: =over 4
  350: 
  351: Parameters:
  352: 
  353: =item I<$str> - string with potentially weird characters to unweird-ify.
  354: 
  355: =back
  356: 
  357: =over 4
  358: 
  359: Returns:
  360: 
  361: =item C<string> - unweird-ified string.
  362: 
  363: =back
  364: 
  365: =cut
  366: 
  367: sub escape ($)
  368:   {
  369:     my $str = shift(@_);
  370:     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
  371:     return($str);
  372:   }
  373: 
  374: =pod
  375: 
  376: B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
  377: 
  378: Dynamic metadata is stored in a nohist_resevaldata GDBM database.
  379: Most of the calculations in this subroutine are totally pointless
  380: and not useful for anything that this subroutine does.
  381: (THIS IS A FRUSTRATED SUBROUTINE THAT IS NON-OPTIMAL, *&*&!.)
  382: The only thing that this subroutine really makes happen is adjusting
  383: a 'count' value inside the F<nohist_new_resevaldata.db> as well
  384: as updating F<nohist_new_resevaldata.db> with information from
  385: F<nohist_resevaldata.db>.
  386: 
  387: =over 4
  388: 
  389: Parameters:
  390: 
  391: =item I<$url> - the filesystem path (url may be a misnomer...)
  392: 
  393: =back
  394: 
  395: =over 4
  396: 
  397: Returns:
  398: 
  399: =item C<hash> - key-value table of dynamically evaluated metadata.
  400: 
  401: =back
  402: 
  403: =cut
  404: 
  405: sub build_on_the_fly_dynamic_metadata ($)
  406:   {
  407:     # BEWARE ALL WHO TRY TO UNDERSTAND THIS ABSURDLY HORRIBLE SUBROUTINE.
  408:       
  409:     # Do all sorts of mumbo-jumbo to compute the user's directory.
  410:     my $url = &declutter(shift(@_));
  411:     $url =~ s/\.meta$//;
  412:     my %returnhash = ();
  413:     my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
  414:     my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
  415: 
  416:     # Attempt a GDBM database instantiation inside users directory and proceed.
  417:     if ((tie(%evaldata,'GDBM_File',
  418:             $user_directory.
  419: 	     '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
  420:         (tie(%newevaldata,'GDBM_File',
  421: 	     $user_directory.
  422: 	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
  423:       {
  424: 	# For different variables, track the running sum and counts.
  425: 	my %sum = ();
  426: 	my %cnt = ();
  427: 
  428: 	# Define computed items as a sum (add) or an average (avg) or a raw
  429: 	# count (cnt) or 'app'?
  430: 	my %listitems=('count'        => 'add',
  431: 		       'course'       => 'add',
  432: 		       'avetries'     => 'avg',
  433: 		       'stdno'        => 'add',
  434: 		       'difficulty'   => 'avg',
  435: 		       'clear'        => 'avg',
  436: 		       'technical'    => 'avg',
  437: 		       'helpful'      => 'avg',
  438: 		       'correct'      => 'avg',
  439: 		       'depth'        => 'avg',
  440: 		       'comments'     => 'app',
  441: 		       'usage'        => 'cnt'
  442: 		       );
  443: 	
  444: 	# Untaint the url and use as part of a regular expression.
  445: 	my $regexp = $url;
  446: 	$regexp =~ s/(\W)/\\$1/g;
  447: 	$regexp = '___'.$regexp.'___([a-z]+)$';
  448: 
  449: 	# Check existing nohist database for this url.
  450: 	# THE ONLY TIME THIS IS IMPORTANT FOR THIS AWFUL SUBROUTINE
  451: 	# IS FOR 'count' ENTRIES
  452: 	# AND FOR REFRESHING non-'count' ENTRIES INSIDE nohist_new DATABASE.
  453: 	foreach (keys %evaldata)
  454: 	  {
  455: 	    my $key = &unescape($_);
  456: 	    if ($key =~ /$regexp/) # If url-based entry exists.
  457: 	      {
  458: 		my $ctype = $1; # Set to specific category type.
  459: 
  460: 		# Do an increment for this category type.
  461: 		if (defined($cnt{$ctype}))
  462: 		  {
  463: 		    $cnt{$ctype}++; 
  464: 		  }
  465: 		else
  466: 		  {
  467: 		    $cnt{$ctype} = 1; 
  468: 		  }
  469:                 unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
  470: 		  {
  471: 		    # Increment the sum based on the evaluated data in the db.
  472: 		    if (defined($sum{$ctype}))
  473: 		      {
  474: 			$sum{$ctype} += $evaldata{$_};
  475: 		      }
  476: 		    else
  477: 		      {
  478: 			$sum{$ctype} = $evaldata{$_};
  479: 		      }
  480:  		  }
  481: 		else # 'app' mode, means to use '<hr />' as a separator
  482: 		  {
  483: 		    if (defined($sum{$ctype}))
  484: 		      {
  485: 			if ($evaldata{$_})
  486: 			  {
  487: 			    $sum{$ctype} .= '<hr />'.$evaldata{$_};
  488: 			  }
  489: 		      }
  490: 		    else
  491: 		      {
  492: 			$sum{$ctype} = ''.$evaldata{$_};
  493: 		      }
  494: 		  }
  495: 		if ($ctype ne 'count')
  496: 		  {
  497: 		    # ALERT!  THIS HORRIBLE LOOP IS ACTUALLY DOING SOMETHING
  498: 		    # USEFUL!
  499: 		    $newevaldata{$_} = $evaldata{$_};
  500: 		  }
  501: 	      }
  502: 	  }
  503: 
  504: 	# THE ONLY OTHER TIME THIS LOOP IS USEFUL IS FOR THE 'count' HASH
  505: 	# ELEMENT.
  506: 	foreach (keys %cnt)
  507: 	  {
  508: 	    if ($listitems{$_} eq 'avg')
  509: 	      {
  510: 		$returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
  511: 	      }
  512: 	    elsif ($listitems{$_} eq 'cnt')
  513: 	      {
  514: 		$returnhash{$_} = $cnt{$_};
  515: 	      }
  516: 	    else
  517: 	      {
  518: 		$returnhash{$_} = $sum{$_};
  519: 	      }
  520: 	  }
  521: 
  522: 	# A RARE MOMENT OF DOING ANYTHING USEFUL INSIDE THIS
  523: 	# BLEEPING SUBROUTINE.
  524: 	if ($returnhash{'count'})
  525: 	  {
  526: 	    my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
  527: 	    $newevaldata{$newkey} = $returnhash{'count'};
  528: 	  }
  529: 
  530: 	untie(%evaldata); # Close/release the original nohist database.
  531: 	untie(%newevaldata); # Close/release the new nohist database.
  532:       }
  533:     return(%returnhash);
  534:     # Celebrate!  We have now accomplished some simple calculations using
  535:     # 1000% bloated functionality in our subroutine.  Go wash your eyeballs
  536:     # out now.
  537:   }
  538: 
  539: =pod
  540: 
  541: B<wanted> - used by B<File::Find::find> subroutine.
  542: 
  543: This evaluates whether a file is wanted, and pushes it onto the
  544: I<@metalist> array.  This subroutine was, for the most part, auto-generated
  545: by the B<find2perl> command.
  546: 
  547: =over 4
  548: 
  549: Parameters:
  550: 
  551: =item I<$file> - a path to the file.
  552: 
  553: =back
  554: 
  555: =over 4
  556: 
  557: Returns:
  558: 
  559: =item C<boolean> - true or false based on logical statement.
  560: 
  561: =back
  562: 
  563: =cut
  564: 
  565: sub wanted ($)
  566:   {
  567:     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
  568:     -f $_ &&
  569:     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
  570:     push(@metalist,$File::Find::dir.'/'.$_);
  571:   }
  572: 
  573: =pod
  574: 
  575: B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
  576: 
  577: I<Note that this is significantly altered from a subroutine present in lonnet.>
  578: 
  579: =over 4
  580: 
  581: Parameters:
  582: 
  583: =item I<$file> - a path.to the file.
  584: 
  585: =back
  586: 
  587: =over 4
  588: 
  589: Returns:
  590: 
  591: =item C<hash reference> - a hash array (keys and values).
  592: 
  593: =back
  594: 
  595: =cut
  596: 
  597: sub get_metadata_from_file ($)
  598:   {
  599:     my ($filename) = @_;
  600:     my %metatable; # Used to store return value of hash-tabled metadata.
  601:     $filename = &declutter($filename); # Remove non-identifying filesystem info
  602:     my $uri = ''; # The URI is not relevant in this scenario.
  603:     unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
  604:       {
  605: 	$filename .= '.meta'; # Append a .meta suffix.
  606:       }
  607:     # Get the file contents.
  608:     my $metadata_string =
  609: 	&get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
  610: 
  611:     # Parse the file based on its XML tags.
  612:     my $parser = HTML::TokeParser->new(\$metadata_string);
  613:     my $token;
  614:     while ($token = $parser->get_token) # Loop through tokens.
  615:       {
  616: 	if ($token->[0] eq 'S') # If it is a start token.
  617: 	  {
  618: 	    my $entry = $token->[1];
  619: 	    my $unikey = $entry; # A unique identifier for this xml tag key.
  620: 	    if (defined($token->[2]->{'part'}))
  621: 	      { 
  622: 		$unikey .= '_'.$token->[2]->{'part'}; 
  623: 	      }
  624: 	    if (defined($token->[2]->{'name'}))
  625: 	      { 
  626: 		$unikey .= '_'.$token->[2]->{'name'}; 
  627: 	      }
  628: 	    # Append $unikey to metatable's keys entry.
  629: 	    if ($metatable{$uri.'keys'})
  630: 	      {
  631: 		$metatable{$uri.'keys'} .= ','.$unikey;
  632: 	      }
  633: 	    else
  634: 	      {
  635: 		$metatable{$uri.'keys'} = $unikey;
  636: 	      }
  637: 	    # Insert contents into metatable entry for the unikey.
  638: 	    foreach my $t3 (@{$token->[3]})
  639: 	      {
  640: 		$metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
  641: 	      }
  642: 	    # If there was no text contained inside the tags, set = default.
  643: 	    unless
  644: 	      (
  645: 	        $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
  646: 	      )
  647: 	      {
  648: 		$metatable{$uri.''.$unikey} =
  649: 		    $metatable{$uri.''.$unikey.'.default'};
  650: 	      }
  651: 	  }
  652:       }
  653:     # Return with a key-value table of XML tags and their tag contents.
  654:     return(\%metatable);
  655:   }
  656: 
  657: =pod
  658: 
  659: B<get_file_contents> - returns either the contents of the file or a -1.
  660: 
  661: =over 4
  662: 
  663: Parameters:
  664: 
  665: =item I<$file> - a complete filesystem path.to the file.
  666: 
  667: =back
  668: 
  669: =over 4
  670: 
  671: Returns:
  672: 
  673: =item C<string> - file contents or a -1.
  674: 
  675: =back
  676: 
  677: =cut
  678: 
  679: sub get_file_contents ($)
  680:   {
  681:     my $file = shift(@_);
  682: 
  683:     # If file does not exist, then return a -1 value.
  684:     unless (-e $file)
  685:       {
  686: 	return(-1);
  687:       }
  688: 
  689:     # Read in file contents.
  690:     my $file_handle = IO::File->new($file);
  691:     my $file_contents = '';
  692:     while (<$file_handle>)
  693:       {
  694: 	$file_contents .= $_;
  695:       }
  696: 
  697:     # Return file contents.
  698:     return($file_contents);
  699:   }
  700: 
  701: =pod
  702: 
  703: B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
  704: 
  705: =over 4
  706: 
  707: Parameters:
  708: 
  709: =item I<$filesystem_path> - a complete filesystem path.
  710: 
  711: =back
  712: 
  713: =over 4
  714: 
  715: Returns:
  716: 
  717: =item C<string> - remnants of the filesystem path (beginning portion removed).
  718: 
  719: =back
  720: 
  721: =cut
  722: 
  723: sub declutter
  724:   {
  725:     my $filesystem_path = shift(@_);
  726: 
  727:     # Remove beginning portions of the filesystem path.
  728:     $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
  729:     $filesystem_path =~ s!^/!!;
  730:     $filesystem_path =~ s!^res/!!;
  731: 
  732:     # Return what is remaining for the filesystem path.
  733:     return($filesystem_path);
  734:   }
  735: 
  736: =pod
  737: 
  738: B<query_home_server_status> - Is this the home server of an author's directory?
  739: 
  740: =over 4
  741: 
  742: Parameters:
  743: 
  744: =item I<$author_filesystem_path> - directory path for a user.
  745: 
  746: =back
  747: 
  748: =over 4
  749: 
  750: Returns:
  751: 
  752: =item C<boolean> - 1 if true; 0 if false.
  753: 
  754: =back
  755: 
  756: =cut
  757: 
  758: sub query_home_server_status ($)
  759:   {
  760:     my $author_filesystem_path = shift(@_);
  761: 
  762:     # Remove beginning portion of this filesystem path.
  763:     $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
  764: 
  765:     # Construct path to the author's ordinary user directory.
  766:     my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
  767:     my $user_directory_path = construct_path_to_user_directory($user_domain,
  768: 							       $username);
  769: 
  770:     # Return status of whether the user directory path is defined.
  771:     if (-e $user_directory_path)
  772:       {
  773: 	return(1); # True.
  774:       }
  775:     else
  776:       {
  777:         return(0); # False.
  778:       }
  779:   }
  780: 
  781: =pod
  782: 
  783: B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
  784: 
  785: =over 4
  786: 
  787: Parameters:
  788: 
  789: =item I<$user_domain> - the loncapa domain of the user.
  790: 
  791: =item I<$username> - the unique username (user id) of the user.
  792: 
  793: =back
  794: 
  795: =over 4
  796: 
  797: Returns:
  798: 
  799: =item C<string> - representing the path on the filesystem.
  800: 
  801: =back
  802: 
  803: =cut
  804: 
  805: sub construct_path_to_user_directory ($$)
  806:   {
  807:     my ($user_domain,$username) = @_;
  808: 
  809:     # Untaint.
  810:     $user_domain =~ s/\W//g;
  811:     $username =~ s/\W//g;
  812: 
  813:     # Create three levels of sub-directoried filesystem path
  814:     # based on the first three characters of the username.
  815:     my $sub_filesystem_path = $username.'__';
  816:     $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
  817: 
  818:     # Use the sub-directoried levels and other variables to generate
  819:     # the complete filesystem path.
  820:     my $complete_filesystem_path =
  821: 	join('/',($perlvar{'lonUsersDir'},
  822: 		  $user_domain,
  823: 		  $sub_filesystem_path,
  824: 		  $username));
  825: 
  826:     # Return the complete filesystem path.
  827:     return($complete_filesystem_path);
  828:   }
  829: 
  830: =pod
  831: 
  832: B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
  833: 
  834: =over 4
  835: 
  836: Parameters:
  837: 
  838: =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
  839: 
  840: =back
  841: 
  842: =over 4
  843: 
  844: Returns:
  845: 
  846: =item C<string> - datetime sql formatted string.
  847: 
  848: =back
  849: 
  850: =cut
  851: 
  852: sub sql_formatted_time ($)
  853:   {
  854:     # Sanitize the time argument and convert to localtime array.
  855:     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
  856: 	localtime(&sanitize_time(shift(@_)));
  857: 
  858:     # Convert month from (0..11) to (1..12).
  859:     $mon += 1;
  860: 
  861:     # Make the year compatible with A.D. specification.
  862:     $year += 1900;
  863: 
  864:     # Return a date which is compatible with MySQL's "DATETIME" format.
  865:     return(join('-',($year,$mon,$mday)).
  866: 	   ' '.
  867: 	   join(':',($hour,$min,$sec))
  868: 	   );
  869:   }
  870: 
  871: 
  872: # ==================================== The following two subroutines are needed
  873: #                 for accommodating incorrect time formats inside the metadata.
  874: 
  875: =pod
  876: 
  877: B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
  878: 
  879: =over 4
  880: 
  881: Parameters:
  882: 
  883: =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
  884: 
  885: =back
  886: 
  887: =over 4
  888: 
  889: Returns:
  890: 
  891: =item C<integer> - seconds since epoch.
  892: 
  893: =back
  894: 
  895: =cut
  896: 
  897: sub make_seconds_since_epoch (@)
  898:   {
  899:     # Keytable of time metadata.
  900:     my %time_metadata = @_;
  901: 
  902:     # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
  903:     return(POSIX::mktime(
  904: 			 ($time_metadata{'seconds'},
  905: 			  $time_metadata{'minutes'},
  906: 			  $time_metadata{'hours'},
  907: 			  $time_metadata{'day'},
  908: 			  $time_metadata{'month'}-1,
  909: 			  $time_metadata{'year'}-1900,
  910: 			  0,
  911: 			  0,
  912: 			  $time_metadata{'dlsav'})
  913: 			 )
  914: 	   );
  915:   }
  916: 
  917: =pod
  918: 
  919: B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
  920: 
  921: Somebody described this subroutine as
  922: "retro-fixing of un-backward-compatible time format".
  923: 
  924: What this means, is that a part of this code expects to get UTC seconds
  925: since the epoch (beginning of 1970).  Yet, some of the .meta files have
  926: sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
  927: integers (e.g. 1044147435).  These time strings do not encode the timezone
  928: and, in this sense, can be considered "un-backwards-compatible".
  929: 
  930: =over 4
  931: 
  932: Parameters:
  933: 
  934: =item I<$potentially_badformat_string> - string to "retro-fix".
  935: 
  936: =back
  937: 
  938: =over 4
  939: 
  940: Returns:
  941: 
  942: =item C<integer> - seconds since epoch.
  943: 
  944: =back
  945: 
  946: =cut
  947: 
  948: sub sanitize_time ($)
  949:   {
  950:     my $timestamp = shift(@_);
  951:     # If timestamp is in this unexpected format....
  952:     if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
  953:       {
  954: 	# then convert into seconds since epoch (the expected format).
  955: 	$timestamp = &make_seconds_since_epoch(
  956: 					       'year' => $1,
  957: 					       'month' => $2,
  958: 					       'day' => $3,
  959: 					       'hours' => $4,
  960: 					       'minutes' => $5,
  961: 					       'seconds' => $6
  962: 					       );
  963:       }
  964:     # Otherwise we assume timestamp to be as expected.
  965:     return($timestamp);
  966:   }
  967: 
  968: =pod
  969: 
  970: =head1 AUTHOR
  971: 
  972: Written to help the loncapa project.
  973: 
  974: Scott Harrison, sharrison@users.sourceforge.net
  975: 
  976: This is distributed under the same terms as loncapa (i.e. "freeware").
  977: 
  978: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>