File:  [LON-CAPA] / loncom / metadata_database / searchcat.pl
Revision 1.29: download - view: text, annotated - select for diffs
Mon Feb 3 13:42:16 2003 UTC (21 years, 3 months ago) by albertel
Branches: MAIN
CVS tags: HEAD
- modifying header to be correct
- cleaning out unneccesary comments

    1: #!/usr/bin/perl
    2: # The LearningOnline Network
    3: # searchcat.pl "Search Catalog" batch script
    4: #
    5: # $Id: searchcat.pl,v 1.29 2003/02/03 13:42:16 albertel Exp $
    6: #
    7: # Copyright Michigan State University Board of Trustees
    8: #
    9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   10: #
   11: # LON-CAPA is free software; you can redistribute it and/or modify
   12: # it under the terms of the GNU General Public License as published by
   13: # the Free Software Foundation; either version 2 of the License, or
   14: # (at your option) any later version.
   15: #
   16: # LON-CAPA is distributed in the hope that it will be useful,
   17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   19: # GNU General Public License for more details.
   20: #
   21: # You should have received a copy of the GNU General Public License
   22: # along with LON-CAPA; if not, write to the Free Software
   23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   24: #
   25: # /home/httpd/html/adm/gpl.txt
   26: #
   27: # http://www.lon-capa.org/
   28: #
   29: # YEAR=2001
   30: # 04/14/2001, 04/16/2001 Scott Harrison
   31: #
   32: # YEAR=2002
   33: # 05/11/2002 Scott Harrison
   34: #
   35: # YEAR=2003
   36: # Scott Harrison
   37: #
   38: ###
   39: 
   40: =pod
   41: 
   42: =head1 NAME
   43: 
   44: B<searchcat.pl> - put authoritative filesystem data into sql database.
   45: 
   46: =head1 SYNOPSIS
   47: 
   48: Ordinarily this script is to be called from a loncapa cron job
   49: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
   50: filesystem installation location: F</etc/cron.d/loncapa>).
   51: 
   52: Here is the cron job entry.
   53: 
   54: C<# Repopulate and refresh the metadata database used for the search catalog.>
   55: 
   56: C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
   57: 
   58: This script only allows itself to be run as the user C<www>.
   59: 
   60: =head1 DESCRIPTION
   61: 
   62: This script goes through a loncapa resource directory and gathers metadata.
   63: The metadata is entered into a SQL database.
   64: 
   65: This script also does general database maintenance such as reformatting
   66: the C<loncapa:metadata> table if it is deprecated.
   67: 
   68: This script also builds dynamic temporal metadata and stores this inside
   69: a F<nohist_resevaldata.db> database file.
   70: 
   71: This script is playing an increasingly important role for a loncapa
   72: library server.  The proper operation of this script is critical for a smooth
   73: and correct user experience.
   74: 
   75: =cut
   76: 
   77: # ========================================================== Setting things up.
   78: 
   79: # ------------------------------------------------------  Use external modules.
   80: 
   81: use lib '/home/httpd/lib/perl/';
   82: use LONCAPA::Configuration;
   83: 
   84: use IO::File;
   85: use HTML::TokeParser;
   86: use DBI;
   87: use GDBM_File;
   88: use POSIX qw(strftime mktime);
   89: 
   90: # ----------------- Code to enable 'find' subroutine listing of the .meta files
   91: use File::Find;
   92: 
   93: # List of .meta files (used on a per-user basis).
   94: my @metalist;
   95: 
   96: # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.
   97: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
   98: my %perlvar = %{$perlvarref};
   99: undef($perlvarref); # Remove since sensitive and not needed.
  100: delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
  101: 
  102: # ------------------------------------- Only run if machine is a library server
  103: if ($perlvar{'lonRole'} ne 'library')
  104:   {
  105:     exit(0);
  106:   }
  107: 
  108: # ------------------------------ Make sure this process is running as user=www.
  109: my $wwwid = getpwnam('www');
  110: if ($wwwid != $<)
  111:   {
  112:     $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
  113:     $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
  114:     system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
  115: 	   "mailto $emailto -s '$subj' > /dev/null");
  116:     exit(1);
  117:   }
  118: 
  119: # ------------------------------------------------------ Initialize log output.
  120: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
  121: print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
  122: 
  123: my $dbh; # Database object reference handle.
  124: 
  125: # ----------------------------- Verify connection to loncapa:metadata database.
  126: unless (
  127: 	$dbh = DBI->connect('DBI:mysql:loncapa','www',
  128: 			    $perlvar{'lonSqlAccess'},
  129: 			    { RaiseError => 0,PrintError => 0})
  130: 	)
  131:   { 
  132:     print(LOG '**** ERROR **** Cannot connect to database!'."\n");
  133:     exit(0);
  134:   }
  135: 
  136: # ------------------------------ Create loncapa:metadata table if non-existent.
  137: my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
  138:     'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
  139:     'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
  140:     'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
  141:     'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
  142:     'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
  143:     'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
  144:     'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
  145:     'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
  146:     'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
  147:     'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
  148: 
  149: $dbh->do($make_metadata_table); # Generate the table.
  150: 
  151: # ----------------------------- Verify format of the loncapa:metadata database.
  152: #                               (delete and recreate database if necessary).
  153: 
  154: # Make a positive control for verifying table structure.
  155: my $make_metadata_table_CONTROL = $make_metadata_table;
  156: $make_metadata_table_CONTROL =~
  157:     s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
  158: 
  159: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
  160: $dbh->do($make_metadata_table_CONTROL);
  161: 
  162: my $table_description; # selectall reference to the table description.
  163: 
  164: my $CONTROL_table_string; # What the table description should look like.
  165: my $table_string; # What the table description does look like.
  166: 
  167: # Calculate the CONTROL table description (what it should be).
  168: $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
  169: foreach my $table_row (@{$table_description})
  170:   {
  171:     $CONTROL_table_string .= join(',',@{$table_row})."\n";
  172:   }
  173: 
  174: # Calculate the current table description (what it currently looks like).
  175: $table_description = $dbh->selectall_arrayref('describe metadata');
  176: foreach my $table_row (@{$table_description})
  177:   {
  178:     $table_string .= join(',',@{$table_row})."\n";
  179:   }
  180: 
  181: if ($table_string ne $CONTROL_table_string)
  182:   {
  183:     # Log this incident.
  184:     print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
  185: 	  '.'."\n");
  186:     # Delete the table.
  187:     $dbh->do('DROP TABLE IF EXISTS metadata');
  188:     # Generate the table.
  189:     $dbh->do($make_metadata_table);
  190:   }
  191: 
  192: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.
  193: 
  194: # ----------------------------------------------- Set utilitysemaphore to zero.
  195: $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
  196: 
  197: # ========================================================= Main functionality.
  198: 
  199: # - Determine home authors on this server based on resources dir and user tree.
  200: 
  201: # RESOURCES: the resources directory (subdirs correspond to author usernames).
  202: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
  203:     (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
  204:      and exit(0));
  205: 
  206: # query_home_server_status will look for user home directories on this machine.
  207: my @homeusers =
  208:     grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
  209: 				    $perlvar{'lonDefDomain'}.'/'.$_)
  210: 	  } grep {!/^\.\.?$/} readdir(RESOURCES);
  211: closedir(RESOURCES);
  212: 
  213: unless (@homeusers)
  214:   {
  215:     print(LOG '=== No home users found on this server.'."\n");
  216:   }
  217: 
  218: # Consider each author individually.
  219: foreach my $user (@homeusers)
  220:   {
  221:     # Make a log entry.
  222:     print(LOG "\n".'=== User: '.$user."\n\n");
  223: 
  224:     # Get filesystem path to this user's directory.
  225:     my $user_directory =
  226: 	&construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
  227: 
  228:     # Remove left-over db-files from a potentially crashed searchcat run.
  229:     unlink($user_directory.'/nohist_new_resevaldata.db');
  230: 
  231:     # Cleanup the metalist array.
  232:     undef(@metalist);
  233:     @metalist = ();
  234: 
  235:     # This will add entries to the @metalist array.
  236:     &File::Find::find(\&wanted,
  237: 		      $perlvar{'lonDocRoot'}.'/res/'.
  238: 		      $perlvar{'lonDefDomain'}.'/'.$user);
  239: 
  240:     # -- process file to get metadata and put into search catalog SQL database
  241:     # Also, build and store dynamic metadata.
  242:     # Also, delete record entries before refreshing.
  243:     foreach my $m (@metalist)
  244:       {
  245: 	# Log this action.
  246: 	print(LOG "- ".$m."\n");
  247: 
  248: 	# Get metadata from the file.
  249: 	my $ref = get_metadata_from_file($m);
  250: 
  251: 	# Make a datarecord identifier for this resource.
  252: 	my $m2 = '/res/'.declutter($m);
  253: 	$m2 =~ s/\.meta$//;
  254: 
  255: 	# Build and store dynamic metadata inside nohist_resevaldata.db.
  256: 	build_on_the_fly_dynamic_metadata($m2);
  257: 
  258: 	# Delete record if it already exists.
  259: 	my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
  260: 	my $sth = $dbh->prepare($q2);
  261: 	$sth->execute();
  262: 	my $r1 = $sth->fetchall_arrayref;
  263: 	if (@$r1)
  264: 	  {
  265: 	    $sth = 
  266: 		$dbh->prepare('delete from metadata where url like binary '.
  267: 			      "'".$m2."'");
  268: 	    $sth->execute();
  269: 	  }
  270: 
  271: 	# Add new/replacement record into the loncapa:metadata table.
  272: 	$sth = $dbh->prepare('insert into metadata values ('.
  273: 			     '"'.delete($ref->{'title'}).'"'.','.
  274: 			     '"'.delete($ref->{'author'}).'"'.','.
  275: 			     '"'.delete($ref->{'subject'}).'"'.','.
  276: 			     '"'.$m2.'"'.','.
  277: 			     '"'.delete($ref->{'keywords'}).'"'.','.
  278: 			     '"'.'current'.'"'.','.
  279: 			     '"'.delete($ref->{'notes'}).'"'.','.
  280: 			     '"'.delete($ref->{'abstract'}).'"'.','.
  281: 			     '"'.delete($ref->{'mime'}).'"'.','.
  282: 			     '"'.delete($ref->{'language'}).'"'.','.
  283: 			     '"'.sql_formatted_time(
  284: 				       delete($ref->{'creationdate'})).'"'.','.
  285: 			     '"'.sql_formatted_time(
  286: 				   delete($ref->{'lastrevisiondate'})).'"'.','.
  287: 			     '"'.delete($ref->{'owner'}).'"'.','.
  288: 			     '"'.delete($ref->{'copyright'}).'"'.','.
  289: 			     '1'.')');
  290: 	$sth->execute();
  291:       }
  292: 
  293: # ----------------------- Clean up database, remove stale SQL database records.
  294:     $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
  295: 
  296: # -------------------------------------------------- Copy over the new db-files
  297:     system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
  298: 	         $user_directory.'/nohist_resevaldata.db');
  299:   }
  300: 
  301: # --------------------------------------------------- Close database connection
  302: $dbh->disconnect;
  303: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
  304: close(LOG);
  305: exit(0);
  306: 
  307: # ================================================================ Subroutines.
  308: 
  309: =pod
  310: 
  311: =head1 SUBROUTINES
  312: 
  313: =cut
  314: 
  315: =pod
  316: 
  317: B<unescape> - translate to unstrange escaped syntax to strange characters.
  318: 
  319: =over 4
  320: 
  321: Parameters:
  322: 
  323: =item I<$str> - string with unweird characters.
  324: 
  325: =back
  326: 
  327: =over 4
  328: 
  329: Returns:
  330: 
  331: =item C<string> - string with potentially weird characters.
  332: 
  333: =back
  334: 
  335: =cut
  336: 
  337: sub unescape ($)
  338:   {
  339:     my $str = shift(@_);
  340:     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
  341:     return($str);
  342:   }
  343: 
  344: =pod
  345: 
  346: B<escape> - translate strange characters to unstrange escaped syntax.
  347: 
  348: =over 4
  349: 
  350: Parameters:
  351: 
  352: =item I<$str> - string with potentially weird characters to unweird-ify.
  353: 
  354: =back
  355: 
  356: =over 4
  357: 
  358: Returns:
  359: 
  360: =item C<string> - unweird-ified string.
  361: 
  362: =back
  363: 
  364: =cut
  365: 
  366: sub escape ($)
  367:   {
  368:     my $str = shift(@_);
  369:     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
  370:     return($str);
  371:   }
  372: 
  373: =pod
  374: 
  375: B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
  376: 
  377: Dynamic metadata is stored in a nohist_resevaldata GDBM database.
  378: The only thing that this subroutine really makes happen is adjusting
  379: a 'count' value inside the F<nohist_new_resevaldata.db> as well
  380: as updating F<nohist_new_resevaldata.db> with information from
  381: F<nohist_resevaldata.db>.
  382: 
  383: It may need optmization, but since it gets called once a week. . .
  384: =over 4
  385: 
  386: Parameters:
  387: 
  388: =item I<$url> - the filesystem path (url may be a misnomer...)
  389: 
  390: =back
  391: 
  392: =over 4
  393: 
  394: Returns:
  395: 
  396: =item C<hash> - key-value table of dynamically evaluated metadata.
  397: 
  398: =back
  399: 
  400: =cut
  401: 
  402: sub build_on_the_fly_dynamic_metadata ($)
  403:   {
  404:     # some elements in here maybe non-obvious
  405: 
  406:     # Need to compute the user's directory.
  407:     my $url = &declutter(shift(@_));
  408:     $url =~ s/\.meta$//;
  409:     my %returnhash = ();
  410:     my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
  411:     my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
  412: 
  413:     # Attempt a GDBM database instantiation inside users directory and proceed.
  414:     if ((tie(%evaldata,'GDBM_File',
  415:             $user_directory.
  416: 	     '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
  417:         (tie(%newevaldata,'GDBM_File',
  418: 	     $user_directory.
  419: 	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
  420:       {
  421: 	# For different variables, track the running sum and counts.
  422: 	my %sum = ();
  423: 	my %cnt = ();
  424: 
  425: 	# Define computed items as a sum (add) or an average (avg) or a raw
  426: 	# count (cnt) or 'app'?
  427: 	my %listitems=('count'        => 'add',
  428: 		       'course'       => 'add',
  429: 		       'avetries'     => 'avg',
  430: 		       'stdno'        => 'add',
  431: 		       'difficulty'   => 'avg',
  432: 		       'clear'        => 'avg',
  433: 		       'technical'    => 'avg',
  434: 		       'helpful'      => 'avg',
  435: 		       'correct'      => 'avg',
  436: 		       'depth'        => 'avg',
  437: 		       'comments'     => 'app',
  438: 		       'usage'        => 'cnt'
  439: 		       );
  440: 	
  441: 	# Untaint the url and use as part of a regular expression.
  442: 	my $regexp = $url;
  443: 	$regexp =~ s/(\W)/\\$1/g;
  444: 	$regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs
  445: 
  446: 	# Check existing nohist database for this url.
  447:         # this is modfying the 'count' entries
  448:         # and copying all othe entries over
  449: 	foreach (keys %evaldata)
  450: 	  {
  451: 	    my $key = &unescape($_);
  452: 	    if ($key =~ /$regexp/) # If url-based entry exists.
  453: 	      {
  454: 		my $ctype = $1; # Set to specific category type.
  455: 
  456: 		# Do an increment for this category type.
  457: 		if (defined($cnt{$ctype}))
  458: 		  {
  459: 		    $cnt{$ctype}++; 
  460: 		  }
  461: 		else
  462: 		  {
  463: 		    $cnt{$ctype} = 1; 
  464: 		  }
  465:                 unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
  466: 		  {
  467: 		    # Increment the sum based on the evaluated data in the db.
  468: 		    if (defined($sum{$ctype}))
  469: 		      {
  470: 			$sum{$ctype} += $evaldata{$_};
  471: 		      }
  472: 		    else
  473: 		      {
  474: 			$sum{$ctype} = $evaldata{$_};
  475: 		      }
  476:  		  }
  477: 		else # 'app' mode, means to use '<hr />' as a separator
  478: 		  {
  479: 		    if (defined($sum{$ctype}))
  480: 		      {
  481: 			if ($evaldata{$_})
  482: 			  {
  483: 			    $sum{$ctype} .= '<hr />'.$evaldata{$_};
  484: 			  }
  485: 		      }
  486: 		    else
  487: 		      {
  488: 			$sum{$ctype} = ''.$evaldata{$_};
  489: 		      }
  490: 		  }
  491: 		if ($ctype ne 'count')
  492: 		  {
  493:                     # this is copying all data except 'count' attributes
  494: 		    $newevaldata{$_} = $evaldata{$_};
  495: 		  }
  496: 	      }
  497: 	  }
  498: 
  499:         # the only other time this loop is useful is for the 'count' hash
  500:         # element
  501: 	foreach (keys %cnt)
  502: 	  {
  503: 	    if ($listitems{$_} eq 'avg')
  504: 	      {
  505: 		$returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
  506: 	      }
  507: 	    elsif ($listitems{$_} eq 'cnt')
  508: 	      {
  509: 		$returnhash{$_} = $cnt{$_};
  510: 	      }
  511: 	    else
  512: 	      {
  513: 		$returnhash{$_} = $sum{$_};
  514: 	      }
  515: 	  }
  516: 
  517:         # seems to be doing something useful
  518: 	if ($returnhash{'count'})
  519: 	  {
  520: 	    my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
  521: 	    $newevaldata{$newkey} = $returnhash{'count'};
  522: 	  }
  523: 
  524: 	untie(%evaldata); # Close/release the original nohist database.
  525: 	untie(%newevaldata); # Close/release the new nohist database.
  526:       }
  527:     return(%returnhash);
  528:   }
  529: 
  530: =pod
  531: 
  532: B<wanted> - used by B<File::Find::find> subroutine.
  533: 
  534: This evaluates whether a file is wanted, and pushes it onto the
  535: I<@metalist> array.  This subroutine was, for the most part, auto-generated
  536: by the B<find2perl> command.
  537: 
  538: =over 4
  539: 
  540: Parameters:
  541: 
  542: =item I<$file> - a path to the file.
  543: 
  544: =back
  545: 
  546: =over 4
  547: 
  548: Returns:
  549: 
  550: =item C<boolean> - true or false based on logical statement.
  551: 
  552: =back
  553: 
  554: =cut
  555: 
  556: sub wanted ($)
  557:   {
  558:     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
  559:     -f $_ &&
  560:     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
  561:     push(@metalist,$File::Find::dir.'/'.$_);
  562:   }
  563: 
  564: =pod
  565: 
  566: B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
  567: 
  568: I<Note that this is significantly altered from a subroutine present in lonnet.>
  569: 
  570: =over 4
  571: 
  572: Parameters:
  573: 
  574: =item I<$file> - a path.to the file.
  575: 
  576: =back
  577: 
  578: =over 4
  579: 
  580: Returns:
  581: 
  582: =item C<hash reference> - a hash array (keys and values).
  583: 
  584: =back
  585: 
  586: =cut
  587: 
  588: sub get_metadata_from_file ($)
  589:   {
  590:     my ($filename) = @_;
  591:     my %metatable; # Used to store return value of hash-tabled metadata.
  592:     $filename = &declutter($filename); # Remove non-identifying filesystem info
  593:     my $uri = ''; # The URI is not relevant in this scenario.
  594:     unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
  595:       {
  596: 	$filename .= '.meta'; # Append a .meta suffix.
  597:       }
  598:     # Get the file contents.
  599:     my $metadata_string =
  600: 	&get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
  601: 
  602:     # Parse the file based on its XML tags.
  603:     my $parser = HTML::TokeParser->new(\$metadata_string);
  604:     my $token;
  605:     while ($token = $parser->get_token) # Loop through tokens.
  606:       {
  607: 	if ($token->[0] eq 'S') # If it is a start token.
  608: 	  {
  609: 	    my $entry = $token->[1];
  610: 	    my $unikey = $entry; # A unique identifier for this xml tag key.
  611: 	    if (defined($token->[2]->{'part'}))
  612: 	      { 
  613: 		$unikey .= '_'.$token->[2]->{'part'}; 
  614: 	      }
  615: 	    if (defined($token->[2]->{'name'}))
  616: 	      { 
  617: 		$unikey .= '_'.$token->[2]->{'name'}; 
  618: 	      }
  619: 	    # Append $unikey to metatable's keys entry.
  620: 	    if ($metatable{$uri.'keys'})
  621: 	      {
  622: 		$metatable{$uri.'keys'} .= ','.$unikey;
  623: 	      }
  624: 	    else
  625: 	      {
  626: 		$metatable{$uri.'keys'} = $unikey;
  627: 	      }
  628: 	    # Insert contents into metatable entry for the unikey.
  629: 	    foreach my $t3 (@{$token->[3]})
  630: 	      {
  631: 		$metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
  632: 	      }
  633: 	    # If there was no text contained inside the tags, set = default.
  634: 	    unless
  635: 	      (
  636: 	        $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
  637: 	      )
  638: 	      {
  639: 		$metatable{$uri.''.$unikey} =
  640: 		    $metatable{$uri.''.$unikey.'.default'};
  641: 	      }
  642: 	  }
  643:       }
  644:     # Return with a key-value table of XML tags and their tag contents.
  645:     return(\%metatable);
  646:   }
  647: 
  648: =pod
  649: 
  650: B<get_file_contents> - returns either the contents of the file or a -1.
  651: 
  652: =over 4
  653: 
  654: Parameters:
  655: 
  656: =item I<$file> - a complete filesystem path.to the file.
  657: 
  658: =back
  659: 
  660: =over 4
  661: 
  662: Returns:
  663: 
  664: =item C<string> - file contents or a -1.
  665: 
  666: =back
  667: 
  668: =cut
  669: 
  670: sub get_file_contents ($)
  671:   {
  672:     my $file = shift(@_);
  673: 
  674:     # If file does not exist, then return a -1 value.
  675:     unless (-e $file)
  676:       {
  677: 	return(-1);
  678:       }
  679: 
  680:     # Read in file contents.
  681:     my $file_handle = IO::File->new($file);
  682:     my $file_contents = '';
  683:     while (<$file_handle>)
  684:       {
  685: 	$file_contents .= $_;
  686:       }
  687: 
  688:     # Return file contents.
  689:     return($file_contents);
  690:   }
  691: 
  692: =pod
  693: 
  694: B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
  695: 
  696: =over 4
  697: 
  698: Parameters:
  699: 
  700: =item I<$filesystem_path> - a complete filesystem path.
  701: 
  702: =back
  703: 
  704: =over 4
  705: 
  706: Returns:
  707: 
  708: =item C<string> - remnants of the filesystem path (beginning portion removed).
  709: 
  710: =back
  711: 
  712: =cut
  713: 
  714: sub declutter
  715:   {
  716:     my $filesystem_path = shift(@_);
  717: 
  718:     # Remove beginning portions of the filesystem path.
  719:     $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
  720:     $filesystem_path =~ s!^/!!;
  721:     $filesystem_path =~ s!^res/!!;
  722: 
  723:     # Return what is remaining for the filesystem path.
  724:     return($filesystem_path);
  725:   }
  726: 
  727: =pod
  728: 
  729: B<query_home_server_status> - Is this the home server of an author's directory?
  730: 
  731: =over 4
  732: 
  733: Parameters:
  734: 
  735: =item I<$author_filesystem_path> - directory path for a user.
  736: 
  737: =back
  738: 
  739: =over 4
  740: 
  741: Returns:
  742: 
  743: =item C<boolean> - 1 if true; 0 if false.
  744: 
  745: =back
  746: 
  747: =cut
  748: 
  749: sub query_home_server_status ($)
  750:   {
  751:     my $author_filesystem_path = shift(@_);
  752: 
  753:     # Remove beginning portion of this filesystem path.
  754:     $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
  755: 
  756:     # Construct path to the author's ordinary user directory.
  757:     my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
  758:     my $user_directory_path = construct_path_to_user_directory($user_domain,
  759: 							       $username);
  760: 
  761:     # Return status of whether the user directory path is defined.
  762:     if (-e $user_directory_path)
  763:       {
  764: 	return(1); # True.
  765:       }
  766:     else
  767:       {
  768:         return(0); # False.
  769:       }
  770:   }
  771: 
  772: =pod
  773: 
  774: B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
  775: 
  776: =over 4
  777: 
  778: Parameters:
  779: 
  780: =item I<$user_domain> - the loncapa domain of the user.
  781: 
  782: =item I<$username> - the unique username (user id) of the user.
  783: 
  784: =back
  785: 
  786: =over 4
  787: 
  788: Returns:
  789: 
  790: =item C<string> - representing the path on the filesystem.
  791: 
  792: =back
  793: 
  794: =cut
  795: 
  796: sub construct_path_to_user_directory ($$)
  797:   {
  798:     my ($user_domain,$username) = @_;
  799: 
  800:     # Untaint.
  801:     $user_domain =~ s/\W//g;
  802:     $username =~ s/\W//g;
  803: 
  804:     # Create three levels of sub-directoried filesystem path
  805:     # based on the first three characters of the username.
  806:     my $sub_filesystem_path = $username.'__';
  807:     $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
  808: 
  809:     # Use the sub-directoried levels and other variables to generate
  810:     # the complete filesystem path.
  811:     my $complete_filesystem_path =
  812: 	join('/',($perlvar{'lonUsersDir'},
  813: 		  $user_domain,
  814: 		  $sub_filesystem_path,
  815: 		  $username));
  816: 
  817:     # Return the complete filesystem path.
  818:     return($complete_filesystem_path);
  819:   }
  820: 
  821: =pod
  822: 
  823: B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
  824: 
  825: =over 4
  826: 
  827: Parameters:
  828: 
  829: =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
  830: 
  831: =back
  832: 
  833: =over 4
  834: 
  835: Returns:
  836: 
  837: =item C<string> - datetime sql formatted string.
  838: 
  839: =back
  840: 
  841: =cut
  842: 
  843: sub sql_formatted_time ($)
  844:   {
  845:     # Sanitize the time argument and convert to localtime array.
  846:     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
  847: 	localtime(&sanitize_time(shift(@_)));
  848: 
  849:     # Convert month from (0..11) to (1..12).
  850:     $mon += 1;
  851: 
  852:     # Make the year compatible with A.D. specification.
  853:     $year += 1900;
  854: 
  855:     # Return a date which is compatible with MySQL's "DATETIME" format.
  856:     return(join('-',($year,$mon,$mday)).
  857: 	   ' '.
  858: 	   join(':',($hour,$min,$sec))
  859: 	   );
  860:   }
  861: 
  862: 
  863: # ==================================== The following two subroutines are needed
  864: #                 for accommodating incorrect time formats inside the metadata.
  865: 
  866: =pod
  867: 
  868: B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
  869: 
  870: =over 4
  871: 
  872: Parameters:
  873: 
  874: =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
  875: 
  876: =back
  877: 
  878: =over 4
  879: 
  880: Returns:
  881: 
  882: =item C<integer> - seconds since epoch.
  883: 
  884: =back
  885: 
  886: =cut
  887: 
  888: sub make_seconds_since_epoch (@)
  889:   {
  890:     # Keytable of time metadata.
  891:     my %time_metadata = @_;
  892: 
  893:     # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
  894:     return(POSIX::mktime(
  895: 			 ($time_metadata{'seconds'},
  896: 			  $time_metadata{'minutes'},
  897: 			  $time_metadata{'hours'},
  898: 			  $time_metadata{'day'},
  899: 			  $time_metadata{'month'}-1,
  900: 			  $time_metadata{'year'}-1900,
  901: 			  0,
  902: 			  0,
  903: 			  $time_metadata{'dlsav'})
  904: 			 )
  905: 	   );
  906:   }
  907: 
  908: =pod
  909: 
  910: B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
  911: 
  912: Somebody described this subroutine as
  913: "retro-fixing of un-backward-compatible time format".
  914: 
  915: What this means, is that a part of this code expects to get UTC seconds
  916: since the epoch (beginning of 1970).  Yet, some of the .meta files have
  917: sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
  918: integers (e.g. 1044147435).  These time strings do not encode the timezone
  919: and, in this sense, can be considered "un-backwards-compatible".
  920: 
  921: =over 4
  922: 
  923: Parameters:
  924: 
  925: =item I<$potentially_badformat_string> - string to "retro-fix".
  926: 
  927: =back
  928: 
  929: =over 4
  930: 
  931: Returns:
  932: 
  933: =item C<integer> - seconds since epoch.
  934: 
  935: =back
  936: 
  937: =cut
  938: 
  939: sub sanitize_time ($)
  940:   {
  941:     my $timestamp = shift(@_);
  942:     # If timestamp is in this unexpected format....
  943:     if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
  944:       {
  945: 	# then convert into seconds since epoch (the expected format).
  946: 	$timestamp = &make_seconds_since_epoch(
  947: 					       'year' => $1,
  948: 					       'month' => $2,
  949: 					       'day' => $3,
  950: 					       'hours' => $4,
  951: 					       'minutes' => $5,
  952: 					       'seconds' => $6
  953: 					       );
  954:       }
  955:     # Otherwise we assume timestamp to be as expected.
  956:     return($timestamp);
  957:   }
  958: 
  959: =pod
  960: 
  961: =head1 AUTHOR
  962: 
  963: Written to help the loncapa project.
  964: 
  965: Scott Harrison, sharrison@users.sourceforge.net
  966: 
  967: This is distributed under the same terms as loncapa (i.e. "freeware").
  968: 
  969: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>