File:  [LON-CAPA] / loncom / metadata_database / searchcat.pl
Revision 1.30: download - view: text, annotated - select for diffs
Mon Feb 3 17:01:55 2003 UTC (21 years, 4 months ago) by www
Branches: MAIN
CVS tags: HEAD
Trying to make sense of the diffs with previous versions, i.e., 1.27 (sniff)

    1: #!/usr/bin/perl
    2: # The LearningOnline Network
    3: # searchcat.pl "Search Catalog" batch script
    4: #
    5: # $Id: searchcat.pl,v 1.30 2003/02/03 17:01:55 www Exp $
    6: #
    7: # Copyright Michigan State University Board of Trustees
    8: #
    9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   10: #
   11: # LON-CAPA is free software; you can redistribute it and/or modify
   12: # it under the terms of the GNU General Public License as published by
   13: # the Free Software Foundation; either version 2 of the License, or
   14: # (at your option) any later version.
   15: #
   16: # LON-CAPA is distributed in the hope that it will be useful,
   17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   19: # GNU General Public License for more details.
   20: #
   21: # You should have received a copy of the GNU General Public License
   22: # along with LON-CAPA; if not, write to the Free Software
   23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   24: #
   25: # /home/httpd/html/adm/gpl.txt
   26: #
   27: # http://www.lon-capa.org/
   28: #
   29: # YEAR=2001
   30: # 04/14/2001, 04/16/2001 Scott Harrison
   31: #
   32: # YEAR=2002
   33: # 05/11/2002 Scott Harrison
   34: #
   35: # YEAR=2003
   36: # Scott Harrison
   37: #
   38: ###
   39: 
   40: =pod
   41: 
   42: =head1 NAME
   43: 
   44: B<searchcat.pl> - put authoritative filesystem data into sql database.
   45: 
   46: =head1 SYNOPSIS
   47: 
   48: Ordinarily this script is to be called from a loncapa cron job
   49: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
   50: filesystem installation location: F</etc/cron.d/loncapa>).
   51: 
   52: Here is the cron job entry.
   53: 
   54: C<# Repopulate and refresh the metadata database used for the search catalog.>
   55: 
   56: C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
   57: 
   58: This script only allows itself to be run as the user C<www>.
   59: 
   60: =head1 DESCRIPTION
   61: 
   62: This script goes through a loncapa resource directory and gathers metadata.
   63: The metadata is entered into a SQL database.
   64: 
   65: This script also does general database maintenance such as reformatting
   66: the C<loncapa:metadata> table if it is deprecated.
   67: 
   68: This script also builds dynamic temporal metadata and stores this inside
   69: a F<nohist_resevaldata.db> database file.
   70: 
   71: This script is playing an increasingly important role for a loncapa
   72: library server.  The proper operation of this script is critical for a smooth
   73: and correct user experience.
   74: 
   75: =cut
   76: 
   77: # ========================================================== Setting things up.
   78: 
   79: # ------------------------------------------------------  Use external modules.
   80: 
   81: use lib '/home/httpd/lib/perl/';
   82: use LONCAPA::Configuration;
   83: 
   84: use IO::File;
   85: use HTML::TokeParser;
   86: use DBI;
   87: use GDBM_File;
   88: use POSIX qw(strftime mktime);
   89: 
   90: # ----------------- Code to enable 'find' subroutine listing of the .meta files
   91: use File::Find;
   92: 
   93: # List of .meta files (used on a per-user basis).
   94: my @metalist;
   95: 
   96: # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.
   97: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
   98: my %perlvar = %{$perlvarref};
   99: undef($perlvarref); # Remove since sensitive and not needed.
  100: delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
  101: 
  102: # ------------------------------------- Only run if machine is a library server
  103: if ($perlvar{'lonRole'} ne 'library')
  104:   {
  105:     exit(0);
  106:   }
  107: 
  108: # ------------------------------ Make sure this process is running as user=www.
  109: my $wwwid = getpwnam('www');
  110: if ($wwwid != $<)
  111:   {
  112:     $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
  113:     $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
  114:     system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
  115: 	   "mailto $emailto -s '$subj' > /dev/null");
  116:     exit(1);
  117:   }
  118: 
  119: # ------------------------------------------------------ Initialize log output.
  120: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
  121: print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
  122: 
  123: my $dbh; # Database object reference handle.
  124: 
  125: # ----------------------------- Verify connection to loncapa:metadata database.
  126: unless (
  127: 	$dbh = DBI->connect('DBI:mysql:loncapa','www',
  128: 			    $perlvar{'lonSqlAccess'},
  129: 			    { RaiseError => 0,PrintError => 0})
  130: 	)
  131:   { 
  132:     print(LOG '**** ERROR **** Cannot connect to database!'."\n");
  133:     exit(0);
  134:   }
  135: 
  136: # ------------------------------ Create loncapa:metadata table if non-existent.
  137: my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
  138:     'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
  139:     'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
  140:     'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
  141:     'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
  142:     'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
  143:     'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
  144:     'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
  145:     'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
  146:     'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
  147:     'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
  148: 
  149: $dbh->do($make_metadata_table); # Generate the table.
  150: 
  151: # ----------------------------- Verify format of the loncapa:metadata database.
  152: #                               (delete and recreate database if necessary).
  153: 
  154: # Make a positive control for verifying table structure.
  155: my $make_metadata_table_CONTROL = $make_metadata_table;
  156: $make_metadata_table_CONTROL =~
  157:     s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
  158: 
  159: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
  160: $dbh->do($make_metadata_table_CONTROL);
  161: 
  162: my $table_description; # selectall reference to the table description.
  163: 
  164: my $CONTROL_table_string; # What the table description should look like.
  165: my $table_string; # What the table description does look like.
  166: 
  167: # Calculate the CONTROL table description (what it should be).
  168: $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
  169: foreach my $table_row (@{$table_description})
  170:   {
  171:     $CONTROL_table_string .= join(',',@{$table_row})."\n";
  172:   }
  173: 
  174: # Calculate the current table description (what it currently looks like).
  175: $table_description = $dbh->selectall_arrayref('describe metadata');
  176: foreach my $table_row (@{$table_description})
  177:   {
  178:     $table_string .= join(',',@{$table_row})."\n";
  179:   }
  180: 
  181: if ($table_string ne $CONTROL_table_string)
  182:   {
  183:     # Log this incident.
  184:     print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
  185: 	  '.'."\n");
  186:     # Delete the table.
  187:     $dbh->do('DROP TABLE IF EXISTS metadata');
  188:     # Generate the table.
  189:     $dbh->do($make_metadata_table);
  190:   }
  191: 
  192: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.
  193: 
  194: # ----------------------------------------------- Set utilitysemaphore to zero.
  195: $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
  196: 
  197: # ========================================================= Main functionality.
  198: 
  199: # - Determine home authors on this server based on resources dir and user tree.
  200: 
  201: # RESOURCES: the resources directory (subdirs correspond to author usernames).
  202: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
  203:     (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
  204:      and exit(0));
  205: 
  206: # query_home_server_status will look for user home directories on this machine.
  207: my @homeusers =
  208:     grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
  209: 				    $perlvar{'lonDefDomain'}.'/'.$_)
  210: 	  } grep {!/^\.\.?$/} readdir(RESOURCES);
  211: closedir(RESOURCES);
  212: 
  213: unless (@homeusers)
  214:   {
  215:     print(LOG '=== No home users found on this server.'."\n");
  216:   }
  217: 
  218: # Consider each author individually.
  219: foreach my $user (@homeusers)
  220:   {
  221:     # Make a log entry.
  222:     print(LOG "\n".'=== User: '.$user."\n\n");
  223: 
  224:     # Get filesystem path to this user's directory.
  225:     my $user_directory =
  226: 	&construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
  227: 
  228:     # Remove left-over db-files from a potentially crashed searchcat run.
  229:     unlink($user_directory.'/nohist_new_resevaldata.db');
  230: 
  231:     # Cleanup the metalist array.
  232:     undef(@metalist);
  233:     @metalist = ();
  234: 
  235:     # This will add entries to the @metalist array.
  236:     &File::Find::find(\&wanted,
  237: 		      $perlvar{'lonDocRoot'}.'/res/'.
  238: 		      $perlvar{'lonDefDomain'}.'/'.$user);
  239: 
  240:     # -- process file to get metadata and put into search catalog SQL database
  241:     # Also, build and store dynamic metadata.
  242:     # Also, delete record entries before refreshing.
  243:     foreach my $m (@metalist)
  244:       {
  245: 	# Log this action.
  246: 	print(LOG "- ".$m."\n");
  247: 
  248: 	# Get metadata from the file.
  249: 	my $ref = get_metadata_from_file($m);
  250: 
  251: 	# Make a datarecord identifier for this resource.
  252: 	my $m2 = '/res/'.declutter($m);
  253: 	$m2 =~ s/\.meta$//;
  254: 
  255: 	# Build and store dynamic metadata inside nohist_resevaldata.db.
  256: 	build_on_the_fly_dynamic_metadata($m2);
  257: 
  258: 	# Delete record if it already exists.
  259: 	my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
  260: 	my $sth = $dbh->prepare($q2);
  261: 	$sth->execute();
  262: 	my $r1 = $sth->fetchall_arrayref;
  263: 	if (@$r1)
  264: 	  {
  265: 	    $sth = 
  266: 		$dbh->prepare('delete from metadata where url like binary '.
  267: 			      "'".$m2."'");
  268: 	    $sth->execute();
  269: 	  }
  270: 
  271: 	# Add new/replacement record into the loncapa:metadata table.
  272: 	$sth = $dbh->prepare('insert into metadata values ('.
  273: 			     '"'.delete($ref->{'title'}).'"'.','.
  274: 			     '"'.delete($ref->{'author'}).'"'.','.
  275: 			     '"'.delete($ref->{'subject'}).'"'.','.
  276: 			     '"'.$m2.'"'.','.
  277: 			     '"'.delete($ref->{'keywords'}).'"'.','.
  278: 			     '"'.'current'.'"'.','.
  279: 			     '"'.delete($ref->{'notes'}).'"'.','.
  280: 			     '"'.delete($ref->{'abstract'}).'"'.','.
  281: 			     '"'.delete($ref->{'mime'}).'"'.','.
  282: 			     '"'.delete($ref->{'language'}).'"'.','.
  283: 			     '"'.sql_formatted_time(
  284: 				       delete($ref->{'creationdate'})).'"'.','.
  285: 			     '"'.sql_formatted_time(
  286: 				   delete($ref->{'lastrevisiondate'})).'"'.','.
  287: 			     '"'.delete($ref->{'owner'}).'"'.','.
  288: 			     '"'.delete($ref->{'copyright'}).'"'.','.
  289: 			     '1'.')');
  290: 	$sth->execute();
  291:       }
  292: 
  293: # ----------------------- Clean up database, remove stale SQL database records.
  294:     $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
  295: 
  296: # -------------------------------------------------- Copy over the new db-files
  297:     system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
  298: 	         $user_directory.'/nohist_resevaldata.db');
  299:   }
  300: 
  301: # --------------------------------------------------- Close database connection
  302: $dbh->disconnect;
  303: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
  304: close(LOG);
  305: exit(0);
  306: 
  307: # ================================================================ Subroutines.
  308: 
  309: =pod
  310: 
  311: =head1 SUBROUTINES
  312: 
  313: =cut
  314: 
  315: =pod
  316: 
  317: B<unescape> - translate to unstrange escaped syntax to strange characters.
  318: 
  319: =over 4
  320: 
  321: Parameters:
  322: 
  323: =item I<$str> - string with unweird characters.
  324: 
  325: =back
  326: 
  327: =over 4
  328: 
  329: Returns:
  330: 
  331: =item C<string> - string with potentially weird characters.
  332: 
  333: =back
  334: 
  335: =cut
  336: 
  337: sub unescape ($)
  338:   {
  339:     my $str = shift(@_);
  340:     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
  341:     return($str);
  342:   }
  343: 
  344: =pod
  345: 
  346: B<escape> - translate strange characters to unstrange escaped syntax.
  347: 
  348: =over 4
  349: 
  350: Parameters:
  351: 
  352: =item I<$str> - string with potentially weird characters to unweird-ify.
  353: 
  354: =back
  355: 
  356: =over 4
  357: 
  358: Returns:
  359: 
  360: =item C<string> - unweird-ified string.
  361: 
  362: =back
  363: 
  364: =cut
  365: 
  366: sub escape ($)
  367:   {
  368:     my $str = shift(@_);
  369:     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
  370:     return($str);
  371:   }
  372: 
  373: =pod
  374: 
  375: B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
  376: 
  377: Returns the dynamic metadata for an author, which will later be added to the
  378: MySQL database (not yet implemented).
  379: 
  380: The vast majority of entries in F<nohist_resevaldata.db>, which contains
  381: the dynamic metadata for an author's resources, are "count", which make
  382: the file really large and evaluation really slow.
  383: 
  384: While computing the current value of all dynamic metadata
  385: for later insertion into the MySQL metadata cache (not yet implemented),
  386: this routine also simply adds up all "count" type fields and replaces them by
  387: one new field with the to-date count.
  388: 
  389: Only after successful completion of working with one author, copy new file to
  390: original file. Copy to tmp-"new"-db-file was necessary since db-file size 
  391: would not shrink after "delete" of key.
  392: 
  393: =over 4
  394: 
  395: Parameters:
  396: 
  397: =item I<$url> - the filesystem path (url may be a misnomer...)
  398: 
  399: =back
  400: 
  401: =over 4
  402: 
  403: Returns:
  404: 
  405: =item C<hash> - key-value table of dynamically evaluated metadata.
  406: 
  407: =back
  408: 
  409: =cut
  410: 
  411: sub build_on_the_fly_dynamic_metadata {
  412: 
  413:     # Need to compute the user's directory.
  414:     my $url=&declutter(shift);
  415:     $url=~s/\.meta$//;
  416:     my %returnhash=();
  417:     my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
  418:     my $user_directory=&construct_path_to_user_directory($adomain,$aauthor);
  419: 
  420:     # Attempt a GDBM database instantiation inside users directory and proceed.
  421:     if ((tie(%evaldata,'GDBM_File',
  422:             $user_directory.
  423: 	     '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
  424:         (tie(%newevaldata,'GDBM_File',
  425: 	     $user_directory.
  426: 	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
  427: 	# For different variables, track the running sum and counts.
  428: 	my %sum=();
  429: 	my %cnt=();
  430: 
  431: 	# Define computed items as a sum (add) or an average (avg) or a raw
  432: 	# count (cnt) or append (app)?
  433: 	my %listitems=('count'        => 'add',
  434: 		       'course'       => 'add',
  435: 		       'avetries'     => 'avg',
  436: 		       'stdno'        => 'add',
  437: 		       'difficulty'   => 'avg',
  438: 		       'clear'        => 'avg',
  439: 		       'technical'    => 'avg',
  440: 		       'helpful'      => 'avg',
  441: 		       'correct'      => 'avg',
  442: 		       'depth'        => 'avg',
  443: 		       'comments'     => 'app',
  444: 		       'usage'        => 'cnt'
  445: 		       );
  446: 	
  447: 	# Untaint the url and use as part of a regular expression.
  448: 	my $regexp=$url;
  449: 	$regexp=~s/(\W)/\\$1/g;
  450: 	$regexp='___'.$regexp.'___([a-z]+)$'; #' emacs
  451: 
  452: 	# Check existing database for this author.
  453:         # this is modifying the 'count' entries
  454:         # and copying all other entries over
  455: 
  456: 	foreach (keys %evaldata) {
  457: 	    my $key=&unescape($_);
  458: 	    if ($key=~/$regexp/) { # If url-based entry exists.
  459: 		my $ctype=$1; # Set to specific category type.
  460: 
  461: 		# Do an increment for this category type.
  462: 		if (defined($cnt{$ctype})) {
  463: 		    $cnt{$ctype}++; 
  464: 		} else {
  465: 		    $cnt{$ctype}=1; 
  466: 		}
  467:                 unless ($listitems{$ctype} eq 'app') { # append comments
  468: 		    # Increment the sum based on the evaluated data in the db.
  469: 		    if (defined($sum{$ctype})) {
  470: 			$sum{$ctype}+=$evaldata{$_};
  471: 		    } else {
  472: 			$sum{$ctype}=$evaldata{$_};
  473: 		    }
  474:  		} else { # 'app' mode, means to use '<hr />' as a separator
  475: 		    if (defined($sum{$ctype})) {
  476: 			if ($evaldata{$_}) {
  477: 			    $sum{$ctype}.='<hr />'.$evaldata{$_};
  478: 			}
  479: 		    } else {
  480: 			$sum{$ctype}=''.$evaldata{$_};
  481: 		    }
  482: 	        }
  483: 		if ($ctype ne 'count') {
  484:                     # this is copying all data except 'count' attributes
  485: 		    $newevaldata{$_}=$evaldata{$_};
  486: 	        }
  487: 	    }
  488: 	}
  489: 
  490:         # these values will be returned (currently still unused)
  491: 	foreach (keys %cnt) {
  492: 	    if ($listitems{$_} eq 'avg') {
  493: 		$returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
  494: 	    } elsif ($listitems{$_} eq 'cnt') {
  495: 		$returnhash{$_}=$cnt{$_};
  496: 	    } else {
  497: 		$returnhash{$_}=$sum{$_};
  498: 	    }
  499: 	}
  500: 
  501:         # generate new count key in resevaldata, insert sum
  502: 	if ($returnhash{'count'}) {
  503: 	    my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
  504: 	    $newevaldata{$newkey}=$returnhash{'count'};
  505: 	}
  506: 
  507: 	untie(%evaldata); # Close/release the original nohist database.
  508: 	untie(%newevaldata); # Close/release the new nohist database.
  509:     }
  510:     return %returnhash;
  511: }
  512: 
  513: =pod
  514: 
  515: B<wanted> - used by B<File::Find::find> subroutine.
  516: 
  517: This evaluates whether a file is wanted, and pushes it onto the
  518: I<@metalist> array.  This subroutine was, for the most part, auto-generated
  519: by the B<find2perl> command.
  520: 
  521: =over 4
  522: 
  523: Parameters:
  524: 
  525: =item I<$file> - a path to the file.
  526: 
  527: =back
  528: 
  529: =over 4
  530: 
  531: Returns:
  532: 
  533: =item C<boolean> - true or false based on logical statement.
  534: 
  535: =back
  536: 
  537: =cut
  538: 
  539: sub wanted ($)
  540:   {
  541:     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
  542:     -f $_ &&
  543:     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
  544:     push(@metalist,$File::Find::dir.'/'.$_);
  545:   }
  546: 
  547: =pod
  548: 
  549: B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
  550: 
  551: I<Note that this is significantly altered from a subroutine present in lonnet.>
  552: 
  553: =over 4
  554: 
  555: Parameters:
  556: 
  557: =item I<$file> - a path.to the file.
  558: 
  559: =back
  560: 
  561: =over 4
  562: 
  563: Returns:
  564: 
  565: =item C<hash reference> - a hash array (keys and values).
  566: 
  567: =back
  568: 
  569: =cut
  570: 
  571: sub get_metadata_from_file ($)
  572:   {
  573:     my ($filename) = @_;
  574:     my %metatable; # Used to store return value of hash-tabled metadata.
  575:     $filename = &declutter($filename); # Remove non-identifying filesystem info
  576:     my $uri = ''; # The URI is not relevant in this scenario.
  577:     unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
  578:       {
  579: 	$filename .= '.meta'; # Append a .meta suffix.
  580:       }
  581:     # Get the file contents.
  582:     my $metadata_string =
  583: 	&get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
  584: 
  585:     # Parse the file based on its XML tags.
  586:     my $parser = HTML::TokeParser->new(\$metadata_string);
  587:     my $token;
  588:     while ($token = $parser->get_token) # Loop through tokens.
  589:       {
  590: 	if ($token->[0] eq 'S') # If it is a start token.
  591: 	  {
  592: 	    my $entry = $token->[1];
  593: 	    my $unikey = $entry; # A unique identifier for this xml tag key.
  594: 	    if (defined($token->[2]->{'part'}))
  595: 	      { 
  596: 		$unikey .= '_'.$token->[2]->{'part'}; 
  597: 	      }
  598: 	    if (defined($token->[2]->{'name'}))
  599: 	      { 
  600: 		$unikey .= '_'.$token->[2]->{'name'}; 
  601: 	      }
  602: 	    # Append $unikey to metatable's keys entry.
  603: 	    if ($metatable{$uri.'keys'})
  604: 	      {
  605: 		$metatable{$uri.'keys'} .= ','.$unikey;
  606: 	      }
  607: 	    else
  608: 	      {
  609: 		$metatable{$uri.'keys'} = $unikey;
  610: 	      }
  611: 	    # Insert contents into metatable entry for the unikey.
  612: 	    foreach my $t3 (@{$token->[3]})
  613: 	      {
  614: 		$metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
  615: 	      }
  616: 	    # If there was no text contained inside the tags, set = default.
  617: 	    unless
  618: 	      (
  619: 	        $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
  620: 	      )
  621: 	      {
  622: 		$metatable{$uri.''.$unikey} =
  623: 		    $metatable{$uri.''.$unikey.'.default'};
  624: 	      }
  625: 	  }
  626:       }
  627:     # Return with a key-value table of XML tags and their tag contents.
  628:     return(\%metatable);
  629:   }
  630: 
  631: =pod
  632: 
  633: B<get_file_contents> - returns either the contents of the file or a -1.
  634: 
  635: =over 4
  636: 
  637: Parameters:
  638: 
  639: =item I<$file> - a complete filesystem path.to the file.
  640: 
  641: =back
  642: 
  643: =over 4
  644: 
  645: Returns:
  646: 
  647: =item C<string> - file contents or a -1.
  648: 
  649: =back
  650: 
  651: =cut
  652: 
  653: sub get_file_contents ($)
  654:   {
  655:     my $file = shift(@_);
  656: 
  657:     # If file does not exist, then return a -1 value.
  658:     unless (-e $file)
  659:       {
  660: 	return(-1);
  661:       }
  662: 
  663:     # Read in file contents.
  664:     my $file_handle = IO::File->new($file);
  665:     my $file_contents = '';
  666:     while (<$file_handle>)
  667:       {
  668: 	$file_contents .= $_;
  669:       }
  670: 
  671:     # Return file contents.
  672:     return($file_contents);
  673:   }
  674: 
  675: =pod
  676: 
  677: B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
  678: 
  679: =over 4
  680: 
  681: Parameters:
  682: 
  683: =item I<$filesystem_path> - a complete filesystem path.
  684: 
  685: =back
  686: 
  687: =over 4
  688: 
  689: Returns:
  690: 
  691: =item C<string> - remnants of the filesystem path (beginning portion removed).
  692: 
  693: =back
  694: 
  695: =cut
  696: 
  697: sub declutter
  698:   {
  699:     my $filesystem_path = shift(@_);
  700: 
  701:     # Remove beginning portions of the filesystem path.
  702:     $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
  703:     $filesystem_path =~ s!^/!!;
  704:     $filesystem_path =~ s!^res/!!;
  705: 
  706:     # Return what is remaining for the filesystem path.
  707:     return($filesystem_path);
  708:   }
  709: 
  710: =pod
  711: 
  712: B<query_home_server_status> - Is this the home server of an author's directory?
  713: 
  714: =over 4
  715: 
  716: Parameters:
  717: 
  718: =item I<$author_filesystem_path> - directory path for a user.
  719: 
  720: =back
  721: 
  722: =over 4
  723: 
  724: Returns:
  725: 
  726: =item C<boolean> - 1 if true; 0 if false.
  727: 
  728: =back
  729: 
  730: =cut
  731: 
  732: sub query_home_server_status ($)
  733:   {
  734:     my $author_filesystem_path = shift(@_);
  735: 
  736:     # Remove beginning portion of this filesystem path.
  737:     $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
  738: 
  739:     # Construct path to the author's ordinary user directory.
  740:     my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
  741:     my $user_directory_path = construct_path_to_user_directory($user_domain,
  742: 							       $username);
  743: 
  744:     # Return status of whether the user directory path is defined.
  745:     if (-e $user_directory_path)
  746:       {
  747: 	return(1); # True.
  748:       }
  749:     else
  750:       {
  751:         return(0); # False.
  752:       }
  753:   }
  754: 
  755: =pod
  756: 
  757: B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
  758: 
  759: =over 4
  760: 
  761: Parameters:
  762: 
  763: =item I<$user_domain> - the loncapa domain of the user.
  764: 
  765: =item I<$username> - the unique username (user id) of the user.
  766: 
  767: =back
  768: 
  769: =over 4
  770: 
  771: Returns:
  772: 
  773: =item C<string> - representing the path on the filesystem.
  774: 
  775: =back
  776: 
  777: =cut
  778: 
  779: sub construct_path_to_user_directory ($$)
  780:   {
  781:     my ($user_domain,$username) = @_;
  782: 
  783:     # Untaint.
  784:     $user_domain =~ s/\W//g;
  785:     $username =~ s/\W//g;
  786: 
  787:     # Create three levels of sub-directoried filesystem path
  788:     # based on the first three characters of the username.
  789:     my $sub_filesystem_path = $username.'__';
  790:     $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
  791: 
  792:     # Use the sub-directoried levels and other variables to generate
  793:     # the complete filesystem path.
  794:     my $complete_filesystem_path =
  795: 	join('/',($perlvar{'lonUsersDir'},
  796: 		  $user_domain,
  797: 		  $sub_filesystem_path,
  798: 		  $username));
  799: 
  800:     # Return the complete filesystem path.
  801:     return($complete_filesystem_path);
  802:   }
  803: 
  804: =pod
  805: 
  806: B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
  807: 
  808: =over 4
  809: 
  810: Parameters:
  811: 
  812: =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
  813: 
  814: =back
  815: 
  816: =over 4
  817: 
  818: Returns:
  819: 
  820: =item C<string> - datetime sql formatted string.
  821: 
  822: =back
  823: 
  824: =cut
  825: 
  826: sub sql_formatted_time ($)
  827:   {
  828:     # Sanitize the time argument and convert to localtime array.
  829:     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
  830: 	localtime(&sanitize_time(shift(@_)));
  831: 
  832:     # Convert month from (0..11) to (1..12).
  833:     $mon += 1;
  834: 
  835:     # Make the year compatible with A.D. specification.
  836:     $year += 1900;
  837: 
  838:     # Return a date which is compatible with MySQL's "DATETIME" format.
  839:     return(join('-',($year,$mon,$mday)).
  840: 	   ' '.
  841: 	   join(':',($hour,$min,$sec))
  842: 	   );
  843:   }
  844: 
  845: 
  846: # ==================================== The following two subroutines are needed
  847: #                 for accommodating incorrect time formats inside the metadata.
  848: 
  849: =pod
  850: 
  851: B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
  852: 
  853: =over 4
  854: 
  855: Parameters:
  856: 
  857: =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
  858: 
  859: =back
  860: 
  861: =over 4
  862: 
  863: Returns:
  864: 
  865: =item C<integer> - seconds since epoch.
  866: 
  867: =back
  868: 
  869: =cut
  870: 
  871: sub make_seconds_since_epoch (@)
  872:   {
  873:     # Keytable of time metadata.
  874:     my %time_metadata = @_;
  875: 
  876:     # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
  877:     return(POSIX::mktime(
  878: 			 ($time_metadata{'seconds'},
  879: 			  $time_metadata{'minutes'},
  880: 			  $time_metadata{'hours'},
  881: 			  $time_metadata{'day'},
  882: 			  $time_metadata{'month'}-1,
  883: 			  $time_metadata{'year'}-1900,
  884: 			  0,
  885: 			  0,
  886: 			  $time_metadata{'dlsav'})
  887: 			 )
  888: 	   );
  889:   }
  890: 
  891: =pod
  892: 
  893: B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
  894: 
  895: Somebody described this subroutine as
  896: "retro-fixing of un-backward-compatible time format".
  897: 
  898: What this means, is that a part of this code expects to get UTC seconds
  899: since the epoch (beginning of 1970).  Yet, some of the .meta files have
  900: sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
  901: integers (e.g. 1044147435).  These time strings do not encode the timezone
  902: and, in this sense, can be considered "un-backwards-compatible".
  903: 
  904: =over 4
  905: 
  906: Parameters:
  907: 
  908: =item I<$potentially_badformat_string> - string to "retro-fix".
  909: 
  910: =back
  911: 
  912: =over 4
  913: 
  914: Returns:
  915: 
  916: =item C<integer> - seconds since epoch.
  917: 
  918: =back
  919: 
  920: =cut
  921: 
  922: sub sanitize_time ($)
  923:   {
  924:     my $timestamp = shift(@_);
  925:     # If timestamp is in this unexpected format....
  926:     if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
  927:       {
  928: 	# then convert into seconds since epoch (the expected format).
  929: 	$timestamp = &make_seconds_since_epoch(
  930: 					       'year' => $1,
  931: 					       'month' => $2,
  932: 					       'day' => $3,
  933: 					       'hours' => $4,
  934: 					       'minutes' => $5,
  935: 					       'seconds' => $6
  936: 					       );
  937:       }
  938:     # Otherwise we assume timestamp to be as expected.
  939:     return($timestamp);
  940:   }
  941: 
  942: =pod
  943: 
  944: =head1 AUTHOR
  945: 
  946: Written to help the loncapa project.
  947: 
  948: Scott Harrison, sharrison@users.sourceforge.net
  949: 
  950: This is distributed under the same terms as loncapa (i.e. "freeware").
  951: 
  952: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>