Annotation of loncom/metadata_database/searchcat.pl, revision 1.28

1.1       harris41    1: #!/usr/bin/perl
                      2: # The LearningOnline Network
                      3: # searchcat.pl "Search Catalog" batch script
1.16      harris41    4: #
1.28    ! harris41    5: # $Id: searchcat.pl,v 1.27 2003/01/04 19:23:31 www Exp $
1.16      harris41    6: #
                      7: # Copyright Michigan State University Board of Trustees
                      8: #
1.28    ! harris41    9: # This file is part of the LearningOnline Network with a
        !            10: # Computer assisted personalized approach (loncapa).
1.16      harris41   11: #
1.28    ! harris41   12: # Loncapa is free software; you can redistribute it and/or modify
1.16      harris41   13: # it under the terms of the GNU General Public License as published by
                     14: # the Free Software Foundation; either version 2 of the License, or
                     15: # (at your option) any later version.
                     16: #
1.28    ! harris41   17: # Loncapa is distributed in the hope that it will be useful,
1.16      harris41   18: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     19: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     20: # GNU General Public License for more details.
                     21: #
                     22: # You should have received a copy of the GNU General Public License
1.28    ! harris41   23: # along with loncapa; if not, write to the Free Software
1.16      harris41   24: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     25: #
                     26: # /home/httpd/html/adm/gpl.txt
                     27: #
1.28    ! harris41   28: # http://www.loncapa.org/
1.16      harris41   29: #
                     30: # YEAR=2001
1.17      harris41   31: # 04/14/2001, 04/16/2001 Scott Harrison
1.16      harris41   32: #
1.17      harris41   33: # YEAR=2002
                     34: # 05/11/2002 Scott Harrison
1.16      harris41   35: #
1.28    ! harris41   36: # YEAR=2003
        !            37: # Scott Harrison
        !            38: #
1.16      harris41   39: ###
1.1       harris41   40: 
1.28    ! harris41   41: =pod
        !            42: 
        !            43: =head1 NAME
        !            44: 
        !            45: B<searchcat.pl> - put authoritative filesystem data into sql database.
        !            46: 
        !            47: =head1 SYNOPSIS
        !            48: 
        !            49: Ordinarily this script is to be called from a loncapa cron job
        !            50: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
        !            51: filesystem installation location: F</etc/cron.d/loncapa>).
        !            52: 
        !            53: Here is the cron job entry.
        !            54: 
        !            55: C<# Repopulate and refresh the metadata database used for the search catalog.>
        !            56: 
        !            57: C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
        !            58: 
        !            59: This script only allows itself to be run as the user C<www>.
        !            60: 
        !            61: =head1 DESCRIPTION
        !            62: 
        !            63: This script goes through a loncapa resource directory and gathers metadata.
        !            64: The metadata is entered into a SQL database.
        !            65: 
        !            66: This script also does general database maintenance such as reformatting
        !            67: the C<loncapa:metadata> table if it is deprecated.
        !            68: 
        !            69: This script also builds dynamic temporal metadata and stores this inside
        !            70: a F<nohist_resevaldata.db> database file.
        !            71: 
        !            72: This script is playing an increasingly important role for a loncapa
        !            73: library server.  The proper operation of this script is critical for a smooth
        !            74: and correct user experience.
        !            75: 
        !            76: =cut
        !            77: 
        !            78: # ========================================================== Setting things up.
        !            79: 
        !            80: # ------------------------------------------------------  Use external modules.
1.1       harris41   81: 
1.17      harris41   82: use lib '/home/httpd/lib/perl/';
                     83: use LONCAPA::Configuration;
                     84: 
1.1       harris41   85: use IO::File;
                     86: use HTML::TokeParser;
1.6       harris41   87: use DBI;
1.21      www        88: use GDBM_File;
1.24      www        89: use POSIX qw(strftime mktime);
1.1       harris41   90: 
1.28    ! harris41   91: # ----------------- Code to enable 'find' subroutine listing of the .meta files
        !            92: use File::Find;
        !            93: 
        !            94: # List of .meta files (used on a per-user basis).
1.1       harris41   95: my @metalist;
1.21      www        96: 
1.28    ! harris41   97: # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.
        !            98: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
        !            99: my %perlvar = %{$perlvarref};
        !           100: undef($perlvarref); # Remove since sensitive and not needed.
        !           101: delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
        !           102: 
        !           103: # ------------------------------------- Only run if machine is a library server
        !           104: if ($perlvar{'lonRole'} ne 'library')
        !           105:   {
        !           106:     exit(0);
        !           107:   }
        !           108: 
        !           109: # ------------------------------ Make sure this process is running as user=www.
        !           110: my $wwwid = getpwnam('www');
        !           111: if ($wwwid != $<)
        !           112:   {
        !           113:     $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
        !           114:     $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
        !           115:     system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
        !           116: 	   "mailto $emailto -s '$subj' > /dev/null");
        !           117:     exit(1);
        !           118:   }
        !           119: 
        !           120: # ------------------------------------------------------ Initialize log output.
        !           121: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
        !           122: print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
        !           123: 
        !           124: my $dbh; # Database object reference handle.
        !           125: 
        !           126: # ----------------------------- Verify connection to loncapa:metadata database.
        !           127: unless (
        !           128: 	$dbh = DBI->connect('DBI:mysql:loncapa','www',
        !           129: 			    $perlvar{'lonSqlAccess'},
        !           130: 			    { RaiseError => 0,PrintError => 0})
        !           131: 	)
        !           132:   { 
        !           133:     print(LOG '**** ERROR **** Cannot connect to database!'."\n");
        !           134:     exit(0);
        !           135:   }
        !           136: 
        !           137: # ------------------------------ Create loncapa:metadata table if non-existent.
        !           138: my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
        !           139:     'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
        !           140:     'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
        !           141:     'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
        !           142:     'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
        !           143:     'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
        !           144:     'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
        !           145:     'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
        !           146:     'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
        !           147:     'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
        !           148:     'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
        !           149: 
        !           150: $dbh->do($make_metadata_table); # Generate the table.
        !           151: 
        !           152: # ----------------------------- Verify format of the loncapa:metadata database.
        !           153: #                               (delete and recreate database if necessary).
        !           154: 
        !           155: # Make a positive control for verifying table structure.
        !           156: my $make_metadata_table_CONTROL = $make_metadata_table;
        !           157: $make_metadata_table_CONTROL =~
        !           158:     s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
        !           159: 
        !           160: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
        !           161: $dbh->do($make_metadata_table_CONTROL);
        !           162: 
        !           163: my $table_description; # selectall reference to the table description.
        !           164: 
        !           165: my $CONTROL_table_string; # What the table description should look like.
        !           166: my $table_string; # What the table description does look like.
        !           167: 
        !           168: # Calculate the CONTROL table description (what it should be).
        !           169: $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
        !           170: foreach my $table_row (@{$table_description})
        !           171:   {
        !           172:     $CONTROL_table_string .= join(',',@{$table_row})."\n";
        !           173:   }
        !           174: 
        !           175: # Calculate the current table description (what it currently looks like).
        !           176: $table_description = $dbh->selectall_arrayref('describe metadata');
        !           177: foreach my $table_row (@{$table_description})
        !           178:   {
        !           179:     $table_string .= join(',',@{$table_row})."\n";
        !           180:   }
        !           181: 
        !           182: if ($table_string ne $CONTROL_table_string)
        !           183:   {
        !           184:     # Log this incident.
        !           185:     print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
        !           186: 	  '.'."\n");
        !           187:     # Delete the table.
        !           188:     $dbh->do('DROP TABLE IF EXISTS metadata');
        !           189:     # Generate the table.
        !           190:     $dbh->do($make_metadata_table);
        !           191:   }
1.21      www       192: 
1.28    ! harris41  193: $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.
1.21      www       194: 
1.28    ! harris41  195: # ----------------------------------------------- Set utilitysemaphore to zero.
        !           196: $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
        !           197: 
        !           198: # ========================================================= Main functionality.
        !           199: 
        !           200: # - Determine home authors on this server based on resources dir and user tree.
        !           201: 
        !           202: # RESOURCES: the resources directory (subdirs correspond to author usernames).
        !           203: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
        !           204:     (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
        !           205:      and exit(0));
        !           206: 
        !           207: # query_home_server_status will look for user home directories on this machine.
        !           208: my @homeusers =
        !           209:     grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
        !           210: 				    $perlvar{'lonDefDomain'}.'/'.$_)
        !           211: 	  } grep {!/^\.\.?$/} readdir(RESOURCES);
        !           212: closedir(RESOURCES);
        !           213: 
        !           214: unless (@homeusers)
        !           215:   {
        !           216:     print(LOG '=== No home users found on this server.'."\n");
        !           217:   }
        !           218: 
        !           219: # Consider each author individually.
        !           220: foreach my $user (@homeusers)
        !           221:   {
        !           222:     # Make a log entry.
        !           223:     print(LOG "\n".'=== User: '.$user."\n\n");
        !           224: 
        !           225:     # Get filesystem path to this user's directory.
        !           226:     my $user_directory =
        !           227: 	&construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
        !           228: 
        !           229:     # Remove left-over db-files from a potentially crashed searchcat run.
        !           230:     unlink($user_directory.'/nohist_new_resevaldata.db');
        !           231: 
        !           232:     # Cleanup the metalist array.
        !           233:     undef(@metalist);
        !           234:     @metalist = ();
        !           235: 
        !           236:     # This will add entries to the @metalist array.
        !           237:     &File::Find::find(\&wanted,
        !           238: 		      $perlvar{'lonDocRoot'}.'/res/'.
        !           239: 		      $perlvar{'lonDefDomain'}.'/'.$user);
        !           240: 
        !           241:     # -- process file to get metadata and put into search catalog SQL database
        !           242:     # Also, build and store dynamic metadata.
        !           243:     # Also, delete record entries before refreshing.
        !           244:     foreach my $m (@metalist)
        !           245:       {
        !           246: 	# Log this action.
        !           247: 	print(LOG "- ".$m."\n");
        !           248: 
        !           249: 	# Get metadata from the file.
        !           250: 	my $ref = get_metadata_from_file($m);
        !           251: 
        !           252: 	# Make a datarecord identifier for this resource.
        !           253: 	my $m2 = '/res/'.declutter($m);
        !           254: 	$m2 =~ s/\.meta$//;
        !           255: 
        !           256: 	# Build and store dynamic metadata inside nohist_resevaldata.db.
        !           257: 	build_on_the_fly_dynamic_metadata($m2);
        !           258: 
        !           259: 	# Delete record if it already exists.
        !           260: 	my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
        !           261: 	my $sth = $dbh->prepare($q2);
        !           262: 	$sth->execute();
        !           263: 	my $r1 = $sth->fetchall_arrayref;
        !           264: 	if (@$r1)
        !           265: 	  {
        !           266: 	    $sth = 
        !           267: 		$dbh->prepare('delete from metadata where url like binary '.
        !           268: 			      "'".$m2."'");
        !           269: 	    $sth->execute();
        !           270: 	  }
        !           271: 
        !           272: 	# Add new/replacement record into the loncapa:metadata table.
        !           273: 	$sth = $dbh->prepare('insert into metadata values ('.
        !           274: 			     '"'.delete($ref->{'title'}).'"'.','.
        !           275: 			     '"'.delete($ref->{'author'}).'"'.','.
        !           276: 			     '"'.delete($ref->{'subject'}).'"'.','.
        !           277: 			     '"'.$m2.'"'.','.
        !           278: 			     '"'.delete($ref->{'keywords'}).'"'.','.
        !           279: 			     '"'.'current'.'"'.','.
        !           280: 			     '"'.delete($ref->{'notes'}).'"'.','.
        !           281: 			     '"'.delete($ref->{'abstract'}).'"'.','.
        !           282: 			     '"'.delete($ref->{'mime'}).'"'.','.
        !           283: 			     '"'.delete($ref->{'language'}).'"'.','.
        !           284: 			     '"'.sql_formatted_time(
        !           285: 				       delete($ref->{'creationdate'})).'"'.','.
        !           286: 			     '"'.sql_formatted_time(
        !           287: 				   delete($ref->{'lastrevisiondate'})).'"'.','.
        !           288: 			     '"'.delete($ref->{'owner'}).'"'.','.
        !           289: 			     '"'.delete($ref->{'copyright'}).'"'.','.
        !           290: 			     '1'.')');
        !           291: 	$sth->execute();
        !           292:       }
        !           293: 
        !           294: # ----------------------- Clean up database, remove stale SQL database records.
        !           295:     $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
        !           296: 
        !           297: # -------------------------------------------------- Copy over the new db-files
        !           298:     system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
        !           299: 	         $user_directory.'/nohist_resevaldata.db');
        !           300:   }
        !           301: 
        !           302: # --------------------------------------------------- Close database connection
        !           303: $dbh->disconnect;
        !           304: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
        !           305: close(LOG);
        !           306: exit(0);
        !           307: 
        !           308: # ================================================================ Subroutines.
        !           309: 
        !           310: =pod
        !           311: 
        !           312: =head1 SUBROUTINES
        !           313: 
        !           314: =cut
        !           315: 
        !           316: =pod
        !           317: 
        !           318: B<unescape> - translate to unstrange escaped syntax to strange characters.
        !           319: 
        !           320: =over 4
        !           321: 
        !           322: Parameters:
        !           323: 
        !           324: =item I<$str> - string with unweird characters.
        !           325: 
        !           326: =back
        !           327: 
        !           328: =over 4
        !           329: 
        !           330: Returns:
        !           331: 
        !           332: =item C<string> - string with potentially weird characters.
        !           333: 
        !           334: =back
        !           335: 
        !           336: =cut
        !           337: 
        !           338: sub unescape ($)
        !           339:   {
        !           340:     my $str = shift(@_);
1.21      www       341:     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
1.28    ! harris41  342:     return($str);
        !           343:   }
        !           344: 
        !           345: =pod
        !           346: 
        !           347: B<escape> - translate strange characters to unstrange escaped syntax.
        !           348: 
        !           349: =over 4
        !           350: 
        !           351: Parameters:
1.21      www       352: 
1.28    ! harris41  353: =item I<$str> - string with potentially weird characters to unweird-ify.
1.22      www       354: 
1.28    ! harris41  355: =back
        !           356: 
        !           357: =over 4
        !           358: 
        !           359: Returns:
        !           360: 
        !           361: =item C<string> - unweird-ified string.
        !           362: 
        !           363: =back
        !           364: 
        !           365: =cut
        !           366: 
        !           367: sub escape ($)
        !           368:   {
        !           369:     my $str = shift(@_);
1.22      www       370:     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
1.28    ! harris41  371:     return($str);
        !           372:   }
        !           373: 
        !           374: =pod
        !           375: 
        !           376: B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
        !           377: 
        !           378: Dynamic metadata is stored in a nohist_resevaldata GDBM database.
        !           379: Most of the calculations in this subroutine are totally pointless
        !           380: and not useful for anything that this subroutine does.
        !           381: (THIS IS A FRUSTRATED SUBROUTINE THAT IS NON-OPTIMAL, *&*&!.)
        !           382: The only thing that this subroutine really makes happen is adjusting
        !           383: a 'count' value inside the F<nohist_new_resevaldata.db> as well
        !           384: as updating F<nohist_new_resevaldata.db> with information from
        !           385: F<nohist_resevaldata.db>.
        !           386: 
        !           387: =over 4
        !           388: 
        !           389: Parameters:
        !           390: 
        !           391: =item I<$url> - the filesystem path (url may be a misnomer...)
        !           392: 
        !           393: =back
        !           394: 
        !           395: =over 4
        !           396: 
        !           397: Returns:
1.22      www       398: 
1.28    ! harris41  399: =item C<hash> - key-value table of dynamically evaluated metadata.
1.21      www       400: 
1.28    ! harris41  401: =back
1.21      www       402: 
1.28    ! harris41  403: =cut
1.25      www       404: 
1.28    ! harris41  405: sub build_on_the_fly_dynamic_metadata ($)
        !           406:   {
        !           407:     # BEWARE ALL WHO TRY TO UNDERSTAND THIS ABSURDLY HORRIBLE SUBROUTINE.
        !           408:       
        !           409:     # Do all sorts of mumbo-jumbo to compute the user's directory.
        !           410:     my $url = &declutter(shift(@_));
        !           411:     $url =~ s/\.meta$//;
        !           412:     my %returnhash = ();
        !           413:     my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
        !           414:     my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
        !           415: 
        !           416:     # Attempt a GDBM database instantiation inside users directory and proceed.
1.25      www       417:     if ((tie(%evaldata,'GDBM_File',
1.28    ! harris41  418:             $user_directory.
        !           419: 	     '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
1.25      www       420:         (tie(%newevaldata,'GDBM_File',
1.28    ! harris41  421: 	     $user_directory.
        !           422: 	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
        !           423:       {
        !           424: 	# For different variables, track the running sum and counts.
        !           425: 	my %sum = ();
        !           426: 	my %cnt = ();
        !           427: 
        !           428: 	# Define computed items as a sum (add) or an average (avg) or a raw
        !           429: 	# count (cnt) or 'app'?
        !           430: 	my %listitems=('count'        => 'add',
        !           431: 		       'course'       => 'add',
        !           432: 		       'avetries'     => 'avg',
        !           433: 		       'stdno'        => 'add',
        !           434: 		       'difficulty'   => 'avg',
        !           435: 		       'clear'        => 'avg',
        !           436: 		       'technical'    => 'avg',
        !           437: 		       'helpful'      => 'avg',
        !           438: 		       'correct'      => 'avg',
        !           439: 		       'depth'        => 'avg',
        !           440: 		       'comments'     => 'app',
        !           441: 		       'usage'        => 'cnt'
        !           442: 		       );
        !           443: 	
        !           444: 	# Untaint the url and use as part of a regular expression.
        !           445: 	my $regexp = $url;
        !           446: 	$regexp =~ s/(\W)/\\$1/g;
        !           447: 	$regexp = '___'.$regexp.'___([a-z]+)$';
        !           448: 
        !           449: 	# Check existing nohist database for this url.
        !           450: 	# THE ONLY TIME THIS IS IMPORTANT FOR THIS AWFUL SUBROUTINE
        !           451: 	# IS FOR 'count' ENTRIES
        !           452: 	# AND FOR REFRESHING non-'count' ENTRIES INSIDE nohist_new DATABASE.
        !           453: 	foreach (keys %evaldata)
        !           454: 	  {
        !           455: 	    my $key = &unescape($_);
        !           456: 	    if ($key =~ /$regexp/) # If url-based entry exists.
        !           457: 	      {
        !           458: 		my $ctype = $1; # Set to specific category type.
        !           459: 
        !           460: 		# Do an increment for this category type.
        !           461: 		if (defined($cnt{$ctype}))
        !           462: 		  {
        !           463: 		    $cnt{$ctype}++; 
        !           464: 		  }
        !           465: 		else
        !           466: 		  {
        !           467: 		    $cnt{$ctype} = 1; 
        !           468: 		  }
        !           469:                 unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
        !           470: 		  {
        !           471: 		    # Increment the sum based on the evaluated data in the db.
        !           472: 		    if (defined($sum{$ctype}))
        !           473: 		      {
        !           474: 			$sum{$ctype} += $evaldata{$_};
        !           475: 		      }
        !           476: 		    else
        !           477: 		      {
        !           478: 			$sum{$ctype} = $evaldata{$_};
        !           479: 		      }
        !           480:  		  }
        !           481: 		else # 'app' mode, means to use '<hr />' as a separator
        !           482: 		  {
        !           483: 		    if (defined($sum{$ctype}))
        !           484: 		      {
        !           485: 			if ($evaldata{$_})
        !           486: 			  {
        !           487: 			    $sum{$ctype} .= '<hr />'.$evaldata{$_};
        !           488: 			  }
        !           489: 		      }
        !           490: 		    else
        !           491: 		      {
        !           492: 			$sum{$ctype} = ''.$evaldata{$_};
        !           493: 		      }
        !           494: 		  }
        !           495: 		if ($ctype ne 'count')
        !           496: 		  {
        !           497: 		    # ALERT!  THIS HORRIBLE LOOP IS ACTUALLY DOING SOMETHING
        !           498: 		    # USEFUL!
        !           499: 		    $newevaldata{$_} = $evaldata{$_};
        !           500: 		  }
        !           501: 	      }
        !           502: 	  }
        !           503: 
        !           504: 	# THE ONLY OTHER TIME THIS LOOP IS USEFUL IS FOR THE 'count' HASH
        !           505: 	# ELEMENT.
        !           506: 	foreach (keys %cnt)
        !           507: 	  {
        !           508: 	    if ($listitems{$_} eq 'avg')
        !           509: 	      {
        !           510: 		$returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
        !           511: 	      }
        !           512: 	    elsif ($listitems{$_} eq 'cnt')
        !           513: 	      {
        !           514: 		$returnhash{$_} = $cnt{$_};
        !           515: 	      }
        !           516: 	    else
        !           517: 	      {
        !           518: 		$returnhash{$_} = $sum{$_};
        !           519: 	      }
        !           520: 	  }
        !           521: 
        !           522: 	# A RARE MOMENT OF DOING ANYTHING USEFUL INSIDE THIS
        !           523: 	# BLEEPING SUBROUTINE.
        !           524: 	if ($returnhash{'count'})
        !           525: 	  {
        !           526: 	    my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
        !           527: 	    $newevaldata{$newkey} = $returnhash{'count'};
        !           528: 	  }
        !           529: 
        !           530: 	untie(%evaldata); # Close/release the original nohist database.
        !           531: 	untie(%newevaldata); # Close/release the new nohist database.
1.22      www       532:       }
1.28    ! harris41  533:     return(%returnhash);
        !           534:     # Celebrate!  We have now accomplished some simple calculations using
        !           535:     # 1000% bloated functionality in our subroutine.  Go wash your eyeballs
        !           536:     # out now.
        !           537:   }
        !           538: 
        !           539: =pod
        !           540: 
        !           541: B<wanted> - used by B<File::Find::find> subroutine.
        !           542: 
        !           543: This evaluates whether a file is wanted, and pushes it onto the
        !           544: I<@metalist> array.  This subroutine was, for the most part, auto-generated
        !           545: by the B<find2perl> command.
        !           546: 
        !           547: =over 4
        !           548: 
        !           549: Parameters:
        !           550: 
        !           551: =item I<$file> - a path to the file.
        !           552: 
        !           553: =back
        !           554: 
        !           555: =over 4
        !           556: 
        !           557: Returns:
        !           558: 
        !           559: =item C<boolean> - true or false based on logical statement.
        !           560: 
        !           561: =back
        !           562: 
        !           563: =cut
        !           564: 
        !           565: sub wanted ($)
        !           566:   {
1.1       harris41  567:     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
1.28    ! harris41  568:     -f $_ &&
1.10      harris41  569:     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
1.28    ! harris41  570:     push(@metalist,$File::Find::dir.'/'.$_);
        !           571:   }
        !           572: 
        !           573: =pod
        !           574: 
        !           575: B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
1.1       harris41  576: 
1.28    ! harris41  577: I<Note that this is significantly altered from a subroutine present in lonnet.>
1.15      harris41  578: 
1.28    ! harris41  579: =over 4
1.1       harris41  580: 
1.28    ! harris41  581: Parameters:
1.27      www       582: 
1.28    ! harris41  583: =item I<$file> - a path.to the file.
1.27      www       584: 
1.28    ! harris41  585: =back
1.27      www       586: 
1.28    ! harris41  587: =over 4
1.25      www       588: 
1.28    ! harris41  589: Returns:
1.1       harris41  590: 
1.28    ! harris41  591: =item C<hash reference> - a hash array (keys and values).
1.1       harris41  592: 
1.28    ! harris41  593: =back
1.25      www       594: 
1.28    ! harris41  595: =cut
1.1       harris41  596: 
1.28    ! harris41  597: sub get_metadata_from_file ($)
        !           598:   {
        !           599:     my ($filename) = @_;
        !           600:     my %metatable; # Used to store return value of hash-tabled metadata.
        !           601:     $filename = &declutter($filename); # Remove non-identifying filesystem info
        !           602:     my $uri = ''; # The URI is not relevant in this scenario.
        !           603:     unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
        !           604:       {
        !           605: 	$filename .= '.meta'; # Append a .meta suffix.
        !           606:       }
        !           607:     # Get the file contents.
        !           608:     my $metadata_string =
        !           609: 	&get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
        !           610: 
        !           611:     # Parse the file based on its XML tags.
        !           612:     my $parser = HTML::TokeParser->new(\$metadata_string);
        !           613:     my $token;
        !           614:     while ($token = $parser->get_token) # Loop through tokens.
        !           615:       {
        !           616: 	if ($token->[0] eq 'S') # If it is a start token.
        !           617: 	  {
        !           618: 	    my $entry = $token->[1];
        !           619: 	    my $unikey = $entry; # A unique identifier for this xml tag key.
        !           620: 	    if (defined($token->[2]->{'part'}))
        !           621: 	      { 
        !           622: 		$unikey .= '_'.$token->[2]->{'part'}; 
        !           623: 	      }
        !           624: 	    if (defined($token->[2]->{'name'}))
        !           625: 	      { 
        !           626: 		$unikey .= '_'.$token->[2]->{'name'}; 
        !           627: 	      }
        !           628: 	    # Append $unikey to metatable's keys entry.
        !           629: 	    if ($metatable{$uri.'keys'})
        !           630: 	      {
        !           631: 		$metatable{$uri.'keys'} .= ','.$unikey;
1.1       harris41  632: 	      }
1.28    ! harris41  633: 	    else
        !           634: 	      {
        !           635: 		$metatable{$uri.'keys'} = $unikey;
1.1       harris41  636: 	      }
1.28    ! harris41  637: 	    # Insert contents into metatable entry for the unikey.
        !           638: 	    foreach my $t3 (@{$token->[3]})
        !           639: 	      {
        !           640: 		$metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
1.1       harris41  641: 	      }
1.28    ! harris41  642: 	    # If there was no text contained inside the tags, set = default.
        !           643: 	    unless
        !           644: 	      (
        !           645: 	        $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
        !           646: 	      )
        !           647: 	      {
        !           648: 		$metatable{$uri.''.$unikey} =
        !           649: 		    $metatable{$uri.''.$unikey.'.default'};
        !           650: 	      }
        !           651: 	  }
        !           652:       }
        !           653:     # Return with a key-value table of XML tags and their tag contents.
        !           654:     return(\%metatable);
        !           655:   }
        !           656: 
        !           657: =pod
        !           658: 
        !           659: B<get_file_contents> - returns either the contents of the file or a -1.
        !           660: 
        !           661: =over 4
        !           662: 
        !           663: Parameters:
        !           664: 
        !           665: =item I<$file> - a complete filesystem path.to the file.
        !           666: 
        !           667: =back
        !           668: 
        !           669: =over 4
        !           670: 
        !           671: Returns:
        !           672: 
        !           673: =item C<string> - file contents or a -1.
        !           674: 
        !           675: =back
        !           676: 
        !           677: =cut
        !           678: 
        !           679: sub get_file_contents ($)
        !           680:   {
        !           681:     my $file = shift(@_);
        !           682: 
        !           683:     # If file does not exist, then return a -1 value.
        !           684:     unless (-e $file)
        !           685:       {
        !           686: 	return(-1);
        !           687:       }
        !           688: 
        !           689:     # Read in file contents.
        !           690:     my $file_handle = IO::File->new($file);
        !           691:     my $file_contents = '';
        !           692:     while (<$file_handle>)
        !           693:       {
        !           694: 	$file_contents .= $_;
        !           695:       }
        !           696: 
        !           697:     # Return file contents.
        !           698:     return($file_contents);
        !           699:   }
        !           700: 
        !           701: =pod
        !           702: 
        !           703: B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
        !           704: 
        !           705: =over 4
        !           706: 
        !           707: Parameters:
        !           708: 
        !           709: =item I<$filesystem_path> - a complete filesystem path.
        !           710: 
        !           711: =back
        !           712: 
        !           713: =over 4
        !           714: 
        !           715: Returns:
        !           716: 
        !           717: =item C<string> - remnants of the filesystem path (beginning portion removed).
        !           718: 
        !           719: =back
        !           720: 
        !           721: =cut
        !           722: 
        !           723: sub declutter
        !           724:   {
        !           725:     my $filesystem_path = shift(@_);
        !           726: 
        !           727:     # Remove beginning portions of the filesystem path.
        !           728:     $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
        !           729:     $filesystem_path =~ s!^/!!;
        !           730:     $filesystem_path =~ s!^res/!!;
        !           731: 
        !           732:     # Return what is remaining for the filesystem path.
        !           733:     return($filesystem_path);
        !           734:   }
        !           735: 
        !           736: =pod
        !           737: 
        !           738: B<query_home_server_status> - Is this the home server of an author's directory?
        !           739: 
        !           740: =over 4
        !           741: 
        !           742: Parameters:
        !           743: 
        !           744: =item I<$author_filesystem_path> - directory path for a user.
        !           745: 
        !           746: =back
        !           747: 
        !           748: =over 4
        !           749: 
        !           750: Returns:
        !           751: 
        !           752: =item C<boolean> - 1 if true; 0 if false.
        !           753: 
        !           754: =back
        !           755: 
        !           756: =cut
        !           757: 
        !           758: sub query_home_server_status ($)
        !           759:   {
        !           760:     my $author_filesystem_path = shift(@_);
        !           761: 
        !           762:     # Remove beginning portion of this filesystem path.
        !           763:     $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
        !           764: 
        !           765:     # Construct path to the author's ordinary user directory.
        !           766:     my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
        !           767:     my $user_directory_path = construct_path_to_user_directory($user_domain,
        !           768: 							       $username);
        !           769: 
        !           770:     # Return status of whether the user directory path is defined.
        !           771:     if (-e $user_directory_path)
        !           772:       {
        !           773: 	return(1); # True.
        !           774:       }
        !           775:     else
        !           776:       {
        !           777:         return(0); # False.
        !           778:       }
        !           779:   }
        !           780: 
        !           781: =pod
        !           782: 
        !           783: B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
        !           784: 
        !           785: =over 4
        !           786: 
        !           787: Parameters:
        !           788: 
        !           789: =item I<$user_domain> - the loncapa domain of the user.
        !           790: 
        !           791: =item I<$username> - the unique username (user id) of the user.
        !           792: 
        !           793: =back
        !           794: 
        !           795: =over 4
        !           796: 
        !           797: Returns:
        !           798: 
        !           799: =item C<string> - representing the path on the filesystem.
        !           800: 
        !           801: =back
        !           802: 
        !           803: =cut
        !           804: 
        !           805: sub construct_path_to_user_directory ($$)
        !           806:   {
        !           807:     my ($user_domain,$username) = @_;
        !           808: 
        !           809:     # Untaint.
        !           810:     $user_domain =~ s/\W//g;
        !           811:     $username =~ s/\W//g;
        !           812: 
        !           813:     # Create three levels of sub-directoried filesystem path
        !           814:     # based on the first three characters of the username.
        !           815:     my $sub_filesystem_path = $username.'__';
        !           816:     $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
        !           817: 
        !           818:     # Use the sub-directoried levels and other variables to generate
        !           819:     # the complete filesystem path.
        !           820:     my $complete_filesystem_path =
        !           821: 	join('/',($perlvar{'lonUsersDir'},
        !           822: 		  $user_domain,
        !           823: 		  $sub_filesystem_path,
        !           824: 		  $username));
        !           825: 
        !           826:     # Return the complete filesystem path.
        !           827:     return($complete_filesystem_path);
        !           828:   }
        !           829: 
        !           830: =pod
        !           831: 
        !           832: B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
        !           833: 
        !           834: =over 4
        !           835: 
        !           836: Parameters:
        !           837: 
        !           838: =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
        !           839: 
        !           840: =back
        !           841: 
        !           842: =over 4
        !           843: 
        !           844: Returns:
        !           845: 
        !           846: =item C<string> - datetime sql formatted string.
        !           847: 
        !           848: =back
        !           849: 
        !           850: =cut
1.13      harris41  851: 
1.28    ! harris41  852: sub sql_formatted_time ($)
        !           853:   {
        !           854:     # Sanitize the time argument and convert to localtime array.
1.13      harris41  855:     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
1.28    ! harris41  856: 	localtime(&sanitize_time(shift(@_)));
        !           857: 
        !           858:     # Convert month from (0..11) to (1..12).
        !           859:     $mon += 1;
        !           860: 
        !           861:     # Make the year compatible with A.D. specification.
        !           862:     $year += 1900;
        !           863: 
        !           864:     # Return a date which is compatible with MySQL's "DATETIME" format.
        !           865:     return(join('-',($year,$mon,$mday)).
        !           866: 	   ' '.
        !           867: 	   join(':',($hour,$min,$sec))
        !           868: 	   );
        !           869:   }
        !           870: 
        !           871: 
        !           872: # ==================================== The following two subroutines are needed
        !           873: #                 for accommodating incorrect time formats inside the metadata.
        !           874: 
        !           875: =pod
        !           876: 
        !           877: B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
        !           878: 
        !           879: =over 4
        !           880: 
        !           881: Parameters:
        !           882: 
        !           883: =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
        !           884: 
        !           885: =back
        !           886: 
        !           887: =over 4
        !           888: 
        !           889: Returns:
        !           890: 
        !           891: =item C<integer> - seconds since epoch.
        !           892: 
        !           893: =back
        !           894: 
        !           895: =cut
        !           896: 
        !           897: sub make_seconds_since_epoch (@)
        !           898:   {
        !           899:     # Keytable of time metadata.
        !           900:     my %time_metadata = @_;
        !           901: 
        !           902:     # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
        !           903:     return(POSIX::mktime(
        !           904: 			 ($time_metadata{'seconds'},
        !           905: 			  $time_metadata{'minutes'},
        !           906: 			  $time_metadata{'hours'},
        !           907: 			  $time_metadata{'day'},
        !           908: 			  $time_metadata{'month'}-1,
        !           909: 			  $time_metadata{'year'}-1900,
        !           910: 			  0,
        !           911: 			  0,
        !           912: 			  $time_metadata{'dlsav'})
        !           913: 			 )
        !           914: 	   );
        !           915:   }
        !           916: 
        !           917: =pod
        !           918: 
        !           919: B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
        !           920: 
        !           921: Somebody described this subroutine as
        !           922: "retro-fixing of un-backward-compatible time format".
        !           923: 
        !           924: What this means, is that a part of this code expects to get UTC seconds
        !           925: since the epoch (beginning of 1970).  Yet, some of the .meta files have
        !           926: sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
        !           927: integers (e.g. 1044147435).  These time strings do not encode the timezone
        !           928: and, in this sense, can be considered "un-backwards-compatible".
        !           929: 
        !           930: =over 4
        !           931: 
        !           932: Parameters:
        !           933: 
        !           934: =item I<$potentially_badformat_string> - string to "retro-fix".
        !           935: 
        !           936: =back
        !           937: 
        !           938: =over 4
        !           939: 
        !           940: Returns:
        !           941: 
        !           942: =item C<integer> - seconds since epoch.
        !           943: 
        !           944: =back
        !           945: 
        !           946: =cut
        !           947: 
        !           948: sub sanitize_time ($)
        !           949:   {
        !           950:     my $timestamp = shift(@_);
        !           951:     # If timestamp is in this unexpected format....
        !           952:     if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
        !           953:       {
        !           954: 	# then convert into seconds since epoch (the expected format).
        !           955: 	$timestamp = &make_seconds_since_epoch(
        !           956: 					       'year' => $1,
        !           957: 					       'month' => $2,
        !           958: 					       'day' => $3,
        !           959: 					       'hours' => $4,
        !           960: 					       'minutes' => $5,
        !           961: 					       'seconds' => $6
        !           962: 					       );
        !           963:       }
        !           964:     # Otherwise we assume timestamp to be as expected.
        !           965:     return($timestamp);
        !           966:   }
        !           967: 
        !           968: =pod
        !           969: 
        !           970: =head1 AUTHOR
        !           971: 
        !           972: Written to help the loncapa project.
        !           973: 
        !           974: Scott Harrison, sharrison@users.sourceforge.net
        !           975: 
        !           976: This is distributed under the same terms as loncapa (i.e. "freeware").
1.24      www       977: 
1.28    ! harris41  978: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>