--- loncom/metadata_database/searchcat.pl 2001/04/16 13:45:20 1.11 +++ loncom/metadata_database/searchcat.pl 2003/02/03 17:01:55 1.30 @@ -1,184 +1,952 @@ #!/usr/bin/perl # The LearningOnline Network # searchcat.pl "Search Catalog" batch script +# +# $Id: searchcat.pl,v 1.30 2003/02/03 17:01:55 www Exp $ +# +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ +# +# YEAR=2001 +# 04/14/2001, 04/16/2001 Scott Harrison +# +# YEAR=2002 +# 05/11/2002 Scott Harrison +# +# YEAR=2003 +# Scott Harrison +# +### + +=pod + +=head1 NAME + +B - put authoritative filesystem data into sql database. + +=head1 SYNOPSIS + +Ordinarily this script is to be called from a loncapa cron job +(CVS source location: F; typical +filesystem installation location: F). + +Here is the cron job entry. + +C<# Repopulate and refresh the metadata database used for the search catalog.> + +C<10 1 * * 7 www /home/httpd/perl/searchcat.pl> + +This script only allows itself to be run as the user C. + +=head1 DESCRIPTION + +This script goes through a loncapa resource directory and gathers metadata. +The metadata is entered into a SQL database. + +This script also does general database maintenance such as reformatting +the C table if it is deprecated. + +This script also builds dynamic temporal metadata and stores this inside +a F database file. + +This script is playing an increasingly important role for a loncapa +library server. The proper operation of this script is critical for a smooth +and correct user experience. -# 04/14/2001 Scott Harrison +=cut -# This script goes through a LON-CAPA resource -# directory and gathers metadata. -# The metadata is entered into a SQL database. +# ========================================================== Setting things up. + +# ------------------------------------------------------ Use external modules. + +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration; use IO::File; use HTML::TokeParser; use DBI; +use GDBM_File; +use POSIX qw(strftime mktime); -my @metalist; # ----------------- Code to enable 'find' subroutine listing of the .meta files -require "find.pl"; -sub wanted { - (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && - -f _ && - /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && - push(@metalist,"$dir/$_"); -} +use File::Find; -# ------------------------------------ Read httpd access.conf and get variables -open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; +# List of .meta files (used on a per-user basis). +my @metalist; -while ($configline=) { - if ($configline =~ /PerlSetVar/) { - my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); - chomp($varvalue); - $perlvar{$varname}=$varvalue; - } -} -close(CONFIG); +# --------------- Read loncapa_apache.conf and loncapa.conf and get variables. +my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf'); +my %perlvar = %{$perlvarref}; +undef($perlvarref); # Remove since sensitive and not needed. +delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed. + +# ------------------------------------- Only run if machine is a library server +if ($perlvar{'lonRole'} ne 'library') + { + exit(0); + } + +# ------------------------------ Make sure this process is running as user=www. +my $wwwid = getpwnam('www'); +if ($wwwid != $<) + { + $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + $subj = "LON: $perlvar{'lonHostID'} User ID mismatch"; + system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ". + "mailto $emailto -s '$subj' > /dev/null"); + exit(1); + } + +# ------------------------------------------------------ Initialize log output. +open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); +print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n"); + +my $dbh; # Database object reference handle. + +# ----------------------------- Verify connection to loncapa:metadata database. +unless ( + $dbh = DBI->connect('DBI:mysql:loncapa','www', + $perlvar{'lonSqlAccess'}, + { RaiseError => 0,PrintError => 0}) + ) + { + print(LOG '**** ERROR **** Cannot connect to database!'."\n"); + exit(0); + } + +# ------------------------------ Create loncapa:metadata table if non-existent. +my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('. + 'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '. + 'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '. + 'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '. + 'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '. + 'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '. + 'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '. + 'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '. + 'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '. + 'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '. + 'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM'; + +$dbh->do($make_metadata_table); # Generate the table. + +# ----------------------------- Verify format of the loncapa:metadata database. +# (delete and recreate database if necessary). + +# Make a positive control for verifying table structure. +my $make_metadata_table_CONTROL = $make_metadata_table; +$make_metadata_table_CONTROL =~ + s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/; + +$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); +$dbh->do($make_metadata_table_CONTROL); + +my $table_description; # selectall reference to the table description. + +my $CONTROL_table_string; # What the table description should look like. +my $table_string; # What the table description does look like. + +# Calculate the CONTROL table description (what it should be). +$table_description = $dbh->selectall_arrayref('describe CONTROL_metadata'); +foreach my $table_row (@{$table_description}) + { + $CONTROL_table_string .= join(',',@{$table_row})."\n"; + } + +# Calculate the current table description (what it currently looks like). +$table_description = $dbh->selectall_arrayref('describe metadata'); +foreach my $table_row (@{$table_description}) + { + $table_string .= join(',',@{$table_row})."\n"; + } + +if ($table_string ne $CONTROL_table_string) + { + # Log this incident. + print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'. + '.'."\n"); + # Delete the table. + $dbh->do('DROP TABLE IF EXISTS metadata'); + # Generate the table. + $dbh->do($make_metadata_table); + } + +$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay. Done with control. + +# ----------------------------------------------- Set utilitysemaphore to zero. +$dbh->do('UPDATE metadata SET utilitysemaphore = 0'); + +# ========================================================= Main functionality. + +# - Determine home authors on this server based on resources dir and user tree. + +# RESOURCES: the resources directory (subdirs correspond to author usernames). +opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or + (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n") + and exit(0)); + +# query_home_server_status will look for user home directories on this machine. +my @homeusers = + grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'. + $perlvar{'lonDefDomain'}.'/'.$_) + } grep {!/^\.\.?$/} readdir(RESOURCES); +closedir(RESOURCES); + +unless (@homeusers) + { + print(LOG '=== No home users found on this server.'."\n"); + } + +# Consider each author individually. +foreach my $user (@homeusers) + { + # Make a log entry. + print(LOG "\n".'=== User: '.$user."\n\n"); + + # Get filesystem path to this user's directory. + my $user_directory = + &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user); + + # Remove left-over db-files from a potentially crashed searchcat run. + unlink($user_directory.'/nohist_new_resevaldata.db'); + + # Cleanup the metalist array. + undef(@metalist); + @metalist = (); + + # This will add entries to the @metalist array. + &File::Find::find(\&wanted, + $perlvar{'lonDocRoot'}.'/res/'. + $perlvar{'lonDefDomain'}.'/'.$user); + + # -- process file to get metadata and put into search catalog SQL database + # Also, build and store dynamic metadata. + # Also, delete record entries before refreshing. + foreach my $m (@metalist) + { + # Log this action. + print(LOG "- ".$m."\n"); + + # Get metadata from the file. + my $ref = get_metadata_from_file($m); + + # Make a datarecord identifier for this resource. + my $m2 = '/res/'.declutter($m); + $m2 =~ s/\.meta$//; + + # Build and store dynamic metadata inside nohist_resevaldata.db. + build_on_the_fly_dynamic_metadata($m2); + + # Delete record if it already exists. + my $q2 = 'select * from metadata where url like binary '."'".$m2."'"; + my $sth = $dbh->prepare($q2); + $sth->execute(); + my $r1 = $sth->fetchall_arrayref; + if (@$r1) + { + $sth = + $dbh->prepare('delete from metadata where url like binary '. + "'".$m2."'"); + $sth->execute(); + } + + # Add new/replacement record into the loncapa:metadata table. + $sth = $dbh->prepare('insert into metadata values ('. + '"'.delete($ref->{'title'}).'"'.','. + '"'.delete($ref->{'author'}).'"'.','. + '"'.delete($ref->{'subject'}).'"'.','. + '"'.$m2.'"'.','. + '"'.delete($ref->{'keywords'}).'"'.','. + '"'.'current'.'"'.','. + '"'.delete($ref->{'notes'}).'"'.','. + '"'.delete($ref->{'abstract'}).'"'.','. + '"'.delete($ref->{'mime'}).'"'.','. + '"'.delete($ref->{'language'}).'"'.','. + '"'.sql_formatted_time( + delete($ref->{'creationdate'})).'"'.','. + '"'.sql_formatted_time( + delete($ref->{'lastrevisiondate'})).'"'.','. + '"'.delete($ref->{'owner'}).'"'.','. + '"'.delete($ref->{'copyright'}).'"'.','. + '1'.')'); + $sth->execute(); + } + +# ----------------------- Clean up database, remove stale SQL database records. + $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0'); + +# -------------------------------------------------- Copy over the new db-files + system('mv '.$user_directory.'/nohist_new_resevaldata.db '. + $user_directory.'/nohist_resevaldata.db'); + } -my $dbh; -# ------------------------------------- Make sure that database can be accessed -{ - unless ( - $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0}) - ) { - print "Cannot connect to database!\n"; - exit; - } -} +# --------------------------------------------------- Close database connection +$dbh->disconnect; +print LOG "\n==== Searchcat completed ".localtime()." ====\n"; +close(LOG); +exit(0); -# ------------------------------------------------------------- get .meta files -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); -my @homeusers=grep - {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")} - grep {!/^\.\.?$/} readdir(RESOURCES); -closedir RESOURCES; -foreach my $user (@homeusers) { - &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); -} +# ================================================================ Subroutines. -# -- process each file to get metadata and put into search catalog SQL database -# Also, check to see if already there. -# I could just delete (without searching first), but this works for now. -foreach my $m (@metalist) { - my $ref=&metadata($m); - my $m2='/res/'.&declutter($m); - my $q2="select * from metadata where url like binary '/res/$m2'"; - my $sth = $dbh->prepare($q2); - $sth->execute(); - my $r1=$sth->fetchall_arrayref; - if (@$r1) { - $sth=$dbh->prepare("delete from metadata where url like binary '/res/$m2'"); - $sth->execute(); - } - $sth=$dbh->prepare('insert into metadata values ('. - '"'.delete($ref->{'title'}).'"'.','. - '"'.delete($ref->{'author'}).'"'.','. - '"'.delete($ref->{'subject'}).'"'.','. - '"/res/'.$m2.'"'.','. - '"'.delete($ref->{'keywords'}).'"'.','. - '"'.'current'.'"'.','. - '"'.delete($ref->{'notes'}).'"'.','. - '"'.delete($ref->{'abstract'}).'"'.','. - '"'.delete($ref->{'mime'}).'"'.','. - '"'.delete($ref->{'language'}).'"'.','. - '"'.delete($ref->{'creationdate'}).'"'.','. - '"'.delete($ref->{'lastrevisiondate'}).'"'.','. - '"'.delete($ref->{'owner'}).'"'.','. - '"'.delete($ref->{'copyright'}).'"'.')'); - $sth->execute(); -} +=pod -# ----------------------------------------------------------- Clean up database -# Need to, perhaps, remove stale SQL database records. -# ... not yet implemented +=head1 SUBROUTINES -# --------------------------------------------------- Close database connection -$dbh->disconnect; +=cut -# ---------------------------------------------------------------- Get metadata -# significantly altered from subroutine present in lonnet -sub metadata { - my ($uri,$what)=@_; - my %metacache; - $uri=&declutter($uri); - my $filename=$uri; - $uri=~s/\.meta$//; - $uri=''; - unless ($metacache{$uri.'keys'}) { - unless ($filename=~/\.meta$/) { $filename.='.meta'; } - my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); - my $parser=HTML::TokeParser->new(\$metastring); - my $token; - while ($token=$parser->get_token) { - if ($token->[0] eq 'S') { - my $entry=$token->[1]; - my $unikey=$entry; - if (defined($token->[2]->{'part'})) { - $unikey.='_'.$token->[2]->{'part'}; - } - if (defined($token->[2]->{'name'})) { - $unikey.='_'.$token->[2]->{'name'}; - } - if ($metacache{$uri.'keys'}) { - $metacache{$uri.'keys'}.=','.$unikey; - } else { - $metacache{$uri.'keys'}=$unikey; - } - map { - $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; - } @{$token->[3]}; - unless ( - $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry) - ) { $metacache{$uri.''.$unikey}= - $metacache{$uri.''.$unikey.'.default'}; - } - } - } - } - return \%metacache; -} +=pod -# ------------------------------------------------------------ Serves up a file -# returns either the contents of the file or a -1 -sub getfile { - my $file=shift; - if (! -e $file ) { return -1; }; - my $fh=IO::File->new($file); - my $a=''; - while (<$fh>) { $a .=$_; } - return $a -} +B - translate to unstrange escaped syntax to strange characters. -# ------------------------------------------------------------- Declutters URLs -sub declutter { - my $thisfn=shift; - $thisfn=~s/^$perlvar{'lonDocRoot'}//; - $thisfn=~s/^\///; - $thisfn=~s/^res\///; - return $thisfn; -} +=over 4 + +Parameters: + +=item I<$str> - string with unweird characters. + +=back + +=over 4 + +Returns: + +=item C - string with potentially weird characters. + +=back + +=cut + +sub unescape ($) + { + my $str = shift(@_); + $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; + return($str); + } + +=pod + +B - translate strange characters to unstrange escaped syntax. + +=over 4 + +Parameters: + +=item I<$str> - string with potentially weird characters to unweird-ify. + +=back + +=over 4 + +Returns: + +=item C - unweird-ified string. + +=back + +=cut + +sub escape ($) + { + my $str = shift(@_); + $str =~ s/(\W)/"%".unpack('H2',$1)/eg; + return($str); + } + +=pod + +B - evaluate and store dynamic metadata. + +Returns the dynamic metadata for an author, which will later be added to the +MySQL database (not yet implemented). + +The vast majority of entries in F, which contains +the dynamic metadata for an author's resources, are "count", which make +the file really large and evaluation really slow. + +While computing the current value of all dynamic metadata +for later insertion into the MySQL metadata cache (not yet implemented), +this routine also simply adds up all "count" type fields and replaces them by +one new field with the to-date count. + +Only after successful completion of working with one author, copy new file to +original file. Copy to tmp-"new"-db-file was necessary since db-file size +would not shrink after "delete" of key. + +=over 4 + +Parameters: + +=item I<$url> - the filesystem path (url may be a misnomer...) + +=back -# --------------------------------------- Is this the home server of an author? -# (copied from lond, modification of the return value) -sub ishome { - my $author=shift; - $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; - my ($udom,$uname)=split(/\//,$author); - my $proname=propath($udom,$uname); - if (-e $proname) { - return 1; - } else { - return 0; +=over 4 + +Returns: + +=item C - key-value table of dynamically evaluated metadata. + +=back + +=cut + +sub build_on_the_fly_dynamic_metadata { + + # Need to compute the user's directory. + my $url=&declutter(shift); + $url=~s/\.meta$//; + my %returnhash=(); + my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); + my $user_directory=&construct_path_to_user_directory($adomain,$aauthor); + + # Attempt a GDBM database instantiation inside users directory and proceed. + if ((tie(%evaldata,'GDBM_File', + $user_directory. + '/nohist_resevaldata.db',&GDBM_READER(),0640)) && + (tie(%newevaldata,'GDBM_File', + $user_directory. + '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { + # For different variables, track the running sum and counts. + my %sum=(); + my %cnt=(); + + # Define computed items as a sum (add) or an average (avg) or a raw + # count (cnt) or append (app)? + my %listitems=('count' => 'add', + 'course' => 'add', + 'avetries' => 'avg', + 'stdno' => 'add', + 'difficulty' => 'avg', + 'clear' => 'avg', + 'technical' => 'avg', + 'helpful' => 'avg', + 'correct' => 'avg', + 'depth' => 'avg', + 'comments' => 'app', + 'usage' => 'cnt' + ); + + # Untaint the url and use as part of a regular expression. + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; #' emacs + + # Check existing database for this author. + # this is modifying the 'count' entries + # and copying all other entries over + + foreach (keys %evaldata) { + my $key=&unescape($_); + if ($key=~/$regexp/) { # If url-based entry exists. + my $ctype=$1; # Set to specific category type. + + # Do an increment for this category type. + if (defined($cnt{$ctype})) { + $cnt{$ctype}++; + } else { + $cnt{$ctype}=1; + } + unless ($listitems{$ctype} eq 'app') { # append comments + # Increment the sum based on the evaluated data in the db. + if (defined($sum{$ctype})) { + $sum{$ctype}+=$evaldata{$_}; + } else { + $sum{$ctype}=$evaldata{$_}; + } + } else { # 'app' mode, means to use '
' as a separator + if (defined($sum{$ctype})) { + if ($evaldata{$_}) { + $sum{$ctype}.='
'.$evaldata{$_}; + } + } else { + $sum{$ctype}=''.$evaldata{$_}; + } + } + if ($ctype ne 'count') { + # this is copying all data except 'count' attributes + $newevaldata{$_}=$evaldata{$_}; + } + } + } + + # these values will be returned (currently still unused) + foreach (keys %cnt) { + if ($listitems{$_} eq 'avg') { + $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; + } elsif ($listitems{$_} eq 'cnt') { + $returnhash{$_}=$cnt{$_}; + } else { + $returnhash{$_}=$sum{$_}; + } + } + + # generate new count key in resevaldata, insert sum + if ($returnhash{'count'}) { + my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; + $newevaldata{$newkey}=$returnhash{'count'}; + } + + untie(%evaldata); # Close/release the original nohist database. + untie(%newevaldata); # Close/release the new nohist database. } + return %returnhash; } -# -------------------------------------------- Return path to profile directory -# (copied from lond) -sub propath { - my ($udom,$uname)=@_; - $udom=~s/\W//g; - $uname=~s/\W//g; - my $subdir=$uname.'__'; - $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; - my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; - return $proname; -} +=pod + +B - used by B subroutine. + +This evaluates whether a file is wanted, and pushes it onto the +I<@metalist> array. This subroutine was, for the most part, auto-generated +by the B command. + +=over 4 + +Parameters: + +=item I<$file> - a path to the file. + +=back + +=over 4 + +Returns: + +=item C - true or false based on logical statement. + +=back + +=cut + +sub wanted ($) + { + (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && + -f $_ && + /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && + push(@metalist,$File::Find::dir.'/'.$_); + } + +=pod + +B - read xml-tagged file and return parsed metadata. + +I + +=over 4 + +Parameters: + +=item I<$file> - a path.to the file. + +=back + +=over 4 + +Returns: + +=item C - a hash array (keys and values). + +=back + +=cut + +sub get_metadata_from_file ($) + { + my ($filename) = @_; + my %metatable; # Used to store return value of hash-tabled metadata. + $filename = &declutter($filename); # Remove non-identifying filesystem info + my $uri = ''; # The URI is not relevant in this scenario. + unless ($filename =~ m/\.meta$/) # Unless ending with .meta. + { + $filename .= '.meta'; # Append a .meta suffix. + } + # Get the file contents. + my $metadata_string = + &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename); + + # Parse the file based on its XML tags. + my $parser = HTML::TokeParser->new(\$metadata_string); + my $token; + while ($token = $parser->get_token) # Loop through tokens. + { + if ($token->[0] eq 'S') # If it is a start token. + { + my $entry = $token->[1]; + my $unikey = $entry; # A unique identifier for this xml tag key. + if (defined($token->[2]->{'part'})) + { + $unikey .= '_'.$token->[2]->{'part'}; + } + if (defined($token->[2]->{'name'})) + { + $unikey .= '_'.$token->[2]->{'name'}; + } + # Append $unikey to metatable's keys entry. + if ($metatable{$uri.'keys'}) + { + $metatable{$uri.'keys'} .= ','.$unikey; + } + else + { + $metatable{$uri.'keys'} = $unikey; + } + # Insert contents into metatable entry for the unikey. + foreach my $t3 (@{$token->[3]}) + { + $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3}; + } + # If there was no text contained inside the tags, set = default. + unless + ( + $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry) + ) + { + $metatable{$uri.''.$unikey} = + $metatable{$uri.''.$unikey.'.default'}; + } + } + } + # Return with a key-value table of XML tags and their tag contents. + return(\%metatable); + } + +=pod + +B - returns either the contents of the file or a -1. + +=over 4 + +Parameters: + +=item I<$file> - a complete filesystem path.to the file. + +=back + +=over 4 + +Returns: + +=item C - file contents or a -1. + +=back + +=cut + +sub get_file_contents ($) + { + my $file = shift(@_); + + # If file does not exist, then return a -1 value. + unless (-e $file) + { + return(-1); + } + + # Read in file contents. + my $file_handle = IO::File->new($file); + my $file_contents = ''; + while (<$file_handle>) + { + $file_contents .= $_; + } + + # Return file contents. + return($file_contents); + } + +=pod + +B - Declutters URLs (remove extraneous prefixed filesystem path). + +=over 4 + +Parameters: + +=item I<$filesystem_path> - a complete filesystem path. + +=back + +=over 4 + +Returns: + +=item C - remnants of the filesystem path (beginning portion removed). + +=back + +=cut + +sub declutter + { + my $filesystem_path = shift(@_); + + # Remove beginning portions of the filesystem path. + $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//; + $filesystem_path =~ s!^/!!; + $filesystem_path =~ s!^res/!!; + + # Return what is remaining for the filesystem path. + return($filesystem_path); + } + +=pod + +B - Is this the home server of an author's directory? + +=over 4 + +Parameters: + +=item I<$author_filesystem_path> - directory path for a user. + +=back + +=over 4 + +Returns: + +=item C - 1 if true; 0 if false. + +=back + +=cut + +sub query_home_server_status ($) + { + my $author_filesystem_path = shift(@_); + + # Remove beginning portion of this filesystem path. + $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!; + + # Construct path to the author's ordinary user directory. + my ($user_domain,$username) = split(m!/!,$author_filesystem_path); + my $user_directory_path = construct_path_to_user_directory($user_domain, + $username); + + # Return status of whether the user directory path is defined. + if (-e $user_directory_path) + { + return(1); # True. + } + else + { + return(0); # False. + } + } + +=pod + +B ($$) - makes a filesystem path to user dir. + +=over 4 + +Parameters: + +=item I<$user_domain> - the loncapa domain of the user. + +=item I<$username> - the unique username (user id) of the user. + +=back + +=over 4 + +Returns: + +=item C - representing the path on the filesystem. + +=back + +=cut + +sub construct_path_to_user_directory ($$) + { + my ($user_domain,$username) = @_; + + # Untaint. + $user_domain =~ s/\W//g; + $username =~ s/\W//g; + + # Create three levels of sub-directoried filesystem path + # based on the first three characters of the username. + my $sub_filesystem_path = $username.'__'; + $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!; + + # Use the sub-directoried levels and other variables to generate + # the complete filesystem path. + my $complete_filesystem_path = + join('/',($perlvar{'lonUsersDir'}, + $user_domain, + $sub_filesystem_path, + $username)); + + # Return the complete filesystem path. + return($complete_filesystem_path); + } + +=pod + +B (@) - turns seconds since epoch into datetime sql format. + +=over 4 + +Parameters: + +=item I<$epochtime> - time in seconds since epoch (may need to be sanitized). + +=back + +=over 4 + +Returns: + +=item C - datetime sql formatted string. + +=back + +=cut + +sub sql_formatted_time ($) + { + # Sanitize the time argument and convert to localtime array. + my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = + localtime(&sanitize_time(shift(@_))); + + # Convert month from (0..11) to (1..12). + $mon += 1; + + # Make the year compatible with A.D. specification. + $year += 1900; + + # Return a date which is compatible with MySQL's "DATETIME" format. + return(join('-',($year,$mon,$mday)). + ' '. + join(':',($hour,$min,$sec)) + ); + } + + +# ==================================== The following two subroutines are needed +# for accommodating incorrect time formats inside the metadata. + +=pod + +B (@) - turns time metadata into seconds since epoch. + +=over 4 + +Parameters: + +=item I<%time_metadata> - a key-value listing characterizing month, year, etc. + +=back + +=over 4 + +Returns: + +=item C - seconds since epoch. + +=back + +=cut + +sub make_seconds_since_epoch (@) + { + # Keytable of time metadata. + my %time_metadata = @_; + + # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC). + return(POSIX::mktime( + ($time_metadata{'seconds'}, + $time_metadata{'minutes'}, + $time_metadata{'hours'}, + $time_metadata{'day'}, + $time_metadata{'month'}-1, + $time_metadata{'year'}-1900, + 0, + 0, + $time_metadata{'dlsav'}) + ) + ); + } + +=pod + +B - if time looks sql-formatted, make it seconds since epoch. + +Somebody described this subroutine as +"retro-fixing of un-backward-compatible time format". + +What this means, is that a part of this code expects to get UTC seconds +since the epoch (beginning of 1970). Yet, some of the .meta files have +sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch +integers (e.g. 1044147435). These time strings do not encode the timezone +and, in this sense, can be considered "un-backwards-compatible". + +=over 4 + +Parameters: + +=item I<$potentially_badformat_string> - string to "retro-fix". + +=back + +=over 4 + +Returns: + +=item C - seconds since epoch. + +=back + +=cut + +sub sanitize_time ($) + { + my $timestamp = shift(@_); + # If timestamp is in this unexpected format.... + if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) + { + # then convert into seconds since epoch (the expected format). + $timestamp = &make_seconds_since_epoch( + 'year' => $1, + 'month' => $2, + 'day' => $3, + 'hours' => $4, + 'minutes' => $5, + 'seconds' => $6 + ); + } + # Otherwise we assume timestamp to be as expected. + return($timestamp); + } + +=pod + +=head1 AUTHOR + +Written to help the loncapa project. + +Scott Harrison, sharrison@users.sourceforge.net + +This is distributed under the same terms as loncapa (i.e. "freeware"). + +=cut