--- loncom/metadata_database/searchcat.pl 2003/01/04 19:23:31 1.27 +++ loncom/metadata_database/searchcat.pl 2003/02/03 05:39:37 1.28 @@ -2,29 +2,30 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.27 2003/01/04 19:23:31 www Exp $ +# $Id: searchcat.pl,v 1.28 2003/02/03 05:39:37 harris41 Exp $ # # Copyright Michigan State University Board of Trustees # -# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# This file is part of the LearningOnline Network with a +# Computer assisted personalized approach (loncapa). # -# LON-CAPA is free software; you can redistribute it and/or modify +# Loncapa is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # -# LON-CAPA is distributed in the hope that it will be useful, +# Loncapa is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with LON-CAPA; if not, write to the Free Software +# along with loncapa; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # -# http://www.lon-capa.org/ +# http://www.loncapa.org/ # # YEAR=2001 # 04/14/2001, 04/16/2001 Scott Harrison @@ -32,11 +33,51 @@ # YEAR=2002 # 05/11/2002 Scott Harrison # +# YEAR=2003 +# Scott Harrison +# ### -# This script goes through a LON-CAPA resource -# directory and gathers metadata. -# The metadata is entered into a SQL database. +=pod + +=head1 NAME + +B - put authoritative filesystem data into sql database. + +=head1 SYNOPSIS + +Ordinarily this script is to be called from a loncapa cron job +(CVS source location: F; typical +filesystem installation location: F). + +Here is the cron job entry. + +C<# Repopulate and refresh the metadata database used for the search catalog.> + +C<10 1 * * 7 www /home/httpd/perl/searchcat.pl> + +This script only allows itself to be run as the user C. + +=head1 DESCRIPTION + +This script goes through a loncapa resource directory and gathers metadata. +The metadata is entered into a SQL database. + +This script also does general database maintenance such as reformatting +the C table if it is deprecated. + +This script also builds dynamic temporal metadata and stores this inside +a F database file. + +This script is playing an increasingly important role for a loncapa +library server. The proper operation of this script is critical for a smooth +and correct user experience. + +=cut + +# ========================================================== Setting things up. + +# ------------------------------------------------------ Use external modules. use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; @@ -47,346 +88,891 @@ use DBI; use GDBM_File; use POSIX qw(strftime mktime); +# ----------------- Code to enable 'find' subroutine listing of the .meta files +use File::Find; + +# List of .meta files (used on a per-user basis). my @metalist; +# --------------- Read loncapa_apache.conf and loncapa.conf and get variables. +my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf'); +my %perlvar = %{$perlvarref}; +undef($perlvarref); # Remove since sensitive and not needed. +delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed. + +# ------------------------------------- Only run if machine is a library server +if ($perlvar{'lonRole'} ne 'library') + { + exit(0); + } + +# ------------------------------ Make sure this process is running as user=www. +my $wwwid = getpwnam('www'); +if ($wwwid != $<) + { + $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + $subj = "LON: $perlvar{'lonHostID'} User ID mismatch"; + system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ". + "mailto $emailto -s '$subj' > /dev/null"); + exit(1); + } + +# ------------------------------------------------------ Initialize log output. +open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); +print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n"); + +my $dbh; # Database object reference handle. + +# ----------------------------- Verify connection to loncapa:metadata database. +unless ( + $dbh = DBI->connect('DBI:mysql:loncapa','www', + $perlvar{'lonSqlAccess'}, + { RaiseError => 0,PrintError => 0}) + ) + { + print(LOG '**** ERROR **** Cannot connect to database!'."\n"); + exit(0); + } + +# ------------------------------ Create loncapa:metadata table if non-existent. +my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('. + 'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '. + 'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '. + 'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '. + 'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '. + 'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '. + 'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '. + 'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '. + 'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '. + 'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '. + 'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM'; + +$dbh->do($make_metadata_table); # Generate the table. + +# ----------------------------- Verify format of the loncapa:metadata database. +# (delete and recreate database if necessary). + +# Make a positive control for verifying table structure. +my $make_metadata_table_CONTROL = $make_metadata_table; +$make_metadata_table_CONTROL =~ + s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/; + +$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); +$dbh->do($make_metadata_table_CONTROL); + +my $table_description; # selectall reference to the table description. + +my $CONTROL_table_string; # What the table description should look like. +my $table_string; # What the table description does look like. + +# Calculate the CONTROL table description (what it should be). +$table_description = $dbh->selectall_arrayref('describe CONTROL_metadata'); +foreach my $table_row (@{$table_description}) + { + $CONTROL_table_string .= join(',',@{$table_row})."\n"; + } + +# Calculate the current table description (what it currently looks like). +$table_description = $dbh->selectall_arrayref('describe metadata'); +foreach my $table_row (@{$table_description}) + { + $table_string .= join(',',@{$table_row})."\n"; + } + +if ($table_string ne $CONTROL_table_string) + { + # Log this incident. + print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'. + '.'."\n"); + # Delete the table. + $dbh->do('DROP TABLE IF EXISTS metadata'); + # Generate the table. + $dbh->do($make_metadata_table); + } -# ----------------------------------------------------- Un-Escape Special Chars +$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay. Done with control. -sub unescape { - my $str=shift; +# ----------------------------------------------- Set utilitysemaphore to zero. +$dbh->do('UPDATE metadata SET utilitysemaphore = 0'); + +# ========================================================= Main functionality. + +# - Determine home authors on this server based on resources dir and user tree. + +# RESOURCES: the resources directory (subdirs correspond to author usernames). +opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or + (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n") + and exit(0)); + +# query_home_server_status will look for user home directories on this machine. +my @homeusers = + grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'. + $perlvar{'lonDefDomain'}.'/'.$_) + } grep {!/^\.\.?$/} readdir(RESOURCES); +closedir(RESOURCES); + +unless (@homeusers) + { + print(LOG '=== No home users found on this server.'."\n"); + } + +# Consider each author individually. +foreach my $user (@homeusers) + { + # Make a log entry. + print(LOG "\n".'=== User: '.$user."\n\n"); + + # Get filesystem path to this user's directory. + my $user_directory = + &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user); + + # Remove left-over db-files from a potentially crashed searchcat run. + unlink($user_directory.'/nohist_new_resevaldata.db'); + + # Cleanup the metalist array. + undef(@metalist); + @metalist = (); + + # This will add entries to the @metalist array. + &File::Find::find(\&wanted, + $perlvar{'lonDocRoot'}.'/res/'. + $perlvar{'lonDefDomain'}.'/'.$user); + + # -- process file to get metadata and put into search catalog SQL database + # Also, build and store dynamic metadata. + # Also, delete record entries before refreshing. + foreach my $m (@metalist) + { + # Log this action. + print(LOG "- ".$m."\n"); + + # Get metadata from the file. + my $ref = get_metadata_from_file($m); + + # Make a datarecord identifier for this resource. + my $m2 = '/res/'.declutter($m); + $m2 =~ s/\.meta$//; + + # Build and store dynamic metadata inside nohist_resevaldata.db. + build_on_the_fly_dynamic_metadata($m2); + + # Delete record if it already exists. + my $q2 = 'select * from metadata where url like binary '."'".$m2."'"; + my $sth = $dbh->prepare($q2); + $sth->execute(); + my $r1 = $sth->fetchall_arrayref; + if (@$r1) + { + $sth = + $dbh->prepare('delete from metadata where url like binary '. + "'".$m2."'"); + $sth->execute(); + } + + # Add new/replacement record into the loncapa:metadata table. + $sth = $dbh->prepare('insert into metadata values ('. + '"'.delete($ref->{'title'}).'"'.','. + '"'.delete($ref->{'author'}).'"'.','. + '"'.delete($ref->{'subject'}).'"'.','. + '"'.$m2.'"'.','. + '"'.delete($ref->{'keywords'}).'"'.','. + '"'.'current'.'"'.','. + '"'.delete($ref->{'notes'}).'"'.','. + '"'.delete($ref->{'abstract'}).'"'.','. + '"'.delete($ref->{'mime'}).'"'.','. + '"'.delete($ref->{'language'}).'"'.','. + '"'.sql_formatted_time( + delete($ref->{'creationdate'})).'"'.','. + '"'.sql_formatted_time( + delete($ref->{'lastrevisiondate'})).'"'.','. + '"'.delete($ref->{'owner'}).'"'.','. + '"'.delete($ref->{'copyright'}).'"'.','. + '1'.')'); + $sth->execute(); + } + +# ----------------------- Clean up database, remove stale SQL database records. + $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0'); + +# -------------------------------------------------- Copy over the new db-files + system('mv '.$user_directory.'/nohist_new_resevaldata.db '. + $user_directory.'/nohist_resevaldata.db'); + } + +# --------------------------------------------------- Close database connection +$dbh->disconnect; +print LOG "\n==== Searchcat completed ".localtime()." ====\n"; +close(LOG); +exit(0); + +# ================================================================ Subroutines. + +=pod + +=head1 SUBROUTINES + +=cut + +=pod + +B - translate to unstrange escaped syntax to strange characters. + +=over 4 + +Parameters: + +=item I<$str> - string with unweird characters. + +=back + +=over 4 + +Returns: + +=item C - string with potentially weird characters. + +=back + +=cut + +sub unescape ($) + { + my $str = shift(@_); $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; - return $str; -} + return($str); + } + +=pod + +B - translate strange characters to unstrange escaped syntax. + +=over 4 + +Parameters: -# -------------------------------------------------------- Escape Special Chars +=item I<$str> - string with potentially weird characters to unweird-ify. -sub escape { - my $str=shift; +=back + +=over 4 + +Returns: + +=item C - unweird-ified string. + +=back + +=cut + +sub escape ($) + { + my $str = shift(@_); $str =~ s/(\W)/"%".unpack('H2',$1)/eg; - return $str; -} + return($str); + } + +=pod + +B - evaluate and store dynamic metadata. + +Dynamic metadata is stored in a nohist_resevaldata GDBM database. +Most of the calculations in this subroutine are totally pointless +and not useful for anything that this subroutine does. +(THIS IS A FRUSTRATED SUBROUTINE THAT IS NON-OPTIMAL, *&*&!.) +The only thing that this subroutine really makes happen is adjusting +a 'count' value inside the F as well +as updating F with information from +F. + +=over 4 + +Parameters: + +=item I<$url> - the filesystem path (url may be a misnomer...) + +=back + +=over 4 + +Returns: +=item C - key-value table of dynamically evaluated metadata. -# ------------------------------------------- Code to evaluate dynamic metadata +=back -sub dynamicmeta { +=cut - my $url=&declutter(shift); - $url=~s/\.meta$//; - my %returnhash=(); - my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); - my $prodir=&propath($adomain,$aauthor); +sub build_on_the_fly_dynamic_metadata ($) + { + # BEWARE ALL WHO TRY TO UNDERSTAND THIS ABSURDLY HORRIBLE SUBROUTINE. + + # Do all sorts of mumbo-jumbo to compute the user's directory. + my $url = &declutter(shift(@_)); + $url =~ s/\.meta$//; + my %returnhash = (); + my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!); + my $user_directory = &construct_path_to_user_directory($adomain,$aauthor); + + # Attempt a GDBM database instantiation inside users directory and proceed. if ((tie(%evaldata,'GDBM_File', - $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) && + $user_directory. + '/nohist_resevaldata.db',&GDBM_READER(),0640)) && (tie(%newevaldata,'GDBM_File', - $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { - my %sum=(); - my %cnt=(); - my %listitems=('count' => 'add', - 'course' => 'add', - 'avetries' => 'avg', - 'stdno' => 'add', - 'difficulty' => 'avg', - 'clear' => 'avg', - 'technical' => 'avg', - 'helpful' => 'avg', - 'correct' => 'avg', - 'depth' => 'avg', - 'comments' => 'app', - 'usage' => 'cnt' - ); - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - foreach (keys %evaldata) { - my $key=&unescape($_); - if ($key=~/$regexp/) { - my $ctype=$1; - if (defined($cnt{$ctype})) { - $cnt{$ctype}++; - } else { - $cnt{$ctype}=1; - } - unless ($listitems{$ctype} eq 'app') { - if (defined($sum{$ctype})) { - $sum{$ctype}+=$evaldata{$_}; - } else { - $sum{$ctype}=$evaldata{$_}; - } - } else { - if (defined($sum{$ctype})) { - if ($evaldata{$_}) { - $sum{$ctype}.='
'.$evaldata{$_}; - } - } else { - $sum{$ctype}=''.$evaldata{$_}; - } - } - if ($ctype ne 'count') { - $newevaldata{$_}=$evaldata{$_}; - } - } + $user_directory. + '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) + { + # For different variables, track the running sum and counts. + my %sum = (); + my %cnt = (); + + # Define computed items as a sum (add) or an average (avg) or a raw + # count (cnt) or 'app'? + my %listitems=('count' => 'add', + 'course' => 'add', + 'avetries' => 'avg', + 'stdno' => 'add', + 'difficulty' => 'avg', + 'clear' => 'avg', + 'technical' => 'avg', + 'helpful' => 'avg', + 'correct' => 'avg', + 'depth' => 'avg', + 'comments' => 'app', + 'usage' => 'cnt' + ); + + # Untaint the url and use as part of a regular expression. + my $regexp = $url; + $regexp =~ s/(\W)/\\$1/g; + $regexp = '___'.$regexp.'___([a-z]+)$'; + + # Check existing nohist database for this url. + # THE ONLY TIME THIS IS IMPORTANT FOR THIS AWFUL SUBROUTINE + # IS FOR 'count' ENTRIES + # AND FOR REFRESHING non-'count' ENTRIES INSIDE nohist_new DATABASE. + foreach (keys %evaldata) + { + my $key = &unescape($_); + if ($key =~ /$regexp/) # If url-based entry exists. + { + my $ctype = $1; # Set to specific category type. + + # Do an increment for this category type. + if (defined($cnt{$ctype})) + { + $cnt{$ctype}++; + } + else + { + $cnt{$ctype} = 1; + } + unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN? + { + # Increment the sum based on the evaluated data in the db. + if (defined($sum{$ctype})) + { + $sum{$ctype} += $evaldata{$_}; + } + else + { + $sum{$ctype} = $evaldata{$_}; + } + } + else # 'app' mode, means to use '
' as a separator + { + if (defined($sum{$ctype})) + { + if ($evaldata{$_}) + { + $sum{$ctype} .= '
'.$evaldata{$_}; + } + } + else + { + $sum{$ctype} = ''.$evaldata{$_}; + } + } + if ($ctype ne 'count') + { + # ALERT! THIS HORRIBLE LOOP IS ACTUALLY DOING SOMETHING + # USEFUL! + $newevaldata{$_} = $evaldata{$_}; + } + } + } + + # THE ONLY OTHER TIME THIS LOOP IS USEFUL IS FOR THE 'count' HASH + # ELEMENT. + foreach (keys %cnt) + { + if ($listitems{$_} eq 'avg') + { + $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; + } + elsif ($listitems{$_} eq 'cnt') + { + $returnhash{$_} = $cnt{$_}; + } + else + { + $returnhash{$_} = $sum{$_}; + } + } + + # A RARE MOMENT OF DOING ANYTHING USEFUL INSIDE THIS + # BLEEPING SUBROUTINE. + if ($returnhash{'count'}) + { + my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count'; + $newevaldata{$newkey} = $returnhash{'count'}; + } + + untie(%evaldata); # Close/release the original nohist database. + untie(%newevaldata); # Close/release the new nohist database. } - foreach (keys %cnt) { - if ($listitems{$_} eq 'avg') { - $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; - } elsif ($listitems{$_} eq 'cnt') { - $returnhash{$_}=$cnt{$_}; - } else { - $returnhash{$_}=$sum{$_}; - } - } - if ($returnhash{'count'}) { - my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; - $newevaldata{$newkey}=$returnhash{'count'}; - } - untie(%evaldata); - untie(%newevaldata); - } - return %returnhash; -} - -# ----------------- Code to enable 'find' subroutine listing of the .meta files -require "find.pl"; -sub wanted { + return(%returnhash); + # Celebrate! We have now accomplished some simple calculations using + # 1000% bloated functionality in our subroutine. Go wash your eyeballs + # out now. + } + +=pod + +B - used by B subroutine. + +This evaluates whether a file is wanted, and pushes it onto the +I<@metalist> array. This subroutine was, for the most part, auto-generated +by the B command. + +=over 4 + +Parameters: + +=item I<$file> - a path to the file. + +=back + +=over 4 + +Returns: + +=item C - true or false based on logical statement. + +=back + +=cut + +sub wanted ($) + { (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && - -f _ && + -f $_ && /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && - push(@metalist,"$dir/$_"); -} + push(@metalist,$File::Find::dir.'/'.$_); + } -# --------------- Read loncapa_apache.conf and loncapa.conf and get variables -my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); -my %perlvar=%{$perlvarref}; -undef $perlvarref; # remove since sensitive and not needed -delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed +=pod -# ------------------------------------- Only run if machine is a library server -exit unless $perlvar{'lonRole'} eq 'library'; +B - read xml-tagged file and return parsed metadata. -# ----------------------------- Make sure this process is running from user=www +I -my $wwwid=getpwnam('www'); -if ($wwwid!=$<) { - $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; - system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ - mailto $emailto -s '$subj' > /dev/null"); - exit 1; -} +=over 4 +Parameters: -# ---------------------------------------------------------- We are in business +=item I<$file> - a path.to the file. -open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); -print LOG '==== Searchcat Run '.localtime()."====\n\n"; -my $dbh; -# ------------------------------------- Make sure that database can be accessed -{ - unless ( - $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0}) - ) { - print LOG "Cannot connect to database!\n"; - exit; - } - my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (". - "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ". - "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ". - "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ". - "copyright TEXT, FULLTEXT idx_title (title), ". - "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ". - "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ". - "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ". - "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ". - "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ". - "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM"; - # It would sure be nice to have some logging mechanism. - $dbh->do($make_metadata_table); -} +=back -# ------------------------------------------------------------- get .meta files -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); -my @homeusers=grep - {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")} - grep {!/^\.\.?$/} readdir(RESOURCES); -closedir RESOURCES; -foreach my $user (@homeusers) { - print LOG "\n=== User: ".$user."\n\n"; -# Remove left-over db-files from potentially crashed searchcat run - my $prodir=&propath($perlvar{'lonDefDomain'},$user); - unlink($prodir.'/nohist_new_resevaldata.db'); -# Use find.pl - undef @metalist; - @metalist=(); - &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); - -# -- process each file to get metadata and put into search catalog SQL database -# Also, check to see if already there. -# I could just delete (without searching first), but this works for now. -foreach my $m (@metalist) { - print LOG "- ".$m."\n"; - my $ref=&metadata($m); - my $m2='/res/'.&declutter($m); - $m2=~s/\.meta$//; - &dynamicmeta($m2); - my $q2="select * from metadata where url like binary '$m2'"; - my $sth = $dbh->prepare($q2); - $sth->execute(); - my $r1=$sth->fetchall_arrayref; - if (@$r1) { - $sth=$dbh->prepare("delete from metadata where url like binary '$m2'"); - $sth->execute(); - } - $sth=$dbh->prepare('insert into metadata values ('. - '"'.delete($ref->{'title'}).'"'.','. - '"'.delete($ref->{'author'}).'"'.','. - '"'.delete($ref->{'subject'}).'"'.','. - '"'.$m2.'"'.','. - '"'.delete($ref->{'keywords'}).'"'.','. - '"'.'current'.'"'.','. - '"'.delete($ref->{'notes'}).'"'.','. - '"'.delete($ref->{'abstract'}).'"'.','. - '"'.delete($ref->{'mime'}).'"'.','. - '"'.delete($ref->{'language'}).'"'.','. - '"'.sqltime(delete($ref->{'creationdate'})).'"'.','. - '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','. - '"'.delete($ref->{'owner'}).'"'.','. - '"'.delete($ref->{'copyright'}).'"'.')'); - $sth->execute(); -} - -# ----------------------------------------------------------- Clean up database -# Need to, perhaps, remove stale SQL database records. -# ... not yet implemented +=over 4 +Returns: -# -------------------------------------------------- Copy over the new db-files - system('mv '.$prodir.'/nohist_new_resevaldata.db '. - $prodir.'/nohist_resevaldata.db'); -} -# --------------------------------------------------- Close database connection -$dbh->disconnect; -print LOG "\n==== Searchcat completed ".localtime()." ====\n"; -close(LOG); -exit 0; -# ============================================================================= +=item C - a hash array (keys and values). + +=back -# ---------------------------------------------------------------- Get metadata -# significantly altered from subroutine present in lonnet -sub metadata { - my ($uri,$what)=@_; - my %metacache; - $uri=&declutter($uri); - my $filename=$uri; - $uri=~s/\.meta$//; - $uri=''; - unless ($metacache{$uri.'keys'}) { - unless ($filename=~/\.meta$/) { $filename.='.meta'; } - my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); - my $parser=HTML::TokeParser->new(\$metastring); - my $token; - while ($token=$parser->get_token) { - if ($token->[0] eq 'S') { - my $entry=$token->[1]; - my $unikey=$entry; - if (defined($token->[2]->{'part'})) { - $unikey.='_'.$token->[2]->{'part'}; +=cut + +sub get_metadata_from_file ($) + { + my ($filename) = @_; + my %metatable; # Used to store return value of hash-tabled metadata. + $filename = &declutter($filename); # Remove non-identifying filesystem info + my $uri = ''; # The URI is not relevant in this scenario. + unless ($filename =~ m/\.meta$/) # Unless ending with .meta. + { + $filename .= '.meta'; # Append a .meta suffix. + } + # Get the file contents. + my $metadata_string = + &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename); + + # Parse the file based on its XML tags. + my $parser = HTML::TokeParser->new(\$metadata_string); + my $token; + while ($token = $parser->get_token) # Loop through tokens. + { + if ($token->[0] eq 'S') # If it is a start token. + { + my $entry = $token->[1]; + my $unikey = $entry; # A unique identifier for this xml tag key. + if (defined($token->[2]->{'part'})) + { + $unikey .= '_'.$token->[2]->{'part'}; } - if (defined($token->[2]->{'name'})) { - $unikey.='_'.$token->[2]->{'name'}; + if (defined($token->[2]->{'name'})) + { + $unikey .= '_'.$token->[2]->{'name'}; } - if ($metacache{$uri.'keys'}) { - $metacache{$uri.'keys'}.=','.$unikey; - } else { - $metacache{$uri.'keys'}=$unikey; + # Append $unikey to metatable's keys entry. + if ($metatable{$uri.'keys'}) + { + $metatable{$uri.'keys'} .= ','.$unikey; } - map { - $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; - } @{$token->[3]}; - unless ( - $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry) - ) { $metacache{$uri.''.$unikey}= - $metacache{$uri.''.$unikey.'.default'}; - } - } - } - } - return \%metacache; -} - -# ------------------------------------------------------------ Serves up a file -# returns either the contents of the file or a -1 -sub getfile { - my $file=shift; - if (! -e $file ) { return -1; }; - my $fh=IO::File->new($file); - my $a=''; - while (<$fh>) { $a .=$_; } - return $a -} - -# ------------------------------------------------------------- Declutters URLs -sub declutter { - my $thisfn=shift; - $thisfn=~s/^$perlvar{'lonDocRoot'}//; - $thisfn=~s/^\///; - $thisfn=~s/^res\///; - return $thisfn; -} - -# --------------------------------------- Is this the home server of an author? -# (copied from lond, modification of the return value) -sub ishome { - my $author=shift; - $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; - my ($udom,$uname)=split(/\//,$author); - my $proname=propath($udom,$uname); - if (-e $proname) { - return 1; - } else { - return 0; - } -} - -# -------------------------------------------- Return path to profile directory -# (copied from lond) -sub propath { - my ($udom,$uname)=@_; - $udom=~s/\W//g; - $uname=~s/\W//g; - my $subdir=$uname.'__'; - $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; - my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; - return $proname; -} + else + { + $metatable{$uri.'keys'} = $unikey; + } + # Insert contents into metatable entry for the unikey. + foreach my $t3 (@{$token->[3]}) + { + $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3}; + } + # If there was no text contained inside the tags, set = default. + unless + ( + $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry) + ) + { + $metatable{$uri.''.$unikey} = + $metatable{$uri.''.$unikey.'.default'}; + } + } + } + # Return with a key-value table of XML tags and their tag contents. + return(\%metatable); + } -# ---------------------------- convert 'time' format into a datetime sql format -sub sqltime { +=pod + +B - returns either the contents of the file or a -1. + +=over 4 + +Parameters: + +=item I<$file> - a complete filesystem path.to the file. + +=back + +=over 4 + +Returns: + +=item C - file contents or a -1. + +=back + +=cut + +sub get_file_contents ($) + { + my $file = shift(@_); + + # If file does not exist, then return a -1 value. + unless (-e $file) + { + return(-1); + } + + # Read in file contents. + my $file_handle = IO::File->new($file); + my $file_contents = ''; + while (<$file_handle>) + { + $file_contents .= $_; + } + + # Return file contents. + return($file_contents); + } + +=pod + +B - Declutters URLs (remove extraneous prefixed filesystem path). + +=over 4 + +Parameters: + +=item I<$filesystem_path> - a complete filesystem path. + +=back + +=over 4 + +Returns: + +=item C - remnants of the filesystem path (beginning portion removed). + +=back + +=cut + +sub declutter + { + my $filesystem_path = shift(@_); + + # Remove beginning portions of the filesystem path. + $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//; + $filesystem_path =~ s!^/!!; + $filesystem_path =~ s!^res/!!; + + # Return what is remaining for the filesystem path. + return($filesystem_path); + } + +=pod + +B - Is this the home server of an author's directory? + +=over 4 + +Parameters: + +=item I<$author_filesystem_path> - directory path for a user. + +=back + +=over 4 + +Returns: + +=item C - 1 if true; 0 if false. + +=back + +=cut + +sub query_home_server_status ($) + { + my $author_filesystem_path = shift(@_); + + # Remove beginning portion of this filesystem path. + $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!; + + # Construct path to the author's ordinary user directory. + my ($user_domain,$username) = split(m!/!,$author_filesystem_path); + my $user_directory_path = construct_path_to_user_directory($user_domain, + $username); + + # Return status of whether the user directory path is defined. + if (-e $user_directory_path) + { + return(1); # True. + } + else + { + return(0); # False. + } + } + +=pod + +B ($$) - makes a filesystem path to user dir. + +=over 4 + +Parameters: + +=item I<$user_domain> - the loncapa domain of the user. + +=item I<$username> - the unique username (user id) of the user. + +=back + +=over 4 + +Returns: + +=item C - representing the path on the filesystem. + +=back + +=cut + +sub construct_path_to_user_directory ($$) + { + my ($user_domain,$username) = @_; + + # Untaint. + $user_domain =~ s/\W//g; + $username =~ s/\W//g; + + # Create three levels of sub-directoried filesystem path + # based on the first three characters of the username. + my $sub_filesystem_path = $username.'__'; + $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!; + + # Use the sub-directoried levels and other variables to generate + # the complete filesystem path. + my $complete_filesystem_path = + join('/',($perlvar{'lonUsersDir'}, + $user_domain, + $sub_filesystem_path, + $username)); + + # Return the complete filesystem path. + return($complete_filesystem_path); + } + +=pod + +B (@) - turns seconds since epoch into datetime sql format. + +=over 4 + +Parameters: + +=item I<$epochtime> - time in seconds since epoch (may need to be sanitized). + +=back + +=over 4 + +Returns: + +=item C - datetime sql formatted string. + +=back + +=cut + +sub sql_formatted_time ($) + { + # Sanitize the time argument and convert to localtime array. my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime(&unsqltime(@_[0])); - $mon++; $year+=1900; - return "$year-$mon-$mday $hour:$min:$sec"; -} - -sub maketime { - my %th=@_; - return POSIX::mktime( - ($th{'seconds'},$th{'minutes'},$th{'hours'}, - $th{'day'},$th{'month'}-1,$th{'year'}-1900,0,0,$th{'dlsav'})); -} - - -######################################### -# -# Retro-fixing of un-backward-compatible time format - -sub unsqltime { - my $timestamp=shift; - if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) { - $timestamp=&maketime( - 'year'=>$1,'month'=>$2,'day'=>$3, - 'hours'=>$4,'minutes'=>$5,'seconds'=>$6); - } - return $timestamp; -} + localtime(&sanitize_time(shift(@_))); + + # Convert month from (0..11) to (1..12). + $mon += 1; + + # Make the year compatible with A.D. specification. + $year += 1900; + + # Return a date which is compatible with MySQL's "DATETIME" format. + return(join('-',($year,$mon,$mday)). + ' '. + join(':',($hour,$min,$sec)) + ); + } + + +# ==================================== The following two subroutines are needed +# for accommodating incorrect time formats inside the metadata. + +=pod + +B (@) - turns time metadata into seconds since epoch. + +=over 4 + +Parameters: + +=item I<%time_metadata> - a key-value listing characterizing month, year, etc. + +=back + +=over 4 + +Returns: + +=item C - seconds since epoch. + +=back + +=cut + +sub make_seconds_since_epoch (@) + { + # Keytable of time metadata. + my %time_metadata = @_; + + # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC). + return(POSIX::mktime( + ($time_metadata{'seconds'}, + $time_metadata{'minutes'}, + $time_metadata{'hours'}, + $time_metadata{'day'}, + $time_metadata{'month'}-1, + $time_metadata{'year'}-1900, + 0, + 0, + $time_metadata{'dlsav'}) + ) + ); + } + +=pod + +B - if time looks sql-formatted, make it seconds since epoch. + +Somebody described this subroutine as +"retro-fixing of un-backward-compatible time format". + +What this means, is that a part of this code expects to get UTC seconds +since the epoch (beginning of 1970). Yet, some of the .meta files have +sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch +integers (e.g. 1044147435). These time strings do not encode the timezone +and, in this sense, can be considered "un-backwards-compatible". + +=over 4 + +Parameters: + +=item I<$potentially_badformat_string> - string to "retro-fix". + +=back + +=over 4 + +Returns: + +=item C - seconds since epoch. + +=back + +=cut + +sub sanitize_time ($) + { + my $timestamp = shift(@_); + # If timestamp is in this unexpected format.... + if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) + { + # then convert into seconds since epoch (the expected format). + $timestamp = &make_seconds_since_epoch( + 'year' => $1, + 'month' => $2, + 'day' => $3, + 'hours' => $4, + 'minutes' => $5, + 'seconds' => $6 + ); + } + # Otherwise we assume timestamp to be as expected. + return($timestamp); + } + +=pod + +=head1 AUTHOR + +Written to help the loncapa project. + +Scott Harrison, sharrison@users.sourceforge.net + +This is distributed under the same terms as loncapa (i.e. "freeware"). +=cut