--- loncom/metadata_database/searchcat.pl 2003/02/03 13:42:16 1.29 +++ loncom/metadata_database/searchcat.pl 2004/04/08 15:57:32 1.55 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.29 2003/02/03 13:42:16 albertel Exp $ +# $Id: searchcat.pl,v 1.55 2004/04/08 15:57:32 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -26,15 +26,6 @@ # # http://www.lon-capa.org/ # -# YEAR=2001 -# 04/14/2001, 04/16/2001 Scott Harrison -# -# YEAR=2002 -# 05/11/2002 Scott Harrison -# -# YEAR=2003 -# Scott Harrison -# ### =pod @@ -52,7 +43,6 @@ filesystem installation location: F - C<10 1 * * 7 www /home/httpd/perl/searchcat.pl> This script only allows itself to be run as the user C. @@ -65,8 +55,8 @@ The metadata is entered into a SQL datab This script also does general database maintenance such as reformatting the C table if it is deprecated. -This script also builds dynamic temporal metadata and stores this inside -a F database file. +This script evaluates dynamic metadata from the authors' +F database file in order to store it in MySQL. This script is playing an increasingly important role for a loncapa library server. The proper operation of this script is critical for a smooth @@ -74,896 +64,563 @@ and correct user experience. =cut -# ========================================================== Setting things up. - -# ------------------------------------------------------ Use external modules. +use strict; +use DBI; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; +use LONCAPA::lonmetadata; use IO::File; use HTML::TokeParser; -use DBI; use GDBM_File; use POSIX qw(strftime mktime); - -# ----------------- Code to enable 'find' subroutine listing of the .meta files use File::Find; -# List of .meta files (used on a per-user basis). -my @metalist; - -# --------------- Read loncapa_apache.conf and loncapa.conf and get variables. -my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf'); -my %perlvar = %{$perlvarref}; -undef($perlvarref); # Remove since sensitive and not needed. -delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed. - -# ------------------------------------- Only run if machine is a library server -if ($perlvar{'lonRole'} ne 'library') - { - exit(0); - } - -# ------------------------------ Make sure this process is running as user=www. -my $wwwid = getpwnam('www'); -if ($wwwid != $<) - { - $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj = "LON: $perlvar{'lonHostID'} User ID mismatch"; - system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ". - "mailto $emailto -s '$subj' > /dev/null"); - exit(1); - } - -# ------------------------------------------------------ Initialize log output. +## +## Use variables for table names so we can test this routine a little easier +my $oldname = 'metadata'; +my $newname = 'newmetadata'; + +# +# Read loncapa_apache.conf and loncapa.conf +my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); +my %perlvar=%{$perlvarref}; +undef $perlvarref; +delete $perlvar{'lonReceipt'}; # remove since sensitive (really?) & not needed +# +# Only run if machine is a library server +exit if ($perlvar{'lonRole'} ne 'library'); +# +# Make sure this process is running from user=www +my $wwwid=getpwnam('www'); +if ($wwwid!=$<) { + my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ + mailto $emailto -s '$subj' > /dev/null"); + exit 1; +} +# +# Let people know we are running open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); -print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n"); - -my $dbh; # Database object reference handle. - -# ----------------------------- Verify connection to loncapa:metadata database. -unless ( - $dbh = DBI->connect('DBI:mysql:loncapa','www', - $perlvar{'lonSqlAccess'}, - { RaiseError => 0,PrintError => 0}) - ) - { - print(LOG '**** ERROR **** Cannot connect to database!'."\n"); - exit(0); - } - -# ------------------------------ Create loncapa:metadata table if non-existent. -my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('. - 'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '. - 'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '. - 'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '. - 'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '. - 'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '. - 'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '. - 'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '. - 'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '. - 'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '. - 'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM'; - -$dbh->do($make_metadata_table); # Generate the table. - -# ----------------------------- Verify format of the loncapa:metadata database. -# (delete and recreate database if necessary). - -# Make a positive control for verifying table structure. -my $make_metadata_table_CONTROL = $make_metadata_table; -$make_metadata_table_CONTROL =~ - s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/; - -$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); -$dbh->do($make_metadata_table_CONTROL); - -my $table_description; # selectall reference to the table description. - -my $CONTROL_table_string; # What the table description should look like. -my $table_string; # What the table description does look like. - -# Calculate the CONTROL table description (what it should be). -$table_description = $dbh->selectall_arrayref('describe CONTROL_metadata'); -foreach my $table_row (@{$table_description}) - { - $CONTROL_table_string .= join(',',@{$table_row})."\n"; - } - -# Calculate the current table description (what it currently looks like). -$table_description = $dbh->selectall_arrayref('describe metadata'); -foreach my $table_row (@{$table_description}) - { - $table_string .= join(',',@{$table_row})."\n"; - } - -if ($table_string ne $CONTROL_table_string) - { - # Log this incident. - print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'. - '.'."\n"); - # Delete the table. - $dbh->do('DROP TABLE IF EXISTS metadata'); - # Generate the table. - $dbh->do($make_metadata_table); - } - -$dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay. Done with control. - -# ----------------------------------------------- Set utilitysemaphore to zero. -$dbh->do('UPDATE metadata SET utilitysemaphore = 0'); - -# ========================================================= Main functionality. - -# - Determine home authors on this server based on resources dir and user tree. - -# RESOURCES: the resources directory (subdirs correspond to author usernames). -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or - (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n") - and exit(0)); - -# query_home_server_status will look for user home directories on this machine. -my @homeusers = - grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'. - $perlvar{'lonDefDomain'}.'/'.$_) - } grep {!/^\.\.?$/} readdir(RESOURCES); -closedir(RESOURCES); - -unless (@homeusers) - { - print(LOG '=== No home users found on this server.'."\n"); - } - -# Consider each author individually. -foreach my $user (@homeusers) - { - # Make a log entry. - print(LOG "\n".'=== User: '.$user."\n\n"); - - # Get filesystem path to this user's directory. - my $user_directory = - &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user); - - # Remove left-over db-files from a potentially crashed searchcat run. - unlink($user_directory.'/nohist_new_resevaldata.db'); - - # Cleanup the metalist array. - undef(@metalist); - @metalist = (); - - # This will add entries to the @metalist array. - &File::Find::find(\&wanted, - $perlvar{'lonDocRoot'}.'/res/'. - $perlvar{'lonDefDomain'}.'/'.$user); - - # -- process file to get metadata and put into search catalog SQL database - # Also, build and store dynamic metadata. - # Also, delete record entries before refreshing. - foreach my $m (@metalist) - { - # Log this action. - print(LOG "- ".$m."\n"); - - # Get metadata from the file. - my $ref = get_metadata_from_file($m); - - # Make a datarecord identifier for this resource. - my $m2 = '/res/'.declutter($m); - $m2 =~ s/\.meta$//; - - # Build and store dynamic metadata inside nohist_resevaldata.db. - build_on_the_fly_dynamic_metadata($m2); - - # Delete record if it already exists. - my $q2 = 'select * from metadata where url like binary '."'".$m2."'"; - my $sth = $dbh->prepare($q2); - $sth->execute(); - my $r1 = $sth->fetchall_arrayref; - if (@$r1) - { - $sth = - $dbh->prepare('delete from metadata where url like binary '. - "'".$m2."'"); - $sth->execute(); - } - - # Add new/replacement record into the loncapa:metadata table. - $sth = $dbh->prepare('insert into metadata values ('. - '"'.delete($ref->{'title'}).'"'.','. - '"'.delete($ref->{'author'}).'"'.','. - '"'.delete($ref->{'subject'}).'"'.','. - '"'.$m2.'"'.','. - '"'.delete($ref->{'keywords'}).'"'.','. - '"'.'current'.'"'.','. - '"'.delete($ref->{'notes'}).'"'.','. - '"'.delete($ref->{'abstract'}).'"'.','. - '"'.delete($ref->{'mime'}).'"'.','. - '"'.delete($ref->{'language'}).'"'.','. - '"'.sql_formatted_time( - delete($ref->{'creationdate'})).'"'.','. - '"'.sql_formatted_time( - delete($ref->{'lastrevisiondate'})).'"'.','. - '"'.delete($ref->{'owner'}).'"'.','. - '"'.delete($ref->{'copyright'}).'"'.','. - '1'.')'); - $sth->execute(); - } - -# ----------------------- Clean up database, remove stale SQL database records. - $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0'); - -# -------------------------------------------------- Copy over the new db-files - system('mv '.$user_directory.'/nohist_new_resevaldata.db '. - $user_directory.'/nohist_resevaldata.db'); - } - -# --------------------------------------------------- Close database connection -$dbh->disconnect; -print LOG "\n==== Searchcat completed ".localtime()." ====\n"; +print LOG '==== Searchcat Run '.localtime()."====\n"; +# +# Connect to database +my $dbh; +if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'}, + { RaiseError =>0,PrintError=>0}))) { + print LOG "Cannot connect to database!\n"; + die "MySQL Error: Cannot connect to database!\n"; +} +# This can return an error and still be okay, so we do not bother checking. +# (perhaps it should be more robust and check for specific errors) +$dbh->do('DROP TABLE IF EXISTS '.$newname); +# +# Create the new table +my $request = &LONCAPA::lonmetadata::create_metadata_storage($newname); +$dbh->do($request); +if ($dbh->err) { + $dbh->disconnect(); + print LOG "\nMySQL Error Create: ".$dbh->errstr."\n"; + die $dbh->errstr; +} +# +# find out which users we need to examine +opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); +my @homeusers = + grep { + &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_"); + } grep { + !/^\.\.?$/; + } readdir(RESOURCES); +closedir RESOURCES; +# +# Loop through the users +foreach my $user (@homeusers) { + print LOG "=== User: ".$user."\n"; + my $prodir=&propath($perlvar{'lonDefDomain'},$user); + # + # Use File::Find to get the files we need to read/modify + find( + {preprocess => \&only_meta_files, +# wanted => \&print_filename, +# wanted => \&log_metadata, + wanted => \&process_meta_file, + }, + "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user"); +} +# +# Rename the table +$dbh->do('DROP TABLE IF EXISTS '.$oldname); +if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { + print LOG "MySQL Error Rename: ".$dbh->errstr."\n"; + die $dbh->errstr; +} +if (! $dbh->disconnect) { + print LOG "MySQL Error Disconnect: ".$dbh->errstr."\n"; + die $dbh->errstr; +} +## +## Finished! +print LOG "==== Searchcat completed ".localtime()." ====\n"; close(LOG); -exit(0); - -# ================================================================ Subroutines. - -=pod - -=head1 SUBROUTINES - -=cut - -=pod - -B - translate to unstrange escaped syntax to strange characters. - -=over 4 - -Parameters: - -=item I<$str> - string with unweird characters. -=back +&write_type_count(); +&write_copyright_count(); -=over 4 +exit 0; -Returns: - -=item C - string with potentially weird characters. - -=back - -=cut - -sub unescape ($) - { - my $str = shift(@_); +######################################################## +######################################################## +### ### +### File::Find support routines ### +### ### +######################################################## +######################################################## +## +## &only_meta_files +## +## Called by File::Find. +## Takes a list of files/directories in and returns a list of files/directories +## to search. +sub only_meta_files { + my @PossibleFiles = @_; + my @ChosenFiles; + foreach my $file (@PossibleFiles) { + if ( ($file =~ /\.meta$/ && # Ends in meta + $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version + ) || (-d $file )) { # directories are okay + # but we do not want /. or /.. + push(@ChosenFiles,$file); + } + } + return @ChosenFiles; +} + +## +## +## Debugging routines, use these for 'wanted' in the File::Find call +## +sub print_filename { + my ($file) = $_; + my $fullfilename = $File::Find::name; + if (-d $file) { + print LOG " Got directory ".$fullfilename."\n"; + } else { + print LOG " Got file ".$fullfilename."\n"; + } + $_=$file; +} + +sub log_metadata { + my ($file) = $_; + my $fullfilename = $File::Find::name; + return if (-d $fullfilename); # No need to do anything here for directories + print LOG $fullfilename."\n"; + my $ref=&metadata($fullfilename); + if (! defined($ref)) { + print LOG " No data\n"; + return; + } + while (my($key,$value) = each(%$ref)) { + print LOG " ".$key." => ".$value."\n"; + } + &count_copyright($ref->{'copyright'}); + $_=$file; +} + + +## +## process_meta_file +## Called by File::Find. +## Only input is the filename in $_. +sub process_meta_file { + my ($file) = $_; + my $filename = $File::Find::name; + return if (-d $filename); # No need to do anything here for directories + # + print LOG $filename."\n"; + # + my $ref=&metadata($filename); + # + # $url is the original file url, not the metadata file + my $url='/res/'.&declutter($filename); + $url=~s/\.meta$//; + print LOG " ".$url."\n"; + # + # Ignore some files based on their metadata + if ($ref->{'obsolete'}) { + print LOG "obsolete\n"; + return; + } + &count_copyright($ref->{'copyright'}); + if ($ref->{'copyright'} eq 'private') { + print LOG "private\n"; + return; + } + # + # Find the dynamic metadata + my %dyn; + if ($url=~ m:/default$:) { + $url=~ s:/default$:/:; + } else { + # %dyn=&dynamicmeta($url); + &count_type($url); + } + # + $ref->{'creationdate'} = &sqltime($ref->{'creationdate'}); + $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'}); + my %Data = ( + %$ref, + %dyn, + 'url'=>$url, + 'version'=>'current'); + my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, + \%Data); + if ($err) { + print LOG "\nMySQL Error Insert: ".$err."\n"; + die $err; + } + if ($count < 1) { + print LOG "Unable to insert record into MySQL database for $url\n"; + die "Unable to insert record into MySQl database for $url"; + } else { + print LOG "Count = ".$count."\n"; + } + # + # Reset $_ before leaving + $_ = $file; +} + +######################################################## +######################################################## +### ### +### &metadata($uri) ### +### Retrieve metadata for the given file ### +### ### +######################################################## +######################################################## +sub metadata { + my ($uri)=@_; + my %metacache=(); + $uri=&declutter($uri); + my $filename=$uri; + $uri=~s/\.meta$//; + $uri=''; + if ($filename !~ /\.meta$/) { + $filename.='.meta'; + } + my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename); + return undef if (! defined($metastring)); + my $parser=HTML::TokeParser->new(\$metastring); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $entry=$token->[1]; + my $unikey=$entry; + if (defined($token->[2]->{'part'})) { + $unikey.='_'.$token->[2]->{'part'}; + } + if (defined($token->[2]->{'name'})) { + $unikey.='_'.$token->[2]->{'name'}; + } + if ($metacache{$uri.'keys'}) { + $metacache{$uri.'keys'}.=','.$unikey; + } else { + $metacache{$uri.'keys'}=$unikey; + } + foreach ( @{$token->[3]}) { + $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; + } + if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){ + $metacache{$uri.''.$unikey} = + $metacache{$uri.''.$unikey.'.default'}; + } + } # End of ($token->[0] eq 'S') + } + return \%metacache; +} + +## +## &getfile($filename) +## Slurps up an entire file into a scalar. +## Returns undef if the file does not exist +sub getfile { + my $file = shift(); + if (! -e $file ) { + return undef; + } + my $fh=IO::File->new($file); + my $contents = ''; + while (<$fh>) { + $contents .= $_; + } + return $contents; +} + +######################################################## +######################################################## +### ### +### Dynamic Metadata ### +### ### +######################################################## +######################################################## +sub dynamicmeta { + my $url = &declutter(shift()); + $url =~ s/\.meta$//; + my %data = ('count' => 0, + 'course' => 0, + 'course_list' => '', + 'avetries' => 'NULL', + 'avetries_list' => '', + 'stdno' => 0, + 'stdno_list' => '', + 'usage' => 0, + 'usage_list' => '', + 'goto' => 0, + 'goto_list' => '', + 'comefrom' => 0, + 'comefrom_list' => '', + 'difficulty' => 'NULL', + 'difficulty_list' => '', + 'sequsage' => '0', + 'sequsage_list' => '', + 'clear' => 'NULL', + 'technical' => 'NULL', + 'correct' => 'NULL', + 'helpful' => 'NULL', + 'depth' => 'NULL', + 'comments' => '', + ); + my ($dom,$auth)=($url=~/^(\w+)\/(\w+)\//); + my $prodir=&propath($dom,$auth); + # + # Get metadata except counts + my %evaldata; + if (! tie(%evaldata,'GDBM_File', + $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { + return (undef); + } + my %sum=(); + my %count=(); + my %concat=(); + my %listitems=( + 'course' => 'add', + 'goto' => 'add', + 'comefrom' => 'add', + 'avetries' => 'average', + 'stdno' => 'add', + 'difficulty' => 'average', + 'clear' => 'average', + 'technical' => 'average', + 'helpful' => 'average', + 'correct' => 'average', + 'depth' => 'average', + 'comments' => 'append', + 'usage' => 'count' + ); + # + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; + while (my ($esckey,$value)=each %evaldata) { + my $key=&unescape($esckey); + if ($key=~/$regexp/) { + my ($item,$purl,$cat)=split(/___/,$key); + $count{$cat}++; + if ($listitems{$cat} ne 'append') { + if (defined($sum{$cat})) { + $sum{$cat}+=&unescape($value); + $concat{$cat}.=','.$item; + } else { + $sum{$cat}=&unescape($value); + $concat{$cat}=$item; + } + } else { + if (defined($sum{$cat})) { + if ($evaldata{$esckey}=~/\w/) { + $sum{$cat}.='
'.&unescape($evaldata{$esckey}); + } + } else { + $sum{$cat}=''.&unescape($evaldata{$esckey}); + } + } + } + } + untie(%evaldata); + # transfer gathered data to returnhash, calculate averages where applicable + my %returnhash; + while (my $cat=each(%count)) { + if ($count{$cat} eq 'nan') { next; } + if ($sum{$cat} eq 'nan') { next; } + if ($listitems{$cat} eq 'average') { + if ($count{$cat}) { + $returnhash{$cat}=int(($sum{$cat}/$count{$cat})*100.0+0.5)/100.0; + } else { + $returnhash{$cat}='NULL'; + } + } elsif ($listitems{$cat} eq 'count') { + $returnhash{$cat}=$count{$cat}; + } else { + $returnhash{$cat}=$sum{$cat}; + } + $returnhash{$cat.'_list'}=$concat{$cat}; + } + # + # get count + if (tie(my %evaldata,'GDBM_File', + $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { + my $escurl=&escape($url); + if (! exists($evaldata{$escurl})) { + $returnhash{'count'}=0; + } else { + $returnhash{'count'}=$evaldata{$escurl}; + } + untie %evaldata; + } + return %returnhash; +} + +######################################################## +######################################################## +### ### +### Counts ### +### ### +######################################################## +######################################################## +{ + +my %countext; + +sub count_type { + my $file=shift; + $file=~/\.(\w+)$/; + my $ext=lc($1); + $countext{$ext}++; +} + +sub write_type_count { + open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt'); + while (my ($extension,$count) = each(%countext)) { + print RESCOUNT $extension.'='.$count.'&'; + } + print RESCOUNT 'time='.time."\n"; + close(RESCOUNT); +} + +} # end of scope for %countext + +{ + +my %copyrights; + +sub count_copyright { + $copyrights{@_[0]}++; +} + +sub write_copyright_count { + open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt'); + while (my ($copyright,$count) = each(%copyrights)) { + print COPYCOUNT $copyright.'='.$count.'&'; + } + print COPYCOUNT 'time='.time."\n"; + close(COPYCOUNT); +} + +} # end of scope for %copyrights + +######################################################## +######################################################## +### ### +### Miscellanous Utility Routines ### +### ### +######################################################## +######################################################## +## +## &ishome($username) +## Returns 1 if $username is a LON-CAPA author, 0 otherwise +## (copied from lond, modification of the return value) +sub ishome { + my $author=shift; + $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; + my ($udom,$uname)=split(/\//,$author); + my $proname=propath($udom,$uname); + if (-e $proname) { + return 1; + } else { + return 0; + } +} + +## +## &propath($udom,$uname) +## Returns the path to the users LON-CAPA directory +## (copied from lond) +sub propath { + my ($udom,$uname)=@_; + $udom=~s/\W//g; + $uname=~s/\W//g; + my $subdir=$uname.'__'; + $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; + my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; + return $proname; +} + +## +## &sqltime($timestamp) +## +## Convert perl $timestamp to MySQL time. MySQL expects YYYY-MM-DD HH:MM:SS +## +sub sqltime { + my ($time) = @_; + my $mysqltime; + if ($time =~ + /(\d+)-(\d+)-(\d+) # YYYY-MM-DD + \s # a space + (\d+):(\d+):(\d+) # HH:MM::SS + /x ) { + # Some of the .meta files have the time in mysql + # format already, so just make sure they are 0 padded and + # pass them back. + $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', + $1,$2,$3,$4,$5,$6); + } elsif ($time =~ /^\d+$/) { + my @TimeData = gmtime($time); + # Alter the month to be 1-12 instead of 0-11 + $TimeData[4]++; + # Alter the year to be from 0 instead of from 1900 + $TimeData[5]+=1900; + $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', + @TimeData[5,4,3,2,1,0]); + } else { + print LOG " Unable to decode time ".$time."\n"; + $mysqltime = 0; + } + return $mysqltime; +} + +## +## &declutter($filename) +## Given a filename, returns a url for the filename. +sub declutter { + my $thisfn=shift; + $thisfn=~s/^$perlvar{'lonDocRoot'}//; + $thisfn=~s/^\///; + $thisfn=~s/^res\///; + return $thisfn; +} + +## +## Escape / Unescape special characters +sub unescape { + my $str=shift; $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; - return($str); - } - -=pod - -B - translate strange characters to unstrange escaped syntax. - -=over 4 - -Parameters: - -=item I<$str> - string with potentially weird characters to unweird-ify. + return $str; +} -=back - -=over 4 - -Returns: - -=item C - unweird-ified string. - -=back - -=cut - -sub escape ($) - { - my $str = shift(@_); +sub escape { + my $str=shift; $str =~ s/(\W)/"%".unpack('H2',$1)/eg; - return($str); - } - -=pod - -B - evaluate and store dynamic metadata. - -Dynamic metadata is stored in a nohist_resevaldata GDBM database. -The only thing that this subroutine really makes happen is adjusting -a 'count' value inside the F as well -as updating F with information from -F. - -It may need optmization, but since it gets called once a week. . . -=over 4 - -Parameters: - -=item I<$url> - the filesystem path (url may be a misnomer...) - -=back - -=over 4 - -Returns: - -=item C - key-value table of dynamically evaluated metadata. - -=back - -=cut - -sub build_on_the_fly_dynamic_metadata ($) - { - # some elements in here maybe non-obvious - - # Need to compute the user's directory. - my $url = &declutter(shift(@_)); - $url =~ s/\.meta$//; - my %returnhash = (); - my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!); - my $user_directory = &construct_path_to_user_directory($adomain,$aauthor); - - # Attempt a GDBM database instantiation inside users directory and proceed. - if ((tie(%evaldata,'GDBM_File', - $user_directory. - '/nohist_resevaldata.db',&GDBM_READER(),0640)) && - (tie(%newevaldata,'GDBM_File', - $user_directory. - '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) - { - # For different variables, track the running sum and counts. - my %sum = (); - my %cnt = (); - - # Define computed items as a sum (add) or an average (avg) or a raw - # count (cnt) or 'app'? - my %listitems=('count' => 'add', - 'course' => 'add', - 'avetries' => 'avg', - 'stdno' => 'add', - 'difficulty' => 'avg', - 'clear' => 'avg', - 'technical' => 'avg', - 'helpful' => 'avg', - 'correct' => 'avg', - 'depth' => 'avg', - 'comments' => 'app', - 'usage' => 'cnt' - ); - - # Untaint the url and use as part of a regular expression. - my $regexp = $url; - $regexp =~ s/(\W)/\\$1/g; - $regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs - - # Check existing nohist database for this url. - # this is modfying the 'count' entries - # and copying all othe entries over - foreach (keys %evaldata) - { - my $key = &unescape($_); - if ($key =~ /$regexp/) # If url-based entry exists. - { - my $ctype = $1; # Set to specific category type. - - # Do an increment for this category type. - if (defined($cnt{$ctype})) - { - $cnt{$ctype}++; - } - else - { - $cnt{$ctype} = 1; - } - unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN? - { - # Increment the sum based on the evaluated data in the db. - if (defined($sum{$ctype})) - { - $sum{$ctype} += $evaldata{$_}; - } - else - { - $sum{$ctype} = $evaldata{$_}; - } - } - else # 'app' mode, means to use '
' as a separator - { - if (defined($sum{$ctype})) - { - if ($evaldata{$_}) - { - $sum{$ctype} .= '
'.$evaldata{$_}; - } - } - else - { - $sum{$ctype} = ''.$evaldata{$_}; - } - } - if ($ctype ne 'count') - { - # this is copying all data except 'count' attributes - $newevaldata{$_} = $evaldata{$_}; - } - } - } - - # the only other time this loop is useful is for the 'count' hash - # element - foreach (keys %cnt) - { - if ($listitems{$_} eq 'avg') - { - $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; - } - elsif ($listitems{$_} eq 'cnt') - { - $returnhash{$_} = $cnt{$_}; - } - else - { - $returnhash{$_} = $sum{$_}; - } - } - - # seems to be doing something useful - if ($returnhash{'count'}) - { - my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count'; - $newevaldata{$newkey} = $returnhash{'count'}; - } - - untie(%evaldata); # Close/release the original nohist database. - untie(%newevaldata); # Close/release the new nohist database. - } - return(%returnhash); - } - -=pod - -B - used by B subroutine. - -This evaluates whether a file is wanted, and pushes it onto the -I<@metalist> array. This subroutine was, for the most part, auto-generated -by the B command. - -=over 4 - -Parameters: - -=item I<$file> - a path to the file. - -=back - -=over 4 - -Returns: - -=item C - true or false based on logical statement. - -=back - -=cut - -sub wanted ($) - { - (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && - -f $_ && - /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ && - push(@metalist,$File::Find::dir.'/'.$_); - } - -=pod - -B - read xml-tagged file and return parsed metadata. - -I - -=over 4 - -Parameters: - -=item I<$file> - a path.to the file. - -=back - -=over 4 - -Returns: - -=item C - a hash array (keys and values). - -=back - -=cut - -sub get_metadata_from_file ($) - { - my ($filename) = @_; - my %metatable; # Used to store return value of hash-tabled metadata. - $filename = &declutter($filename); # Remove non-identifying filesystem info - my $uri = ''; # The URI is not relevant in this scenario. - unless ($filename =~ m/\.meta$/) # Unless ending with .meta. - { - $filename .= '.meta'; # Append a .meta suffix. - } - # Get the file contents. - my $metadata_string = - &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename); - - # Parse the file based on its XML tags. - my $parser = HTML::TokeParser->new(\$metadata_string); - my $token; - while ($token = $parser->get_token) # Loop through tokens. - { - if ($token->[0] eq 'S') # If it is a start token. - { - my $entry = $token->[1]; - my $unikey = $entry; # A unique identifier for this xml tag key. - if (defined($token->[2]->{'part'})) - { - $unikey .= '_'.$token->[2]->{'part'}; - } - if (defined($token->[2]->{'name'})) - { - $unikey .= '_'.$token->[2]->{'name'}; - } - # Append $unikey to metatable's keys entry. - if ($metatable{$uri.'keys'}) - { - $metatable{$uri.'keys'} .= ','.$unikey; - } - else - { - $metatable{$uri.'keys'} = $unikey; - } - # Insert contents into metatable entry for the unikey. - foreach my $t3 (@{$token->[3]}) - { - $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3}; - } - # If there was no text contained inside the tags, set = default. - unless - ( - $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry) - ) - { - $metatable{$uri.''.$unikey} = - $metatable{$uri.''.$unikey.'.default'}; - } - } - } - # Return with a key-value table of XML tags and their tag contents. - return(\%metatable); - } - -=pod - -B - returns either the contents of the file or a -1. - -=over 4 - -Parameters: - -=item I<$file> - a complete filesystem path.to the file. - -=back - -=over 4 - -Returns: - -=item C - file contents or a -1. - -=back - -=cut - -sub get_file_contents ($) - { - my $file = shift(@_); - - # If file does not exist, then return a -1 value. - unless (-e $file) - { - return(-1); - } - - # Read in file contents. - my $file_handle = IO::File->new($file); - my $file_contents = ''; - while (<$file_handle>) - { - $file_contents .= $_; - } - - # Return file contents. - return($file_contents); - } - -=pod - -B - Declutters URLs (remove extraneous prefixed filesystem path). - -=over 4 - -Parameters: - -=item I<$filesystem_path> - a complete filesystem path. - -=back - -=over 4 - -Returns: - -=item C - remnants of the filesystem path (beginning portion removed). - -=back - -=cut - -sub declutter - { - my $filesystem_path = shift(@_); - - # Remove beginning portions of the filesystem path. - $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//; - $filesystem_path =~ s!^/!!; - $filesystem_path =~ s!^res/!!; - - # Return what is remaining for the filesystem path. - return($filesystem_path); - } - -=pod - -B - Is this the home server of an author's directory? - -=over 4 - -Parameters: - -=item I<$author_filesystem_path> - directory path for a user. - -=back - -=over 4 - -Returns: - -=item C - 1 if true; 0 if false. - -=back - -=cut - -sub query_home_server_status ($) - { - my $author_filesystem_path = shift(@_); - - # Remove beginning portion of this filesystem path. - $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!; - - # Construct path to the author's ordinary user directory. - my ($user_domain,$username) = split(m!/!,$author_filesystem_path); - my $user_directory_path = construct_path_to_user_directory($user_domain, - $username); - - # Return status of whether the user directory path is defined. - if (-e $user_directory_path) - { - return(1); # True. - } - else - { - return(0); # False. - } - } - -=pod - -B ($$) - makes a filesystem path to user dir. - -=over 4 - -Parameters: - -=item I<$user_domain> - the loncapa domain of the user. - -=item I<$username> - the unique username (user id) of the user. - -=back - -=over 4 - -Returns: - -=item C - representing the path on the filesystem. - -=back - -=cut - -sub construct_path_to_user_directory ($$) - { - my ($user_domain,$username) = @_; - - # Untaint. - $user_domain =~ s/\W//g; - $username =~ s/\W//g; - - # Create three levels of sub-directoried filesystem path - # based on the first three characters of the username. - my $sub_filesystem_path = $username.'__'; - $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!; - - # Use the sub-directoried levels and other variables to generate - # the complete filesystem path. - my $complete_filesystem_path = - join('/',($perlvar{'lonUsersDir'}, - $user_domain, - $sub_filesystem_path, - $username)); - - # Return the complete filesystem path. - return($complete_filesystem_path); - } - -=pod - -B (@) - turns seconds since epoch into datetime sql format. - -=over 4 - -Parameters: - -=item I<$epochtime> - time in seconds since epoch (may need to be sanitized). - -=back - -=over 4 - -Returns: - -=item C - datetime sql formatted string. - -=back - -=cut - -sub sql_formatted_time ($) - { - # Sanitize the time argument and convert to localtime array. - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime(&sanitize_time(shift(@_))); - - # Convert month from (0..11) to (1..12). - $mon += 1; - - # Make the year compatible with A.D. specification. - $year += 1900; - - # Return a date which is compatible with MySQL's "DATETIME" format. - return(join('-',($year,$mon,$mday)). - ' '. - join(':',($hour,$min,$sec)) - ); - } - - -# ==================================== The following two subroutines are needed -# for accommodating incorrect time formats inside the metadata. - -=pod - -B (@) - turns time metadata into seconds since epoch. - -=over 4 - -Parameters: - -=item I<%time_metadata> - a key-value listing characterizing month, year, etc. - -=back - -=over 4 - -Returns: - -=item C - seconds since epoch. - -=back - -=cut - -sub make_seconds_since_epoch (@) - { - # Keytable of time metadata. - my %time_metadata = @_; - - # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC). - return(POSIX::mktime( - ($time_metadata{'seconds'}, - $time_metadata{'minutes'}, - $time_metadata{'hours'}, - $time_metadata{'day'}, - $time_metadata{'month'}-1, - $time_metadata{'year'}-1900, - 0, - 0, - $time_metadata{'dlsav'}) - ) - ); - } - -=pod - -B - if time looks sql-formatted, make it seconds since epoch. - -Somebody described this subroutine as -"retro-fixing of un-backward-compatible time format". - -What this means, is that a part of this code expects to get UTC seconds -since the epoch (beginning of 1970). Yet, some of the .meta files have -sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch -integers (e.g. 1044147435). These time strings do not encode the timezone -and, in this sense, can be considered "un-backwards-compatible". - -=over 4 - -Parameters: - -=item I<$potentially_badformat_string> - string to "retro-fix". - -=back - -=over 4 - -Returns: - -=item C - seconds since epoch. - -=back - -=cut - -sub sanitize_time ($) - { - my $timestamp = shift(@_); - # If timestamp is in this unexpected format.... - if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) - { - # then convert into seconds since epoch (the expected format). - $timestamp = &make_seconds_since_epoch( - 'year' => $1, - 'month' => $2, - 'day' => $3, - 'hours' => $4, - 'minutes' => $5, - 'seconds' => $6 - ); - } - # Otherwise we assume timestamp to be as expected. - return($timestamp); - } - -=pod - -=head1 AUTHOR - -Written to help the loncapa project. - -Scott Harrison, sharrison@users.sourceforge.net - -This is distributed under the same terms as loncapa (i.e. "freeware"). - -=cut + return $str; +}