#!/usr/bin/perl # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # # $Id: searchcat.pl,v 1.73 2007/01/02 07:26:11 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # ### =pod =head1 NAME B - put authoritative filesystem data into sql database. =head1 SYNOPSIS Ordinarily this script is to be called from a loncapa cron job (CVS source location: F; typical filesystem installation location: F). Here is the cron job entry. C<# Repopulate and refresh the metadata database used for the search catalog.> C<10 1 * * 7 www /home/httpd/perl/searchcat.pl> This script only allows itself to be run as the user C. =head1 DESCRIPTION This script goes through a loncapa resource directory and gathers metadata. The metadata is entered into a SQL database. This script also does general database maintenance such as reformatting the C table if it is deprecated. This script evaluates dynamic metadata from the authors' F database file in order to store it in MySQL. This script is playing an increasingly important role for a loncapa library server. The proper operation of this script is critical for a smooth and correct user experience. =cut use strict; use DBI; use lib '/home/httpd/lib/perl/'; use LONCAPA::lonmetadata; use Getopt::Long; use IO::File; use HTML::TokeParser; use GDBM_File; use POSIX qw(strftime mktime); use Apache::lonnet(); use File::Find; # # Set up configuration options my ($simulate,$oneuser,$help,$verbose,$logfile,$debug); GetOptions ( 'help' => \$help, 'simulate' => \$simulate, 'only=s' => \$oneuser, 'verbose=s' => \$verbose, 'debug' => \$debug, ); if ($help) { print <<"ENDHELP"; $0 Rebuild and update the LON-CAPA metadata database. Options: -help Print this help -simulate Do not modify the database. -only=user Only compute for the given user. Implies -simulate -verbose=val Sets logging level, val must be a number -debug Turns on debugging output ENDHELP exit 0; } if (! defined($debug)) { $debug = 0; } if (! defined($verbose)) { $verbose = 0; } if (defined($oneuser)) { $simulate=1; } ## ## Use variables for table names so we can test this routine a little easier my %oldnames = ( 'metadata' => 'metadata', 'portfolio' => 'portfolio_metadata', 'access' => 'portfolio_access', 'addedfields' => 'portfolio_addedfields', ); my %newnames; # new table names - append pid to have unique temporary tables foreach my $key (keys(%oldnames)) { $newnames{$key} = 'new'.$oldnames{$key}.$$; } # # Only run if machine is a library server exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library'); # # Make sure this process is running from user=www my $wwwid=getpwnam('www'); if ($wwwid!=$<) { my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}"; my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ mail -s '$subj' $emailto > /dev/null"); exit 1; } # # Let people know we are running open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log'); &log(0,'==== Searchcat Run '.localtime()."===="); if ($debug) { &log(0,'simulating') if ($simulate); &log(0,'only processing user '.$oneuser) if ($oneuser); &log(0,'verbosity level = '.$verbose); } # # Connect to database my $dbh; if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'}, { RaiseError =>0,PrintError=>0}))) { &log(0,"Cannot connect to database!"); die "MySQL Error: Cannot connect to database!\n"; } # This can return an error and still be okay, so we do not bother checking. # (perhaps it should be more robust and check for specific errors) foreach my $key (keys(%newnames)) { if ($newnames{$key} ne '') { $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key}); } } # # Create the new metadata and portfolio tables foreach my $key (keys(%newnames)) { if ($newnames{$key} ne '') { my $request = &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key}); $dbh->do($request); if ($dbh->err) { $dbh->disconnect(); &log(0,"MySQL Error Create: ".$dbh->errstr); die $dbh->errstr; } } } # # find out which users we need to examine my @domains = sort(&Apache::lonnet::current_machine_domains()); &log(9,'domains ="'.join('","',@domains).'"'); foreach my $dom (@domains) { &log(9,'domain = '.$dom); opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom"); my @homeusers = grep { &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_"); } grep { !/^\.\.?$/; } readdir(RESOURCES); closedir RESOURCES; &log(5,'users = '.$dom.':'.join(',',@homeusers)); # if ($oneuser) { @homeusers=($oneuser); } # # Loop through the users foreach my $user (@homeusers) { &log(0,"=== User: ".$user); &process_dynamic_metadata($user,$dom); # # Use File::Find to get the files we need to read/modify find( {preprocess => \&only_meta_files, #wanted => \&print_filename, #wanted => \&log_metadata, wanted => \&process_meta_file, no_chdir => 1, }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) ); } # Search for public portfolio files my %portusers; if ($oneuser) { %portusers = ( $oneuser => '', ); } else { my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom; &descend_tree($dir,0,\%portusers); } foreach my $uname (keys(%portusers)) { my $urlstart = '/uploaded/'.$dom.'/'.$uname; my $pathstart = &propath($dom,$uname).'/userfiles'; my $is_course = &Apache::lonnet::is_course($dom,$uname); my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname); my %access = &Apache::lonnet::get_access_controls($curr_perm); foreach my $file (keys(%access)) { my ($group,$url,$fullpath); if ($is_course) { ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/); $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path; $url = $urlstart.'/groups/'.$group.'/portfolio'.$path; } else { $fullpath = $pathstart.'/portfolio'.$file; $url = $urlstart.'/portfolio'.$file; } if (ref($access{$file}) eq 'HASH') { &process_portfolio_access_data($url,$access{$file}); } &process_portfolio_metadata($url,$fullpath,$is_course,$dom, $uname,$group); } } } # # Rename the tables if (! $simulate) { foreach my $key (keys(%oldnames)) { if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) { $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key}); if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) { &log(0,"MySQL Error Rename: ".$dbh->errstr); die $dbh->errstr; } else { &log(1,"MySQL table rename successful for $key."); } } } } if (! $dbh->disconnect) { &log(0,"MySQL Error Disconnect: ".$dbh->errstr); die $dbh->errstr; } ## ## Finished! &log(0,"==== Searchcat completed ".localtime()." ===="); close(LOG); &write_type_count(); &write_copyright_count(); exit 0; ## ## Status logging routine. Inputs: $level, $message ## ## $level 0 should be used for normal output and error messages ## ## $message does not need to end with \n. In the case of errors ## the message should contain as much information as possible to ## help in diagnosing the problem. ## sub log { my ($level,$message)=@_; $level = 0 if (! defined($level)); if ($verbose >= $level) { print LOG $message.$/; } } sub descend_tree { my ($dir,$depth,$alldomusers) = @_; if (-d $dir) { opendir(DIR,$dir); my @contents = grep(!/^\./,readdir(DIR)); closedir(DIR); $depth ++; foreach my $item (@contents) { if ($depth < 4) { &descend_tree($dir.'/'.$item,$depth,$alldomusers); } else { if (-e $dir.'/'.$item.'/file_permissions.db') { $$alldomusers{$item} = ''; } } } } } sub process_portfolio_access_data { my ($url,$access_hash) = @_; foreach my $key (keys(%{$access_hash})) { my $acc_data; $acc_data->{url} = $url; $acc_data->{keynum} = $key; my ($num,$scope,$end,$start) = ($key =~ /^([^:]+):([a-z]+)_(\d*)_?(\d*)$/); next if (($scope ne 'public') && ($scope ne 'guest')); $acc_data->{scope} = $scope; if ($end != 0) { $acc_data->{end} = &LONCAPA::lonmetadata::sqltime($end); } $acc_data->{start} = &LONCAPA::lonmetadata::sqltime($start); if (! $simulate) { my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh, $newnames{'access'}, 'portfolio_access',$acc_data); if ($err) { &log(0,"MySQL Error Insert: ".$err); } if ($count < 1) { &log(0,"Unable to insert record into MySQL database for $url"); } } } } sub process_portfolio_metadata { my ($url,$fullpath,$is_course,$dom,$uname,$group) = @_; my ($ref,$crs,$addedfields) = &portfolio_metadata($fullpath,$dom,$uname, $group); &getfiledates($ref,$fullpath); if ($is_course) { $ref->{'groupname'} = $group; } my %Data; if (ref($ref) eq 'HASH') { %Data = %{$ref}; } %Data = ( %Data, 'url'=>$url, 'version'=>'current', ); if (! $simulate) { my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh, $newnames{'portfolio'}, 'portfolio_metadata',\%Data); if ($err) { &log(0,"MySQL Error Insert: ".$err); } if ($count < 1) { &log(0,"Unable to insert record into MySQL portfolio_metadata database table for $url"); } if (ref($addedfields) eq 'HASH') { if (keys(%{$addedfields}) > 0) { foreach my $key (keys(%{$addedfields})) { my $added_data = { 'url' => $url, 'field' => $key, 'value' => $addedfields->{$key}, 'courserestricted' => $crs, }; ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh, $newnames{'addedfields'}, 'portfolio_addedfields', $added_data); if ($err) { &log(0,"MySQL Error Insert: ".$err); } if ($count < 1) { &log(0,"Unable to insert record into MySQL portfolio_addedfields database table for url = $url and field = $key"); } } } } } return; } ######################################################## ######################################################## ### ### ### File::Find support routines ### ### ### ######################################################## ######################################################## ## ## &only_meta_files ## ## Called by File::Find. ## Takes a list of files/directories in and returns a list of files/directories ## to search. sub only_meta_files { my @PossibleFiles = @_; my @ChosenFiles; foreach my $file (@PossibleFiles) { if ( ($file =~ /\.meta$/ && # Ends in meta $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version ) || (-d $File::Find::dir."/".$file )) { # directories are okay # but we do not want /. or /.. push(@ChosenFiles,$file); } } return @ChosenFiles; } ## ## ## Debugging routines, use these for 'wanted' in the File::Find call ## sub print_filename { my ($file) = $_; my $fullfilename = $File::Find::name; if ($debug) { if (-d $file) { &log(5," Got directory ".$fullfilename); } else { &log(5," Got file ".$fullfilename); } } $_=$file; } sub log_metadata { my ($file) = $_; my $fullfilename = $File::Find::name; return if (-d $fullfilename); # No need to do anything here for directories if ($debug) { &log(6,$fullfilename); my $ref = &metadata($fullfilename); if (! defined($ref)) { &log(6," No data"); return; } while (my($key,$value) = each(%$ref)) { &log(6," ".$key." => ".$value); } &count_copyright($ref->{'copyright'}); } $_=$file; } ## ## process_meta_file ## Called by File::Find. ## Only input is the filename in $_. sub process_meta_file { my ($file) = $_; my $filename = $File::Find::name; # full filename return if (-d $filename); # No need to do anything here for directories # &log(3,$filename) if ($debug); # my $ref = &metadata($filename); # # $url is the original file url, not the metadata file my $target = $filename; $target =~ s/\.meta$//; my $url='/res/'.&declutter($target); &log(3," ".$url) if ($debug); # # Ignore some files based on their metadata if ($ref->{'obsolete'}) { &log(3,"obsolete") if ($debug); return; } &count_copyright($ref->{'copyright'}); if ($ref->{'copyright'} eq 'private') { &log(3,"private") if ($debug); return; } # # Find the dynamic metadata my %dyn; if ($url=~ m:/default$:) { $url=~ s:/default$:/:; &log(3,"Skipping dynamic data") if ($debug); } else { &log(3,"Retrieving dynamic data") if ($debug); %dyn=&get_dynamic_metadata($url); &count_type($url); } &getfiledates($ref,$target); # my %Data = ( %$ref, %dyn, 'url'=>$url, 'version'=>'current'); if (! $simulate) { my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'}, 'metadata',\%Data); if ($err) { &log(0,"MySQL Error Insert: ".$err); } if ($count < 1) { &log(0,"Unable to insert record into MySQL database for $url"); } } # # Reset $_ before leaving $_ = $file; } ######################################################## ######################################################## ### ### ### &metadata($uri) ### ### Retrieve metadata for the given file ### ### ### ######################################################## ######################################################## sub metadata { my ($uri) = @_; my %metacache=(); $uri=&declutter($uri); my $filename=$uri; $uri=~s/\.meta$//; $uri=''; if ($filename !~ /\.meta$/) { $filename.='.meta'; } my $metastring=&getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename); return undef if (! defined($metastring)); my $parser=HTML::TokeParser->new(\$metastring); my $token; while ($token=$parser->get_token) { if ($token->[0] eq 'S') { my $entry=$token->[1]; my $unikey=$entry; if (defined($token->[2]->{'part'})) { $unikey.='_'.$token->[2]->{'part'}; } if (defined($token->[2]->{'name'})) { $unikey.='_'.$token->[2]->{'name'}; } if ($metacache{$uri.'keys'}) { $metacache{$uri.'keys'}.=','.$unikey; } else { $metacache{$uri.'keys'}=$unikey; } foreach ( @{$token->[3]}) { $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; } if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){ $metacache{$uri.''.$unikey} = $metacache{$uri.''.$unikey.'.default'}; } } # End of ($token->[0] eq 'S') } return \%metacache; } ############################################################### ############################################################### ### ### ### &portfolio_metadata($filepath,$dom,$uname,$group) ### ### Retrieve metadata for the given file ### ### Returns array - ### ### contains reference to metadatahash and ### ### optional reference to addedfields hash ### ### ### ############################################################### ############################################################### sub portfolio_metadata { my ($fullpath,$dom,$uname,$group)=@_; my ($mime) = ( $fullpath=~/\.(\w+)$/ ); my %metacache=(); if ($fullpath !~ /\.meta$/) { $fullpath .= '.meta'; } my (@standard_fields,%addedfields); my $colsref = $LONCAPA::lonmetadata::Portfolio_metadata_table_description; if (ref($colsref) eq 'ARRAY') { my @columns = @{$colsref}; foreach my $coldata (@columns) { push(@standard_fields,$coldata->{'name'}); } } my $metastring=&getfile($fullpath); if (! defined($metastring)) { $metacache{'keys'}= 'owner,domain,mime'; $metacache{'owner'} = $uname.':'.$dom; $metacache{'domain'} = $dom; $metacache{'mime'} = $mime; if (defined($group)) { $metacache{'keys'} .= ',courserestricted'; $metacache{'courserestricted'} = 'course.'.$dom.'_'.$uname; } } else { my $parser=HTML::TokeParser->new(\$metastring); my $token; while ($token=$parser->get_token) { if ($token->[0] eq 'S') { my $entry=$token->[1]; if ($metacache{'keys'}) { $metacache{'keys'}.=','.$entry; } else { $metacache{'keys'}=$entry; } my $value = $parser->get_text('/'.$entry); if (!grep(/^\Q$entry\E$/,@standard_fields)) { my $clean_value = lc($value); $clean_value =~ s/\s/_/g; if ($clean_value ne $entry) { if (defined($addedfields{$entry})) { $addedfields{$entry} .=','.$value; } else { $addedfields{$entry} = $value; } } } else { $metacache{$entry} = $value; } } } # End of ($token->[0] eq 'S') } if (keys(%addedfields) > 0) { foreach my $key (sort keys(%addedfields)) { $metacache{'addedfieldnames'} .= $key.','; $metacache{'addedfieldvalues'} .= $addedfields{$key}.'&&&'; } $metacache{'addedfieldnames'} =~ s/,$//; $metacache{'addedfieldvalues'} =~ s/\&\&\&$//; if ($metacache{'keys'}) { $metacache{'keys'}.=',addedfieldnames'; } else { $metacache{'keys'}='addedfieldnames'; } $metacache{'keys'}.=',addedfieldvalues'; } return (\%metacache,$metacache{'courserestricted'},\%addedfields); } ## ## &getfile($filename) ## Slurps up an entire file into a scalar. ## Returns undef if the file does not exist sub getfile { my $file = shift(); if (! -e $file ) { return undef; } my $fh=IO::File->new($file); my $contents = ''; while (<$fh>) { $contents .= $_; } return $contents; } ## ## &getfiledates() ## Converts creationdate and modifieddates to SQL format ## Applies stat() to file to retrieve dates if missing sub getfiledates { my ($ref,$target) = @_; if (! defined($ref->{'creationdate'}) || $ref->{'creationdate'} =~ /^\s*$/) { $ref->{'creationdate'} = (stat($target))[9]; } if (! defined($ref->{'lastrevisiondate'}) || $ref->{'lastrevisiondate'} =~ /^\s*$/) { $ref->{'lastrevisiondate'} = (stat($target))[9]; } $ref->{'creationdate'} = &LONCAPA::lonmetadata::sqltime($ref->{'creationdate'}); $ref->{'lastrevisiondate'} = &LONCAPA::lonmetadata::sqltime($ref->{'lastrevisiondate'}); } ######################################################## ######################################################## ### ### ### Dynamic Metadata ### ### ### ######################################################## ######################################################## ## ## Dynamic metadata description (incomplete) ## ## For a full description of all fields, ## see LONCAPA::lonmetadata ## ## Field Type ##----------------------------------------------------------- ## count integer ## course integer ## course_list comma separated list of course ids ## avetries real ## avetries_list comma separated list of real numbers ## stdno real ## stdno_list comma separated list of real numbers ## usage integer ## usage_list comma separated list of resources ## goto scalar ## goto_list comma separated list of resources ## comefrom scalar ## comefrom_list comma separated list of resources ## difficulty real ## difficulty_list comma separated list of real numbers ## sequsage scalar ## sequsage_list comma separated list of resources ## clear real ## technical real ## correct real ## helpful real ## depth real ## comments html of all the comments made ## { my %DynamicData; my %Counts; sub process_dynamic_metadata { my ($user,$dom) = @_; undef(%DynamicData); undef(%Counts); # my $prodir = &propath($dom,$user); # # Read in the dynamic metadata my %evaldata; if (! tie(%evaldata,'GDBM_File', $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { return 0; } # %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata); untie(%evaldata); $DynamicData{'domain'} = $dom; #print('user = '.$user.' domain = '.$dom.$/); # # Read in the access count data &log(7,'Reading access count data') if ($debug); my %countdata; if (! tie(%countdata,'GDBM_File', $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { return 0; } while (my ($key,$count) = each(%countdata)) { next if ($key !~ /^$dom/); $key = &unescape($key); &log(8,' Count '.$key.' = '.$count) if ($debug); $Counts{$key}=$count; } untie(%countdata); if ($debug) { &log(7,scalar(keys(%Counts)). " Counts read for ".$user."@".$dom); &log(7,scalar(keys(%DynamicData)). " Dynamic metadata read for ".$user."@".$dom); } # return 1; } sub get_dynamic_metadata { my ($url) = @_; $url =~ s:^/res/::; my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url, \%DynamicData); # find the count $data{'count'} = $Counts{$url}; # # Log the dynamic metadata if ($debug) { while (my($k,$v)=each(%data)) { &log(8," ".$k." => ".$v); } } return %data; } } # End of %DynamicData and %Counts scope ######################################################## ######################################################## ### ### ### Counts ### ### ### ######################################################## ######################################################## { my %countext; sub count_type { my $file=shift; $file=~/\.(\w+)$/; my $ext=lc($1); $countext{$ext}++; } sub write_type_count { open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt'); while (my ($extension,$count) = each(%countext)) { print RESCOUNT $extension.'='.$count.'&'; } print RESCOUNT 'time='.time."\n"; close(RESCOUNT); } } # end of scope for %countext { my %copyrights; sub count_copyright { $copyrights{@_[0]}++; } sub write_copyright_count { open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt'); while (my ($copyright,$count) = each(%copyrights)) { print COPYCOUNT $copyright.'='.$count.'&'; } print COPYCOUNT 'time='.time."\n"; close(COPYCOUNT); } } # end of scope for %copyrights ######################################################## ######################################################## ### ### ### Miscellanous Utility Routines ### ### ### ######################################################## ######################################################## ## ## &ishome($username) ## Returns 1 if $username is a LON-CAPA author, 0 otherwise ## (copied from lond, modification of the return value) sub ishome { my $author=shift; $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/; my ($udom,$uname)=split(/\//,$author); my $proname=propath($udom,$uname); if (-e $proname) { return 1; } else { return 0; } } ## ## &propath($udom,$uname) ## Returns the path to the users LON-CAPA directory ## (copied from lond) sub propath { my ($udom,$uname)=@_; $udom=~s/\W//g; $uname=~s/\W//g; my $subdir=$uname.'__'; $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/; my $proname="$Apache::lonnet::perlvar{'lonUsersDir'}/$udom/$subdir/$uname"; return $proname; } ## ## &declutter($filename) ## Given a filename, returns a url for the filename. sub declutter { my $thisfn=shift; $thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//; $thisfn=~s/^\///; $thisfn=~s/^res\///; return $thisfn; } ## ## Escape / Unescape special characters sub unescape { my $str=shift; $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; return $str; } sub escape { my $str=shift; $str =~ s/(\W)/"%".unpack('H2',$1)/eg; return $str; } 500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.