#!/usr/bin/perl # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # # $Id: searchcat.pl,v 1.77 2007/07/25 23:17:43 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # ### =pod =head1 NAME B - put authoritative filesystem data into sql database. =head1 SYNOPSIS Ordinarily this script is to be called from a loncapa cron job (CVS source location: F; typical filesystem installation location: F). Here is the cron job entry. C<# Repopulate and refresh the metadata database used for the search catalog.> C<10 1 * * 7 www /home/httpd/perl/searchcat.pl> This script only allows itself to be run as the user C. =head1 DESCRIPTION This script goes through a loncapa resource directory and gathers metadata. The metadata is entered into a SQL database. This script also does general database maintenance such as reformatting the C table if it is deprecated. This script evaluates dynamic metadata from the authors' F database file in order to store it in MySQL. This script is playing an increasingly important role for a loncapa library server. The proper operation of this script is critical for a smooth and correct user experience. =cut use strict; use DBI; use lib '/home/httpd/lib/perl/'; use LONCAPA::lonmetadata; use LONCAPA; use Getopt::Long; use IO::File; use HTML::TokeParser; use GDBM_File; use POSIX qw(strftime mktime); use Apache::lonnet(); use File::Find; # # Set up configuration options my ($simulate,$oneuser,$help,$verbose,$logfile,$debug); GetOptions ( 'help' => \$help, 'simulate' => \$simulate, 'only=s' => \$oneuser, 'verbose=s' => \$verbose, 'debug' => \$debug, ); if ($help) { print <<"ENDHELP"; $0 Rebuild and update the LON-CAPA metadata database. Options: -help Print this help -simulate Do not modify the database. -only=user Only compute for the given user. Implies -simulate -verbose=val Sets logging level, val must be a number -debug Turns on debugging output ENDHELP exit 0; } if (! defined($debug)) { $debug = 0; } if (! defined($verbose)) { $verbose = 0; } if (defined($oneuser)) { $simulate=1; } ## ## Use variables for table names so we can test this routine a little easier my %oldnames = ( 'metadata' => 'metadata', 'portfolio' => 'portfolio_metadata', 'access' => 'portfolio_access', 'addedfields' => 'portfolio_addedfields', 'allusers' => 'allusers', ); my %newnames; # new table names - append pid to have unique temporary tables foreach my $key (keys(%oldnames)) { $newnames{$key} = 'new'.$oldnames{$key}.$$; } # # Only run if machine is a library server exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library'); # # Make sure this process is running from user=www my $wwwid=getpwnam('www'); if ($wwwid!=$<) { my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}"; my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch"; system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\ mail -s '$subj' $emailto > /dev/null"); exit 1; } # # Let people know we are running open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log'); &log(0,'==== Searchcat Run '.localtime()."===="); if ($debug) { &log(0,'simulating') if ($simulate); &log(0,'only processing user '.$oneuser) if ($oneuser); &log(0,'verbosity level = '.$verbose); } # # Connect to database my $dbh; if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'}, { RaiseError =>0,PrintError=>0}))) { &log(0,"Cannot connect to database!"); die "MySQL Error: Cannot connect to database!\n"; } # This can return an error and still be okay, so we do not bother checking. # (perhaps it should be more robust and check for specific errors) foreach my $key (keys(%newnames)) { if ($newnames{$key} ne '') { $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key}); } } # # Create the new metadata, portfolio and allusers tables foreach my $key (keys(%newnames)) { if ($newnames{$key} ne '') { my $request = &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key}); $dbh->do($request); if ($dbh->err) { $dbh->disconnect(); &log(0,"MySQL Error Create: ".$dbh->errstr); die $dbh->errstr; } } } # # find out which users we need to examine my @domains = sort(&Apache::lonnet::current_machine_domains()); &log(9,'domains ="'.join('","',@domains).'"'); foreach my $dom (@domains) { &log(9,'domain = '.$dom); opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom"); my @homeusers = grep { &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_"); } grep { !/^\.\.?$/; } readdir(RESOURCES); closedir RESOURCES; &log(5,'users = '.$dom.':'.join(',',@homeusers)); # if ($oneuser) { @homeusers=($oneuser); } # # Loop through the users foreach my $user (@homeusers) { &log(0,"=== User: ".$user); &process_dynamic_metadata($user,$dom); # # Use File::Find to get the files we need to read/modify find( {preprocess => \&only_meta_files, #wanted => \&print_filename, #wanted => \&log_metadata, wanted => \&process_meta_file, no_chdir => 1, }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) ); } # Search for all users and public portfolio files my (%allusers,%portusers); if ($oneuser) { %portusers = ( $oneuser => '', ); %allusers = ( $oneuser => '', ); } else { my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom; &descend_tree($dom,$dir,0,\%portusers,\%allusers); } foreach my $uname (keys(%portusers)) { my $urlstart = '/uploaded/'.$dom.'/'.$uname; my $pathstart = &propath($dom,$uname).'/userfiles'; my $is_course = &Apache::lonnet::is_course($dom,$uname); my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname); my %access = &Apache::lonnet::get_access_controls($curr_perm); foreach my $file (keys(%access)) { my ($group,$url,$fullpath); if ($is_course) { ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/); $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path; $url = $urlstart.'/groups/'.$group.'/portfolio'.$path; } else { $fullpath = $pathstart.'/portfolio'.$file; $url = $urlstart.'/portfolio'.$file; } if (ref($access{$file}) eq 'HASH') { my %portaccesslog = &LONCAPA::lonmetadata::process_portfolio_access_data($dbh, $simulate,\%newnames,$url,$fullpath,$access{$file}); &portfolio_logging(%portaccesslog); } my %portmetalog = &LONCAPA::lonmetadata::process_portfolio_metadata($dbh,$simulate,\%newnames,$url,$fullpath,$is_course,$dom,$uname,$group); &portfolio_logging(%portmetalog); } } # Update allusers foreach my $uname (keys(%allusers)) { my %userdata = &Apache::lonnet::get('environment',['firstname','lastname', 'middlename','generation','id','permanentemail'],$dom,$uname); $userdata{'username'} = $uname; $userdata{'domain'} = $dom; my %alluserslog = &LONCAPA::lonmetadata::process_allusers_data($dbh,$simulate, \%newnames,$uname,$dom,\%userdata); foreach my $item (keys(%alluserslog)) { &log(0,$alluserslog{$item}); } } } # # Rename the tables if (! $simulate) { foreach my $key (keys(%oldnames)) { if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) { $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key}); if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) { &log(0,"MySQL Error Rename: ".$dbh->errstr); die $dbh->errstr; } else { &log(1,"MySQL table rename successful for $key."); } } } } if (! $dbh->disconnect) { &log(0,"MySQL Error Disconnect: ".$dbh->errstr); die $dbh->errstr; } ## ## Finished! &log(0,"==== Searchcat completed ".localtime()." ===="); close(LOG); &write_type_count(); &write_copyright_count(); exit 0; ## ## Status logging routine. Inputs: $level, $message ## ## $level 0 should be used for normal output and error messages ## ## $message does not need to end with \n. In the case of errors ## the message should contain as much information as possible to ## help in diagnosing the problem. ## sub log { my ($level,$message)=@_; $level = 0 if (! defined($level)); if ($verbose >= $level) { print LOG $message.$/; } } sub portfolio_logging { my (%portlog) = @_; foreach my $key (keys(%portlog)) { if (ref($portlog{$key}) eq 'HASH') { foreach my $item (keys(%{$portlog{$key}})) { &log(0,$portlog{$key}{$item}); } } } } sub descend_tree { my ($dom,$dir,$depth,$allportusers,$alldomusers) = @_; if (-d $dir) { opendir(DIR,$dir); my @contents = grep(!/^\./,readdir(DIR)); closedir(DIR); $depth ++; foreach my $item (@contents) { if ($depth < 4) { &descend_tree($dom,$dir.'/'.$item,$depth,$allportusers,$alldomusers); } else { if (-e $dir.'/'.$item.'/file_permissions.db') { $$allportusers{$item} = ''; } if (!&Apache::lonnet::is_course($dom,$item)) { $$alldomusers{$item} = ''; } } } } } ######################################################## ######################################################## ### ### ### File::Find support routines ### ### ### ######################################################## ######################################################## ## ## &only_meta_files ## ## Called by File::Find. ## Takes a list of files/directories in and returns a list of files/directories ## to search. sub only_meta_files { my @PossibleFiles = @_; my @ChosenFiles; foreach my $file (@PossibleFiles) { if ( ($file =~ /\.meta$/ && # Ends in meta $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version ) || (-d $File::Find::dir."/".$file )) { # directories are okay # but we do not want /. or /.. push(@ChosenFiles,$file); } } return @ChosenFiles; } ## ## ## Debugging routines, use these for 'wanted' in the File::Find call ## sub print_filename { my ($file) = $_; my $fullfilename = $File::Find::name; if ($debug) { if (-d $file) { &log(5," Got directory ".$fullfilename); } else { &log(5," Got file ".$fullfilename); } } $_=$file; } sub log_metadata { my ($file) = $_; my $fullfilename = $File::Find::name; return if (-d $fullfilename); # No need to do anything here for directories if ($debug) { &log(6,$fullfilename); my $ref = &metadata($fullfilename); if (! defined($ref)) { &log(6," No data"); return; } while (my($key,$value) = each(%$ref)) { &log(6," ".$key." => ".$value); } &count_copyright($ref->{'copyright'}); } $_=$file; } ## ## process_meta_file ## Called by File::Find. ## Only input is the filename in $_. sub process_meta_file { my ($file) = $_; my $filename = $File::Find::name; # full filename return if (-d $filename); # No need to do anything here for directories # &log(3,$filename) if ($debug); # my $ref = &metadata($filename); # # $url is the original file url, not the metadata file my $target = $filename; $target =~ s/\.meta$//; my $url='/res/'.&declutter($target); &log(3," ".$url) if ($debug); # # Ignore some files based on their metadata if ($ref->{'obsolete'}) { &log(3,"obsolete") if ($debug); return; } &count_copyright($ref->{'copyright'}); if ($ref->{'copyright'} eq 'private') { &log(3,"private") if ($debug); return; } # # Find the dynamic metadata my %dyn; if ($url=~ m:/default$:) { $url=~ s:/default$:/:; &log(3,"Skipping dynamic data") if ($debug); } else { &log(3,"Retrieving dynamic data") if ($debug); %dyn=&get_dynamic_metadata($url); &count_type($url); } &LONCAPA::lonmetadata::getfiledates($ref,$target); # my %Data = ( %$ref, %dyn, 'url'=>$url, 'version'=>'current'); if (! $simulate) { my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'}, 'metadata',\%Data); if ($err) { &log(0,"MySQL Error Insert: ".$err); } if ($count < 1) { &log(0,"Unable to insert record into MySQL database for $url"); } } # # Reset $_ before leaving $_ = $file; } ######################################################## ######################################################## ### ### ### &metadata($uri) ### ### Retrieve metadata for the given file ### ### ### ######################################################## ######################################################## sub metadata { my ($uri) = @_; my %metacache=(); $uri=&declutter($uri); my $filename=$uri; $uri=~s/\.meta$//; $uri=''; if ($filename !~ /\.meta$/) { $filename.='.meta'; } my $metastring = &LONCAPA::lonmetadata::getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename); return undef if (! defined($metastring)); my $parser=HTML::TokeParser->new(\$metastring); my $token; while ($token=$parser->get_token) { if ($token->[0] eq 'S') { my $entry=$token->[1]; my $unikey=$entry; if (defined($token->[2]->{'part'})) { $unikey.='_'.$token->[2]->{'part'}; } if (defined($token->[2]->{'name'})) { $unikey.='_'.$token->[2]->{'name'}; } if ($metacache{$uri.'keys'}) { $metacache{$uri.'keys'}.=','.$unikey; } else { $metacache{$uri.'keys'}=$unikey; } foreach ( @{$token->[3]}) { $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_}; } if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){ $metacache{$uri.''.$unikey} = $metacache{$uri.''.$unikey.'.default'}; } } # End of ($token->[0] eq 'S') } return \%metacache; } ######################################################## ######################################################## ### ### ### Dynamic Metadata ### ### ### ######################################################## ######################################################## ## ## Dynamic metadata description (incomplete) ## ## For a full description of all fields, ## see LONCAPA::lonmetadata ## ## Field Type ##----------------------------------------------------------- ## count integer ## course integer ## course_list comma separated list of course ids ## avetries real ## avetries_list comma separated list of real numbers ## stdno real ## stdno_list comma separated list of real numbers ## usage integer ## usage_list comma separated list of resources ## goto scalar ## goto_list comma separated list of resources ## comefrom scalar ## comefrom_list comma separated list of resources ## difficulty real ## difficulty_list comma separated list of real numbers ## sequsage scalar ## sequsage_list comma separated list of resources ## clear real ## technical real ## correct real ## helpful real ## depth real ## comments html of all the comments made ## { my %DynamicData; my %Counts; sub process_dynamic_metadata { my ($user,$dom) = @_; undef(%DynamicData); undef(%Counts); # my $prodir = &propath($dom,$user); # # Read in the dynamic metadata my %evaldata; if (! tie(%evaldata,'GDBM_File', $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { return 0; } # %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata); untie(%evaldata); $DynamicData{'domain'} = $dom; #print('user = '.$user.' domain = '.$dom.$/); # # Read in the access count data &log(7,'Reading access count data') if ($debug); my %countdata; if (! tie(%countdata,'GDBM_File', $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { return 0; } while (my ($key,$count) = each(%countdata)) { next if ($key !~ /^$dom/); $key = &unescape($key); &log(8,' Count '.$key.' = '.$count) if ($debug); $Counts{$key}=$count; } untie(%countdata); if ($debug) { &log(7,scalar(keys(%Counts)). " Counts read for ".$user."@".$dom); &log(7,scalar(keys(%DynamicData)). " Dynamic metadata read for ".$user."@".$dom); } # return 1; } sub get_dynamic_metadata { my ($url) = @_; $url =~ s:^/res/::; my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url, \%DynamicData); # find the count $data{'count'} = $Counts{$url}; # # Log the dynamic metadata if ($debug) { while (my($k,$v)=each(%data)) { &log(8," ".$k." => ".$v); } } return %data; } } # End of %DynamicData and %Counts scope ######################################################## ######################################################## ### ### ### Counts ### ### ### ######################################################## ######################################################## { my %countext; sub count_type { my $file=shift; $file=~/\.(\w+)$/; my $ext=lc($1); $countext{$ext}++; } sub write_type_count { open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt'); while (my ($extension,$count) = each(%countext)) { print RESCOUNT $extension.'='.$count.'&'; } print RESCOUNT 'time='.time."\n"; close(RESCOUNT); } } # end of scope for %countext { my %copyrights; sub count_copyright { $copyrights{@_[0]}++; } sub write_copyright_count { open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt'); while (my ($copyright,$count) = each(%copyrights)) { print COPYCOUNT $copyright.'='.$count.'&'; } print COPYCOUNT 'time='.time."\n"; close(COPYCOUNT); } } # end of scope for %copyrights ######################################################## ######################################################## ### ### ### Miscellanous Utility Routines ### ### ### ######################################################## ######################################################## ## ## &ishome($username) ## Returns 1 if $username is a LON-CAPA author, 0 otherwise ## (copied from lond, modification of the return value) sub ishome { my $author=shift; $author=~s{/home/httpd/html/res/([^/]*)/([^/]*).*}{$1/$2}; my ($udom,$uname)=split(/\//,$author); my $proname=propath($udom,$uname); if (-e $proname) { return 1; } else { return 0; } } ## ## &declutter($filename) ## Given a filename, returns a url for the filename. sub declutter { my $thisfn=shift; $thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//; $thisfn=~s/^\///; $thisfn=~s/^res\///; return $thisfn; }