File:
[LON-CAPA] /
loncom /
metadata_database /
searchcat.pl
Revision
1.78:
download - view:
text,
annotated -
select for diffs
Fri Mar 26 13:29:31 2010 UTC (14 years, 7 months ago) by
raeburn
Branches:
MAIN
CVS tags:
version_2_9_X,
version_2_9_1,
version_2_9_0,
version_2_8_99_1,
version_2_10_X,
version_2_10_1,
version_2_10_0_RC2,
version_2_10_0_RC1,
version_2_10_0,
loncapaMITrelate_1,
language_hyphenation_merge,
language_hyphenation,
PRINT_INCOMPLETE_base,
PRINT_INCOMPLETE,
HEAD,
BZ4492-merge,
BZ4492-feature_horizontal_radioresponse
- When populating allusers table:
- Use hash of courseIDs from single call to lonnet::courseiddump() for domain, instead of individual call to lonnet::is_course() to test if user is a course.
- Check if directory in lonUsers/$dom/$1/$2/$3/$uname is for a user
by testing for existence of passwd file.
#!/usr/bin/perl
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script
#
# $Id: searchcat.pl,v 1.78 2010/03/26 13:29:31 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
###
=pod
=head1 NAME
B<searchcat.pl> - put authoritative filesystem data into sql database.
=head1 SYNOPSIS
Ordinarily this script is to be called from a loncapa cron job
(CVS source location: F<loncapa/loncom/cron/loncapa>; typical
filesystem installation location: F</etc/cron.d/loncapa>).
Here is the cron job entry.
C<# Repopulate and refresh the metadata database used for the search catalog.>
C<10 1 * * 7 www /home/httpd/perl/searchcat.pl>
This script only allows itself to be run as the user C<www>.
=head1 DESCRIPTION
This script goes through a loncapa resource directory and gathers metadata.
The metadata is entered into a SQL database.
This script also does general database maintenance such as reformatting
the C<loncapa:metadata> table if it is deprecated.
This script evaluates dynamic metadata from the authors'
F<nohist_resevaldata.db> database file in order to store it in MySQL.
This script is playing an increasingly important role for a loncapa
library server. The proper operation of this script is critical for a smooth
and correct user experience.
=cut
use strict;
use DBI;
use lib '/home/httpd/lib/perl/';
use LONCAPA::lonmetadata;
use LONCAPA;
use Getopt::Long;
use IO::File;
use HTML::TokeParser;
use GDBM_File;
use POSIX qw(strftime mktime);
use Apache::lonnet();
use File::Find;
#
# Set up configuration options
my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
GetOptions (
'help' => \$help,
'simulate' => \$simulate,
'only=s' => \$oneuser,
'verbose=s' => \$verbose,
'debug' => \$debug,
);
if ($help) {
print <<"ENDHELP";
$0
Rebuild and update the LON-CAPA metadata database.
Options:
-help Print this help
-simulate Do not modify the database.
-only=user Only compute for the given user. Implies -simulate
-verbose=val Sets logging level, val must be a number
-debug Turns on debugging output
ENDHELP
exit 0;
}
if (! defined($debug)) {
$debug = 0;
}
if (! defined($verbose)) {
$verbose = 0;
}
if (defined($oneuser)) {
$simulate=1;
}
##
## Use variables for table names so we can test this routine a little easier
my %oldnames = (
'metadata' => 'metadata',
'portfolio' => 'portfolio_metadata',
'access' => 'portfolio_access',
'addedfields' => 'portfolio_addedfields',
'allusers' => 'allusers',
);
my %newnames;
# new table names - append pid to have unique temporary tables
foreach my $key (keys(%oldnames)) {
$newnames{$key} = 'new'.$oldnames{$key}.$$;
}
#
# Only run if machine is a library server
exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library');
my $hostid = $Apache::lonnet::perlvar{'lonHostID'};
#
# Make sure this process is running from user=www
my $wwwid=getpwnam('www');
if ($wwwid!=$<) {
my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}";
my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch";
system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
mail -s '$subj' $emailto > /dev/null");
exit 1;
}
#
# Let people know we are running
open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log');
&log(0,'==== Searchcat Run '.localtime()."====");
if ($debug) {
&log(0,'simulating') if ($simulate);
&log(0,'only processing user '.$oneuser) if ($oneuser);
&log(0,'verbosity level = '.$verbose);
}
#
# Connect to database
my $dbh;
if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'},
{ RaiseError =>0,PrintError=>0}))) {
&log(0,"Cannot connect to database!");
die "MySQL Error: Cannot connect to database!\n";
}
# This can return an error and still be okay, so we do not bother checking.
# (perhaps it should be more robust and check for specific errors)
foreach my $key (keys(%newnames)) {
if ($newnames{$key} ne '') {
$dbh->do('DROP TABLE IF EXISTS '.$newnames{$key});
}
}
#
# Create the new metadata, portfolio and allusers tables
foreach my $key (keys(%newnames)) {
if ($newnames{$key} ne '') {
my $request =
&LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key});
$dbh->do($request);
if ($dbh->err) {
$dbh->disconnect();
&log(0,"MySQL Error Create: ".$dbh->errstr);
die $dbh->errstr;
}
}
}
#
# find out which users we need to examine
my @domains = sort(&Apache::lonnet::current_machine_domains());
&log(9,'domains ="'.join('","',@domains).'"');
foreach my $dom (@domains) {
&log(9,'domain = '.$dom);
opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom");
my @homeusers =
grep {
&ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_");
} grep {
!/^\.\.?$/;
} readdir(RESOURCES);
closedir RESOURCES;
&log(5,'users = '.$dom.':'.join(',',@homeusers));
#
if ($oneuser) {
@homeusers=($oneuser);
}
#
# Loop through the users
foreach my $user (@homeusers) {
&log(0,"=== User: ".$user);
&process_dynamic_metadata($user,$dom);
#
# Use File::Find to get the files we need to read/modify
find(
{preprocess => \&only_meta_files,
#wanted => \&print_filename,
#wanted => \&log_metadata,
wanted => \&process_meta_file,
no_chdir => 1,
}, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) );
}
# Search for all users and public portfolio files
my (%allusers,%portusers,%courses);
if ($oneuser) {
%portusers = (
$oneuser => '',
);
%allusers = (
$oneuser => '',
);
%courses = &courseiddump($dom,'.',1,'.','.',$oneuser,undef,
undef,'.');
} else {
# get courseIDs for domain on current machine
%courses=&Apache::lonnet::courseiddump($dom,'.',1,'.','.','.',1,[$hostid],'.');
my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom;
&descend_tree($dom,$dir,0,\%portusers,\%allusers);
}
foreach my $uname (keys(%portusers)) {
my $urlstart = '/uploaded/'.$dom.'/'.$uname;
my $pathstart = &propath($dom,$uname).'/userfiles';
my $is_course = '';
if (exists($courses{$dom.'_'.$uname})) {
$is_course = 1;
}
my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname);
my %access = &Apache::lonnet::get_access_controls($curr_perm);
foreach my $file (keys(%access)) {
my ($group,$url,$fullpath);
if ($is_course) {
($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/);
$fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path;
$url = $urlstart.'/groups/'.$group.'/portfolio'.$path;
} else {
$fullpath = $pathstart.'/portfolio'.$file;
$url = $urlstart.'/portfolio'.$file;
}
if (ref($access{$file}) eq 'HASH') {
my %portaccesslog =
&LONCAPA::lonmetadata::process_portfolio_access_data($dbh,
$simulate,\%newnames,$url,$fullpath,$access{$file});
&portfolio_logging(%portaccesslog);
}
my %portmetalog = &LONCAPA::lonmetadata::process_portfolio_metadata($dbh,$simulate,\%newnames,$url,$fullpath,$is_course,$dom,$uname,$group);
&portfolio_logging(%portmetalog);
}
}
# Update allusers
foreach my $uname (keys(%allusers)) {
next if (exists($courses{$dom.'_'.$uname}));
my %userdata =
&Apache::lonnet::get('environment',['firstname','lastname',
'middlename','generation','id','permanentemail'],$dom,$uname);
$userdata{'username'} = $uname;
$userdata{'domain'} = $dom;
my %alluserslog =
&LONCAPA::lonmetadata::process_allusers_data($dbh,$simulate,
\%newnames,$uname,$dom,\%userdata);
foreach my $item (keys(%alluserslog)) {
&log(0,$alluserslog{$item});
}
}
}
#
# Rename the tables
if (! $simulate) {
foreach my $key (keys(%oldnames)) {
if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) {
$dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key});
if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) {
&log(0,"MySQL Error Rename: ".$dbh->errstr);
die $dbh->errstr;
} else {
&log(1,"MySQL table rename successful for $key.");
}
}
}
}
if (! $dbh->disconnect) {
&log(0,"MySQL Error Disconnect: ".$dbh->errstr);
die $dbh->errstr;
}
##
## Finished!
&log(0,"==== Searchcat completed ".localtime()." ====");
close(LOG);
&write_type_count();
&write_copyright_count();
exit 0;
##
## Status logging routine. Inputs: $level, $message
##
## $level 0 should be used for normal output and error messages
##
## $message does not need to end with \n. In the case of errors
## the message should contain as much information as possible to
## help in diagnosing the problem.
##
sub log {
my ($level,$message)=@_;
$level = 0 if (! defined($level));
if ($verbose >= $level) {
print LOG $message.$/;
}
}
sub portfolio_logging {
my (%portlog) = @_;
foreach my $key (keys(%portlog)) {
if (ref($portlog{$key}) eq 'HASH') {
foreach my $item (keys(%{$portlog{$key}})) {
&log(0,$portlog{$key}{$item});
}
}
}
}
sub descend_tree {
my ($dom,$dir,$depth,$allportusers,$alldomusers) = @_;
if (-d $dir) {
opendir(DIR,$dir);
my @contents = grep(!/^\./,readdir(DIR));
closedir(DIR);
$depth ++;
foreach my $item (@contents) {
if ($depth < 4) {
&descend_tree($dom,$dir.'/'.$item,$depth,$allportusers,$alldomusers);
} else {
if (-e $dir.'/'.$item.'/file_permissions.db') {
$$allportusers{$item} = '';
}
if (-e $dir.'/'.$item.'/passwd') {
$$alldomusers{$item} = '';
}
}
}
}
}
########################################################
########################################################
### ###
### File::Find support routines ###
### ###
########################################################
########################################################
##
## &only_meta_files
##
## Called by File::Find.
## Takes a list of files/directories in and returns a list of files/directories
## to search.
sub only_meta_files {
my @PossibleFiles = @_;
my @ChosenFiles;
foreach my $file (@PossibleFiles) {
if ( ($file =~ /\.meta$/ && # Ends in meta
$file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version
) || (-d $File::Find::dir."/".$file )) { # directories are okay
# but we do not want /. or /..
push(@ChosenFiles,$file);
}
}
return @ChosenFiles;
}
##
##
## Debugging routines, use these for 'wanted' in the File::Find call
##
sub print_filename {
my ($file) = $_;
my $fullfilename = $File::Find::name;
if ($debug) {
if (-d $file) {
&log(5," Got directory ".$fullfilename);
} else {
&log(5," Got file ".$fullfilename);
}
}
$_=$file;
}
sub log_metadata {
my ($file) = $_;
my $fullfilename = $File::Find::name;
return if (-d $fullfilename); # No need to do anything here for directories
if ($debug) {
&log(6,$fullfilename);
my $ref = &metadata($fullfilename);
if (! defined($ref)) {
&log(6," No data");
return;
}
while (my($key,$value) = each(%$ref)) {
&log(6," ".$key." => ".$value);
}
&count_copyright($ref->{'copyright'});
}
$_=$file;
}
##
## process_meta_file
## Called by File::Find.
## Only input is the filename in $_.
sub process_meta_file {
my ($file) = $_;
my $filename = $File::Find::name; # full filename
return if (-d $filename); # No need to do anything here for directories
#
&log(3,$filename) if ($debug);
#
my $ref = &metadata($filename);
#
# $url is the original file url, not the metadata file
my $target = $filename;
$target =~ s/\.meta$//;
my $url='/res/'.&declutter($target);
&log(3," ".$url) if ($debug);
#
# Ignore some files based on their metadata
if ($ref->{'obsolete'}) {
&log(3,"obsolete") if ($debug);
return;
}
&count_copyright($ref->{'copyright'});
if ($ref->{'copyright'} eq 'private') {
&log(3,"private") if ($debug);
return;
}
#
# Find the dynamic metadata
my %dyn;
if ($url=~ m:/default$:) {
$url=~ s:/default$:/:;
&log(3,"Skipping dynamic data") if ($debug);
} else {
&log(3,"Retrieving dynamic data") if ($debug);
%dyn=&get_dynamic_metadata($url);
&count_type($url);
}
&LONCAPA::lonmetadata::getfiledates($ref,$target);
#
my %Data = (
%$ref,
%dyn,
'url'=>$url,
'version'=>'current');
if (! $simulate) {
my ($count,$err) =
&LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'},
'metadata',\%Data);
if ($err) {
&log(0,"MySQL Error Insert: ".$err);
}
if ($count < 1) {
&log(0,"Unable to insert record into MySQL database for $url");
}
}
#
# Reset $_ before leaving
$_ = $file;
}
########################################################
########################################################
### ###
### &metadata($uri) ###
### Retrieve metadata for the given file ###
### ###
########################################################
########################################################
sub metadata {
my ($uri) = @_;
my %metacache=();
$uri=&declutter($uri);
my $filename=$uri;
$uri=~s/\.meta$//;
$uri='';
if ($filename !~ /\.meta$/) {
$filename.='.meta';
}
my $metastring =
&LONCAPA::lonmetadata::getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename);
return undef if (! defined($metastring));
my $parser=HTML::TokeParser->new(\$metastring);
my $token;
while ($token=$parser->get_token) {
if ($token->[0] eq 'S') {
my $entry=$token->[1];
my $unikey=$entry;
if (defined($token->[2]->{'part'})) {
$unikey.='_'.$token->[2]->{'part'};
}
if (defined($token->[2]->{'name'})) {
$unikey.='_'.$token->[2]->{'name'};
}
if ($metacache{$uri.'keys'}) {
$metacache{$uri.'keys'}.=','.$unikey;
} else {
$metacache{$uri.'keys'}=$unikey;
}
foreach ( @{$token->[3]}) {
$metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
}
if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){
$metacache{$uri.''.$unikey} =
$metacache{$uri.''.$unikey.'.default'};
}
} # End of ($token->[0] eq 'S')
}
return \%metacache;
}
########################################################
########################################################
### ###
### Dynamic Metadata ###
### ###
########################################################
########################################################
##
## Dynamic metadata description (incomplete)
##
## For a full description of all fields,
## see LONCAPA::lonmetadata
##
## Field Type
##-----------------------------------------------------------
## count integer
## course integer
## course_list comma separated list of course ids
## avetries real
## avetries_list comma separated list of real numbers
## stdno real
## stdno_list comma separated list of real numbers
## usage integer
## usage_list comma separated list of resources
## goto scalar
## goto_list comma separated list of resources
## comefrom scalar
## comefrom_list comma separated list of resources
## difficulty real
## difficulty_list comma separated list of real numbers
## sequsage scalar
## sequsage_list comma separated list of resources
## clear real
## technical real
## correct real
## helpful real
## depth real
## comments html of all the comments made
##
{
my %DynamicData;
my %Counts;
sub process_dynamic_metadata {
my ($user,$dom) = @_;
undef(%DynamicData);
undef(%Counts);
#
my $prodir = &propath($dom,$user);
#
# Read in the dynamic metadata
my %evaldata;
if (! tie(%evaldata,'GDBM_File',
$prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
return 0;
}
#
%DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata);
untie(%evaldata);
$DynamicData{'domain'} = $dom;
#print('user = '.$user.' domain = '.$dom.$/);
#
# Read in the access count data
&log(7,'Reading access count data') if ($debug);
my %countdata;
if (! tie(%countdata,'GDBM_File',
$prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
return 0;
}
while (my ($key,$count) = each(%countdata)) {
next if ($key !~ /^$dom/);
$key = &unescape($key);
&log(8,' Count '.$key.' = '.$count) if ($debug);
$Counts{$key}=$count;
}
untie(%countdata);
if ($debug) {
&log(7,scalar(keys(%Counts)).
" Counts read for ".$user."@".$dom);
&log(7,scalar(keys(%DynamicData)).
" Dynamic metadata read for ".$user."@".$dom);
}
#
return 1;
}
sub get_dynamic_metadata {
my ($url) = @_;
$url =~ s:^/res/::;
my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url,
\%DynamicData);
# find the count
$data{'count'} = $Counts{$url};
#
# Log the dynamic metadata
if ($debug) {
while (my($k,$v)=each(%data)) {
&log(8," ".$k." => ".$v);
}
}
return %data;
}
} # End of %DynamicData and %Counts scope
########################################################
########################################################
### ###
### Counts ###
### ###
########################################################
########################################################
{
my %countext;
sub count_type {
my $file=shift;
$file=~/\.(\w+)$/;
my $ext=lc($1);
$countext{$ext}++;
}
sub write_type_count {
open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt');
while (my ($extension,$count) = each(%countext)) {
print RESCOUNT $extension.'='.$count.'&';
}
print RESCOUNT 'time='.time."\n";
close(RESCOUNT);
}
} # end of scope for %countext
{
my %copyrights;
sub count_copyright {
$copyrights{@_[0]}++;
}
sub write_copyright_count {
open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt');
while (my ($copyright,$count) = each(%copyrights)) {
print COPYCOUNT $copyright.'='.$count.'&';
}
print COPYCOUNT 'time='.time."\n";
close(COPYCOUNT);
}
} # end of scope for %copyrights
########################################################
########################################################
### ###
### Miscellanous Utility Routines ###
### ###
########################################################
########################################################
##
## &ishome($username)
## Returns 1 if $username is a LON-CAPA author, 0 otherwise
## (copied from lond, modification of the return value)
sub ishome {
my $author=shift;
$author=~s{/home/httpd/html/res/([^/]*)/([^/]*).*}{$1/$2};
my ($udom,$uname)=split(/\//,$author);
my $proname=propath($udom,$uname);
if (-e $proname) {
return 1;
} else {
return 0;
}
}
##
## &declutter($filename)
## Given a filename, returns a url for the filename.
sub declutter {
my $thisfn=shift;
$thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//;
$thisfn=~s/^\///;
$thisfn=~s/^res\///;
return $thisfn;
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>