1: #!/usr/bin/perl
2: # The LearningOnline Network
3: # searchcat.pl "Search Catalog" batch script
4: #
5: # $Id: searchcat.pl,v 1.61 2005/03/09 18:22:19 matthew Exp $
6: #
7: # Copyright Michigan State University Board of Trustees
8: #
9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
10: #
11: # LON-CAPA is free software; you can redistribute it and/or modify
12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
16: # LON-CAPA is distributed in the hope that it will be useful,
17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
22: # along with LON-CAPA; if not, write to the Free Software
23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
27: # http://www.lon-capa.org/
28: #
29: ###
30:
31: =pod
32:
33: =head1 NAME
34:
35: B<searchcat.pl> - put authoritative filesystem data into sql database.
36:
37: =head1 SYNOPSIS
38:
39: Ordinarily this script is to be called from a loncapa cron job
40: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
41: filesystem installation location: F</etc/cron.d/loncapa>).
42:
43: Here is the cron job entry.
44:
45: C<# Repopulate and refresh the metadata database used for the search catalog.>
46: C<10 1 * * 7 www /home/httpd/perl/searchcat.pl>
47:
48: This script only allows itself to be run as the user C<www>.
49:
50: =head1 DESCRIPTION
51:
52: This script goes through a loncapa resource directory and gathers metadata.
53: The metadata is entered into a SQL database.
54:
55: This script also does general database maintenance such as reformatting
56: the C<loncapa:metadata> table if it is deprecated.
57:
58: This script evaluates dynamic metadata from the authors'
59: F<nohist_resevaldata.db> database file in order to store it in MySQL.
60:
61: This script is playing an increasingly important role for a loncapa
62: library server. The proper operation of this script is critical for a smooth
63: and correct user experience.
64:
65: =cut
66:
67: use strict;
68:
69: use DBI;
70: use lib '/home/httpd/lib/perl/';
71: use LONCAPA::Configuration;
72: use LONCAPA::lonmetadata;
73:
74: use Getopt::Long;
75: use IO::File;
76: use HTML::TokeParser;
77: use GDBM_File;
78: use POSIX qw(strftime mktime);
79:
80: use File::Find;
81:
82: #
83: # Set up configuration options
84: my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
85: GetOptions (
86: 'help' => \$help,
87: 'simulate' => \$simulate,
88: 'only=s' => \$oneuser,
89: 'verbose=s' => \$verbose,
90: 'debug' => \$debug,
91: );
92:
93: if ($help) {
94: print <<"ENDHELP";
95: $0
96: Rebuild and update the LON-CAPA metadata database.
97: Options:
98: -help Print this help
99: -simulate Do not modify the database.
100: -only=user Only compute for the given user. Implies -simulate
101: -verbose=val Sets logging level, val must be a number
102: -debug Turns on debugging output
103: ENDHELP
104: exit 0;
105: }
106:
107: if (! defined($debug)) {
108: $debug = 0;
109: }
110:
111: if (! defined($verbose)) {
112: $verbose = 0;
113: }
114:
115: if (defined($oneuser)) {
116: $simulate=1;
117: }
118:
119: ##
120: ## Use variables for table names so we can test this routine a little easier
121: my $oldname = 'metadata';
122: my $newname = 'newmetadata'.$$; # append pid to have unique temporary table
123:
124: #
125: # Read loncapa_apache.conf and loncapa.conf
126: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
127: my %perlvar=%{$perlvarref};
128: undef $perlvarref;
129: delete $perlvar{'lonReceipt'}; # remove since sensitive (really?) & not needed
130: #
131: # Only run if machine is a library server
132: exit if ($perlvar{'lonRole'} ne 'library');
133: #
134: # Make sure this process is running from user=www
135: my $wwwid=getpwnam('www');
136: if ($wwwid!=$<) {
137: my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
138: my $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
139: system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
140: mailto $emailto -s '$subj' > /dev/null");
141: exit 1;
142: }
143: #
144: # Let people know we are running
145: open(LOG,'>>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
146: &log(0,'==== Searchcat Run '.localtime()."====");
147:
148:
149: if ($debug) {
150: &log(0,'simulating') if ($simulate);
151: &log(0,'only processing user '.$oneuser) if ($oneuser);
152: &log(0,'verbosity level = '.$verbose);
153: }
154: #
155: # Connect to database
156: my $dbh;
157: if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},
158: { RaiseError =>0,PrintError=>0}))) {
159: &log(0,"Cannot connect to database!");
160: die "MySQL Error: Cannot connect to database!\n";
161: }
162: # This can return an error and still be okay, so we do not bother checking.
163: # (perhaps it should be more robust and check for specific errors)
164: $dbh->do('DROP TABLE IF EXISTS '.$newname);
165: #
166: # Create the new table
167: my $request = &LONCAPA::lonmetadata::create_metadata_storage($newname);
168: $dbh->do($request);
169: if ($dbh->err) {
170: $dbh->disconnect();
171: &log(0,"MySQL Error Create: ".$dbh->errstr);
172: die $dbh->errstr;
173: }
174: #
175: # find out which users we need to examine
176: my $dom = $perlvar{'lonDefDomain'};
177: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$dom");
178: my @homeusers =
179: grep {
180: &ishome("$perlvar{'lonDocRoot'}/res/$dom/$_");
181: } grep {
182: !/^\.\.?$/;
183: } readdir(RESOURCES);
184: closedir RESOURCES;
185: #
186: if ($oneuser) {
187: @homeusers=($oneuser);
188: }
189: #
190: # Loop through the users
191: foreach my $user (@homeusers) {
192: &log(0,"=== User: ".$user);
193: &process_dynamic_metadata($user,$dom);
194: #
195: # Use File::Find to get the files we need to read/modify
196: find(
197: {preprocess => \&only_meta_files,
198: # wanted => \&print_filename,
199: # wanted => \&log_metadata,
200: wanted => \&process_meta_file,
201: },
202: "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
203: }
204: #
205: # Rename the table
206: if (! $simulate) {
207: $dbh->do('DROP TABLE IF EXISTS '.$oldname);
208: if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) {
209: &log(0,"MySQL Error Rename: ".$dbh->errstr);
210: die $dbh->errstr;
211: } else {
212: &log(1,"MySQL table rename successful.");
213: }
214: }
215:
216: if (! $dbh->disconnect) {
217: &log(0,"MySQL Error Disconnect: ".$dbh->errstr);
218: die $dbh->errstr;
219: }
220: ##
221: ## Finished!
222: &log(0,"==== Searchcat completed ".localtime()." ====");
223: close(LOG);
224:
225: &write_type_count();
226: &write_copyright_count();
227:
228: exit 0;
229:
230: ##
231: ## Status logging routine. Inputs: $level, $message
232: ##
233: ## $level 0 should be used for normal output and error messages
234: ##
235: ## $message does not need to end with \n. In the case of errors
236: ## the message should contain as much information as possible to
237: ## help in diagnosing the problem.
238: ##
239: sub log {
240: my ($level,$message)=@_;
241: $level = 0 if (! defined($level));
242: if ($verbose >= $level) {
243: print LOG $message.$/;
244: }
245: }
246:
247: ########################################################
248: ########################################################
249: ### ###
250: ### File::Find support routines ###
251: ### ###
252: ########################################################
253: ########################################################
254: ##
255: ## &only_meta_files
256: ##
257: ## Called by File::Find.
258: ## Takes a list of files/directories in and returns a list of files/directories
259: ## to search.
260: sub only_meta_files {
261: my @PossibleFiles = @_;
262: my @ChosenFiles;
263: foreach my $file (@PossibleFiles) {
264: if ( ($file =~ /\.meta$/ && # Ends in meta
265: $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version
266: ) || (-d $file )) { # directories are okay
267: # but we do not want /. or /..
268: push(@ChosenFiles,$file);
269: }
270: }
271: return @ChosenFiles;
272: }
273:
274: ##
275: ##
276: ## Debugging routines, use these for 'wanted' in the File::Find call
277: ##
278: sub print_filename {
279: my ($file) = $_;
280: my $fullfilename = $File::Find::name;
281: if ($debug) {
282: if (-d $file) {
283: &log(5," Got directory ".$fullfilename);
284: } else {
285: &log(5," Got file ".$fullfilename);
286: }
287: }
288: $_=$file;
289: }
290:
291: sub log_metadata {
292: my ($file) = $_;
293: my $fullfilename = $File::Find::name;
294: return if (-d $fullfilename); # No need to do anything here for directories
295: if ($debug) {
296: &log(6,$fullfilename);
297: my $ref=&metadata($fullfilename);
298: if (! defined($ref)) {
299: &log(6," No data");
300: return;
301: }
302: while (my($key,$value) = each(%$ref)) {
303: &log(6," ".$key." => ".$value);
304: }
305: &count_copyright($ref->{'copyright'});
306: }
307: $_=$file;
308: }
309:
310: ##
311: ## process_meta_file
312: ## Called by File::Find.
313: ## Only input is the filename in $_.
314: sub process_meta_file {
315: my ($file) = $_;
316: my $filename = $File::Find::name; # full filename
317: return if (-d $filename); # No need to do anything here for directories
318: #
319: &log(3,$filename) if ($debug);
320: #
321: my $ref=&metadata($filename);
322: #
323: # $url is the original file url, not the metadata file
324: my $target = $filename;
325: $target =~ s/\.meta$//;
326: my $url='/res/'.&declutter($target);
327: &log(3," ".$url) if ($debug);
328: #
329: # Ignore some files based on their metadata
330: if ($ref->{'obsolete'}) {
331: &log(3,"obsolete") if ($debug);
332: return;
333: }
334: &count_copyright($ref->{'copyright'});
335: if ($ref->{'copyright'} eq 'private') {
336: &log(3,"private") if ($debug);
337: return;
338: }
339: #
340: # Find the dynamic metadata
341: my %dyn;
342: if ($url=~ m:/default$:) {
343: $url=~ s:/default$:/:;
344: &log(3,"Skipping dynamic data") if ($debug);
345: } else {
346: &log(3,"Retrieving dynamic data") if ($debug);
347: %dyn=&get_dynamic_metadata($url);
348: &count_type($url);
349: }
350: #
351: if (! defined($ref->{'creationdate'}) ||
352: $ref->{'creationdate'} =~ /^\s*$/) {
353: $ref->{'creationdate'} = (stat($target))[9];
354: }
355: if (! defined($ref->{'lastrevisiondate'}) ||
356: $ref->{'lastrevisiondate'} =~ /^\s*$/) {
357: $ref->{'lastrevisiondate'} = (stat($target))[9];
358: }
359: $ref->{'creationdate'} = &sqltime($ref->{'creationdate'});
360: $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'});
361: my %Data = (
362: %$ref,
363: %dyn,
364: 'url'=>$url,
365: 'version'=>'current');
366: if (! $simulate) {
367: my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname,
368: \%Data);
369: if ($err) {
370: &log(0,"MySQL Error Insert: ".$err);
371: }
372: if ($count < 1) {
373: &log(0,"Unable to insert record into MySQL database for $url");
374: }
375: }
376: #
377: # Reset $_ before leaving
378: $_ = $file;
379: }
380:
381: ########################################################
382: ########################################################
383: ### ###
384: ### &metadata($uri) ###
385: ### Retrieve metadata for the given file ###
386: ### ###
387: ########################################################
388: ########################################################
389: sub metadata {
390: my ($uri)=@_;
391: my %metacache=();
392: $uri=&declutter($uri);
393: my $filename=$uri;
394: $uri=~s/\.meta$//;
395: $uri='';
396: if ($filename !~ /\.meta$/) {
397: $filename.='.meta';
398: }
399: my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
400: return undef if (! defined($metastring));
401: my $parser=HTML::TokeParser->new(\$metastring);
402: my $token;
403: while ($token=$parser->get_token) {
404: if ($token->[0] eq 'S') {
405: my $entry=$token->[1];
406: my $unikey=$entry;
407: if (defined($token->[2]->{'part'})) {
408: $unikey.='_'.$token->[2]->{'part'};
409: }
410: if (defined($token->[2]->{'name'})) {
411: $unikey.='_'.$token->[2]->{'name'};
412: }
413: if ($metacache{$uri.'keys'}) {
414: $metacache{$uri.'keys'}.=','.$unikey;
415: } else {
416: $metacache{$uri.'keys'}=$unikey;
417: }
418: foreach ( @{$token->[3]}) {
419: $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
420: }
421: if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){
422: $metacache{$uri.''.$unikey} =
423: $metacache{$uri.''.$unikey.'.default'};
424: }
425: } # End of ($token->[0] eq 'S')
426: }
427: return \%metacache;
428: }
429:
430: ##
431: ## &getfile($filename)
432: ## Slurps up an entire file into a scalar.
433: ## Returns undef if the file does not exist
434: sub getfile {
435: my $file = shift();
436: if (! -e $file ) {
437: return undef;
438: }
439: my $fh=IO::File->new($file);
440: my $contents = '';
441: while (<$fh>) {
442: $contents .= $_;
443: }
444: return $contents;
445: }
446:
447: ########################################################
448: ########################################################
449: ### ###
450: ### Dynamic Metadata ###
451: ### ###
452: ########################################################
453: ########################################################
454: ##
455: ## Dynamic metadata description (incomplete)
456: ##
457: ## For a full description of all fields,
458: ## see LONCAPA::lonmetadata
459: ##
460: ## Field Type
461: ##-----------------------------------------------------------
462: ## count integer
463: ## course integer
464: ## course_list comma separated list of course ids
465: ## avetries real
466: ## avetries_list comma separated list of real numbers
467: ## stdno real
468: ## stdno_list comma separated list of real numbers
469: ## usage integer
470: ## usage_list comma separated list of resources
471: ## goto scalar
472: ## goto_list comma separated list of resources
473: ## comefrom scalar
474: ## comefrom_list comma separated list of resources
475: ## difficulty real
476: ## difficulty_list comma separated list of real numbers
477: ## sequsage scalar
478: ## sequsage_list comma separated list of resources
479: ## clear real
480: ## technical real
481: ## correct real
482: ## helpful real
483: ## depth real
484: ## comments html of all the comments made
485: ##
486: {
487:
488: my %DynamicData;
489: my %Counts;
490:
491: sub process_dynamic_metadata {
492: my ($user,$dom) = @_;
493: undef(%DynamicData);
494: undef(%Counts);
495: #
496: my $prodir = &propath($dom,$user);
497: #
498: # Read in the dynamic metadata
499: my %evaldata;
500: if (! tie(%evaldata,'GDBM_File',
501: $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
502: return 0;
503: }
504: #
505: %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata);
506: untie(%evaldata);
507: #
508: # Read in the access count data
509: &log(7,'Reading access count data') if ($debug);
510: my %countdata;
511: if (! tie(%countdata,'GDBM_File',
512: $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
513: return 0;
514: }
515: while (my ($key,$count) = each(%countdata)) {
516: next if ($key !~ /^$dom/);
517: $key = &unescape($key);
518: &log(8,' Count '.$key.' = '.$count) if ($debug);
519: $Counts{$key}=$count;
520: }
521: untie(%countdata);
522: if ($debug) {
523: &log(7,scalar(keys(%Counts)).
524: " Counts read for ".$user."@".$dom);
525: &log(7,scalar(keys(%DynamicData)).
526: " Dynamic metadata read for ".$user."@".$dom);
527: }
528: #
529: return 1;
530: }
531:
532: sub get_dynamic_metadata {
533: my ($url) = @_;
534: $url =~ s:^/res/::;
535: if (! exists($DynamicData{$url})) {
536: &log(7,' No dynamic data for '.$url) if ($debug);
537: return ();
538: }
539: my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url,
540: \%DynamicData);
541: # find the count
542: $data{'count'} = $Counts{$url};
543: #
544: # Log the dynamic metadata
545: if ($debug) {
546: while (my($k,$v)=each(%data)) {
547: &log(8," ".$k." => ".$v);
548: }
549: }
550: return %data;
551: }
552:
553: } # End of %DynamicData and %Counts scope
554:
555: ########################################################
556: ########################################################
557: ### ###
558: ### Counts ###
559: ### ###
560: ########################################################
561: ########################################################
562: {
563:
564: my %countext;
565:
566: sub count_type {
567: my $file=shift;
568: $file=~/\.(\w+)$/;
569: my $ext=lc($1);
570: $countext{$ext}++;
571: }
572:
573: sub write_type_count {
574: open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt');
575: while (my ($extension,$count) = each(%countext)) {
576: print RESCOUNT $extension.'='.$count.'&';
577: }
578: print RESCOUNT 'time='.time."\n";
579: close(RESCOUNT);
580: }
581:
582: } # end of scope for %countext
583:
584: {
585:
586: my %copyrights;
587:
588: sub count_copyright {
589: $copyrights{@_[0]}++;
590: }
591:
592: sub write_copyright_count {
593: open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt');
594: while (my ($copyright,$count) = each(%copyrights)) {
595: print COPYCOUNT $copyright.'='.$count.'&';
596: }
597: print COPYCOUNT 'time='.time."\n";
598: close(COPYCOUNT);
599: }
600:
601: } # end of scope for %copyrights
602:
603: ########################################################
604: ########################################################
605: ### ###
606: ### Miscellanous Utility Routines ###
607: ### ###
608: ########################################################
609: ########################################################
610: ##
611: ## &ishome($username)
612: ## Returns 1 if $username is a LON-CAPA author, 0 otherwise
613: ## (copied from lond, modification of the return value)
614: sub ishome {
615: my $author=shift;
616: $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
617: my ($udom,$uname)=split(/\//,$author);
618: my $proname=propath($udom,$uname);
619: if (-e $proname) {
620: return 1;
621: } else {
622: return 0;
623: }
624: }
625:
626: ##
627: ## &propath($udom,$uname)
628: ## Returns the path to the users LON-CAPA directory
629: ## (copied from lond)
630: sub propath {
631: my ($udom,$uname)=@_;
632: $udom=~s/\W//g;
633: $uname=~s/\W//g;
634: my $subdir=$uname.'__';
635: $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
636: my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
637: return $proname;
638: }
639:
640: ##
641: ## &sqltime($timestamp)
642: ##
643: ## Convert perl $timestamp to MySQL time. MySQL expects YYYY-MM-DD HH:MM:SS
644: ##
645: sub sqltime {
646: my ($time) = @_;
647: my $mysqltime;
648: if ($time =~
649: /(\d+)-(\d+)-(\d+) # YYYY-MM-DD
650: \s # a space
651: (\d+):(\d+):(\d+) # HH:MM::SS
652: /x ) {
653: # Some of the .meta files have the time in mysql
654: # format already, so just make sure they are 0 padded and
655: # pass them back.
656: $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
657: $1,$2,$3,$4,$5,$6);
658: } elsif ($time =~ /^\d+$/) {
659: my @TimeData = gmtime($time);
660: # Alter the month to be 1-12 instead of 0-11
661: $TimeData[4]++;
662: # Alter the year to be from 0 instead of from 1900
663: $TimeData[5]+=1900;
664: $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
665: @TimeData[5,4,3,2,1,0]);
666: } elsif (! defined($time) || $time == 0) {
667: $mysqltime = 0;
668: } else {
669: &log(0," sqltime:Unable to decode time ".$time);
670: $mysqltime = 0;
671: }
672: return $mysqltime;
673: }
674:
675: ##
676: ## &declutter($filename)
677: ## Given a filename, returns a url for the filename.
678: sub declutter {
679: my $thisfn=shift;
680: $thisfn=~s/^$perlvar{'lonDocRoot'}//;
681: $thisfn=~s/^\///;
682: $thisfn=~s/^res\///;
683: return $thisfn;
684: }
685:
686: ##
687: ## Escape / Unescape special characters
688: sub unescape {
689: my $str=shift;
690: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
691: return $str;
692: }
693:
694: sub escape {
695: my $str=shift;
696: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
697: return $str;
698: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>