1: #!/usr/bin/perl
2: # The LearningOnline Network
3: # searchcat.pl "Search Catalog" batch script
4: #
5: # $Id: searchcat.pl,v 1.76 2007/04/11 22:44:18 albertel Exp $
6: #
7: # Copyright Michigan State University Board of Trustees
8: #
9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
10: #
11: # LON-CAPA is free software; you can redistribute it and/or modify
12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
16: # LON-CAPA is distributed in the hope that it will be useful,
17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
22: # along with LON-CAPA; if not, write to the Free Software
23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
27: # http://www.lon-capa.org/
28: #
29: ###
30:
31: =pod
32:
33: =head1 NAME
34:
35: B<searchcat.pl> - put authoritative filesystem data into sql database.
36:
37: =head1 SYNOPSIS
38:
39: Ordinarily this script is to be called from a loncapa cron job
40: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
41: filesystem installation location: F</etc/cron.d/loncapa>).
42:
43: Here is the cron job entry.
44:
45: C<# Repopulate and refresh the metadata database used for the search catalog.>
46: C<10 1 * * 7 www /home/httpd/perl/searchcat.pl>
47:
48: This script only allows itself to be run as the user C<www>.
49:
50: =head1 DESCRIPTION
51:
52: This script goes through a loncapa resource directory and gathers metadata.
53: The metadata is entered into a SQL database.
54:
55: This script also does general database maintenance such as reformatting
56: the C<loncapa:metadata> table if it is deprecated.
57:
58: This script evaluates dynamic metadata from the authors'
59: F<nohist_resevaldata.db> database file in order to store it in MySQL.
60:
61: This script is playing an increasingly important role for a loncapa
62: library server. The proper operation of this script is critical for a smooth
63: and correct user experience.
64:
65: =cut
66:
67: use strict;
68: use DBI;
69: use lib '/home/httpd/lib/perl/';
70: use LONCAPA::lonmetadata;
71: use LONCAPA;
72: use Getopt::Long;
73: use IO::File;
74: use HTML::TokeParser;
75: use GDBM_File;
76: use POSIX qw(strftime mktime);
77:
78: use Apache::lonnet();
79:
80: use File::Find;
81:
82: #
83: # Set up configuration options
84: my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
85: GetOptions (
86: 'help' => \$help,
87: 'simulate' => \$simulate,
88: 'only=s' => \$oneuser,
89: 'verbose=s' => \$verbose,
90: 'debug' => \$debug,
91: );
92:
93: if ($help) {
94: print <<"ENDHELP";
95: $0
96: Rebuild and update the LON-CAPA metadata database.
97: Options:
98: -help Print this help
99: -simulate Do not modify the database.
100: -only=user Only compute for the given user. Implies -simulate
101: -verbose=val Sets logging level, val must be a number
102: -debug Turns on debugging output
103: ENDHELP
104: exit 0;
105: }
106:
107: if (! defined($debug)) {
108: $debug = 0;
109: }
110:
111: if (! defined($verbose)) {
112: $verbose = 0;
113: }
114:
115: if (defined($oneuser)) {
116: $simulate=1;
117: }
118:
119: ##
120: ## Use variables for table names so we can test this routine a little easier
121: my %oldnames = (
122: 'metadata' => 'metadata',
123: 'portfolio' => 'portfolio_metadata',
124: 'access' => 'portfolio_access',
125: 'addedfields' => 'portfolio_addedfields',
126: );
127:
128: my %newnames;
129: # new table names - append pid to have unique temporary tables
130: foreach my $key (keys(%oldnames)) {
131: $newnames{$key} = 'new'.$oldnames{$key}.$$;
132: }
133:
134: #
135: # Only run if machine is a library server
136: exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library');
137: #
138: # Make sure this process is running from user=www
139: my $wwwid=getpwnam('www');
140: if ($wwwid!=$<) {
141: my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}";
142: my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch";
143: system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
144: mail -s '$subj' $emailto > /dev/null");
145: exit 1;
146: }
147: #
148: # Let people know we are running
149: open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log');
150: &log(0,'==== Searchcat Run '.localtime()."====");
151:
152:
153: if ($debug) {
154: &log(0,'simulating') if ($simulate);
155: &log(0,'only processing user '.$oneuser) if ($oneuser);
156: &log(0,'verbosity level = '.$verbose);
157: }
158: #
159: # Connect to database
160: my $dbh;
161: if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'},
162: { RaiseError =>0,PrintError=>0}))) {
163: &log(0,"Cannot connect to database!");
164: die "MySQL Error: Cannot connect to database!\n";
165: }
166: # This can return an error and still be okay, so we do not bother checking.
167: # (perhaps it should be more robust and check for specific errors)
168: foreach my $key (keys(%newnames)) {
169: if ($newnames{$key} ne '') {
170: $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key});
171: }
172: }
173:
174: #
175: # Create the new metadata and portfolio tables
176: foreach my $key (keys(%newnames)) {
177: if ($newnames{$key} ne '') {
178: my $request =
179: &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key});
180: $dbh->do($request);
181: if ($dbh->err) {
182: $dbh->disconnect();
183: &log(0,"MySQL Error Create: ".$dbh->errstr);
184: die $dbh->errstr;
185: }
186: }
187: }
188:
189: #
190: # find out which users we need to examine
191: my @domains = sort(&Apache::lonnet::current_machine_domains());
192: &log(9,'domains ="'.join('","',@domains).'"');
193:
194: foreach my $dom (@domains) {
195: &log(9,'domain = '.$dom);
196: opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom");
197: my @homeusers =
198: grep {
199: &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_");
200: } grep {
201: !/^\.\.?$/;
202: } readdir(RESOURCES);
203: closedir RESOURCES;
204: &log(5,'users = '.$dom.':'.join(',',@homeusers));
205: #
206: if ($oneuser) {
207: @homeusers=($oneuser);
208: }
209: #
210: # Loop through the users
211: foreach my $user (@homeusers) {
212: &log(0,"=== User: ".$user);
213: &process_dynamic_metadata($user,$dom);
214: #
215: # Use File::Find to get the files we need to read/modify
216: find(
217: {preprocess => \&only_meta_files,
218: #wanted => \&print_filename,
219: #wanted => \&log_metadata,
220: wanted => \&process_meta_file,
221: no_chdir => 1,
222: }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) );
223: }
224: # Search for public portfolio files
225: my %portusers;
226: if ($oneuser) {
227: %portusers = (
228: $oneuser => '',
229: );
230: } else {
231: my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom;
232: &descend_tree($dir,0,\%portusers);
233: }
234: foreach my $uname (keys(%portusers)) {
235: my $urlstart = '/uploaded/'.$dom.'/'.$uname;
236: my $pathstart = &propath($dom,$uname).'/userfiles';
237: my $is_course = &Apache::lonnet::is_course($dom,$uname);
238: my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname);
239: my %access = &Apache::lonnet::get_access_controls($curr_perm);
240: foreach my $file (keys(%access)) {
241: my ($group,$url,$fullpath);
242: if ($is_course) {
243: ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/);
244: $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path;
245: $url = $urlstart.'/groups/'.$group.'/portfolio'.$path;
246: } else {
247: $fullpath = $pathstart.'/portfolio'.$file;
248: $url = $urlstart.'/portfolio'.$file;
249: }
250: if (ref($access{$file}) eq 'HASH') {
251: my %portaccesslog =
252: &LONCAPA::lonmetadata::process_portfolio_access_data($dbh,
253: $simulate,\%newnames,$url,$fullpath,$access{$file});
254: &portfolio_logging(%portaccesslog);
255: }
256: my %portmetalog = &LONCAPA::lonmetadata::process_portfolio_metadata($dbh,$simulate,\%newnames,$url,$fullpath,$is_course,$dom,$uname,$group);
257: &portfolio_logging(%portmetalog);
258: }
259: }
260: }
261:
262: #
263: # Rename the tables
264: if (! $simulate) {
265: foreach my $key (keys(%oldnames)) {
266: if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) {
267: $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key});
268: if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) {
269: &log(0,"MySQL Error Rename: ".$dbh->errstr);
270: die $dbh->errstr;
271: } else {
272: &log(1,"MySQL table rename successful for $key.");
273: }
274: }
275: }
276: }
277: if (! $dbh->disconnect) {
278: &log(0,"MySQL Error Disconnect: ".$dbh->errstr);
279: die $dbh->errstr;
280: }
281: ##
282: ## Finished!
283: &log(0,"==== Searchcat completed ".localtime()." ====");
284: close(LOG);
285:
286: &write_type_count();
287: &write_copyright_count();
288:
289: exit 0;
290:
291: ##
292: ## Status logging routine. Inputs: $level, $message
293: ##
294: ## $level 0 should be used for normal output and error messages
295: ##
296: ## $message does not need to end with \n. In the case of errors
297: ## the message should contain as much information as possible to
298: ## help in diagnosing the problem.
299: ##
300: sub log {
301: my ($level,$message)=@_;
302: $level = 0 if (! defined($level));
303: if ($verbose >= $level) {
304: print LOG $message.$/;
305: }
306: }
307:
308: sub portfolio_logging {
309: my (%portlog) = @_;
310: foreach my $key (keys(%portlog)) {
311: if (ref($portlog{$key}) eq 'HASH') {
312: foreach my $item (keys(%{$portlog{$key}})) {
313: &log(0,$portlog{$key}{$item});
314: }
315: }
316: }
317: }
318:
319: sub descend_tree {
320: my ($dir,$depth,$alldomusers) = @_;
321: if (-d $dir) {
322: opendir(DIR,$dir);
323: my @contents = grep(!/^\./,readdir(DIR));
324: closedir(DIR);
325: $depth ++;
326: foreach my $item (@contents) {
327: if ($depth < 4) {
328: &descend_tree($dir.'/'.$item,$depth,$alldomusers);
329: } else {
330: if (-e $dir.'/'.$item.'/file_permissions.db') {
331:
332: $$alldomusers{$item} = '';
333: }
334: }
335: }
336: }
337: }
338:
339: ########################################################
340: ########################################################
341: ### ###
342: ### File::Find support routines ###
343: ### ###
344: ########################################################
345: ########################################################
346: ##
347: ## &only_meta_files
348: ##
349: ## Called by File::Find.
350: ## Takes a list of files/directories in and returns a list of files/directories
351: ## to search.
352: sub only_meta_files {
353: my @PossibleFiles = @_;
354: my @ChosenFiles;
355: foreach my $file (@PossibleFiles) {
356: if ( ($file =~ /\.meta$/ && # Ends in meta
357: $file !~ /\.\d+\.[^\.]+\.meta$/ # is not for a prior version
358: ) || (-d $File::Find::dir."/".$file )) { # directories are okay
359: # but we do not want /. or /..
360: push(@ChosenFiles,$file);
361: }
362: }
363: return @ChosenFiles;
364: }
365:
366: ##
367: ##
368: ## Debugging routines, use these for 'wanted' in the File::Find call
369: ##
370: sub print_filename {
371: my ($file) = $_;
372: my $fullfilename = $File::Find::name;
373: if ($debug) {
374: if (-d $file) {
375: &log(5," Got directory ".$fullfilename);
376: } else {
377: &log(5," Got file ".$fullfilename);
378: }
379: }
380: $_=$file;
381: }
382:
383: sub log_metadata {
384: my ($file) = $_;
385: my $fullfilename = $File::Find::name;
386: return if (-d $fullfilename); # No need to do anything here for directories
387: if ($debug) {
388: &log(6,$fullfilename);
389: my $ref = &metadata($fullfilename);
390: if (! defined($ref)) {
391: &log(6," No data");
392: return;
393: }
394: while (my($key,$value) = each(%$ref)) {
395: &log(6," ".$key." => ".$value);
396: }
397: &count_copyright($ref->{'copyright'});
398: }
399: $_=$file;
400: }
401:
402: ##
403: ## process_meta_file
404: ## Called by File::Find.
405: ## Only input is the filename in $_.
406: sub process_meta_file {
407: my ($file) = $_;
408: my $filename = $File::Find::name; # full filename
409: return if (-d $filename); # No need to do anything here for directories
410: #
411: &log(3,$filename) if ($debug);
412: #
413: my $ref = &metadata($filename);
414: #
415: # $url is the original file url, not the metadata file
416: my $target = $filename;
417: $target =~ s/\.meta$//;
418: my $url='/res/'.&declutter($target);
419: &log(3," ".$url) if ($debug);
420: #
421: # Ignore some files based on their metadata
422: if ($ref->{'obsolete'}) {
423: &log(3,"obsolete") if ($debug);
424: return;
425: }
426: &count_copyright($ref->{'copyright'});
427: if ($ref->{'copyright'} eq 'private') {
428: &log(3,"private") if ($debug);
429: return;
430: }
431: #
432: # Find the dynamic metadata
433: my %dyn;
434: if ($url=~ m:/default$:) {
435: $url=~ s:/default$:/:;
436: &log(3,"Skipping dynamic data") if ($debug);
437: } else {
438: &log(3,"Retrieving dynamic data") if ($debug);
439: %dyn=&get_dynamic_metadata($url);
440: &count_type($url);
441: }
442: &LONCAPA::lonmetadata::getfiledates($ref,$target);
443: #
444: my %Data = (
445: %$ref,
446: %dyn,
447: 'url'=>$url,
448: 'version'=>'current');
449: if (! $simulate) {
450: my ($count,$err) =
451: &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'},
452: 'metadata',\%Data);
453: if ($err) {
454: &log(0,"MySQL Error Insert: ".$err);
455: }
456: if ($count < 1) {
457: &log(0,"Unable to insert record into MySQL database for $url");
458: }
459: }
460: #
461: # Reset $_ before leaving
462: $_ = $file;
463: }
464:
465: ########################################################
466: ########################################################
467: ### ###
468: ### &metadata($uri) ###
469: ### Retrieve metadata for the given file ###
470: ### ###
471: ########################################################
472: ########################################################
473: sub metadata {
474: my ($uri) = @_;
475: my %metacache=();
476: $uri=&declutter($uri);
477: my $filename=$uri;
478: $uri=~s/\.meta$//;
479: $uri='';
480: if ($filename !~ /\.meta$/) {
481: $filename.='.meta';
482: }
483: my $metastring =
484: &LONCAPA::lonmetadata::getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename);
485: return undef if (! defined($metastring));
486: my $parser=HTML::TokeParser->new(\$metastring);
487: my $token;
488: while ($token=$parser->get_token) {
489: if ($token->[0] eq 'S') {
490: my $entry=$token->[1];
491: my $unikey=$entry;
492: if (defined($token->[2]->{'part'})) {
493: $unikey.='_'.$token->[2]->{'part'};
494: }
495: if (defined($token->[2]->{'name'})) {
496: $unikey.='_'.$token->[2]->{'name'};
497: }
498: if ($metacache{$uri.'keys'}) {
499: $metacache{$uri.'keys'}.=','.$unikey;
500: } else {
501: $metacache{$uri.'keys'}=$unikey;
502: }
503: foreach ( @{$token->[3]}) {
504: $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
505: }
506: if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){
507: $metacache{$uri.''.$unikey} =
508: $metacache{$uri.''.$unikey.'.default'};
509: }
510: } # End of ($token->[0] eq 'S')
511: }
512: return \%metacache;
513: }
514:
515: ########################################################
516: ########################################################
517: ### ###
518: ### Dynamic Metadata ###
519: ### ###
520: ########################################################
521: ########################################################
522: ##
523: ## Dynamic metadata description (incomplete)
524: ##
525: ## For a full description of all fields,
526: ## see LONCAPA::lonmetadata
527: ##
528: ## Field Type
529: ##-----------------------------------------------------------
530: ## count integer
531: ## course integer
532: ## course_list comma separated list of course ids
533: ## avetries real
534: ## avetries_list comma separated list of real numbers
535: ## stdno real
536: ## stdno_list comma separated list of real numbers
537: ## usage integer
538: ## usage_list comma separated list of resources
539: ## goto scalar
540: ## goto_list comma separated list of resources
541: ## comefrom scalar
542: ## comefrom_list comma separated list of resources
543: ## difficulty real
544: ## difficulty_list comma separated list of real numbers
545: ## sequsage scalar
546: ## sequsage_list comma separated list of resources
547: ## clear real
548: ## technical real
549: ## correct real
550: ## helpful real
551: ## depth real
552: ## comments html of all the comments made
553: ##
554: {
555:
556: my %DynamicData;
557: my %Counts;
558:
559: sub process_dynamic_metadata {
560: my ($user,$dom) = @_;
561: undef(%DynamicData);
562: undef(%Counts);
563: #
564: my $prodir = &propath($dom,$user);
565: #
566: # Read in the dynamic metadata
567: my %evaldata;
568: if (! tie(%evaldata,'GDBM_File',
569: $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
570: return 0;
571: }
572: #
573: %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata);
574: untie(%evaldata);
575: $DynamicData{'domain'} = $dom;
576: #print('user = '.$user.' domain = '.$dom.$/);
577: #
578: # Read in the access count data
579: &log(7,'Reading access count data') if ($debug);
580: my %countdata;
581: if (! tie(%countdata,'GDBM_File',
582: $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
583: return 0;
584: }
585: while (my ($key,$count) = each(%countdata)) {
586: next if ($key !~ /^$dom/);
587: $key = &unescape($key);
588: &log(8,' Count '.$key.' = '.$count) if ($debug);
589: $Counts{$key}=$count;
590: }
591: untie(%countdata);
592: if ($debug) {
593: &log(7,scalar(keys(%Counts)).
594: " Counts read for ".$user."@".$dom);
595: &log(7,scalar(keys(%DynamicData)).
596: " Dynamic metadata read for ".$user."@".$dom);
597: }
598: #
599: return 1;
600: }
601:
602: sub get_dynamic_metadata {
603: my ($url) = @_;
604: $url =~ s:^/res/::;
605: my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url,
606: \%DynamicData);
607: # find the count
608: $data{'count'} = $Counts{$url};
609: #
610: # Log the dynamic metadata
611: if ($debug) {
612: while (my($k,$v)=each(%data)) {
613: &log(8," ".$k." => ".$v);
614: }
615: }
616: return %data;
617: }
618:
619: } # End of %DynamicData and %Counts scope
620:
621: ########################################################
622: ########################################################
623: ### ###
624: ### Counts ###
625: ### ###
626: ########################################################
627: ########################################################
628: {
629:
630: my %countext;
631:
632: sub count_type {
633: my $file=shift;
634: $file=~/\.(\w+)$/;
635: my $ext=lc($1);
636: $countext{$ext}++;
637: }
638:
639: sub write_type_count {
640: open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt');
641: while (my ($extension,$count) = each(%countext)) {
642: print RESCOUNT $extension.'='.$count.'&';
643: }
644: print RESCOUNT 'time='.time."\n";
645: close(RESCOUNT);
646: }
647:
648: } # end of scope for %countext
649:
650: {
651:
652: my %copyrights;
653:
654: sub count_copyright {
655: $copyrights{@_[0]}++;
656: }
657:
658: sub write_copyright_count {
659: open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt');
660: while (my ($copyright,$count) = each(%copyrights)) {
661: print COPYCOUNT $copyright.'='.$count.'&';
662: }
663: print COPYCOUNT 'time='.time."\n";
664: close(COPYCOUNT);
665: }
666:
667: } # end of scope for %copyrights
668:
669: ########################################################
670: ########################################################
671: ### ###
672: ### Miscellanous Utility Routines ###
673: ### ###
674: ########################################################
675: ########################################################
676: ##
677: ## &ishome($username)
678: ## Returns 1 if $username is a LON-CAPA author, 0 otherwise
679: ## (copied from lond, modification of the return value)
680: sub ishome {
681: my $author=shift;
682: $author=~s{/home/httpd/html/res/([^/]*)/([^/]*).*}{$1/$2};
683: my ($udom,$uname)=split(/\//,$author);
684: my $proname=propath($udom,$uname);
685: if (-e $proname) {
686: return 1;
687: } else {
688: return 0;
689: }
690: }
691:
692: ##
693: ## &declutter($filename)
694: ## Given a filename, returns a url for the filename.
695: sub declutter {
696: my $thisfn=shift;
697: $thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//;
698: $thisfn=~s/^\///;
699: $thisfn=~s/^res\///;
700: return $thisfn;
701: }
702:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>