Annotation of loncom/metadata_database/searchcat.pl, revision 1.79

1.1       harris41    1: #!/usr/bin/perl
                      2: # The LearningOnline Network
                      3: # searchcat.pl "Search Catalog" batch script
1.16      harris41    4: #
1.79    ! raeburn     5: # $Id: searchcat.pl,v 1.78 2010/03/26 13:29:31 raeburn Exp $
1.16      harris41    6: #
                      7: # Copyright Michigan State University Board of Trustees
                      8: #
1.29      albertel    9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
1.16      harris41   10: #
1.29      albertel   11: # LON-CAPA is free software; you can redistribute it and/or modify
1.16      harris41   12: # it under the terms of the GNU General Public License as published by
                     13: # the Free Software Foundation; either version 2 of the License, or
                     14: # (at your option) any later version.
                     15: #
1.29      albertel   16: # LON-CAPA is distributed in the hope that it will be useful,
1.16      harris41   17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     19: # GNU General Public License for more details.
                     20: #
                     21: # You should have received a copy of the GNU General Public License
1.29      albertel   22: # along with LON-CAPA; if not, write to the Free Software
1.16      harris41   23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     24: #
                     25: # /home/httpd/html/adm/gpl.txt
                     26: #
1.29      albertel   27: # http://www.lon-capa.org/
1.16      harris41   28: #
                     29: ###
1.33      matthew    30: 
1.32      www        31: =pod
1.1       harris41   32: 
1.32      www        33: =head1 NAME
                     34: 
                     35: B<searchcat.pl> - put authoritative filesystem data into sql database.
                     36: 
                     37: =head1 SYNOPSIS
                     38: 
                     39: Ordinarily this script is to be called from a loncapa cron job
                     40: (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
                     41: filesystem installation location: F</etc/cron.d/loncapa>).
                     42: 
                     43: Here is the cron job entry.
                     44: 
                     45: C<# Repopulate and refresh the metadata database used for the search catalog.>
                     46: C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
                     47: 
                     48: This script only allows itself to be run as the user C<www>.
                     49: 
                     50: =head1 DESCRIPTION
                     51: 
                     52: This script goes through a loncapa resource directory and gathers metadata.
                     53: The metadata is entered into a SQL database.
                     54: 
                     55: This script also does general database maintenance such as reformatting
                     56: the C<loncapa:metadata> table if it is deprecated.
                     57: 
                     58: This script evaluates dynamic metadata from the authors'
1.48      www        59: F<nohist_resevaldata.db> database file in order to store it in MySQL.
1.32      www        60: 
                     61: This script is playing an increasingly important role for a loncapa
                     62: library server.  The proper operation of this script is critical for a smooth
                     63: and correct user experience.
                     64: 
                     65: =cut
1.1       harris41   66: 
1.45      www        67: use strict;
1.55      matthew    68: use DBI;
1.17      harris41   69: use lib '/home/httpd/lib/perl/';
1.55      matthew    70: use LONCAPA::lonmetadata;
1.76      albertel   71: use LONCAPA;
1.56      matthew    72: use Getopt::Long;
1.1       harris41   73: use IO::File;
                     74: use HTML::TokeParser;
1.21      www        75: use GDBM_File;
1.24      www        76: use POSIX qw(strftime mktime);
1.56      matthew    77: 
1.63      matthew    78: use Apache::lonnet();
1.62      matthew    79: 
1.55      matthew    80: use File::Find;
1.1       harris41   81: 
1.56      matthew    82: #
                     83: # Set up configuration options
1.63      matthew    84: my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
1.56      matthew    85: GetOptions (
                     86:             'help'     => \$help,
                     87:             'simulate' => \$simulate,
                     88:             'only=s'   => \$oneuser,
                     89:             'verbose=s'  => \$verbose,
                     90:             'debug' => \$debug,
                     91:             );
                     92: 
                     93: if ($help) {
                     94:     print <<"ENDHELP";
                     95: $0
                     96: Rebuild and update the LON-CAPA metadata database. 
                     97: Options:
                     98:     -help          Print this help
                     99:     -simulate      Do not modify the database.
                    100:     -only=user     Only compute for the given user.  Implies -simulate   
                    101:     -verbose=val   Sets logging level, val must be a number
                    102:     -debug         Turns on debugging output
                    103: ENDHELP
                    104:     exit 0;
                    105: }
                    106: 
                    107: if (! defined($debug)) {
                    108:     $debug = 0;
                    109: }
                    110: 
                    111: if (! defined($verbose)) {
                    112:     $verbose = 0;
                    113: }
                    114: 
                    115: if (defined($oneuser)) {
                    116:     $simulate=1;
                    117: }
                    118: 
1.55      matthew   119: ##
                    120: ## Use variables for table names so we can test this routine a little easier
1.69      raeburn   121: my %oldnames = (
                    122:                  'metadata'    => 'metadata',
                    123:                  'portfolio'   => 'portfolio_metadata',
                    124:                  'access'      => 'portfolio_access',
                    125:                  'addedfields' => 'portfolio_addedfields',
1.78      raeburn   126:                  'allusers'    => 'allusers',
1.69      raeburn   127:                );
                    128: 
                    129: my %newnames;
                    130: # new table names -  append pid to have unique temporary tables
                    131: foreach my $key (keys(%oldnames)) {
                    132:     $newnames{$key} = 'new'.$oldnames{$key}.$$;
                    133: }
1.45      www       134: 
1.55      matthew   135: #
                    136: # Only run if machine is a library server
1.63      matthew   137: exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library');
1.78      raeburn   138: my $hostid = $Apache::lonnet::perlvar{'lonHostID'};
                    139: 
1.55      matthew   140: #
                    141: #  Make sure this process is running from user=www
                    142: my $wwwid=getpwnam('www');
                    143: if ($wwwid!=$<) {
1.63      matthew   144:     my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}";
                    145:     my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch";
1.55      matthew   146:     system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
1.63      matthew   147:  mail -s '$subj' $emailto > /dev/null");
1.55      matthew   148:     exit 1;
                    149: }
                    150: #
                    151: # Let people know we are running
1.63      matthew   152: open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log');
1.56      matthew   153: &log(0,'==== Searchcat Run '.localtime()."====");
1.57      matthew   154: 
                    155: 
1.56      matthew   156: if ($debug) {
                    157:     &log(0,'simulating') if ($simulate);
                    158:     &log(0,'only processing user '.$oneuser) if ($oneuser);
                    159:     &log(0,'verbosity level = '.$verbose);
                    160: }
1.55      matthew   161: #
                    162: # Connect to database
                    163: my $dbh;
1.63      matthew   164: if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'},
1.55      matthew   165:                           { RaiseError =>0,PrintError=>0}))) {
1.56      matthew   166:     &log(0,"Cannot connect to database!");
1.55      matthew   167:     die "MySQL Error: Cannot connect to database!\n";
                    168: }
                    169: # This can return an error and still be okay, so we do not bother checking.
                    170: # (perhaps it should be more robust and check for specific errors)
1.69      raeburn   171: foreach my $key (keys(%newnames)) {
                    172:     if ($newnames{$key} ne '') {
                    173:         $dbh->do('DROP TABLE IF EXISTS '.$newnames{$key});
                    174:     }
                    175: }
                    176: 
1.55      matthew   177: #
1.77      raeburn   178: # Create the new metadata, portfolio and allusers tables
1.69      raeburn   179: foreach my $key (keys(%newnames)) {
                    180:     if ($newnames{$key} ne '') { 
                    181:         my $request =
                    182:              &LONCAPA::lonmetadata::create_metadata_storage($newnames{$key},$oldnames{$key});
                    183:         $dbh->do($request);
                    184:         if ($dbh->err) {
                    185:             $dbh->disconnect();
                    186:             &log(0,"MySQL Error Create: ".$dbh->errstr);
                    187:             die $dbh->errstr;
                    188:         }
                    189:     }
1.55      matthew   190: }
1.69      raeburn   191: 
1.55      matthew   192: #
                    193: # find out which users we need to examine
1.63      matthew   194: my @domains = sort(&Apache::lonnet::current_machine_domains());
                    195: &log(9,'domains ="'.join('","',@domains).'"');
1.62      matthew   196: 
                    197: foreach my $dom (@domains) {
                    198:     &log(9,'domain = '.$dom);
1.63      matthew   199:     opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom");
1.62      matthew   200:     my @homeusers = 
                    201:         grep {
1.63      matthew   202:             &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_");
1.62      matthew   203:         } grep { 
                    204:             !/^\.\.?$/;
                    205:         } readdir(RESOURCES);
                    206:     closedir RESOURCES;
                    207:     &log(5,'users = '.$dom.':'.join(',',@homeusers));
                    208:     #
                    209:     if ($oneuser) {
                    210:         @homeusers=($oneuser);
                    211:     }
                    212:     #
                    213:     # Loop through the users
                    214:     foreach my $user (@homeusers) {
                    215:         &log(0,"=== User: ".$user);
                    216:         &process_dynamic_metadata($user,$dom);
                    217:         #
                    218:         # Use File::Find to get the files we need to read/modify
                    219:         find(
                    220:              {preprocess => \&only_meta_files,
                    221:               #wanted     => \&print_filename,
                    222:               #wanted     => \&log_metadata,
                    223:               wanted     => \&process_meta_file,
1.66      albertel  224:               no_chdir   => 1,
1.63      matthew   225:              }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) );
1.62      matthew   226:     }
1.77      raeburn   227:     # Search for all users and public portfolio files
1.78      raeburn   228:     my (%allusers,%portusers,%courses);
1.69      raeburn   229:     if ($oneuser) {
                    230:         %portusers = (
                    231:                         $oneuser => '',
                    232:                        );
1.77      raeburn   233:         %allusers = (
                    234:                         $oneuser => '',
                    235:                        );
1.78      raeburn   236:         %courses = &courseiddump($dom,'.',1,'.','.',$oneuser,undef,
                    237:                                  undef,'.');
1.69      raeburn   238:     } else {
1.78      raeburn   239:         # get courseIDs for domain on current machine
                    240:         %courses=&Apache::lonnet::courseiddump($dom,'.',1,'.','.','.',1,[$hostid],'.');
1.69      raeburn   241:         my $dir = $Apache::lonnet::perlvar{lonUsersDir}.'/'.$dom;
1.77      raeburn   242:         &descend_tree($dom,$dir,0,\%portusers,\%allusers);
1.69      raeburn   243:     }
                    244:     foreach my $uname (keys(%portusers)) {
                    245:         my $urlstart = '/uploaded/'.$dom.'/'.$uname;
                    246:         my $pathstart = &propath($dom,$uname).'/userfiles';
1.78      raeburn   247:         my $is_course = '';
                    248:         if (exists($courses{$dom.'_'.$uname})) {
                    249:             $is_course = 1;
                    250:         }
1.69      raeburn   251:         my $curr_perm = &Apache::lonnet::get_portfile_permissions($dom,$uname);
                    252:         my %access = &Apache::lonnet::get_access_controls($curr_perm);
1.75      raeburn   253:         foreach my $file (keys(%access)) {
1.69      raeburn   254:             my ($group,$url,$fullpath);
                    255:             if ($is_course) {
                    256:                 ($group, my ($path)) = ($file =~ /^(\w+)(\/.+)$/);
1.72      raeburn   257:                 $fullpath = $pathstart.'/groups/'.$group.'/portfolio'.$path;
1.69      raeburn   258:                 $url = $urlstart.'/groups/'.$group.'/portfolio'.$path;
                    259:             } else {
                    260:                 $fullpath = $pathstart.'/portfolio'.$file;
1.72      raeburn   261:                 $url = $urlstart.'/portfolio'.$file;
1.69      raeburn   262:             }
                    263:             if (ref($access{$file}) eq 'HASH') {
1.75      raeburn   264:                 my %portaccesslog = 
                    265:                     &LONCAPA::lonmetadata::process_portfolio_access_data($dbh,
                    266:                            $simulate,\%newnames,$url,$fullpath,$access{$file});
                    267:                 &portfolio_logging(%portaccesslog);
1.69      raeburn   268:             }
1.75      raeburn   269:             my %portmetalog = &LONCAPA::lonmetadata::process_portfolio_metadata($dbh,$simulate,\%newnames,$url,$fullpath,$is_course,$dom,$uname,$group);
                    270:             &portfolio_logging(%portmetalog);
1.69      raeburn   271:         }
                    272:     }
1.79    ! raeburn   273:     my (%names_by_id,,%ids_by_name,%idstodelete,%idstoadd,%duplicates);
        !           274:     unless ($simulate || $oneuser) {
        !           275:         my $idshashref;
        !           276:         $idshashref = &tie_domain_hash($dom, "ids", &GDBM_WRCREAT());
        !           277:         if (ref($idshashref) eq 'HASH') {
        !           278:             %names_by_id = %{$idshashref};
        !           279:             while (my ($id,$uname) = each(%{$idshashref}) ) {
        !           280:                 $id = &unescape($id);
        !           281:                 $uname = &unescape($uname); 
        !           282:                 $names_by_id{$id} = $uname;
        !           283:                 push(@{$ids_by_name{$uname}},$id);
        !           284:             }
        !           285:             &untie_domain_hash($idshashref);
        !           286:         }
        !           287:     }
1.77      raeburn   288:     # Update allusers
                    289:     foreach my $uname (keys(%allusers)) {
1.78      raeburn   290:         next if (exists($courses{$dom.'_'.$uname}));
1.77      raeburn   291:         my %userdata = 
                    292:             &Apache::lonnet::get('environment',['firstname','lastname',
                    293:                 'middlename','generation','id','permanentemail'],$dom,$uname);
1.79    ! raeburn   294:         unless ($simulate || $oneuser) {
        !           295:             my $addid;
        !           296:             if ($userdata{'id'} ne '') {
        !           297:                 $addid = $userdata{'id'};
        !           298:                 $addid=~tr/A-Z/a-z/;
        !           299:             }
        !           300:             if (exists($ids_by_name{$uname})) {
        !           301:                 if (ref($ids_by_name{$uname}) eq 'ARRAY') {
        !           302:                     if (scalar(@{$ids_by_name{$uname}}) > 1) {
        !           303:                         &log(0,"Multiple employee/student IDs found in ids.db for $uname:$dom -- ".join(', ',@{$ids_by_name{$uname}}));
        !           304:                     }
        !           305:                     foreach my $id (@{$ids_by_name{$uname}}) {
        !           306:                         if ($id eq $userdata{'id'}) {
        !           307:                             undef($addid);
        !           308:                         } else { 
        !           309:                             $idstodelete{$id} = $uname;
        !           310:                         }
        !           311:                     }
        !           312:                 }
        !           313:             }
        !           314:             if ($addid ne '') {
        !           315:                 if (exists($idstoadd{$addid})) {
        !           316:                     push(@{$duplicates{$addid}},$uname);
        !           317:                 } else {
        !           318:                     if ((exists($names_by_id{$addid})) && ($names_by_id{$addid} ne $uname)) {
        !           319:                         &log(0,"In ids.db ($dom) $addid => $names_by_id{$addid} will be replaced by $addid => $uname");
        !           320:                     }
        !           321:                     $idstoadd{$addid} = $uname;
        !           322:                 }
        !           323:             }
        !           324:         }
        !           325:         
1.77      raeburn   326:         $userdata{'username'} = $uname;
                    327:         $userdata{'domain'} = $dom;
                    328:         my %alluserslog = 
                    329:             &LONCAPA::lonmetadata::process_allusers_data($dbh,$simulate,
                    330:                 \%newnames,$uname,$dom,\%userdata);
                    331:         foreach my $item (keys(%alluserslog)) {
                    332:             &log(0,$alluserslog{$item});
                    333:         }
                    334:     }
1.79    ! raeburn   335:     unless ($simulate || $oneuser) {
        !           336:         if (keys(%idstodelete) > 0) {
        !           337:             my %resulthash = &Apache::lonnet::iddel($dom,\%idstodelete,$hostid);
        !           338:             if ($resulthash{$hostid} eq 'ok') {
        !           339:                 foreach my $id (sort(keys(%idstodelete))) {
        !           340:                     &log(0,"Record deleted from ids.db for $dom -- $id => ".$idstodelete{$id});
        !           341:                 }
        !           342:             } else {
        !           343:                 &log(0,"Error: '$resulthash{$hostid}' occurred when attempting to delete records from ids.db for $dom");
        !           344:             }
        !           345:         }
        !           346:         if (keys(%idstoadd) > 0) {
        !           347:             my $putresult = &Apache::lonnet::put_dom('ids',\%idstoadd,$dom,$hostid);
        !           348:             if ($putresult eq 'ok') {
        !           349:                 foreach my $id (sort(keys(%idstoadd))) {
        !           350:                     &log(0,"Record added to ids.db for $dom -- $id => ".$idstoadd{$id});
        !           351:                 }
        !           352:             } else {
        !           353:                 &log(0,"Error: '$putresult' occurred when attempting to add records to ids.db for $dom"); 
        !           354:             }
        !           355:         }
        !           356:         if (keys(%duplicates) > 0) {
        !           357:             foreach my $id (sort(keys(%duplicates))) {
        !           358:                 &log(0,"Duplicate IDs found for entries to add to ids.db in $dom -- $id => $idstodelete{$id}");
        !           359:             }
        !           360:         }
        !           361:     }
1.55      matthew   362: }
1.69      raeburn   363: 
1.55      matthew   364: #
1.69      raeburn   365: # Rename the tables
1.56      matthew   366: if (! $simulate) {
1.69      raeburn   367:     foreach my $key (keys(%oldnames)) {
                    368:         if (($oldnames{$key} ne '') && ($newnames{$key} ne '')) {
                    369:             $dbh->do('DROP TABLE IF EXISTS '.$oldnames{$key});
                    370:             if (! $dbh->do('RENAME TABLE '.$newnames{$key}.' TO '.$oldnames{$key})) {
                    371:                 &log(0,"MySQL Error Rename: ".$dbh->errstr);
                    372:                 die $dbh->errstr;
                    373:             } else {
                    374:                 &log(1,"MySQL table rename successful for $key.");
                    375:             }
                    376:         }
1.56      matthew   377:     }
1.55      matthew   378: }
                    379: if (! $dbh->disconnect) {
1.56      matthew   380:     &log(0,"MySQL Error Disconnect: ".$dbh->errstr);
1.55      matthew   381:     die $dbh->errstr;
                    382: }
                    383: ##
                    384: ## Finished!
1.56      matthew   385: &log(0,"==== Searchcat completed ".localtime()." ====");
1.55      matthew   386: close(LOG);
1.21      www       387: 
1.55      matthew   388: &write_type_count();
                    389: &write_copyright_count();
1.36      www       390: 
1.55      matthew   391: exit 0;
1.28      harris41  392: 
1.56      matthew   393: ##
                    394: ## Status logging routine.  Inputs: $level, $message
                    395: ## 
                    396: ## $level 0 should be used for normal output and error messages
                    397: ##
                    398: ## $message does not need to end with \n.  In the case of errors
                    399: ## the message should contain as much information as possible to
                    400: ## help in diagnosing the problem.
                    401: ##
                    402: sub log {
                    403:     my ($level,$message)=@_;
                    404:     $level = 0 if (! defined($level));
                    405:     if ($verbose >= $level) {
                    406:         print LOG $message.$/;
                    407:     }
                    408: }
                    409: 
1.75      raeburn   410: sub portfolio_logging {
                    411:     my (%portlog) = @_;
                    412:     foreach my $key (keys(%portlog)) {
                    413:         if (ref($portlog{$key}) eq 'HASH') {
                    414:             foreach my $item (keys(%{$portlog{$key}})) {
                    415:                 &log(0,$portlog{$key}{$item});
                    416:             }
                    417:         }
                    418:     }
                    419: }
                    420: 
1.69      raeburn   421: sub descend_tree {
1.77      raeburn   422:     my ($dom,$dir,$depth,$allportusers,$alldomusers) = @_;
1.69      raeburn   423:     if (-d $dir) {
                    424:         opendir(DIR,$dir);
                    425:         my @contents = grep(!/^\./,readdir(DIR));
                    426:         closedir(DIR);
                    427:         $depth ++;
                    428:         foreach my $item (@contents) {
                    429:             if ($depth < 4) {
1.77      raeburn   430:                 &descend_tree($dom,$dir.'/'.$item,$depth,$allportusers,$alldomusers);
1.69      raeburn   431:             } else {
                    432:                 if (-e $dir.'/'.$item.'/file_permissions.db') {
1.78      raeburn   433:                     $$allportusers{$item} = '';
1.77      raeburn   434:                 }
1.78      raeburn   435:                 if (-e $dir.'/'.$item.'/passwd') {
1.69      raeburn   436:                     $$alldomusers{$item} = '';
                    437:                 }
                    438:             }       
                    439:         }
                    440:     } 
                    441: }
                    442: 
1.55      matthew   443: ########################################################
                    444: ########################################################
                    445: ###                                                  ###
                    446: ###          File::Find support routines             ###
                    447: ###                                                  ###
                    448: ########################################################
                    449: ########################################################
                    450: ##
                    451: ## &only_meta_files
                    452: ##
                    453: ## Called by File::Find.
                    454: ## Takes a list of files/directories in and returns a list of files/directories
                    455: ## to search.
                    456: sub only_meta_files {
                    457:     my @PossibleFiles = @_;
                    458:     my @ChosenFiles;
                    459:     foreach my $file (@PossibleFiles) {
                    460:         if ( ($file =~ /\.meta$/ &&            # Ends in meta
                    461:               $file !~ /\.\d+\.[^\.]+\.meta$/  # is not for a prior version
1.67      albertel  462:              ) || (-d $File::Find::dir."/".$file )) { # directories are okay
1.55      matthew   463:                  # but we do not want /. or /..
                    464:             push(@ChosenFiles,$file);
                    465:         }
1.38      www       466:     }
1.55      matthew   467:     return @ChosenFiles;
1.38      www       468: }
                    469: 
1.55      matthew   470: ##
                    471: ##
                    472: ## Debugging routines, use these for 'wanted' in the File::Find call
                    473: ##
                    474: sub print_filename {
                    475:     my ($file) = $_;
                    476:     my $fullfilename = $File::Find::name;
1.56      matthew   477:     if ($debug) {
                    478:         if (-d $file) {
                    479:             &log(5," Got directory ".$fullfilename);
                    480:         } else {
                    481:             &log(5," Got file ".$fullfilename);
                    482:         }
1.38      www       483:     }
1.55      matthew   484:     $_=$file;
1.38      www       485: }
1.28      harris41  486: 
1.55      matthew   487: sub log_metadata {
                    488:     my ($file) = $_;
                    489:     my $fullfilename = $File::Find::name;
                    490:     return if (-d $fullfilename); # No need to do anything here for directories
1.56      matthew   491:     if ($debug) {
                    492:         &log(6,$fullfilename);
1.69      raeburn   493:         my $ref = &metadata($fullfilename);
1.56      matthew   494:         if (! defined($ref)) {
                    495:             &log(6,"    No data");
                    496:             return;
                    497:         }
                    498:         while (my($key,$value) = each(%$ref)) {
                    499:             &log(6,"    ".$key." => ".$value);
                    500:         }
                    501:         &count_copyright($ref->{'copyright'});
1.55      matthew   502:     }
                    503:     $_=$file;
1.31      harris41  504: }
1.21      www       505: 
1.55      matthew   506: ##
                    507: ## process_meta_file
                    508: ##   Called by File::Find. 
                    509: ##   Only input is the filename in $_.  
                    510: sub process_meta_file {
                    511:     my ($file) = $_;
1.56      matthew   512:     my $filename = $File::Find::name; # full filename
1.55      matthew   513:     return if (-d $filename); # No need to do anything here for directories
                    514:     #
1.56      matthew   515:     &log(3,$filename) if ($debug);
1.55      matthew   516:     #
1.69      raeburn   517:     my $ref = &metadata($filename);
1.55      matthew   518:     #
                    519:     # $url is the original file url, not the metadata file
1.61      matthew   520:     my $target = $filename;
                    521:     $target =~ s/\.meta$//;
                    522:     my $url='/res/'.&declutter($target);
1.56      matthew   523:     &log(3,"    ".$url) if ($debug);
1.55      matthew   524:     #
                    525:     # Ignore some files based on their metadata
                    526:     if ($ref->{'obsolete'}) { 
1.56      matthew   527:         &log(3,"obsolete") if ($debug);
1.55      matthew   528:         return; 
                    529:     }
                    530:     &count_copyright($ref->{'copyright'});
                    531:     if ($ref->{'copyright'} eq 'private') { 
1.56      matthew   532:         &log(3,"private") if ($debug);
1.55      matthew   533:         return; 
                    534:     }
                    535:     #
                    536:     # Find the dynamic metadata
                    537:     my %dyn;
                    538:     if ($url=~ m:/default$:) {
                    539:         $url=~ s:/default$:/:;
1.56      matthew   540:         &log(3,"Skipping dynamic data") if ($debug);
1.55      matthew   541:     } else {
1.56      matthew   542:         &log(3,"Retrieving dynamic data") if ($debug);
                    543:         %dyn=&get_dynamic_metadata($url);
1.55      matthew   544:         &count_type($url);
                    545:     }
1.75      raeburn   546:     &LONCAPA::lonmetadata::getfiledates($ref,$target);
1.55      matthew   547:     #
                    548:     my %Data = (
                    549:                 %$ref,
                    550:                 %dyn,
                    551:                 'url'=>$url,
                    552:                 'version'=>'current');
1.56      matthew   553:     if (! $simulate) {
1.69      raeburn   554:         my ($count,$err) = 
                    555:           &LONCAPA::lonmetadata::store_metadata($dbh,$newnames{'metadata'},
                    556:                                                 'metadata',\%Data);
1.56      matthew   557:         if ($err) {
                    558:             &log(0,"MySQL Error Insert: ".$err);
                    559:         }
                    560:         if ($count < 1) {
                    561:             &log(0,"Unable to insert record into MySQL database for $url");
                    562:         }
1.55      matthew   563:     }
                    564:     #
                    565:     # Reset $_ before leaving
                    566:     $_ = $file;
                    567: }
                    568: 
                    569: ########################################################
                    570: ########################################################
                    571: ###                                                  ###
                    572: ###  &metadata($uri)                                 ###
                    573: ###   Retrieve metadata for the given file           ###
                    574: ###                                                  ###
                    575: ########################################################
                    576: ########################################################
                    577: sub metadata {
1.69      raeburn   578:     my ($uri) = @_;
1.55      matthew   579:     my %metacache=();
                    580:     $uri=&declutter($uri);
                    581:     my $filename=$uri;
                    582:     $uri=~s/\.meta$//;
                    583:     $uri='';
                    584:     if ($filename !~ /\.meta$/) { 
                    585:         $filename.='.meta';
                    586:     }
1.75      raeburn   587:     my $metastring = 
                    588:         &LONCAPA::lonmetadata::getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename);
1.55      matthew   589:     return undef if (! defined($metastring));
                    590:     my $parser=HTML::TokeParser->new(\$metastring);
                    591:     my $token;
                    592:     while ($token=$parser->get_token) {
                    593:         if ($token->[0] eq 'S') {
                    594:             my $entry=$token->[1];
                    595:             my $unikey=$entry;
                    596:             if (defined($token->[2]->{'part'})) { 
                    597:                 $unikey.='_'.$token->[2]->{'part'}; 
                    598:             }
                    599:             if (defined($token->[2]->{'name'})) { 
                    600:                 $unikey.='_'.$token->[2]->{'name'}; 
                    601:             }
                    602:             if ($metacache{$uri.'keys'}) {
                    603:                 $metacache{$uri.'keys'}.=','.$unikey;
                    604:             } else {
                    605:                 $metacache{$uri.'keys'}=$unikey;
                    606:             }
                    607:             foreach ( @{$token->[3]}) {
                    608:                 $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
1.69      raeburn   609:             }
1.55      matthew   610:             if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){
                    611:                 $metacache{$uri.''.$unikey} = 
                    612:                     $metacache{$uri.''.$unikey.'.default'};
                    613:             }
                    614:         } # End of ($token->[0] eq 'S')
                    615:     }
                    616:     return \%metacache;
1.31      harris41  617: }
1.28      harris41  618: 
1.55      matthew   619: ########################################################
                    620: ########################################################
                    621: ###                                                  ###
                    622: ###    Dynamic Metadata                              ###
                    623: ###                                                  ###
                    624: ########################################################
                    625: ########################################################
1.56      matthew   626: ##
1.58      www       627: ## Dynamic metadata description (incomplete)
                    628: ##
                    629: ## For a full description of all fields,
                    630: ## see LONCAPA::lonmetadata
1.56      matthew   631: ##
                    632: ##   Field             Type
                    633: ##-----------------------------------------------------------
                    634: ##   count             integer
                    635: ##   course            integer
1.58      www       636: ##   course_list       comma separated list of course ids
1.56      matthew   637: ##   avetries          real                                
1.58      www       638: ##   avetries_list     comma separated list of real numbers
1.56      matthew   639: ##   stdno             real
1.58      www       640: ##   stdno_list        comma separated list of real numbers
1.56      matthew   641: ##   usage             integer   
1.58      www       642: ##   usage_list        comma separated list of resources
1.56      matthew   643: ##   goto              scalar
1.58      www       644: ##   goto_list         comma separated list of resources
1.56      matthew   645: ##   comefrom          scalar
1.58      www       646: ##   comefrom_list     comma separated list of resources
1.56      matthew   647: ##   difficulty        real
1.58      www       648: ##   difficulty_list   comma separated list of real numbers
1.56      matthew   649: ##   sequsage          scalar
1.58      www       650: ##   sequsage_list     comma separated list of resources
1.56      matthew   651: ##   clear             real
                    652: ##   technical         real
                    653: ##   correct           real
                    654: ##   helpful           real
                    655: ##   depth             real
                    656: ##   comments          html of all the comments made
                    657: ##
                    658: {
                    659: 
                    660: my %DynamicData;
                    661: my %Counts;
                    662: 
                    663: sub process_dynamic_metadata {
                    664:     my ($user,$dom) = @_;
                    665:     undef(%DynamicData);
                    666:     undef(%Counts);
                    667:     #
                    668:     my $prodir = &propath($dom,$user);
1.55      matthew   669:     #
1.56      matthew   670:     # Read in the dynamic metadata
1.55      matthew   671:     my %evaldata;
                    672:     if (! tie(%evaldata,'GDBM_File',
                    673:               $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
1.56      matthew   674:         return 0;
1.55      matthew   675:     }
1.56      matthew   676:     #
1.57      matthew   677:     %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata);
1.55      matthew   678:     untie(%evaldata);
1.62      matthew   679:     $DynamicData{'domain'} = $dom;
1.64      albertel  680:     #print('user = '.$user.' domain = '.$dom.$/);
1.56      matthew   681:     #
                    682:     # Read in the access count data
                    683:     &log(7,'Reading access count data') if ($debug);
                    684:     my %countdata;
                    685:     if (! tie(%countdata,'GDBM_File',
                    686:               $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
                    687:         return 0;
                    688:     }
                    689:     while (my ($key,$count) = each(%countdata)) {
                    690:         next if ($key !~ /^$dom/);
                    691:         $key = &unescape($key);
                    692:         &log(8,'    Count '.$key.' = '.$count) if ($debug);
                    693:         $Counts{$key}=$count;
                    694:     }
                    695:     untie(%countdata);
                    696:     if ($debug) {
                    697:         &log(7,scalar(keys(%Counts)).
                    698:              " Counts read for ".$user."@".$dom);
                    699:         &log(7,scalar(keys(%DynamicData)).
                    700:              " Dynamic metadata read for ".$user."@".$dom);
                    701:     }
                    702:     #
                    703:     return 1;
                    704: }
                    705: 
                    706: sub get_dynamic_metadata {
                    707:     my ($url) = @_;
                    708:     $url =~ s:^/res/::;
1.57      matthew   709:     my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url,
                    710:                                                                \%DynamicData);
1.56      matthew   711:     # find the count
                    712:     $data{'count'} = $Counts{$url};
                    713:     #
                    714:     # Log the dynamic metadata
                    715:     if ($debug) {
                    716:         while (my($k,$v)=each(%data)) {
                    717:             &log(8,"    ".$k." => ".$v);
                    718:         }
1.44      www       719:     }
1.56      matthew   720:     return %data;
1.30      www       721: }
1.28      harris41  722: 
1.56      matthew   723: } # End of %DynamicData and %Counts scope
                    724: 
1.55      matthew   725: ########################################################
                    726: ########################################################
                    727: ###                                                  ###
                    728: ###   Counts                                         ###
                    729: ###                                                  ###
                    730: ########################################################
                    731: ########################################################
                    732: {
1.1       harris41  733: 
1.55      matthew   734: my %countext;
1.15      harris41  735: 
1.55      matthew   736: sub count_type {
                    737:     my $file=shift;
                    738:     $file=~/\.(\w+)$/;
                    739:     my $ext=lc($1);
                    740:     $countext{$ext}++;
1.31      harris41  741: }
1.1       harris41  742: 
1.55      matthew   743: sub write_type_count {
                    744:     open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt');
                    745:     while (my ($extension,$count) = each(%countext)) {
                    746: 	print RESCOUNT $extension.'='.$count.'&';
1.47      www       747:     }
1.55      matthew   748:     print RESCOUNT 'time='.time."\n";
                    749:     close(RESCOUNT);
1.31      harris41  750: }
1.27      www       751: 
1.55      matthew   752: } # end of scope for %countext
1.34      matthew   753: 
1.55      matthew   754: {
1.34      matthew   755: 
1.55      matthew   756: my %copyrights;
1.44      www       757: 
1.55      matthew   758: sub count_copyright {
                    759:     $copyrights{@_[0]}++;
1.31      harris41  760: }
1.33      matthew   761: 
1.55      matthew   762: sub write_copyright_count {
                    763:     open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt');
                    764:     while (my ($copyright,$count) = each(%copyrights)) {
                    765: 	print COPYCOUNT $copyright.'='.$count.'&';
1.31      harris41  766:     }
1.55      matthew   767:     print COPYCOUNT 'time='.time."\n";
                    768:     close(COPYCOUNT);
1.31      harris41  769: }
1.28      harris41  770: 
1.55      matthew   771: } # end of scope for %copyrights
1.28      harris41  772: 
1.55      matthew   773: ########################################################
                    774: ########################################################
                    775: ###                                                  ###
                    776: ###   Miscellanous Utility Routines                  ###
                    777: ###                                                  ###
                    778: ########################################################
                    779: ########################################################
                    780: ##
                    781: ## &ishome($username)
                    782: ##   Returns 1 if $username is a LON-CAPA author, 0 otherwise
                    783: ##   (copied from lond, modification of the return value)
1.31      harris41  784: sub ishome {
                    785:     my $author=shift;
1.76      albertel  786:     $author=~s{/home/httpd/html/res/([^/]*)/([^/]*).*}{$1/$2};
1.31      harris41  787:     my ($udom,$uname)=split(/\//,$author);
                    788:     my $proname=propath($udom,$uname);
                    789:     if (-e $proname) {
                    790: 	return 1;
                    791:     } else {
                    792:         return 0;
                    793:     }
                    794: }
1.28      harris41  795: 
1.55      matthew   796: ##
                    797: ## &declutter($filename)
                    798: ##   Given a filename, returns a url for the filename.
                    799: sub declutter {
                    800:     my $thisfn=shift;
1.63      matthew   801:     $thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//;
1.55      matthew   802:     $thisfn=~s/^\///;
                    803:     $thisfn=~s/^res\///;
                    804:     return $thisfn;
1.31      harris41  805: }
1.28      harris41  806: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>