Diff for /loncom/metadata_database/searchcat.pl between versions 1.29 and 1.67

version 1.29, 2003/02/03 13:42:16 version 1.67, 2006/02/05 19:46:31
Line 26 Line 26
 #  #
 # http://www.lon-capa.org/  # http://www.lon-capa.org/
 #  #
 # YEAR=2001  
 # 04/14/2001, 04/16/2001 Scott Harrison  
 #  
 # YEAR=2002  
 # 05/11/2002 Scott Harrison  
 #  
 # YEAR=2003  
 # Scott Harrison  
 #  
 ###  ###
   
 =pod  =pod
Line 52  filesystem installation location: F</etc Line 43  filesystem installation location: F</etc
 Here is the cron job entry.  Here is the cron job entry.
   
 C<# Repopulate and refresh the metadata database used for the search catalog.>  C<# Repopulate and refresh the metadata database used for the search catalog.>
   
 C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>  C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
   
 This script only allows itself to be run as the user C<www>.  This script only allows itself to be run as the user C<www>.
Line 65  The metadata is entered into a SQL datab Line 55  The metadata is entered into a SQL datab
 This script also does general database maintenance such as reformatting  This script also does general database maintenance such as reformatting
 the C<loncapa:metadata> table if it is deprecated.  the C<loncapa:metadata> table if it is deprecated.
   
 This script also builds dynamic temporal metadata and stores this inside  This script evaluates dynamic metadata from the authors'
 a F<nohist_resevaldata.db> database file.  F<nohist_resevaldata.db> database file in order to store it in MySQL.
   
 This script is playing an increasingly important role for a loncapa  This script is playing an increasingly important role for a loncapa
 library server.  The proper operation of this script is critical for a smooth  library server.  The proper operation of this script is critical for a smooth
Line 74  and correct user experience. Line 64  and correct user experience.
   
 =cut  =cut
   
 # ========================================================== Setting things up.  use strict;
   BEGIN {
 # ------------------------------------------------------  Use external modules.      eval "use Apache2::compat();";
   };
   use DBI;
 use lib '/home/httpd/lib/perl/';  use lib '/home/httpd/lib/perl/';
 use LONCAPA::Configuration;  use LONCAPA::lonmetadata;
   
   use Getopt::Long;
 use IO::File;  use IO::File;
 use HTML::TokeParser;  use HTML::TokeParser;
 use DBI;  
 use GDBM_File;  use GDBM_File;
 use POSIX qw(strftime mktime);  use POSIX qw(strftime mktime);
   
 # ----------------- Code to enable 'find' subroutine listing of the .meta files  use Apache::lonnet();
 use File::Find;  
   
 # List of .meta files (used on a per-user basis).  
 my @metalist;  
   
 # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.  
 my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');  
 my %perlvar = %{$perlvarref};  
 undef($perlvarref); # Remove since sensitive and not needed.  
 delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.  
   
 # ------------------------------------- Only run if machine is a library server  
 if ($perlvar{'lonRole'} ne 'library')  
   {  
     exit(0);  
   }  
   
 # ------------------------------ Make sure this process is running as user=www.  
 my $wwwid = getpwnam('www');  
 if ($wwwid != $<)  
   {  
     $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";  
     $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";  
     system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".  
    "mailto $emailto -s '$subj' > /dev/null");  
     exit(1);  
   }  
   
 # ------------------------------------------------------ Initialize log output.  
 open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');  
 print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");  
   
 my $dbh; # Database object reference handle.  
   
 # ----------------------------- Verify connection to loncapa:metadata database.  
 unless (  
  $dbh = DBI->connect('DBI:mysql:loncapa','www',  
     $perlvar{'lonSqlAccess'},  
     { RaiseError => 0,PrintError => 0})  
  )  
   {   
     print(LOG '**** ERROR **** Cannot connect to database!'."\n");  
     exit(0);  
   }  
   
 # ------------------------------ Create loncapa:metadata table if non-existent.  
 my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.  
     'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.  
     'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.  
     'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.  
     'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.  
     'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.  
     'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.  
     'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.  
     'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.  
     'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.  
     'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';  
   
 $dbh->do($make_metadata_table); # Generate the table.  
   
 # ----------------------------- Verify format of the loncapa:metadata database.  
 #                               (delete and recreate database if necessary).  
   
 # Make a positive control for verifying table structure.  
 my $make_metadata_table_CONTROL = $make_metadata_table;  
 $make_metadata_table_CONTROL =~  
     s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;  
   
 $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');  
 $dbh->do($make_metadata_table_CONTROL);  
   
 my $table_description; # selectall reference to the table description.  
   
 my $CONTROL_table_string; # What the table description should look like.  
 my $table_string; # What the table description does look like.  
   
 # Calculate the CONTROL table description (what it should be).  
 $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');  
 foreach my $table_row (@{$table_description})  
   {  
     $CONTROL_table_string .= join(',',@{$table_row})."\n";  
   }  
   
 # Calculate the current table description (what it currently looks like).  
 $table_description = $dbh->selectall_arrayref('describe metadata');  
 foreach my $table_row (@{$table_description})  
   {  
     $table_string .= join(',',@{$table_row})."\n";  
   }  
   
 if ($table_string ne $CONTROL_table_string)  
   {  
     # Log this incident.  
     print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.  
   '.'."\n");  
     # Delete the table.  
     $dbh->do('DROP TABLE IF EXISTS metadata');  
     # Generate the table.  
     $dbh->do($make_metadata_table);  
   }  
   
 $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.  
   
 # ----------------------------------------------- Set utilitysemaphore to zero.  
 $dbh->do('UPDATE metadata SET utilitysemaphore = 0');  
   
 # ========================================================= Main functionality.  
   
 # - Determine home authors on this server based on resources dir and user tree.  
   
 # RESOURCES: the resources directory (subdirs correspond to author usernames).  
 opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or  
     (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")  
      and exit(0));  
   
 # query_home_server_status will look for user home directories on this machine.  
 my @homeusers =  
     grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.  
     $perlvar{'lonDefDomain'}.'/'.$_)  
   } grep {!/^\.\.?$/} readdir(RESOURCES);  
 closedir(RESOURCES);  
   
 unless (@homeusers)  
   {  
     print(LOG '=== No home users found on this server.'."\n");  
   }  
   
 # Consider each author individually.  
 foreach my $user (@homeusers)  
   {  
     # Make a log entry.  
     print(LOG "\n".'=== User: '.$user."\n\n");  
   
     # Get filesystem path to this user's directory.  
     my $user_directory =  
  &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);  
   
     # Remove left-over db-files from a potentially crashed searchcat run.  
     unlink($user_directory.'/nohist_new_resevaldata.db');  
   
     # Cleanup the metalist array.  
     undef(@metalist);  
     @metalist = ();  
   
     # This will add entries to the @metalist array.  
     &File::Find::find(\&wanted,  
       $perlvar{'lonDocRoot'}.'/res/'.  
       $perlvar{'lonDefDomain'}.'/'.$user);  
   
     # -- process file to get metadata and put into search catalog SQL database  
     # Also, build and store dynamic metadata.  
     # Also, delete record entries before refreshing.  
     foreach my $m (@metalist)  
       {  
  # Log this action.  
  print(LOG "- ".$m."\n");  
   
  # Get metadata from the file.  
  my $ref = get_metadata_from_file($m);  
   
  # Make a datarecord identifier for this resource.  
  my $m2 = '/res/'.declutter($m);  
  $m2 =~ s/\.meta$//;  
   
  # Build and store dynamic metadata inside nohist_resevaldata.db.  
  build_on_the_fly_dynamic_metadata($m2);  
   
  # Delete record if it already exists.  
  my $q2 = 'select * from metadata where url like binary '."'".$m2."'";  
  my $sth = $dbh->prepare($q2);  
  $sth->execute();  
  my $r1 = $sth->fetchall_arrayref;  
  if (@$r1)  
   {  
     $sth =   
  $dbh->prepare('delete from metadata where url like binary '.  
       "'".$m2."'");  
     $sth->execute();  
   }  
   
  # Add new/replacement record into the loncapa:metadata table.  
  $sth = $dbh->prepare('insert into metadata values ('.  
      '"'.delete($ref->{'title'}).'"'.','.  
      '"'.delete($ref->{'author'}).'"'.','.  
      '"'.delete($ref->{'subject'}).'"'.','.  
      '"'.$m2.'"'.','.  
      '"'.delete($ref->{'keywords'}).'"'.','.  
      '"'.'current'.'"'.','.  
      '"'.delete($ref->{'notes'}).'"'.','.  
      '"'.delete($ref->{'abstract'}).'"'.','.  
      '"'.delete($ref->{'mime'}).'"'.','.  
      '"'.delete($ref->{'language'}).'"'.','.  
      '"'.sql_formatted_time(  
        delete($ref->{'creationdate'})).'"'.','.  
      '"'.sql_formatted_time(  
    delete($ref->{'lastrevisiondate'})).'"'.','.  
      '"'.delete($ref->{'owner'}).'"'.','.  
      '"'.delete($ref->{'copyright'}).'"'.','.  
      '1'.')');  
  $sth->execute();  
       }  
   
 # ----------------------- Clean up database, remove stale SQL database records.  
     $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');  
   
 # -------------------------------------------------- Copy over the new db-files  
     system('mv '.$user_directory.'/nohist_new_resevaldata.db '.  
          $user_directory.'/nohist_resevaldata.db');  
   }  
   
 # --------------------------------------------------- Close database connection  
 $dbh->disconnect;  
 print LOG "\n==== Searchcat completed ".localtime()." ====\n";  
 close(LOG);  
 exit(0);  
   
 # ================================================================ Subroutines.  
   
 =pod  use File::Find;
   
 =head1 SUBROUTINES  
   
 =cut  
   
 =pod  
   
 B<unescape> - translate to unstrange escaped syntax to strange characters.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$str> - string with unweird characters.  
   
 =back  #
   # Set up configuration options
   my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
   GetOptions (
               'help'     => \$help,
               'simulate' => \$simulate,
               'only=s'   => \$oneuser,
               'verbose=s'  => \$verbose,
               'debug' => \$debug,
               );
   
   if ($help) {
       print <<"ENDHELP";
   $0
   Rebuild and update the LON-CAPA metadata database. 
   Options:
       -help          Print this help
       -simulate      Do not modify the database.
       -only=user     Only compute for the given user.  Implies -simulate   
       -verbose=val   Sets logging level, val must be a number
       -debug         Turns on debugging output
   ENDHELP
       exit 0;
   }
   
   if (! defined($debug)) {
       $debug = 0;
   }
   
   if (! defined($verbose)) {
       $verbose = 0;
   }
   
   if (defined($oneuser)) {
       $simulate=1;
   }
   
   ##
   ## Use variables for table names so we can test this routine a little easier
   my $oldname = 'metadata';
   my $newname = 'newmetadata'.$$; # append pid to have unique temporary table
   
 =over 4  #
   # Only run if machine is a library server
   exit if ($Apache::lonnet::perlvar{'lonRole'} ne 'library');
   #
   #  Make sure this process is running from user=www
   my $wwwid=getpwnam('www');
   if ($wwwid!=$<) {
       my $emailto="$Apache::lonnet::perlvar{'lonAdmEMail'},$Apache::lonnet::perlvar{'lonSysEMail'}";
       my $subj="LON: $Apache::lonnet::perlvar{'lonHostID'} User ID mismatch";
       system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
    mail -s '$subj' $emailto > /dev/null");
       exit 1;
   }
   #
   # Let people know we are running
   open(LOG,'>>'.$Apache::lonnet::perlvar{'lonDaemons'}.'/logs/searchcat.log');
   &log(0,'==== Searchcat Run '.localtime()."====");
   
 Returns:  
   
 =item C<string> - string with potentially weird characters.  if ($debug) {
       &log(0,'simulating') if ($simulate);
       &log(0,'only processing user '.$oneuser) if ($oneuser);
       &log(0,'verbosity level = '.$verbose);
   }
   #
   # Connect to database
   my $dbh;
   if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$Apache::lonnet::perlvar{'lonSqlAccess'},
                             { RaiseError =>0,PrintError=>0}))) {
       &log(0,"Cannot connect to database!");
       die "MySQL Error: Cannot connect to database!\n";
   }
   # This can return an error and still be okay, so we do not bother checking.
   # (perhaps it should be more robust and check for specific errors)
   $dbh->do('DROP TABLE IF EXISTS '.$newname);
   #
   # Create the new table
   my $request = &LONCAPA::lonmetadata::create_metadata_storage($newname);
   $dbh->do($request);
   if ($dbh->err) {
       $dbh->disconnect();
       &log(0,"MySQL Error Create: ".$dbh->errstr);
       die $dbh->errstr;
   }
   #
   # find out which users we need to examine
   my @domains = sort(&Apache::lonnet::current_machine_domains());
   &log(9,'domains ="'.join('","',@domains).'"');
   
   foreach my $dom (@domains) {
       &log(9,'domain = '.$dom);
       opendir(RESOURCES,"$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom");
       my @homeusers = 
           grep {
               &ishome("$Apache::lonnet::perlvar{'lonDocRoot'}/res/$dom/$_");
           } grep { 
               !/^\.\.?$/;
           } readdir(RESOURCES);
       closedir RESOURCES;
       &log(5,'users = '.$dom.':'.join(',',@homeusers));
       #
       if ($oneuser) {
           @homeusers=($oneuser);
       }
       #
       # Loop through the users
       foreach my $user (@homeusers) {
           &log(0,"=== User: ".$user);
           &process_dynamic_metadata($user,$dom);
           #
           # Use File::Find to get the files we need to read/modify
           find(
                {preprocess => \&only_meta_files,
                 #wanted     => \&print_filename,
                 #wanted     => \&log_metadata,
                 wanted     => \&process_meta_file,
                 no_chdir   => 1,
                }, join('/',($Apache::lonnet::perlvar{'lonDocRoot'},'res',$dom,$user)) );
       }
   }
   #
   # Rename the table
   if (! $simulate) {
       $dbh->do('DROP TABLE IF EXISTS '.$oldname);
       if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) {
           &log(0,"MySQL Error Rename: ".$dbh->errstr);
           die $dbh->errstr;
       } else {
           &log(1,"MySQL table rename successful.");
       }
   }
   if (! $dbh->disconnect) {
       &log(0,"MySQL Error Disconnect: ".$dbh->errstr);
       die $dbh->errstr;
   }
   ##
   ## Finished!
   &log(0,"==== Searchcat completed ".localtime()." ====");
   close(LOG);
   
 =back  &write_type_count();
   &write_copyright_count();
   
 =cut  exit 0;
   
 sub unescape ($)  ##
   {  ## Status logging routine.  Inputs: $level, $message
     my $str = shift(@_);  ## 
   ## $level 0 should be used for normal output and error messages
   ##
   ## $message does not need to end with \n.  In the case of errors
   ## the message should contain as much information as possible to
   ## help in diagnosing the problem.
   ##
   sub log {
       my ($level,$message)=@_;
       $level = 0 if (! defined($level));
       if ($verbose >= $level) {
           print LOG $message.$/;
       }
   }
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###          File::Find support routines             ###
   ###                                                  ###
   ########################################################
   ########################################################
   ##
   ## &only_meta_files
   ##
   ## Called by File::Find.
   ## Takes a list of files/directories in and returns a list of files/directories
   ## to search.
   sub only_meta_files {
       my @PossibleFiles = @_;
       my @ChosenFiles;
       foreach my $file (@PossibleFiles) {
           if ( ($file =~ /\.meta$/ &&            # Ends in meta
                 $file !~ /\.\d+\.[^\.]+\.meta$/  # is not for a prior version
                ) || (-d $File::Find::dir."/".$file )) { # directories are okay
                    # but we do not want /. or /..
               push(@ChosenFiles,$file);
           }
       }
       return @ChosenFiles;
   }
   
   ##
   ##
   ## Debugging routines, use these for 'wanted' in the File::Find call
   ##
   sub print_filename {
       my ($file) = $_;
       my $fullfilename = $File::Find::name;
       if ($debug) {
           if (-d $file) {
               &log(5," Got directory ".$fullfilename);
           } else {
               &log(5," Got file ".$fullfilename);
           }
       }
       $_=$file;
   }
   
   sub log_metadata {
       my ($file) = $_;
       my $fullfilename = $File::Find::name;
       return if (-d $fullfilename); # No need to do anything here for directories
       if ($debug) {
           &log(6,$fullfilename);
           my $ref=&metadata($fullfilename);
           if (! defined($ref)) {
               &log(6,"    No data");
               return;
           }
           while (my($key,$value) = each(%$ref)) {
               &log(6,"    ".$key." => ".$value);
           }
           &count_copyright($ref->{'copyright'});
       }
       $_=$file;
   }
   
   ##
   ## process_meta_file
   ##   Called by File::Find. 
   ##   Only input is the filename in $_.  
   sub process_meta_file {
       my ($file) = $_;
       my $filename = $File::Find::name; # full filename
       return if (-d $filename); # No need to do anything here for directories
       #
       &log(3,$filename) if ($debug);
       #
       my $ref=&metadata($filename);
       #
       # $url is the original file url, not the metadata file
       my $target = $filename;
       $target =~ s/\.meta$//;
       my $url='/res/'.&declutter($target);
       &log(3,"    ".$url) if ($debug);
       #
       # Ignore some files based on their metadata
       if ($ref->{'obsolete'}) { 
           &log(3,"obsolete") if ($debug);
           return; 
       }
       &count_copyright($ref->{'copyright'});
       if ($ref->{'copyright'} eq 'private') { 
           &log(3,"private") if ($debug);
           return; 
       }
       #
       # Find the dynamic metadata
       my %dyn;
       if ($url=~ m:/default$:) {
           $url=~ s:/default$:/:;
           &log(3,"Skipping dynamic data") if ($debug);
       } else {
           &log(3,"Retrieving dynamic data") if ($debug);
           %dyn=&get_dynamic_metadata($url);
           &count_type($url);
       }
       #
       if (! defined($ref->{'creationdate'}) ||
           $ref->{'creationdate'} =~ /^\s*$/) {
           $ref->{'creationdate'} = (stat($target))[9];
       }
       if (! defined($ref->{'lastrevisiondate'}) ||
           $ref->{'lastrevisiondate'} =~ /^\s*$/) {
           $ref->{'lastrevisiondate'} = (stat($target))[9];
       }
       $ref->{'creationdate'}     = &sqltime($ref->{'creationdate'});
       $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'});
       my %Data = (
                   %$ref,
                   %dyn,
                   'url'=>$url,
                   'version'=>'current');
       if (! $simulate) {
           my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname,
                                                                    \%Data);
           if ($err) {
               &log(0,"MySQL Error Insert: ".$err);
           }
           if ($count < 1) {
               &log(0,"Unable to insert record into MySQL database for $url");
           }
       }
       #
       # Reset $_ before leaving
       $_ = $file;
   }
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###  &metadata($uri)                                 ###
   ###   Retrieve metadata for the given file           ###
   ###                                                  ###
   ########################################################
   ########################################################
   sub metadata {
       my ($uri)=@_;
       my %metacache=();
       $uri=&declutter($uri);
       my $filename=$uri;
       $uri=~s/\.meta$//;
       $uri='';
       if ($filename !~ /\.meta$/) { 
           $filename.='.meta';
       }
       my $metastring=&getfile($Apache::lonnet::perlvar{'lonDocRoot'}.'/res/'.$filename);
       return undef if (! defined($metastring));
       my $parser=HTML::TokeParser->new(\$metastring);
       my $token;
       while ($token=$parser->get_token) {
           if ($token->[0] eq 'S') {
               my $entry=$token->[1];
               my $unikey=$entry;
               if (defined($token->[2]->{'part'})) { 
                   $unikey.='_'.$token->[2]->{'part'}; 
               }
               if (defined($token->[2]->{'name'})) { 
                   $unikey.='_'.$token->[2]->{'name'}; 
               }
               if ($metacache{$uri.'keys'}) {
                   $metacache{$uri.'keys'}.=','.$unikey;
               } else {
                   $metacache{$uri.'keys'}=$unikey;
               }
               foreach ( @{$token->[3]}) {
                   $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
               } 
               if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){
                   $metacache{$uri.''.$unikey} = 
                       $metacache{$uri.''.$unikey.'.default'};
               }
           } # End of ($token->[0] eq 'S')
       }
       return \%metacache;
   }
   
   ##
   ## &getfile($filename)
   ##   Slurps up an entire file into a scalar.  
   ##   Returns undef if the file does not exist
   sub getfile {
       my $file = shift();
       if (! -e $file ) { 
           return undef; 
       }
       my $fh=IO::File->new($file);
       my $contents = '';
       while (<$fh>) { 
           $contents .= $_;
       }
       return $contents;
   }
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###    Dynamic Metadata                              ###
   ###                                                  ###
   ########################################################
   ########################################################
   ##
   ## Dynamic metadata description (incomplete)
   ##
   ## For a full description of all fields,
   ## see LONCAPA::lonmetadata
   ##
   ##   Field             Type
   ##-----------------------------------------------------------
   ##   count             integer
   ##   course            integer
   ##   course_list       comma separated list of course ids
   ##   avetries          real                                
   ##   avetries_list     comma separated list of real numbers
   ##   stdno             real
   ##   stdno_list        comma separated list of real numbers
   ##   usage             integer   
   ##   usage_list        comma separated list of resources
   ##   goto              scalar
   ##   goto_list         comma separated list of resources
   ##   comefrom          scalar
   ##   comefrom_list     comma separated list of resources
   ##   difficulty        real
   ##   difficulty_list   comma separated list of real numbers
   ##   sequsage          scalar
   ##   sequsage_list     comma separated list of resources
   ##   clear             real
   ##   technical         real
   ##   correct           real
   ##   helpful           real
   ##   depth             real
   ##   comments          html of all the comments made
   ##
   {
   
   my %DynamicData;
   my %Counts;
   
   sub process_dynamic_metadata {
       my ($user,$dom) = @_;
       undef(%DynamicData);
       undef(%Counts);
       #
       my $prodir = &propath($dom,$user);
       #
       # Read in the dynamic metadata
       my %evaldata;
       if (! tie(%evaldata,'GDBM_File',
                 $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
           return 0;
       }
       #
       %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata);
       untie(%evaldata);
       $DynamicData{'domain'} = $dom;
       #print('user = '.$user.' domain = '.$dom.$/);
       #
       # Read in the access count data
       &log(7,'Reading access count data') if ($debug);
       my %countdata;
       if (! tie(%countdata,'GDBM_File',
                 $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
           return 0;
       }
       while (my ($key,$count) = each(%countdata)) {
           next if ($key !~ /^$dom/);
           $key = &unescape($key);
           &log(8,'    Count '.$key.' = '.$count) if ($debug);
           $Counts{$key}=$count;
       }
       untie(%countdata);
       if ($debug) {
           &log(7,scalar(keys(%Counts)).
                " Counts read for ".$user."@".$dom);
           &log(7,scalar(keys(%DynamicData)).
                " Dynamic metadata read for ".$user."@".$dom);
       }
       #
       return 1;
   }
   
   sub get_dynamic_metadata {
       my ($url) = @_;
       $url =~ s:^/res/::;
       my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url,
                                                                  \%DynamicData);
       # find the count
       $data{'count'} = $Counts{$url};
       #
       # Log the dynamic metadata
       if ($debug) {
           while (my($k,$v)=each(%data)) {
               &log(8,"    ".$k." => ".$v);
           }
       }
       return %data;
   }
   
   } # End of %DynamicData and %Counts scope
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###   Counts                                         ###
   ###                                                  ###
   ########################################################
   ########################################################
   {
   
   my %countext;
   
   sub count_type {
       my $file=shift;
       $file=~/\.(\w+)$/;
       my $ext=lc($1);
       $countext{$ext}++;
   }
   
   sub write_type_count {
       open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt');
       while (my ($extension,$count) = each(%countext)) {
    print RESCOUNT $extension.'='.$count.'&';
       }
       print RESCOUNT 'time='.time."\n";
       close(RESCOUNT);
   }
   
   } # end of scope for %countext
   
   {
   
   my %copyrights;
   
   sub count_copyright {
       $copyrights{@_[0]}++;
   }
   
   sub write_copyright_count {
       open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt');
       while (my ($copyright,$count) = each(%copyrights)) {
    print COPYCOUNT $copyright.'='.$count.'&';
       }
       print COPYCOUNT 'time='.time."\n";
       close(COPYCOUNT);
   }
   
   } # end of scope for %copyrights
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###   Miscellanous Utility Routines                  ###
   ###                                                  ###
   ########################################################
   ########################################################
   ##
   ## &ishome($username)
   ##   Returns 1 if $username is a LON-CAPA author, 0 otherwise
   ##   (copied from lond, modification of the return value)
   sub ishome {
       my $author=shift;
       $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
       my ($udom,$uname)=split(/\//,$author);
       my $proname=propath($udom,$uname);
       if (-e $proname) {
    return 1;
       } else {
           return 0;
       }
   }
   
   ##
   ## &propath($udom,$uname)
   ##   Returns the path to the users LON-CAPA directory
   ##   (copied from lond)
   sub propath {
       my ($udom,$uname)=@_;
       $udom=~s/\W//g;
       $uname=~s/\W//g;
       my $subdir=$uname.'__';
       $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
       my $proname="$Apache::lonnet::perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
       return $proname;
   } 
   
   ##
   ## &sqltime($timestamp)
   ##
   ## Convert perl $timestamp to MySQL time.  MySQL expects YYYY-MM-DD HH:MM:SS
   ##
   sub sqltime {
       my ($time) = @_;
       my $mysqltime;
       if ($time =~ 
           /(\d+)-(\d+)-(\d+) # YYYY-MM-DD
           \s                 # a space
           (\d+):(\d+):(\d+)  # HH:MM::SS
           /x ) { 
           # Some of the .meta files have the time in mysql
           # format already, so just make sure they are 0 padded and
           # pass them back.
           $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
                                $1,$2,$3,$4,$5,$6);
       } elsif ($time =~ /^\d+$/) {
           my @TimeData = gmtime($time);
           # Alter the month to be 1-12 instead of 0-11
           $TimeData[4]++;
           # Alter the year to be from 0 instead of from 1900
           $TimeData[5]+=1900;
           $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
                                @TimeData[5,4,3,2,1,0]);
       } elsif (! defined($time) || $time == 0) {
           $mysqltime = 0;
       } else {
           &log(0,"    sqltime:Unable to decode time ".$time);
           $mysqltime = 0;
       }
       return $mysqltime;
   }
   
   ##
   ## &declutter($filename)
   ##   Given a filename, returns a url for the filename.
   sub declutter {
       my $thisfn=shift;
       $thisfn=~s/^$Apache::lonnet::perlvar{'lonDocRoot'}//;
       $thisfn=~s/^\///;
       $thisfn=~s/^res\///;
       return $thisfn;
   }
   
   ##
   ## Escape / Unescape special characters
   sub unescape {
       my $str=shift;
     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;      $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
     return($str);      return $str;
   }  }
   
 =pod  
   
 B<escape> - translate strange characters to unstrange escaped syntax.  
   
 =over 4  
   
 Parameters:  sub escape {
       my $str=shift;
 =item I<$str> - string with potentially weird characters to unweird-ify.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - unweird-ified string.  
   
 =back  
   
 =cut  
   
 sub escape ($)  
   {  
     my $str = shift(@_);  
     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;      $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
     return($str);      return $str;
   }  }
   
 =pod  
   
 B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.  
   
 Dynamic metadata is stored in a nohist_resevaldata GDBM database.  
 The only thing that this subroutine really makes happen is adjusting  
 a 'count' value inside the F<nohist_new_resevaldata.db> as well  
 as updating F<nohist_new_resevaldata.db> with information from  
 F<nohist_resevaldata.db>.  
   
 It may need optmization, but since it gets called once a week. . .  
 =over 4  
   
 Parameters:  
   
 =item I<$url> - the filesystem path (url may be a misnomer...)  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<hash> - key-value table of dynamically evaluated metadata.  
   
 =back  
   
 =cut  
   
 sub build_on_the_fly_dynamic_metadata ($)  
   {  
     # some elements in here maybe non-obvious  
   
     # Need to compute the user's directory.  
     my $url = &declutter(shift(@_));  
     $url =~ s/\.meta$//;  
     my %returnhash = ();  
     my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);  
     my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);  
   
     # Attempt a GDBM database instantiation inside users directory and proceed.  
     if ((tie(%evaldata,'GDBM_File',  
             $user_directory.  
      '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&  
         (tie(%newevaldata,'GDBM_File',  
      $user_directory.  
      '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))  
       {  
  # For different variables, track the running sum and counts.  
  my %sum = ();  
  my %cnt = ();  
   
  # Define computed items as a sum (add) or an average (avg) or a raw  
  # count (cnt) or 'app'?  
  my %listitems=('count'        => 'add',  
        'course'       => 'add',  
        'avetries'     => 'avg',  
        'stdno'        => 'add',  
        'difficulty'   => 'avg',  
        'clear'        => 'avg',  
        'technical'    => 'avg',  
        'helpful'      => 'avg',  
        'correct'      => 'avg',  
        'depth'        => 'avg',  
        'comments'     => 'app',  
        'usage'        => 'cnt'  
        );  
   
  # Untaint the url and use as part of a regular expression.  
  my $regexp = $url;  
  $regexp =~ s/(\W)/\\$1/g;  
  $regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs  
   
  # Check existing nohist database for this url.  
         # this is modfying the 'count' entries  
         # and copying all othe entries over  
  foreach (keys %evaldata)  
   {  
     my $key = &unescape($_);  
     if ($key =~ /$regexp/) # If url-based entry exists.  
       {  
  my $ctype = $1; # Set to specific category type.  
   
  # Do an increment for this category type.  
  if (defined($cnt{$ctype}))  
   {  
     $cnt{$ctype}++;   
   }  
  else  
   {  
     $cnt{$ctype} = 1;   
   }  
                 unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?  
   {  
     # Increment the sum based on the evaluated data in the db.  
     if (defined($sum{$ctype}))  
       {  
  $sum{$ctype} += $evaldata{$_};  
       }  
     else  
       {  
  $sum{$ctype} = $evaldata{$_};  
       }  
    }  
  else # 'app' mode, means to use '<hr />' as a separator  
   {  
     if (defined($sum{$ctype}))  
       {  
  if ($evaldata{$_})  
   {  
     $sum{$ctype} .= '<hr />'.$evaldata{$_};  
   }  
       }  
     else  
       {  
  $sum{$ctype} = ''.$evaldata{$_};  
       }  
   }  
  if ($ctype ne 'count')  
   {  
                     # this is copying all data except 'count' attributes  
     $newevaldata{$_} = $evaldata{$_};  
   }  
       }  
   }  
   
         # the only other time this loop is useful is for the 'count' hash  
         # element  
  foreach (keys %cnt)  
   {  
     if ($listitems{$_} eq 'avg')  
       {  
  $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;  
       }  
     elsif ($listitems{$_} eq 'cnt')  
       {  
  $returnhash{$_} = $cnt{$_};  
       }  
     else  
       {  
  $returnhash{$_} = $sum{$_};  
       }  
   }  
   
         # seems to be doing something useful  
  if ($returnhash{'count'})  
   {  
     my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';  
     $newevaldata{$newkey} = $returnhash{'count'};  
   }  
   
  untie(%evaldata); # Close/release the original nohist database.  
  untie(%newevaldata); # Close/release the new nohist database.  
       }  
     return(%returnhash);  
   }  
   
 =pod  
   
 B<wanted> - used by B<File::Find::find> subroutine.  
   
 This evaluates whether a file is wanted, and pushes it onto the  
 I<@metalist> array.  This subroutine was, for the most part, auto-generated  
 by the B<find2perl> command.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$file> - a path to the file.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<boolean> - true or false based on logical statement.  
   
 =back  
   
 =cut  
   
 sub wanted ($)  
   {  
     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&  
     -f $_ &&  
     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&  
     push(@metalist,$File::Find::dir.'/'.$_);  
   }  
   
 =pod  
   
 B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.  
   
 I<Note that this is significantly altered from a subroutine present in lonnet.>  
   
 =over 4  
   
 Parameters:  
   
 =item I<$file> - a path.to the file.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<hash reference> - a hash array (keys and values).  
   
 =back  
   
 =cut  
   
 sub get_metadata_from_file ($)  
   {  
     my ($filename) = @_;  
     my %metatable; # Used to store return value of hash-tabled metadata.  
     $filename = &declutter($filename); # Remove non-identifying filesystem info  
     my $uri = ''; # The URI is not relevant in this scenario.  
     unless ($filename =~ m/\.meta$/) # Unless ending with .meta.  
       {  
  $filename .= '.meta'; # Append a .meta suffix.  
       }  
     # Get the file contents.  
     my $metadata_string =  
  &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);  
   
     # Parse the file based on its XML tags.  
     my $parser = HTML::TokeParser->new(\$metadata_string);  
     my $token;  
     while ($token = $parser->get_token) # Loop through tokens.  
       {  
  if ($token->[0] eq 'S') # If it is a start token.  
   {  
     my $entry = $token->[1];  
     my $unikey = $entry; # A unique identifier for this xml tag key.  
     if (defined($token->[2]->{'part'}))  
       {   
  $unikey .= '_'.$token->[2]->{'part'};   
       }  
     if (defined($token->[2]->{'name'}))  
       {   
  $unikey .= '_'.$token->[2]->{'name'};   
       }  
     # Append $unikey to metatable's keys entry.  
     if ($metatable{$uri.'keys'})  
       {  
  $metatable{$uri.'keys'} .= ','.$unikey;  
       }  
     else  
       {  
  $metatable{$uri.'keys'} = $unikey;  
       }  
     # Insert contents into metatable entry for the unikey.  
     foreach my $t3 (@{$token->[3]})  
       {  
  $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};  
       }  
     # If there was no text contained inside the tags, set = default.  
     unless  
       (  
         $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)  
       )  
       {  
  $metatable{$uri.''.$unikey} =  
     $metatable{$uri.''.$unikey.'.default'};  
       }  
   }  
       }  
     # Return with a key-value table of XML tags and their tag contents.  
     return(\%metatable);  
   }  
   
 =pod  
   
 B<get_file_contents> - returns either the contents of the file or a -1.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$file> - a complete filesystem path.to the file.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - file contents or a -1.  
   
 =back  
   
 =cut  
   
 sub get_file_contents ($)  
   {  
     my $file = shift(@_);  
   
     # If file does not exist, then return a -1 value.  
     unless (-e $file)  
       {  
  return(-1);  
       }  
   
     # Read in file contents.  
     my $file_handle = IO::File->new($file);  
     my $file_contents = '';  
     while (<$file_handle>)  
       {  
  $file_contents .= $_;  
       }  
   
     # Return file contents.  
     return($file_contents);  
   }  
   
 =pod  
   
 B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).  
   
 =over 4  
   
 Parameters:  
   
 =item I<$filesystem_path> - a complete filesystem path.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - remnants of the filesystem path (beginning portion removed).  
   
 =back  
   
 =cut  
   
 sub declutter  
   {  
     my $filesystem_path = shift(@_);  
   
     # Remove beginning portions of the filesystem path.  
     $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;  
     $filesystem_path =~ s!^/!!;  
     $filesystem_path =~ s!^res/!!;  
   
     # Return what is remaining for the filesystem path.  
     return($filesystem_path);  
   }  
   
 =pod  
   
 B<query_home_server_status> - Is this the home server of an author's directory?  
   
 =over 4  
   
 Parameters:  
   
 =item I<$author_filesystem_path> - directory path for a user.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<boolean> - 1 if true; 0 if false.  
   
 =back  
   
 =cut  
   
 sub query_home_server_status ($)  
   {  
     my $author_filesystem_path = shift(@_);  
   
     # Remove beginning portion of this filesystem path.  
     $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;  
   
     # Construct path to the author's ordinary user directory.  
     my ($user_domain,$username) = split(m!/!,$author_filesystem_path);  
     my $user_directory_path = construct_path_to_user_directory($user_domain,  
        $username);  
   
     # Return status of whether the user directory path is defined.  
     if (-e $user_directory_path)  
       {  
  return(1); # True.  
       }  
     else  
       {  
         return(0); # False.  
       }  
   }  
   
 =pod  
   
 B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$user_domain> - the loncapa domain of the user.  
   
 =item I<$username> - the unique username (user id) of the user.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - representing the path on the filesystem.  
   
 =back  
   
 =cut  
   
 sub construct_path_to_user_directory ($$)  
   {  
     my ($user_domain,$username) = @_;  
   
     # Untaint.  
     $user_domain =~ s/\W//g;  
     $username =~ s/\W//g;  
   
     # Create three levels of sub-directoried filesystem path  
     # based on the first three characters of the username.  
     my $sub_filesystem_path = $username.'__';  
     $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;  
   
     # Use the sub-directoried levels and other variables to generate  
     # the complete filesystem path.  
     my $complete_filesystem_path =  
  join('/',($perlvar{'lonUsersDir'},  
   $user_domain,  
   $sub_filesystem_path,  
   $username));  
   
     # Return the complete filesystem path.  
     return($complete_filesystem_path);  
   }  
   
 =pod  
   
 B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - datetime sql formatted string.  
   
 =back  
   
 =cut  
   
 sub sql_formatted_time ($)  
   {  
     # Sanitize the time argument and convert to localtime array.  
     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =  
  localtime(&sanitize_time(shift(@_)));  
   
     # Convert month from (0..11) to (1..12).  
     $mon += 1;  
   
     # Make the year compatible with A.D. specification.  
     $year += 1900;  
   
     # Return a date which is compatible with MySQL's "DATETIME" format.  
     return(join('-',($year,$mon,$mday)).  
    ' '.  
    join(':',($hour,$min,$sec))  
    );  
   }  
   
   
 # ==================================== The following two subroutines are needed  
 #                 for accommodating incorrect time formats inside the metadata.  
   
 =pod  
   
 B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.  
   
 =over 4  
   
 Parameters:  
   
 =item I<%time_metadata> - a key-value listing characterizing month, year, etc.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<integer> - seconds since epoch.  
   
 =back  
   
 =cut  
   
 sub make_seconds_since_epoch (@)  
   {  
     # Keytable of time metadata.  
     my %time_metadata = @_;  
   
     # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).  
     return(POSIX::mktime(  
  ($time_metadata{'seconds'},  
   $time_metadata{'minutes'},  
   $time_metadata{'hours'},  
   $time_metadata{'day'},  
   $time_metadata{'month'}-1,  
   $time_metadata{'year'}-1900,  
   0,  
   0,  
   $time_metadata{'dlsav'})  
  )  
    );  
   }  
   
 =pod  
   
 B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.  
   
 Somebody described this subroutine as  
 "retro-fixing of un-backward-compatible time format".  
   
 What this means, is that a part of this code expects to get UTC seconds  
 since the epoch (beginning of 1970).  Yet, some of the .meta files have  
 sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch  
 integers (e.g. 1044147435).  These time strings do not encode the timezone  
 and, in this sense, can be considered "un-backwards-compatible".  
   
 =over 4  
   
 Parameters:  
   
 =item I<$potentially_badformat_string> - string to "retro-fix".  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<integer> - seconds since epoch.  
   
 =back  
   
 =cut  
   
 sub sanitize_time ($)  
   {  
     my $timestamp = shift(@_);  
     # If timestamp is in this unexpected format....  
     if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)  
       {  
  # then convert into seconds since epoch (the expected format).  
  $timestamp = &make_seconds_since_epoch(  
        'year' => $1,  
        'month' => $2,  
        'day' => $3,  
        'hours' => $4,  
        'minutes' => $5,  
        'seconds' => $6  
        );  
       }  
     # Otherwise we assume timestamp to be as expected.  
     return($timestamp);  
   }  
   
 =pod  
   
 =head1 AUTHOR  
   
 Written to help the loncapa project.  
   
 Scott Harrison, sharrison@users.sourceforge.net  
   
 This is distributed under the same terms as loncapa (i.e. "freeware").  
   
 =cut  

Removed from v.1.29  
changed lines
  Added in v.1.67


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>