Diff for /loncom/metadata_database/searchcat.pl between versions 1.28 and 1.57

version 1.28, 2003/02/03 05:39:37 version 1.57, 2004/04/12 21:11:45
Line 6 Line 6
 #  #
 # Copyright Michigan State University Board of Trustees  # Copyright Michigan State University Board of Trustees
 #  #
 # This file is part of the LearningOnline Network with a  # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
 # Computer assisted personalized approach (loncapa).  
 #  #
 # Loncapa is free software; you can redistribute it and/or modify  # LON-CAPA is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by  # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation; either version 2 of the License, or  # the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.  # (at your option) any later version.
 #  #
 # Loncapa is distributed in the hope that it will be useful,  # LON-CAPA is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of  # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.  # GNU General Public License for more details.
 #  #
 # You should have received a copy of the GNU General Public License  # You should have received a copy of the GNU General Public License
 # along with loncapa; if not, write to the Free Software  # along with LON-CAPA; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #  #
 # /home/httpd/html/adm/gpl.txt  # /home/httpd/html/adm/gpl.txt
 #  #
 # http://www.loncapa.org/  # http://www.lon-capa.org/
 #  
 # YEAR=2001  
 # 04/14/2001, 04/16/2001 Scott Harrison  
 #  
 # YEAR=2002  
 # 05/11/2002 Scott Harrison  
 #  
 # YEAR=2003  
 # Scott Harrison  
 #  #
 ###  ###
   
Line 53  filesystem installation location: F</etc Line 43  filesystem installation location: F</etc
 Here is the cron job entry.  Here is the cron job entry.
   
 C<# Repopulate and refresh the metadata database used for the search catalog.>  C<# Repopulate and refresh the metadata database used for the search catalog.>
   
 C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>  C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
   
 This script only allows itself to be run as the user C<www>.  This script only allows itself to be run as the user C<www>.
Line 66  The metadata is entered into a SQL datab Line 55  The metadata is entered into a SQL datab
 This script also does general database maintenance such as reformatting  This script also does general database maintenance such as reformatting
 the C<loncapa:metadata> table if it is deprecated.  the C<loncapa:metadata> table if it is deprecated.
   
 This script also builds dynamic temporal metadata and stores this inside  This script evaluates dynamic metadata from the authors'
 a F<nohist_resevaldata.db> database file.  F<nohist_resevaldata.db> database file in order to store it in MySQL.
   
 This script is playing an increasingly important role for a loncapa  This script is playing an increasingly important role for a loncapa
 library server.  The proper operation of this script is critical for a smooth  library server.  The proper operation of this script is critical for a smooth
Line 75  and correct user experience. Line 64  and correct user experience.
   
 =cut  =cut
   
 # ========================================================== Setting things up.  use strict;
   
 # ------------------------------------------------------  Use external modules.  
   
   use DBI;
 use lib '/home/httpd/lib/perl/';  use lib '/home/httpd/lib/perl/';
 use LONCAPA::Configuration;  use LONCAPA::Configuration;
   use LONCAPA::lonmetadata;
   
   use Getopt::Long;
 use IO::File;  use IO::File;
 use HTML::TokeParser;  use HTML::TokeParser;
 use DBI;  
 use GDBM_File;  use GDBM_File;
 use POSIX qw(strftime mktime);  use POSIX qw(strftime mktime);
   
 # ----------------- Code to enable 'find' subroutine listing of the .meta files  
 use File::Find;  use File::Find;
   
 # List of .meta files (used on a per-user basis).  #
 my @metalist;  # Set up configuration options
   my ($simulate,$oneuser,$help,$verbose,$logfile,$debug);
 # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.  GetOptions (
 my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');              'help'     => \$help,
 my %perlvar = %{$perlvarref};              'simulate' => \$simulate,
 undef($perlvarref); # Remove since sensitive and not needed.              'only=s'   => \$oneuser,
 delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.              'verbose=s'  => \$verbose,
               'debug' => \$debug,
 # ------------------------------------- Only run if machine is a library server              );
 if ($perlvar{'lonRole'} ne 'library')  
   {  if ($help) {
     exit(0);      print <<"ENDHELP";
   }  $0
   Rebuild and update the LON-CAPA metadata database. 
 # ------------------------------ Make sure this process is running as user=www.  Options:
 my $wwwid = getpwnam('www');      -help          Print this help
 if ($wwwid != $<)      -simulate      Do not modify the database.
   {      -only=user     Only compute for the given user.  Implies -simulate   
     $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";      -verbose=val   Sets logging level, val must be a number
     $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";      -debug         Turns on debugging output
     system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".  ENDHELP
    "mailto $emailto -s '$subj' > /dev/null");      exit 0;
     exit(1);  }
   }  
   if (! defined($debug)) {
 # ------------------------------------------------------ Initialize log output.      $debug = 0;
   }
   
   if (! defined($verbose)) {
       $verbose = 0;
   }
   
   if (defined($oneuser)) {
       $simulate=1;
   }
   
   ##
   ## Use variables for table names so we can test this routine a little easier
   my $oldname = 'metadata';
   my $newname = 'newmetadata';
   
   #
   # Read loncapa_apache.conf and loncapa.conf
   my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
   my %perlvar=%{$perlvarref};
   undef $perlvarref;
   delete $perlvar{'lonReceipt'}; # remove since sensitive (really?) & not needed
   #
   # Only run if machine is a library server
   exit if ($perlvar{'lonRole'} ne 'library');
   #
   #  Make sure this process is running from user=www
   my $wwwid=getpwnam('www');
   if ($wwwid!=$<) {
       my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
       my $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
       system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
    mailto $emailto -s '$subj' > /dev/null");
       exit 1;
   }
   #
   # Let people know we are running
 open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');  open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
 print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");  &log(0,'==== Searchcat Run '.localtime()."====");
   
 my $dbh; # Database object reference handle.  
   
 # ----------------------------- Verify connection to loncapa:metadata database.  if ($debug) {
 unless (      &log(0,'simulating') if ($simulate);
  $dbh = DBI->connect('DBI:mysql:loncapa','www',      &log(0,'only processing user '.$oneuser) if ($oneuser);
     $perlvar{'lonSqlAccess'},      &log(0,'verbosity level = '.$verbose);
     { RaiseError => 0,PrintError => 0})  }
  )  #
   {   # Connect to database
     print(LOG '**** ERROR **** Cannot connect to database!'."\n");  my $dbh;
     exit(0);  if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},
   }                            { RaiseError =>0,PrintError=>0}))) {
       &log(0,"Cannot connect to database!");
 # ------------------------------ Create loncapa:metadata table if non-existent.      die "MySQL Error: Cannot connect to database!\n";
 my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.  }
     'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.  # This can return an error and still be okay, so we do not bother checking.
     'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.  # (perhaps it should be more robust and check for specific errors)
     'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.  $dbh->do('DROP TABLE IF EXISTS '.$newname);
     'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.  #
     'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.  # Create the new table
     'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.  my $request = &LONCAPA::lonmetadata::create_metadata_storage($newname);
     'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.  $dbh->do($request);
     'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.  if ($dbh->err) {
     'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.      $dbh->disconnect();
     'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';      &log(0,"MySQL Error Create: ".$dbh->errstr);
       die $dbh->errstr;
 $dbh->do($make_metadata_table); # Generate the table.  }
   #
 # ----------------------------- Verify format of the loncapa:metadata database.  # find out which users we need to examine
 #                               (delete and recreate database if necessary).  my $dom = $perlvar{'lonDefDomain'};
   opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$dom");
 # Make a positive control for verifying table structure.  my @homeusers = 
 my $make_metadata_table_CONTROL = $make_metadata_table;      grep {
 $make_metadata_table_CONTROL =~          &ishome("$perlvar{'lonDocRoot'}/res/$dom/$_");
     s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;      } grep { 
           !/^\.\.?$/;
 $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');      } readdir(RESOURCES);
 $dbh->do($make_metadata_table_CONTROL);  closedir RESOURCES;
   #
 my $table_description; # selectall reference to the table description.  if ($oneuser) {
       @homeusers=($oneuser);
 my $CONTROL_table_string; # What the table description should look like.  }
 my $table_string; # What the table description does look like.  #
   # Loop through the users
 # Calculate the CONTROL table description (what it should be).  foreach my $user (@homeusers) {
 $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');      &log(0,"=== User: ".$user);
 foreach my $table_row (@{$table_description})      &process_dynamic_metadata($user,$dom);
   {      #
     $CONTROL_table_string .= join(',',@{$table_row})."\n";      # Use File::Find to get the files we need to read/modify
   }      find(
            {preprocess => \&only_meta_files,
 # Calculate the current table description (what it currently looks like).  #          wanted     => \&print_filename,
 $table_description = $dbh->selectall_arrayref('describe metadata');  #          wanted     => \&log_metadata,
 foreach my $table_row (@{$table_description})            wanted     => \&process_meta_file,
   {            }, 
     $table_string .= join(',',@{$table_row})."\n";           "$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
   }  }
   #
 if ($table_string ne $CONTROL_table_string)  # Rename the table
   {  if (! $simulate) {
     # Log this incident.      $dbh->do('DROP TABLE IF EXISTS '.$oldname);
     print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.      if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) {
   '.'."\n");          &log(0,"MySQL Error Rename: ".$dbh->errstr);
     # Delete the table.          die $dbh->errstr;
     $dbh->do('DROP TABLE IF EXISTS metadata');      } else {
     # Generate the table.          &log(1,"MySQL table rename successful.");
     $dbh->do($make_metadata_table);      }
   }  }
   
 $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.  if (! $dbh->disconnect) {
       &log(0,"MySQL Error Disconnect: ".$dbh->errstr);
 # ----------------------------------------------- Set utilitysemaphore to zero.      die $dbh->errstr;
 $dbh->do('UPDATE metadata SET utilitysemaphore = 0');  }
   ##
 # ========================================================= Main functionality.  ## Finished!
   &log(0,"==== Searchcat completed ".localtime()." ====");
 # - Determine home authors on this server based on resources dir and user tree.  
   
 # RESOURCES: the resources directory (subdirs correspond to author usernames).  
 opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or  
     (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")  
      and exit(0));  
   
 # query_home_server_status will look for user home directories on this machine.  
 my @homeusers =  
     grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.  
     $perlvar{'lonDefDomain'}.'/'.$_)  
   } grep {!/^\.\.?$/} readdir(RESOURCES);  
 closedir(RESOURCES);  
   
 unless (@homeusers)  
   {  
     print(LOG '=== No home users found on this server.'."\n");  
   }  
   
 # Consider each author individually.  
 foreach my $user (@homeusers)  
   {  
     # Make a log entry.  
     print(LOG "\n".'=== User: '.$user."\n\n");  
   
     # Get filesystem path to this user's directory.  
     my $user_directory =  
  &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);  
   
     # Remove left-over db-files from a potentially crashed searchcat run.  
     unlink($user_directory.'/nohist_new_resevaldata.db');  
   
     # Cleanup the metalist array.  
     undef(@metalist);  
     @metalist = ();  
   
     # This will add entries to the @metalist array.  
     &File::Find::find(\&wanted,  
       $perlvar{'lonDocRoot'}.'/res/'.  
       $perlvar{'lonDefDomain'}.'/'.$user);  
   
     # -- process file to get metadata and put into search catalog SQL database  
     # Also, build and store dynamic metadata.  
     # Also, delete record entries before refreshing.  
     foreach my $m (@metalist)  
       {  
  # Log this action.  
  print(LOG "- ".$m."\n");  
   
  # Get metadata from the file.  
  my $ref = get_metadata_from_file($m);  
   
  # Make a datarecord identifier for this resource.  
  my $m2 = '/res/'.declutter($m);  
  $m2 =~ s/\.meta$//;  
   
  # Build and store dynamic metadata inside nohist_resevaldata.db.  
  build_on_the_fly_dynamic_metadata($m2);  
   
  # Delete record if it already exists.  
  my $q2 = 'select * from metadata where url like binary '."'".$m2."'";  
  my $sth = $dbh->prepare($q2);  
  $sth->execute();  
  my $r1 = $sth->fetchall_arrayref;  
  if (@$r1)  
   {  
     $sth =   
  $dbh->prepare('delete from metadata where url like binary '.  
       "'".$m2."'");  
     $sth->execute();  
   }  
   
  # Add new/replacement record into the loncapa:metadata table.  
  $sth = $dbh->prepare('insert into metadata values ('.  
      '"'.delete($ref->{'title'}).'"'.','.  
      '"'.delete($ref->{'author'}).'"'.','.  
      '"'.delete($ref->{'subject'}).'"'.','.  
      '"'.$m2.'"'.','.  
      '"'.delete($ref->{'keywords'}).'"'.','.  
      '"'.'current'.'"'.','.  
      '"'.delete($ref->{'notes'}).'"'.','.  
      '"'.delete($ref->{'abstract'}).'"'.','.  
      '"'.delete($ref->{'mime'}).'"'.','.  
      '"'.delete($ref->{'language'}).'"'.','.  
      '"'.sql_formatted_time(  
        delete($ref->{'creationdate'})).'"'.','.  
      '"'.sql_formatted_time(  
    delete($ref->{'lastrevisiondate'})).'"'.','.  
      '"'.delete($ref->{'owner'}).'"'.','.  
      '"'.delete($ref->{'copyright'}).'"'.','.  
      '1'.')');  
  $sth->execute();  
       }  
   
 # ----------------------- Clean up database, remove stale SQL database records.  
     $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');  
   
 # -------------------------------------------------- Copy over the new db-files  
     system('mv '.$user_directory.'/nohist_new_resevaldata.db '.  
          $user_directory.'/nohist_resevaldata.db');  
   }  
   
 # --------------------------------------------------- Close database connection  
 $dbh->disconnect;  
 print LOG "\n==== Searchcat completed ".localtime()." ====\n";  
 close(LOG);  close(LOG);
 exit(0);  
   
 # ================================================================ Subroutines.  
   
 =pod  
   
 =head1 SUBROUTINES  
   
 =cut  
   
 =pod  
   
 B<unescape> - translate to unstrange escaped syntax to strange characters.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$str> - string with unweird characters.  
   
 =back  
   
 =over 4  
   
 Returns:  &write_type_count();
   &write_copyright_count();
   
 =item C<string> - string with potentially weird characters.  exit 0;
   
 =back  ##
   ## Status logging routine.  Inputs: $level, $message
 =cut  ## 
   ## $level 0 should be used for normal output and error messages
 sub unescape ($)  ##
   {  ## $message does not need to end with \n.  In the case of errors
     my $str = shift(@_);  ## the message should contain as much information as possible to
   ## help in diagnosing the problem.
   ##
   sub log {
       my ($level,$message)=@_;
       $level = 0 if (! defined($level));
       if ($verbose >= $level) {
           print LOG $message.$/;
       }
   }
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###          File::Find support routines             ###
   ###                                                  ###
   ########################################################
   ########################################################
   ##
   ## &only_meta_files
   ##
   ## Called by File::Find.
   ## Takes a list of files/directories in and returns a list of files/directories
   ## to search.
   sub only_meta_files {
       my @PossibleFiles = @_;
       my @ChosenFiles;
       foreach my $file (@PossibleFiles) {
           if ( ($file =~ /\.meta$/ &&            # Ends in meta
                 $file !~ /\.\d+\.[^\.]+\.meta$/  # is not for a prior version
                ) || (-d $file )) { # directories are okay
                    # but we do not want /. or /..
               push(@ChosenFiles,$file);
           }
       }
       return @ChosenFiles;
   }
   
   ##
   ##
   ## Debugging routines, use these for 'wanted' in the File::Find call
   ##
   sub print_filename {
       my ($file) = $_;
       my $fullfilename = $File::Find::name;
       if ($debug) {
           if (-d $file) {
               &log(5," Got directory ".$fullfilename);
           } else {
               &log(5," Got file ".$fullfilename);
           }
       }
       $_=$file;
   }
   
   sub log_metadata {
       my ($file) = $_;
       my $fullfilename = $File::Find::name;
       return if (-d $fullfilename); # No need to do anything here for directories
       if ($debug) {
           &log(6,$fullfilename);
           my $ref=&metadata($fullfilename);
           if (! defined($ref)) {
               &log(6,"    No data");
               return;
           }
           while (my($key,$value) = each(%$ref)) {
               &log(6,"    ".$key." => ".$value);
           }
           &count_copyright($ref->{'copyright'});
       }
       $_=$file;
   }
   
   ##
   ## process_meta_file
   ##   Called by File::Find. 
   ##   Only input is the filename in $_.  
   sub process_meta_file {
       my ($file) = $_;
       my $filename = $File::Find::name; # full filename
       return if (-d $filename); # No need to do anything here for directories
       #
       &log(3,$filename) if ($debug);
       #
       my $ref=&metadata($filename);
       #
       # $url is the original file url, not the metadata file
       my $url='/res/'.&declutter($filename);
       $url=~s/\.meta$//;
       &log(3,"    ".$url) if ($debug);
       #
       # Ignore some files based on their metadata
       if ($ref->{'obsolete'}) { 
           &log(3,"obsolete") if ($debug);
           return; 
       }
       &count_copyright($ref->{'copyright'});
       if ($ref->{'copyright'} eq 'private') { 
           &log(3,"private") if ($debug);
           return; 
       }
       #
       # Find the dynamic metadata
       my %dyn;
       if ($url=~ m:/default$:) {
           $url=~ s:/default$:/:;
           &log(3,"Skipping dynamic data") if ($debug);
       } else {
           &log(3,"Retrieving dynamic data") if ($debug);
           %dyn=&get_dynamic_metadata($url);
           &count_type($url);
       }
       #
       $ref->{'creationdate'}     = &sqltime($ref->{'creationdate'});
       $ref->{'lastrevisiondate'} = &sqltime($ref->{'lastrevisiondate'});
       my %Data = (
                   %$ref,
                   %dyn,
                   'url'=>$url,
                   'version'=>'current');
       if (! $simulate) {
           my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname,
                                                                    \%Data);
           if ($err) {
               &log(0,"MySQL Error Insert: ".$err);
           }
           if ($count < 1) {
               &log(0,"Unable to insert record into MySQL database for $url");
           }
       }
       #
       # Reset $_ before leaving
       $_ = $file;
   }
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###  &metadata($uri)                                 ###
   ###   Retrieve metadata for the given file           ###
   ###                                                  ###
   ########################################################
   ########################################################
   sub metadata {
       my ($uri)=@_;
       my %metacache=();
       $uri=&declutter($uri);
       my $filename=$uri;
       $uri=~s/\.meta$//;
       $uri='';
       if ($filename !~ /\.meta$/) { 
           $filename.='.meta';
       }
       my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
       return undef if (! defined($metastring));
       my $parser=HTML::TokeParser->new(\$metastring);
       my $token;
       while ($token=$parser->get_token) {
           if ($token->[0] eq 'S') {
               my $entry=$token->[1];
               my $unikey=$entry;
               if (defined($token->[2]->{'part'})) { 
                   $unikey.='_'.$token->[2]->{'part'}; 
               }
               if (defined($token->[2]->{'name'})) { 
                   $unikey.='_'.$token->[2]->{'name'}; 
               }
               if ($metacache{$uri.'keys'}) {
                   $metacache{$uri.'keys'}.=','.$unikey;
               } else {
                   $metacache{$uri.'keys'}=$unikey;
               }
               foreach ( @{$token->[3]}) {
                   $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
               } 
               if (! ($metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry))){
                   $metacache{$uri.''.$unikey} = 
                       $metacache{$uri.''.$unikey.'.default'};
               }
           } # End of ($token->[0] eq 'S')
       }
       return \%metacache;
   }
   
   ##
   ## &getfile($filename)
   ##   Slurps up an entire file into a scalar.  
   ##   Returns undef if the file does not exist
   sub getfile {
       my $file = shift();
       if (! -e $file ) { 
           return undef; 
       }
       my $fh=IO::File->new($file);
       my $contents = '';
       while (<$fh>) { 
           $contents .= $_;
       }
       return $contents;
   }
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###    Dynamic Metadata                              ###
   ###                                                  ###
   ########################################################
   ########################################################
   ##
   ## Dynamic metadata description
   ##
   ##   Field             Type
   ##-----------------------------------------------------------
   ##   count             integer
   ##   course            integer
   ##   course_list       comma seperated list of course ids
   ##   avetries          real                                
   ##   avetries_list     comma seperated list of real numbers
   ##   stdno             real
   ##   stdno_list        comma seperated list of real numbers
   ##   usage             integer   
   ##   usage_list        comma seperated list of resources
   ##   goto              scalar
   ##   goto_list         comma seperated list of resources
   ##   comefrom          scalar
   ##   comefrom_list     comma seperated list of resources
   ##   difficulty        real
   ##   difficulty_list   comma seperated list of real numbers
   ##   sequsage          scalar
   ##   sequsage_list     comma seperated list of resources
   ##   clear             real
   ##   technical         real
   ##   correct           real
   ##   helpful           real
   ##   depth             real
   ##   comments          html of all the comments made
   ##
   {
   
   my %DynamicData;
   my %Counts;
   
   sub process_dynamic_metadata {
       my ($user,$dom) = @_;
       undef(%DynamicData);
       undef(%Counts);
       #
       my $prodir = &propath($dom,$user);
       #
       # Read in the dynamic metadata
       my %evaldata;
       if (! tie(%evaldata,'GDBM_File',
                 $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
           return 0;
       }
       #
       %DynamicData = &LONCAPA::lonmetadata::process_reseval_data(\%evaldata);
       untie(%evaldata);
       #
       # Read in the access count data
       &log(7,'Reading access count data') if ($debug);
       my %countdata;
       if (! tie(%countdata,'GDBM_File',
                 $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
           return 0;
       }
       while (my ($key,$count) = each(%countdata)) {
           next if ($key !~ /^$dom/);
           $key = &unescape($key);
           &log(8,'    Count '.$key.' = '.$count) if ($debug);
           $Counts{$key}=$count;
       }
       untie(%countdata);
       if ($debug) {
           &log(7,scalar(keys(%Counts)).
                " Counts read for ".$user."@".$dom);
           &log(7,scalar(keys(%DynamicData)).
                " Dynamic metadata read for ".$user."@".$dom);
       }
       #
       return 1;
   }
   
   sub get_dynamic_metadata {
       my ($url) = @_;
       $url =~ s:^/res/::;
       if (! exists($DynamicData{$url})) {
           &log(7,'    No dynamic data for '.$url) if ($debug);
           return ();
       }
       my %data = &LONCAPA::lonmetadata::process_dynamic_metadata($url,
                                                                  \%DynamicData);
       # find the count
       $data{'count'} = $Counts{$url};
       #
       # Log the dynamic metadata
       if ($debug) {
           while (my($k,$v)=each(%data)) {
               &log(8,"    ".$k." => ".$v);
           }
       }
       return %data;
   }
   
   } # End of %DynamicData and %Counts scope
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###   Counts                                         ###
   ###                                                  ###
   ########################################################
   ########################################################
   {
   
   my %countext;
   
   sub count_type {
       my $file=shift;
       $file=~/\.(\w+)$/;
       my $ext=lc($1);
       $countext{$ext}++;
   }
   
   sub write_type_count {
       open(RESCOUNT,'>/home/httpd/html/lon-status/rescount.txt');
       while (my ($extension,$count) = each(%countext)) {
    print RESCOUNT $extension.'='.$count.'&';
       }
       print RESCOUNT 'time='.time."\n";
       close(RESCOUNT);
   }
   
   } # end of scope for %countext
   
   {
   
   my %copyrights;
   
   sub count_copyright {
       $copyrights{@_[0]}++;
   }
   
   sub write_copyright_count {
       open(COPYCOUNT,'>/home/httpd/html/lon-status/copyrightcount.txt');
       while (my ($copyright,$count) = each(%copyrights)) {
    print COPYCOUNT $copyright.'='.$count.'&';
       }
       print COPYCOUNT 'time='.time."\n";
       close(COPYCOUNT);
   }
   
   } # end of scope for %copyrights
   
   ########################################################
   ########################################################
   ###                                                  ###
   ###   Miscellanous Utility Routines                  ###
   ###                                                  ###
   ########################################################
   ########################################################
   ##
   ## &ishome($username)
   ##   Returns 1 if $username is a LON-CAPA author, 0 otherwise
   ##   (copied from lond, modification of the return value)
   sub ishome {
       my $author=shift;
       $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
       my ($udom,$uname)=split(/\//,$author);
       my $proname=propath($udom,$uname);
       if (-e $proname) {
    return 1;
       } else {
           return 0;
       }
   }
   
   ##
   ## &propath($udom,$uname)
   ##   Returns the path to the users LON-CAPA directory
   ##   (copied from lond)
   sub propath {
       my ($udom,$uname)=@_;
       $udom=~s/\W//g;
       $uname=~s/\W//g;
       my $subdir=$uname.'__';
       $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
       my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
       return $proname;
   } 
   
   ##
   ## &sqltime($timestamp)
   ##
   ## Convert perl $timestamp to MySQL time.  MySQL expects YYYY-MM-DD HH:MM:SS
   ##
   sub sqltime {
       my ($time) = @_;
       my $mysqltime;
       if ($time =~ 
           /(\d+)-(\d+)-(\d+) # YYYY-MM-DD
           \s                 # a space
           (\d+):(\d+):(\d+)  # HH:MM::SS
           /x ) { 
           # Some of the .meta files have the time in mysql
           # format already, so just make sure they are 0 padded and
           # pass them back.
           $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
                                $1,$2,$3,$4,$5,$6);
       } elsif ($time =~ /^\d+$/) {
           my @TimeData = gmtime($time);
           # Alter the month to be 1-12 instead of 0-11
           $TimeData[4]++;
           # Alter the year to be from 0 instead of from 1900
           $TimeData[5]+=1900;
           $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d',
                                @TimeData[5,4,3,2,1,0]);
       } elsif (! defined($time) || $time == 0) {
           $mysqltime = 0;
       } else {
           &log(0,"    sqltime:Unable to decode time ".$time);
           $mysqltime = 0;
       }
       return $mysqltime;
   }
   
   ##
   ## &declutter($filename)
   ##   Given a filename, returns a url for the filename.
   sub declutter {
       my $thisfn=shift;
       $thisfn=~s/^$perlvar{'lonDocRoot'}//;
       $thisfn=~s/^\///;
       $thisfn=~s/^res\///;
       return $thisfn;
   }
   
   ##
   ## Escape / Unescape special characters
   sub unescape {
       my $str=shift;
     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;      $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
     return($str);      return $str;
   }  }
   
 =pod  
   
 B<escape> - translate strange characters to unstrange escaped syntax.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$str> - string with potentially weird characters to unweird-ify.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - unweird-ified string.  sub escape {
       my $str=shift;
 =back  
   
 =cut  
   
 sub escape ($)  
   {  
     my $str = shift(@_);  
     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;      $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
     return($str);      return $str;
   }  }
   
 =pod  
   
 B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.  
   
 Dynamic metadata is stored in a nohist_resevaldata GDBM database.  
 Most of the calculations in this subroutine are totally pointless  
 and not useful for anything that this subroutine does.  
 (THIS IS A FRUSTRATED SUBROUTINE THAT IS NON-OPTIMAL, *&*&!.)  
 The only thing that this subroutine really makes happen is adjusting  
 a 'count' value inside the F<nohist_new_resevaldata.db> as well  
 as updating F<nohist_new_resevaldata.db> with information from  
 F<nohist_resevaldata.db>.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$url> - the filesystem path (url may be a misnomer...)  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<hash> - key-value table of dynamically evaluated metadata.  
   
 =back  
   
 =cut  
   
 sub build_on_the_fly_dynamic_metadata ($)  
   {  
     # BEWARE ALL WHO TRY TO UNDERSTAND THIS ABSURDLY HORRIBLE SUBROUTINE.  
         
     # Do all sorts of mumbo-jumbo to compute the user's directory.  
     my $url = &declutter(shift(@_));  
     $url =~ s/\.meta$//;  
     my %returnhash = ();  
     my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);  
     my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);  
   
     # Attempt a GDBM database instantiation inside users directory and proceed.  
     if ((tie(%evaldata,'GDBM_File',  
             $user_directory.  
      '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&  
         (tie(%newevaldata,'GDBM_File',  
      $user_directory.  
      '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))  
       {  
  # For different variables, track the running sum and counts.  
  my %sum = ();  
  my %cnt = ();  
   
  # Define computed items as a sum (add) or an average (avg) or a raw  
  # count (cnt) or 'app'?  
  my %listitems=('count'        => 'add',  
        'course'       => 'add',  
        'avetries'     => 'avg',  
        'stdno'        => 'add',  
        'difficulty'   => 'avg',  
        'clear'        => 'avg',  
        'technical'    => 'avg',  
        'helpful'      => 'avg',  
        'correct'      => 'avg',  
        'depth'        => 'avg',  
        'comments'     => 'app',  
        'usage'        => 'cnt'  
        );  
   
  # Untaint the url and use as part of a regular expression.  
  my $regexp = $url;  
  $regexp =~ s/(\W)/\\$1/g;  
  $regexp = '___'.$regexp.'___([a-z]+)$';  
   
  # Check existing nohist database for this url.  
  # THE ONLY TIME THIS IS IMPORTANT FOR THIS AWFUL SUBROUTINE  
  # IS FOR 'count' ENTRIES  
  # AND FOR REFRESHING non-'count' ENTRIES INSIDE nohist_new DATABASE.  
  foreach (keys %evaldata)  
   {  
     my $key = &unescape($_);  
     if ($key =~ /$regexp/) # If url-based entry exists.  
       {  
  my $ctype = $1; # Set to specific category type.  
   
  # Do an increment for this category type.  
  if (defined($cnt{$ctype}))  
   {  
     $cnt{$ctype}++;   
   }  
  else  
   {  
     $cnt{$ctype} = 1;   
   }  
                 unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?  
   {  
     # Increment the sum based on the evaluated data in the db.  
     if (defined($sum{$ctype}))  
       {  
  $sum{$ctype} += $evaldata{$_};  
       }  
     else  
       {  
  $sum{$ctype} = $evaldata{$_};  
       }  
    }  
  else # 'app' mode, means to use '<hr />' as a separator  
   {  
     if (defined($sum{$ctype}))  
       {  
  if ($evaldata{$_})  
   {  
     $sum{$ctype} .= '<hr />'.$evaldata{$_};  
   }  
       }  
     else  
       {  
  $sum{$ctype} = ''.$evaldata{$_};  
       }  
   }  
  if ($ctype ne 'count')  
   {  
     # ALERT!  THIS HORRIBLE LOOP IS ACTUALLY DOING SOMETHING  
     # USEFUL!  
     $newevaldata{$_} = $evaldata{$_};  
   }  
       }  
   }  
   
  # THE ONLY OTHER TIME THIS LOOP IS USEFUL IS FOR THE 'count' HASH  
  # ELEMENT.  
  foreach (keys %cnt)  
   {  
     if ($listitems{$_} eq 'avg')  
       {  
  $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;  
       }  
     elsif ($listitems{$_} eq 'cnt')  
       {  
  $returnhash{$_} = $cnt{$_};  
       }  
     else  
       {  
  $returnhash{$_} = $sum{$_};  
       }  
   }  
   
  # A RARE MOMENT OF DOING ANYTHING USEFUL INSIDE THIS  
  # BLEEPING SUBROUTINE.  
  if ($returnhash{'count'})  
   {  
     my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';  
     $newevaldata{$newkey} = $returnhash{'count'};  
   }  
   
  untie(%evaldata); # Close/release the original nohist database.  
  untie(%newevaldata); # Close/release the new nohist database.  
       }  
     return(%returnhash);  
     # Celebrate!  We have now accomplished some simple calculations using  
     # 1000% bloated functionality in our subroutine.  Go wash your eyeballs  
     # out now.  
   }  
   
 =pod  
   
 B<wanted> - used by B<File::Find::find> subroutine.  
   
 This evaluates whether a file is wanted, and pushes it onto the  
 I<@metalist> array.  This subroutine was, for the most part, auto-generated  
 by the B<find2perl> command.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$file> - a path to the file.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<boolean> - true or false based on logical statement.  
   
 =back  
   
 =cut  
   
 sub wanted ($)  
   {  
     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&  
     -f $_ &&  
     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&  
     push(@metalist,$File::Find::dir.'/'.$_);  
   }  
   
 =pod  
   
 B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.  
   
 I<Note that this is significantly altered from a subroutine present in lonnet.>  
   
 =over 4  
   
 Parameters:  
   
 =item I<$file> - a path.to the file.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<hash reference> - a hash array (keys and values).  
   
 =back  
   
 =cut  
   
 sub get_metadata_from_file ($)  
   {  
     my ($filename) = @_;  
     my %metatable; # Used to store return value of hash-tabled metadata.  
     $filename = &declutter($filename); # Remove non-identifying filesystem info  
     my $uri = ''; # The URI is not relevant in this scenario.  
     unless ($filename =~ m/\.meta$/) # Unless ending with .meta.  
       {  
  $filename .= '.meta'; # Append a .meta suffix.  
       }  
     # Get the file contents.  
     my $metadata_string =  
  &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);  
   
     # Parse the file based on its XML tags.  
     my $parser = HTML::TokeParser->new(\$metadata_string);  
     my $token;  
     while ($token = $parser->get_token) # Loop through tokens.  
       {  
  if ($token->[0] eq 'S') # If it is a start token.  
   {  
     my $entry = $token->[1];  
     my $unikey = $entry; # A unique identifier for this xml tag key.  
     if (defined($token->[2]->{'part'}))  
       {   
  $unikey .= '_'.$token->[2]->{'part'};   
       }  
     if (defined($token->[2]->{'name'}))  
       {   
  $unikey .= '_'.$token->[2]->{'name'};   
       }  
     # Append $unikey to metatable's keys entry.  
     if ($metatable{$uri.'keys'})  
       {  
  $metatable{$uri.'keys'} .= ','.$unikey;  
       }  
     else  
       {  
  $metatable{$uri.'keys'} = $unikey;  
       }  
     # Insert contents into metatable entry for the unikey.  
     foreach my $t3 (@{$token->[3]})  
       {  
  $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};  
       }  
     # If there was no text contained inside the tags, set = default.  
     unless  
       (  
         $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)  
       )  
       {  
  $metatable{$uri.''.$unikey} =  
     $metatable{$uri.''.$unikey.'.default'};  
       }  
   }  
       }  
     # Return with a key-value table of XML tags and their tag contents.  
     return(\%metatable);  
   }  
   
 =pod  
   
 B<get_file_contents> - returns either the contents of the file or a -1.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$file> - a complete filesystem path.to the file.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - file contents or a -1.  
   
 =back  
   
 =cut  
   
 sub get_file_contents ($)  
   {  
     my $file = shift(@_);  
   
     # If file does not exist, then return a -1 value.  
     unless (-e $file)  
       {  
  return(-1);  
       }  
   
     # Read in file contents.  
     my $file_handle = IO::File->new($file);  
     my $file_contents = '';  
     while (<$file_handle>)  
       {  
  $file_contents .= $_;  
       }  
   
     # Return file contents.  
     return($file_contents);  
   }  
   
 =pod  
   
 B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).  
   
 =over 4  
   
 Parameters:  
   
 =item I<$filesystem_path> - a complete filesystem path.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - remnants of the filesystem path (beginning portion removed).  
   
 =back  
   
 =cut  
   
 sub declutter  
   {  
     my $filesystem_path = shift(@_);  
   
     # Remove beginning portions of the filesystem path.  
     $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;  
     $filesystem_path =~ s!^/!!;  
     $filesystem_path =~ s!^res/!!;  
   
     # Return what is remaining for the filesystem path.  
     return($filesystem_path);  
   }  
   
 =pod  
   
 B<query_home_server_status> - Is this the home server of an author's directory?  
   
 =over 4  
   
 Parameters:  
   
 =item I<$author_filesystem_path> - directory path for a user.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<boolean> - 1 if true; 0 if false.  
   
 =back  
   
 =cut  
   
 sub query_home_server_status ($)  
   {  
     my $author_filesystem_path = shift(@_);  
   
     # Remove beginning portion of this filesystem path.  
     $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;  
   
     # Construct path to the author's ordinary user directory.  
     my ($user_domain,$username) = split(m!/!,$author_filesystem_path);  
     my $user_directory_path = construct_path_to_user_directory($user_domain,  
        $username);  
   
     # Return status of whether the user directory path is defined.  
     if (-e $user_directory_path)  
       {  
  return(1); # True.  
       }  
     else  
       {  
         return(0); # False.  
       }  
   }  
   
 =pod  
   
 B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$user_domain> - the loncapa domain of the user.  
   
 =item I<$username> - the unique username (user id) of the user.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - representing the path on the filesystem.  
   
 =back  
   
 =cut  
   
 sub construct_path_to_user_directory ($$)  
   {  
     my ($user_domain,$username) = @_;  
   
     # Untaint.  
     $user_domain =~ s/\W//g;  
     $username =~ s/\W//g;  
   
     # Create three levels of sub-directoried filesystem path  
     # based on the first three characters of the username.  
     my $sub_filesystem_path = $username.'__';  
     $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;  
   
     # Use the sub-directoried levels and other variables to generate  
     # the complete filesystem path.  
     my $complete_filesystem_path =  
  join('/',($perlvar{'lonUsersDir'},  
   $user_domain,  
   $sub_filesystem_path,  
   $username));  
   
     # Return the complete filesystem path.  
     return($complete_filesystem_path);  
   }  
   
 =pod  
   
 B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.  
   
 =over 4  
   
 Parameters:  
   
 =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<string> - datetime sql formatted string.  
   
 =back  
   
 =cut  
   
 sub sql_formatted_time ($)  
   {  
     # Sanitize the time argument and convert to localtime array.  
     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =  
  localtime(&sanitize_time(shift(@_)));  
   
     # Convert month from (0..11) to (1..12).  
     $mon += 1;  
   
     # Make the year compatible with A.D. specification.  
     $year += 1900;  
   
     # Return a date which is compatible with MySQL's "DATETIME" format.  
     return(join('-',($year,$mon,$mday)).  
    ' '.  
    join(':',($hour,$min,$sec))  
    );  
   }  
   
   
 # ==================================== The following two subroutines are needed  
 #                 for accommodating incorrect time formats inside the metadata.  
   
 =pod  
   
 B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.  
   
 =over 4  
   
 Parameters:  
   
 =item I<%time_metadata> - a key-value listing characterizing month, year, etc.  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<integer> - seconds since epoch.  
   
 =back  
   
 =cut  
   
 sub make_seconds_since_epoch (@)  
   {  
     # Keytable of time metadata.  
     my %time_metadata = @_;  
   
     # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).  
     return(POSIX::mktime(  
  ($time_metadata{'seconds'},  
   $time_metadata{'minutes'},  
   $time_metadata{'hours'},  
   $time_metadata{'day'},  
   $time_metadata{'month'}-1,  
   $time_metadata{'year'}-1900,  
   0,  
   0,  
   $time_metadata{'dlsav'})  
  )  
    );  
   }  
   
 =pod  
   
 B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.  
   
 Somebody described this subroutine as  
 "retro-fixing of un-backward-compatible time format".  
   
 What this means, is that a part of this code expects to get UTC seconds  
 since the epoch (beginning of 1970).  Yet, some of the .meta files have  
 sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch  
 integers (e.g. 1044147435).  These time strings do not encode the timezone  
 and, in this sense, can be considered "un-backwards-compatible".  
   
 =over 4  
   
 Parameters:  
   
 =item I<$potentially_badformat_string> - string to "retro-fix".  
   
 =back  
   
 =over 4  
   
 Returns:  
   
 =item C<integer> - seconds since epoch.  
   
 =back  
   
 =cut  
   
 sub sanitize_time ($)  
   {  
     my $timestamp = shift(@_);  
     # If timestamp is in this unexpected format....  
     if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)  
       {  
  # then convert into seconds since epoch (the expected format).  
  $timestamp = &make_seconds_since_epoch(  
        'year' => $1,  
        'month' => $2,  
        'day' => $3,  
        'hours' => $4,  
        'minutes' => $5,  
        'seconds' => $6  
        );  
       }  
     # Otherwise we assume timestamp to be as expected.  
     return($timestamp);  
   }  
   
 =pod  
   
 =head1 AUTHOR  
   
 Written to help the loncapa project.  
   
 Scott Harrison, sharrison@users.sourceforge.net  
   
 This is distributed under the same terms as loncapa (i.e. "freeware").  
   
 =cut  

Removed from v.1.28  
changed lines
  Added in v.1.57


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>