Diff for /loncom/metadata_database/searchcat.pl between versions 1.19 and 1.29

version 1.19, 2002/07/01 18:23:00 version 1.29, 2003/02/03 13:42:16
Line 32 Line 32
 # YEAR=2002  # YEAR=2002
 # 05/11/2002 Scott Harrison  # 05/11/2002 Scott Harrison
 #  #
   # YEAR=2003
   # Scott Harrison
   #
 ###  ###
   
 # This script goes through a LON-CAPA resource  =pod
 # directory and gathers metadata.  
 # The metadata is entered into a SQL database.  =head1 NAME
   
   B<searchcat.pl> - put authoritative filesystem data into sql database.
   
   =head1 SYNOPSIS
   
   Ordinarily this script is to be called from a loncapa cron job
   (CVS source location: F<loncapa/loncom/cron/loncapa>; typical
   filesystem installation location: F</etc/cron.d/loncapa>).
   
   Here is the cron job entry.
   
   C<# Repopulate and refresh the metadata database used for the search catalog.>
   
   C<10 1 * * 7    www    /home/httpd/perl/searchcat.pl>
   
   This script only allows itself to be run as the user C<www>.
   
   =head1 DESCRIPTION
   
   This script goes through a loncapa resource directory and gathers metadata.
   The metadata is entered into a SQL database.
   
   This script also does general database maintenance such as reformatting
   the C<loncapa:metadata> table if it is deprecated.
   
   This script also builds dynamic temporal metadata and stores this inside
   a F<nohist_resevaldata.db> database file.
   
   This script is playing an increasingly important role for a loncapa
   library server.  The proper operation of this script is critical for a smooth
   and correct user experience.
   
   =cut
   
   # ========================================================== Setting things up.
   
   # ------------------------------------------------------  Use external modules.
   
 use lib '/home/httpd/lib/perl/';  use lib '/home/httpd/lib/perl/';
 use LONCAPA::Configuration;  use LONCAPA::Configuration;
Line 44  use LONCAPA::Configuration; Line 84  use LONCAPA::Configuration;
 use IO::File;  use IO::File;
 use HTML::TokeParser;  use HTML::TokeParser;
 use DBI;  use DBI;
   use GDBM_File;
   use POSIX qw(strftime mktime);
   
 my @metalist;  
 # ----------------- Code to enable 'find' subroutine listing of the .meta files  # ----------------- Code to enable 'find' subroutine listing of the .meta files
 require "find.pl";  use File::Find;
 sub wanted {  
     (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&  
     -f _ &&  
     /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&  
     push(@metalist,"$dir/$_");  
 }  
   
 # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables  # List of .meta files (used on a per-user basis).
 my $perlvarref=LONCAPA::Configuration::read_conf('loncapa_apache.conf',  my @metalist;
                                                  'loncapa.conf');  
 my %perlvar=%{$perlvarref};  
 undef $perlvarref; # remove since sensitive and not needed  
 delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed  
   
 # ------------------------------------- Only run if machine is a library server  # ---------------  Read loncapa_apache.conf and loncapa.conf and get variables.
 exit unless $perlvar{'lonRole'} eq 'library';  my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
   my %perlvar = %{$perlvarref};
   undef($perlvarref); # Remove since sensitive and not needed.
   delete($perlvar{'lonReceipt'}); # Remove since sensitive and not needed.
   
 my $dbh;  # ------------------------------------- Only run if machine is a library server
 # ------------------------------------- Make sure that database can be accessed  if ($perlvar{'lonRole'} ne 'library')
 {    {
     unless (      exit(0);
     $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})    }
     ) {   
  print "Cannot connect to database!\n";  # ------------------------------ Make sure this process is running as user=www.
  exit;  my $wwwid = getpwnam('www');
     }  if ($wwwid != $<)
     my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".    {
         "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".      $emailto = "$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
         "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".      $subj = "LON: $perlvar{'lonHostID'} User ID mismatch";
         "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ".      system("echo 'User ID mismatch. searchcat.pl must be run as user www.' | ".
         "copyright TEXT, FULLTEXT idx_title (title), ".     "mailto $emailto -s '$subj' > /dev/null");
         "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ".      exit(1);
         "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ".    }
         "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ".  
         "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ".  # ------------------------------------------------------ Initialize log output.
         "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ".  open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
         "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM";  print(LOG '==== Searchcat Run '.localtime().' ===='."\n\n");
     # It would sure be nice to have some logging mechanism.  
   my $dbh; # Database object reference handle.
   
   # ----------------------------- Verify connection to loncapa:metadata database.
   unless (
    $dbh = DBI->connect('DBI:mysql:loncapa','www',
       $perlvar{'lonSqlAccess'},
       { RaiseError => 0,PrintError => 0})
    )
     { 
       print(LOG '**** ERROR **** Cannot connect to database!'."\n");
       exit(0);
     }
   
   # ------------------------------ Create loncapa:metadata table if non-existent.
   my $make_metadata_table = 'CREATE TABLE IF NOT EXISTS metadata ('.
       'title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, '.
       'version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, '.
       'creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, '.
       'copyright TEXT, utilitysemaphore BOOL, FULLTEXT idx_title (title), '.
       'FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), '.
       'FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), '.
       'FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), '.
       'FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), '.
       'FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), '.
       'FULLTEXT idx_copyright (copyright)) TYPE=MYISAM';
   
   $dbh->do($make_metadata_table); # Generate the table.
   
   # ----------------------------- Verify format of the loncapa:metadata database.
   #                               (delete and recreate database if necessary).
   
   # Make a positive control for verifying table structure.
   my $make_metadata_table_CONTROL = $make_metadata_table;
   $make_metadata_table_CONTROL =~
       s/^(CREATE TABLE IF NOT EXISTS) metadata/$1 CONTROL_metadata/;
   
   $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata');
   $dbh->do($make_metadata_table_CONTROL);
   
   my $table_description; # selectall reference to the table description.
   
   my $CONTROL_table_string; # What the table description should look like.
   my $table_string; # What the table description does look like.
   
   # Calculate the CONTROL table description (what it should be).
   $table_description = $dbh->selectall_arrayref('describe CONTROL_metadata');
   foreach my $table_row (@{$table_description})
     {
       $CONTROL_table_string .= join(',',@{$table_row})."\n";
     }
   
   # Calculate the current table description (what it currently looks like).
   $table_description = $dbh->selectall_arrayref('describe metadata');
   foreach my $table_row (@{$table_description})
     {
       $table_string .= join(',',@{$table_row})."\n";
     }
   
   if ($table_string ne $CONTROL_table_string)
     {
       # Log this incident.
       print(LOG '**** WARNING **** Table structure mismatch, need to regenerate'.
     '.'."\n");
       # Delete the table.
       $dbh->do('DROP TABLE IF EXISTS metadata');
       # Generate the table.
     $dbh->do($make_metadata_table);      $dbh->do($make_metadata_table);
 }    }
   
   $dbh->do('DROP TABLE IF EXISTS CONTROL_metadata'); # Okay.  Done with control.
   
   # ----------------------------------------------- Set utilitysemaphore to zero.
   $dbh->do('UPDATE metadata SET utilitysemaphore = 0');
   
   # ========================================================= Main functionality.
   
 # ------------------------------------------------------------- get .meta files  # - Determine home authors on this server based on resources dir and user tree.
 opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");  
 my @homeusers=grep  # RESOURCES: the resources directory (subdirs correspond to author usernames).
           {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")}  opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}") or
           grep {!/^\.\.?$/} readdir(RESOURCES);      (print(LOG '=== /res/--lonDefDomain-- directory is not accessible'."\n")
 closedir RESOURCES;       and exit(0));
 foreach my $user (@homeusers) {  
     &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");  # query_home_server_status will look for user home directories on this machine.
 }  my @homeusers =
       grep {&query_home_server_status($perlvar{'lonDocRoot'}.'/res/'.
 # -- process each file to get metadata and put into search catalog SQL database      $perlvar{'lonDefDomain'}.'/'.$_)
 # Also, check to see if already there.    } grep {!/^\.\.?$/} readdir(RESOURCES);
 # I could just delete (without searching first), but this works for now.  closedir(RESOURCES);
 foreach my $m (@metalist) {  
     my $ref=&metadata($m);  unless (@homeusers)
     my $m2='/res/'.&declutter($m);    {
     $m2=~s/\.meta$//;      print(LOG '=== No home users found on this server.'."\n");
     my $q2="select * from metadata where url like binary '$m2'";    }
     my $sth = $dbh->prepare($q2);  
     $sth->execute();  # Consider each author individually.
     my $r1=$sth->fetchall_arrayref;  foreach my $user (@homeusers)
     if (@$r1) {    {
  $sth=$dbh->prepare("delete from metadata where url like binary '$m2'");      # Make a log entry.
         $sth->execute();      print(LOG "\n".'=== User: '.$user."\n\n");
     }  
     $sth=$dbh->prepare('insert into metadata values ('.      # Get filesystem path to this user's directory.
   '"'.delete($ref->{'title'}).'"'.','.      my $user_directory =
   '"'.delete($ref->{'author'}).'"'.','.   &construct_path_to_user_directory($perlvar{'lonDefDomain'},$user);
   '"'.delete($ref->{'subject'}).'"'.','.  
   '"'.$m2.'"'.','.      # Remove left-over db-files from a potentially crashed searchcat run.
   '"'.delete($ref->{'keywords'}).'"'.','.      unlink($user_directory.'/nohist_new_resevaldata.db');
   '"'.'current'.'"'.','.  
   '"'.delete($ref->{'notes'}).'"'.','.      # Cleanup the metalist array.
   '"'.delete($ref->{'abstract'}).'"'.','.      undef(@metalist);
   '"'.delete($ref->{'mime'}).'"'.','.      @metalist = ();
   '"'.delete($ref->{'language'}).'"'.','.  
   '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.      # This will add entries to the @metalist array.
   '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.      &File::Find::find(\&wanted,
   '"'.delete($ref->{'owner'}).'"'.','.        $perlvar{'lonDocRoot'}.'/res/'.
   '"'.delete($ref->{'copyright'}).'"'.')');        $perlvar{'lonDefDomain'}.'/'.$user);
     $sth->execute();  
 }      # -- process file to get metadata and put into search catalog SQL database
       # Also, build and store dynamic metadata.
 # ----------------------------------------------------------- Clean up database      # Also, delete record entries before refreshing.
 # Need to, perhaps, remove stale SQL database records.      foreach my $m (@metalist)
 # ... not yet implemented        {
    # Log this action.
    print(LOG "- ".$m."\n");
   
    # Get metadata from the file.
    my $ref = get_metadata_from_file($m);
   
    # Make a datarecord identifier for this resource.
    my $m2 = '/res/'.declutter($m);
    $m2 =~ s/\.meta$//;
   
    # Build and store dynamic metadata inside nohist_resevaldata.db.
    build_on_the_fly_dynamic_metadata($m2);
   
    # Delete record if it already exists.
    my $q2 = 'select * from metadata where url like binary '."'".$m2."'";
    my $sth = $dbh->prepare($q2);
    $sth->execute();
    my $r1 = $sth->fetchall_arrayref;
    if (@$r1)
     {
       $sth = 
    $dbh->prepare('delete from metadata where url like binary '.
         "'".$m2."'");
       $sth->execute();
     }
   
    # Add new/replacement record into the loncapa:metadata table.
    $sth = $dbh->prepare('insert into metadata values ('.
        '"'.delete($ref->{'title'}).'"'.','.
        '"'.delete($ref->{'author'}).'"'.','.
        '"'.delete($ref->{'subject'}).'"'.','.
        '"'.$m2.'"'.','.
        '"'.delete($ref->{'keywords'}).'"'.','.
        '"'.'current'.'"'.','.
        '"'.delete($ref->{'notes'}).'"'.','.
        '"'.delete($ref->{'abstract'}).'"'.','.
        '"'.delete($ref->{'mime'}).'"'.','.
        '"'.delete($ref->{'language'}).'"'.','.
        '"'.sql_formatted_time(
          delete($ref->{'creationdate'})).'"'.','.
        '"'.sql_formatted_time(
      delete($ref->{'lastrevisiondate'})).'"'.','.
        '"'.delete($ref->{'owner'}).'"'.','.
        '"'.delete($ref->{'copyright'}).'"'.','.
        '1'.')');
    $sth->execute();
         }
   
   # ----------------------- Clean up database, remove stale SQL database records.
       $dbh->do('DELETE FROM metadata WHERE utilitysemaphore = 0');
   
   # -------------------------------------------------- Copy over the new db-files
       system('mv '.$user_directory.'/nohist_new_resevaldata.db '.
            $user_directory.'/nohist_resevaldata.db');
     }
   
 # --------------------------------------------------- Close database connection  # --------------------------------------------------- Close database connection
 $dbh->disconnect;  $dbh->disconnect;
   print LOG "\n==== Searchcat completed ".localtime()." ====\n";
   close(LOG);
   exit(0);
   
   # ================================================================ Subroutines.
   
   =pod
   
   =head1 SUBROUTINES
   
   =cut
   
   =pod
   
   B<unescape> - translate to unstrange escaped syntax to strange characters.
   
   =over 4
   
   Parameters:
   
   =item I<$str> - string with unweird characters.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - string with potentially weird characters.
   
   =back
   
   =cut
   
   sub unescape ($)
     {
       my $str = shift(@_);
       $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
       return($str);
     }
   
   =pod
   
   B<escape> - translate strange characters to unstrange escaped syntax.
   
   =over 4
   
   Parameters:
   
   =item I<$str> - string with potentially weird characters to unweird-ify.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - unweird-ified string.
   
   =back
   
   =cut
   
   sub escape ($)
     {
       my $str = shift(@_);
       $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
       return($str);
     }
   
   =pod
   
   B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
   
   Dynamic metadata is stored in a nohist_resevaldata GDBM database.
   The only thing that this subroutine really makes happen is adjusting
   a 'count' value inside the F<nohist_new_resevaldata.db> as well
   as updating F<nohist_new_resevaldata.db> with information from
   F<nohist_resevaldata.db>.
   
 # ---------------------------------------------------------------- Get metadata  It may need optmization, but since it gets called once a week. . .
 # significantly altered from subroutine present in lonnet  =over 4
 sub metadata {  
     my ($uri,$what)=@_;  Parameters:
     my %metacache;  
     $uri=&declutter($uri);  =item I<$url> - the filesystem path (url may be a misnomer...)
     my $filename=$uri;  
     $uri=~s/\.meta$//;  =back
     $uri='';  
     unless ($metacache{$uri.'keys'}) {  =over 4
         unless ($filename=~/\.meta$/) { $filename.='.meta'; }  
  my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);  Returns:
         my $parser=HTML::TokeParser->new(\$metastring);  
         my $token;  =item C<hash> - key-value table of dynamically evaluated metadata.
         while ($token=$parser->get_token) {  
            if ($token->[0] eq 'S') {  =back
       my $entry=$token->[1];  
               my $unikey=$entry;  =cut
               if (defined($token->[2]->{'part'})) {   
                  $unikey.='_'.$token->[2]->{'part'};   sub build_on_the_fly_dynamic_metadata ($)
       }    {
               if (defined($token->[2]->{'name'})) {       # some elements in here maybe non-obvious
                  $unikey.='_'.$token->[2]->{'name'};   
       }      # Need to compute the user's directory.
               if ($metacache{$uri.'keys'}) {      my $url = &declutter(shift(@_));
                  $metacache{$uri.'keys'}.=','.$unikey;      $url =~ s/\.meta$//;
               } else {      my %returnhash = ();
                  $metacache{$uri.'keys'}=$unikey;      my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
       }      my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
               map {  
   $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};      # Attempt a GDBM database instantiation inside users directory and proceed.
               } @{$token->[3]};      if ((tie(%evaldata,'GDBM_File',
               unless (              $user_directory.
                  $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)       '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
       ) { $metacache{$uri.''.$unikey}=          (tie(%newevaldata,'GDBM_File',
       $metacache{$uri.''.$unikey.'.default'};       $user_directory.
        '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
         {
    # For different variables, track the running sum and counts.
    my %sum = ();
    my %cnt = ();
   
    # Define computed items as a sum (add) or an average (avg) or a raw
    # count (cnt) or 'app'?
    my %listitems=('count'        => 'add',
          'course'       => 'add',
          'avetries'     => 'avg',
          'stdno'        => 'add',
          'difficulty'   => 'avg',
          'clear'        => 'avg',
          'technical'    => 'avg',
          'helpful'      => 'avg',
          'correct'      => 'avg',
          'depth'        => 'avg',
          'comments'     => 'app',
          'usage'        => 'cnt'
          );
   
    # Untaint the url and use as part of a regular expression.
    my $regexp = $url;
    $regexp =~ s/(\W)/\\$1/g;
    $regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs
   
    # Check existing nohist database for this url.
           # this is modfying the 'count' entries
           # and copying all othe entries over
    foreach (keys %evaldata)
     {
       my $key = &unescape($_);
       if ($key =~ /$regexp/) # If url-based entry exists.
         {
    my $ctype = $1; # Set to specific category type.
   
    # Do an increment for this category type.
    if (defined($cnt{$ctype}))
     {
       $cnt{$ctype}++; 
     }
    else
     {
       $cnt{$ctype} = 1; 
     }
                   unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
     {
       # Increment the sum based on the evaluated data in the db.
       if (defined($sum{$ctype}))
         {
    $sum{$ctype} += $evaldata{$_};
       }        }
           }      else
        }        {
     }   $sum{$ctype} = $evaldata{$_};
     return \%metacache;        }
 }     }
    else # 'app' mode, means to use '<hr />' as a separator
 # ------------------------------------------------------------ Serves up a file    {
 # returns either the contents of the file or a -1      if (defined($sum{$ctype}))
 sub getfile {        {
   my $file=shift;   if ($evaldata{$_})
   if (! -e $file ) { return -1; };    {
   my $fh=IO::File->new($file);      $sum{$ctype} .= '<hr />'.$evaldata{$_};
   my $a='';    }
   while (<$fh>) { $a .=$_; }        }
   return $a      else
 }        {
    $sum{$ctype} = ''.$evaldata{$_};
 # ------------------------------------------------------------- Declutters URLs        }
 sub declutter {    }
     my $thisfn=shift;   if ($ctype ne 'count')
     $thisfn=~s/^$perlvar{'lonDocRoot'}//;    {
     $thisfn=~s/^\///;                      # this is copying all data except 'count' attributes
     $thisfn=~s/^res\///;      $newevaldata{$_} = $evaldata{$_};
     return $thisfn;    }
 }        }
     }
 # --------------------------------------- Is this the home server of an author?  
 # (copied from lond, modification of the return value)          # the only other time this loop is useful is for the 'count' hash
 sub ishome {          # element
     my $author=shift;   foreach (keys %cnt)
     $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;    {
     my ($udom,$uname)=split(/\//,$author);      if ($listitems{$_} eq 'avg')
     my $proname=propath($udom,$uname);        {
     if (-e $proname) {   $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
  return 1;        }
     } else {      elsif ($listitems{$_} eq 'cnt')
         return 0;        {
     }   $returnhash{$_} = $cnt{$_};
 }        }
       else
 # -------------------------------------------- Return path to profile directory        {
 # (copied from lond)   $returnhash{$_} = $sum{$_};
 sub propath {        }
     my ($udom,$uname)=@_;    }
     $udom=~s/\W//g;  
     $uname=~s/\W//g;          # seems to be doing something useful
     my $subdir=$uname.'__';   if ($returnhash{'count'})
     $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;    {
     my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";      my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
     return $proname;      $newevaldata{$newkey} = $returnhash{'count'};
 }     }
   
    untie(%evaldata); # Close/release the original nohist database.
    untie(%newevaldata); # Close/release the new nohist database.
         }
       return(%returnhash);
     }
   
   =pod
   
   B<wanted> - used by B<File::Find::find> subroutine.
   
   This evaluates whether a file is wanted, and pushes it onto the
   I<@metalist> array.  This subroutine was, for the most part, auto-generated
   by the B<find2perl> command.
   
   =over 4
   
   Parameters:
   
   =item I<$file> - a path to the file.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<boolean> - true or false based on logical statement.
   
   =back
   
   =cut
   
   sub wanted ($)
     {
       (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
       -f $_ &&
       /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
       push(@metalist,$File::Find::dir.'/'.$_);
     }
   
   =pod
   
   B<get_metadata_from_file> - read xml-tagged file and return parsed metadata.
   
   I<Note that this is significantly altered from a subroutine present in lonnet.>
   
   =over 4
   
   Parameters:
   
   =item I<$file> - a path.to the file.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<hash reference> - a hash array (keys and values).
   
   =back
   
   =cut
   
   sub get_metadata_from_file ($)
     {
       my ($filename) = @_;
       my %metatable; # Used to store return value of hash-tabled metadata.
       $filename = &declutter($filename); # Remove non-identifying filesystem info
       my $uri = ''; # The URI is not relevant in this scenario.
       unless ($filename =~ m/\.meta$/) # Unless ending with .meta.
         {
    $filename .= '.meta'; # Append a .meta suffix.
         }
       # Get the file contents.
       my $metadata_string =
    &get_file_contents($perlvar{'lonDocRoot'}.'/res/'.$filename);
   
       # Parse the file based on its XML tags.
       my $parser = HTML::TokeParser->new(\$metadata_string);
       my $token;
       while ($token = $parser->get_token) # Loop through tokens.
         {
    if ($token->[0] eq 'S') # If it is a start token.
     {
       my $entry = $token->[1];
       my $unikey = $entry; # A unique identifier for this xml tag key.
       if (defined($token->[2]->{'part'}))
         { 
    $unikey .= '_'.$token->[2]->{'part'}; 
         }
       if (defined($token->[2]->{'name'}))
         { 
    $unikey .= '_'.$token->[2]->{'name'}; 
         }
       # Append $unikey to metatable's keys entry.
       if ($metatable{$uri.'keys'})
         {
    $metatable{$uri.'keys'} .= ','.$unikey;
         }
       else
         {
    $metatable{$uri.'keys'} = $unikey;
         }
       # Insert contents into metatable entry for the unikey.
       foreach my $t3 (@{$token->[3]})
         {
    $metatable{$uri.''.$unikey.'.'.$_} = $token->[2]->{$t3};
         }
       # If there was no text contained inside the tags, set = default.
       unless
         (
           $metatable{$uri.''.$unikey} = $parser->get_text('/'.$entry)
         )
         {
    $metatable{$uri.''.$unikey} =
       $metatable{$uri.''.$unikey.'.default'};
         }
     }
         }
       # Return with a key-value table of XML tags and their tag contents.
       return(\%metatable);
     }
   
   =pod
   
   B<get_file_contents> - returns either the contents of the file or a -1.
   
   =over 4
   
   Parameters:
   
   =item I<$file> - a complete filesystem path.to the file.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - file contents or a -1.
   
   =back
   
   =cut
   
   sub get_file_contents ($)
     {
       my $file = shift(@_);
   
       # If file does not exist, then return a -1 value.
       unless (-e $file)
         {
    return(-1);
         }
   
       # Read in file contents.
       my $file_handle = IO::File->new($file);
       my $file_contents = '';
       while (<$file_handle>)
         {
    $file_contents .= $_;
         }
   
       # Return file contents.
       return($file_contents);
     }
   
   =pod
   
   B<declutter> - Declutters URLs (remove extraneous prefixed filesystem path).
   
   =over 4
   
   Parameters:
   
   =item I<$filesystem_path> - a complete filesystem path.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - remnants of the filesystem path (beginning portion removed).
   
   =back
   
   =cut
   
   sub declutter
     {
       my $filesystem_path = shift(@_);
   
       # Remove beginning portions of the filesystem path.
       $filesystem_path =~ s/^$perlvar{'lonDocRoot'}//;
       $filesystem_path =~ s!^/!!;
       $filesystem_path =~ s!^res/!!;
   
       # Return what is remaining for the filesystem path.
       return($filesystem_path);
     }
   
   =pod
   
   B<query_home_server_status> - Is this the home server of an author's directory?
   
   =over 4
   
   Parameters:
   
   =item I<$author_filesystem_path> - directory path for a user.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<boolean> - 1 if true; 0 if false.
   
   =back
   
   =cut
   
   sub query_home_server_status ($)
     {
       my $author_filesystem_path = shift(@_);
   
       # Remove beginning portion of this filesystem path.
       $author_filesystem_path =~ s!/home/httpd/html/res/([^/]*)/([^/]*).*!$1/$2!;
   
       # Construct path to the author's ordinary user directory.
       my ($user_domain,$username) = split(m!/!,$author_filesystem_path);
       my $user_directory_path = construct_path_to_user_directory($user_domain,
          $username);
   
       # Return status of whether the user directory path is defined.
       if (-e $user_directory_path)
         {
    return(1); # True.
         }
       else
         {
           return(0); # False.
         }
     }
   
   =pod
   
   B<construct_path_to_user_directory> ($$) - makes a filesystem path to user dir.
   
   =over 4
   
   Parameters:
   
   =item I<$user_domain> - the loncapa domain of the user.
   
   =item I<$username> - the unique username (user id) of the user.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - representing the path on the filesystem.
   
 # ---------------------------- convert 'time' format into a datetime sql format  =back
 sub sqltime {  
   =cut
   
   sub construct_path_to_user_directory ($$)
     {
       my ($user_domain,$username) = @_;
   
       # Untaint.
       $user_domain =~ s/\W//g;
       $username =~ s/\W//g;
   
       # Create three levels of sub-directoried filesystem path
       # based on the first three characters of the username.
       my $sub_filesystem_path = $username.'__';
       $sub_filesystem_path =~ s!(.)(.)(.).*!$1/$2/$3/!;
   
       # Use the sub-directoried levels and other variables to generate
       # the complete filesystem path.
       my $complete_filesystem_path =
    join('/',($perlvar{'lonUsersDir'},
     $user_domain,
     $sub_filesystem_path,
     $username));
   
       # Return the complete filesystem path.
       return($complete_filesystem_path);
     }
   
   =pod
   
   B<sql_formatted_time> (@) - turns seconds since epoch into datetime sql format.
   
   =over 4
   
   Parameters:
   
   =item I<$epochtime> - time in seconds since epoch (may need to be sanitized).
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<string> - datetime sql formatted string.
   
   =back
   
   =cut
   
   sub sql_formatted_time ($)
     {
       # Sanitize the time argument and convert to localtime array.
     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =      my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
  localtime(@_[0]);   localtime(&sanitize_time(shift(@_)));
     $mon++; $year+=1900;  
     return "$year-$mon-$mday $hour:$min:$sec";      # Convert month from (0..11) to (1..12).
 }      $mon += 1;
   
       # Make the year compatible with A.D. specification.
       $year += 1900;
   
       # Return a date which is compatible with MySQL's "DATETIME" format.
       return(join('-',($year,$mon,$mday)).
      ' '.
      join(':',($hour,$min,$sec))
      );
     }
   
   
   # ==================================== The following two subroutines are needed
   #                 for accommodating incorrect time formats inside the metadata.
   
   =pod
   
   B<make_seconds_since_epoch> (@) - turns time metadata into seconds since epoch.
   
   =over 4
   
   Parameters:
   
   =item I<%time_metadata> - a key-value listing characterizing month, year, etc.
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<integer> - seconds since epoch.
   
   =back
   
   =cut
   
   sub make_seconds_since_epoch (@)
     {
       # Keytable of time metadata.
       my %time_metadata = @_;
   
       # Return seconds since the epoch (January 1, 1970, 00:00:00 UTC).
       return(POSIX::mktime(
    ($time_metadata{'seconds'},
     $time_metadata{'minutes'},
     $time_metadata{'hours'},
     $time_metadata{'day'},
     $time_metadata{'month'}-1,
     $time_metadata{'year'}-1900,
     0,
     0,
     $time_metadata{'dlsav'})
    )
      );
     }
   
   =pod
   
   B<sanitize_time> - if time looks sql-formatted, make it seconds since epoch.
   
   Somebody described this subroutine as
   "retro-fixing of un-backward-compatible time format".
   
   What this means, is that a part of this code expects to get UTC seconds
   since the epoch (beginning of 1970).  Yet, some of the .meta files have
   sql-formatted time strings (2001-04-01, etc.) instead of seconds-since-epoch
   integers (e.g. 1044147435).  These time strings do not encode the timezone
   and, in this sense, can be considered "un-backwards-compatible".
   
   =over 4
   
   Parameters:
   
   =item I<$potentially_badformat_string> - string to "retro-fix".
   
   =back
   
   =over 4
   
   Returns:
   
   =item C<integer> - seconds since epoch.
   
   =back
   
   =cut
   
   sub sanitize_time ($)
     {
       my $timestamp = shift(@_);
       # If timestamp is in this unexpected format....
       if ($timestamp =~ /^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/)
         {
    # then convert into seconds since epoch (the expected format).
    $timestamp = &make_seconds_since_epoch(
          'year' => $1,
          'month' => $2,
          'day' => $3,
          'hours' => $4,
          'minutes' => $5,
          'seconds' => $6
          );
         }
       # Otherwise we assume timestamp to be as expected.
       return($timestamp);
     }
   
   =pod
   
   =head1 AUTHOR
   
   Written to help the loncapa project.
   
   Scott Harrison, sharrison@users.sourceforge.net
   
   This is distributed under the same terms as loncapa (i.e. "freeware").
   
   =cut

Removed from v.1.19  
changed lines
  Added in v.1.29


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>