File:  [LON-CAPA] / loncom / metadata_database / LONCAPA / lonmetadata.pm
Revision 1.5: download - view: text, annotated - select for diffs
Mon Apr 12 21:11:45 2004 UTC (20 years, 1 month ago) by matthew
Branches: MAIN
CVS tags: HEAD
Moved nohist_reseval parsing functionality from searchcat.pl to lonmetadata.pm.
searchcat.pl: stop dying on insert errors.
lonmetadata.pm: &store_metadata now translates 'nan' to 'NULL' because MySQL
won't take 'nan' in a numeric field.
Added &process_reseval_data and &process_dynamic_metadata routines.

# The LearningOnline Network with CAPA
#
# $Id: lonmetadata.pm,v 1.5 2004/04/12 21:11:45 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
######################################################################

package LONCAPA::lonmetadata;

use strict;
use DBI;

######################################################################
######################################################################

=pod 

=head1 Name

lonmetadata

=head1 Synopsis

lonmetadata holds a description of the metadata table and provides
wrappers for the storage and retrieval of metadata to/from the database.

=head1 Description

=head1 Methods

=over 4

=cut

######################################################################
######################################################################

=pod

=item Old table creation command

CREATE TABLE IF NOT EXISTS metadata 
(title TEXT, 
author TEXT, 
subject TEXT, 
url TEXT, 
keywords TEXT, 
version TEXT, 
notes TEXT, 
abstract TEXT, 
mime TEXT, 
language TEXT, 
creationdate DATETIME, 
lastrevisiondate DATETIME, 
owner TEXT, 
copyright TEXT, 

FULLTEXT idx_title (title), 
FULLTEXT idx_author (author), 
FULLTEXT idx_subject (subject), 
FULLTEXT idx_url (url), 
FULLTEXT idx_keywords (keywords), 
FULLTEXT idx_version (version), 
FULLTEXT idx_notes (notes), 
FULLTEXT idx_abstract (abstract), 
FULLTEXT idx_mime (mime), 
FULLTEXT idx_language (language),
FULLTEXT idx_owner (owner), 
FULLTEXT idx_copyright (copyright)) 

TYPE=MYISAM;

=cut

######################################################################
######################################################################
my @Metadata_Table_Description = 
    (
     { name => 'title',     type=>'TEXT'},
     { name => 'author',    type=>'TEXT'},
     { name => 'subject',   type=>'TEXT'},
     { name => 'url',       type=>'TEXT', restrictions => 'NOT NULL' },
     { name => 'keywords',  type=>'TEXT'},
     { name => 'version',   type=>'TEXT'},
     { name => 'notes',     type=>'TEXT'},
     { name => 'abstract',  type=>'TEXT'},
     { name => 'mime',      type=>'TEXT'},
     { name => 'language',  type=>'TEXT'},
     { name => 'creationdate',     type=>'DATETIME'},
     { name => 'lastrevisiondate', type=>'DATETIME'},
     { name => 'owner',     type=>'TEXT'},
     { name => 'copyright', type=>'TEXT'}, 
      #--------------------------------------------------
     { name => 'dependencies',   type=>'TEXT'},
     { name => 'modifyinguser',  type=>'TEXT'},
     { name => 'authorspace',    type=>'TEXT'},
     { name => 'lowestgradelevel',  type=>'INT'},
     { name => 'highestgradelevel', type=>'INT'},
     { name => 'standards',      type=>'TEXT'},
     { name => 'count',          type=>'INT'},
     { name => 'course',         type=>'INT'},
     { name => 'course_list',    type=>'TEXT'},
     { name => 'goto',           type=>'INT'},
     { name => 'goto_list',      type=>'TEXT'},
     { name => 'comefrom',       type=>'INT'},
     { name => 'comefrom_list',  type=>'TEXT'},
     { name => 'sequsage',       type=>'INT'},
     { name => 'sequsage_list',  type=>'TEXT'},
     { name => 'stdno',          type=>'INT'},
     { name => 'stdno_list',     type=>'TEXT'},
     { name => 'avetries',       type=>'FLOAT'},
     { name => 'avetries_list',  type=>'TEXT'},
     { name => 'difficulty',     type=>'FLOAT'},
     { name => 'difficulty_list',type=>'TEXT'},
     { name => 'clear',          type=>'FLOAT'},
     { name => 'technical',      type=>'FLOAT'},
     { name => 'correct',        type=>'FLOAT'},
     { name => 'helpful',        type=>'FLOAT'},
     { name => 'depth',          type=>'FLOAT'},
     { name => 'hostname',       type=> 'TEXT'},
     #--------------------------------------------------
     );

my @Fulltext_indicies = qw/
    title
    author
    subject
    url
    keywords
    version
    notes
    abstract
    mime
    language
    owner
    copyright/;

######################################################################
######################################################################

=pod

=item &describe_metadata_storage

Input: None

Returns: An array of hash references describing the columns and indicies
of the metadata table(s).

=cut

######################################################################
######################################################################
sub describe_metadata_storage { 
    return (\@Metadata_Table_Description,\@Fulltext_indicies);
}

######################################################################
######################################################################

=pod

=item create_metadata_storage()

Inputs: table name (optional): the name of the table.  Default is 'metadata'.

Returns: A perl string which, when executed by MySQL, will cause the
metadata storage to be initialized.

=cut

######################################################################
######################################################################
sub create_metadata_storage { 
    my ($tablename) = @_;
    $tablename = 'metadata' if (! defined($tablename));
    my $request = "CREATE TABLE IF NOT EXISTS ".$tablename." ";
    #
    # Process the columns  (this code is stolen from lonmysql.pm)
    my @Columns;
    my $col_des; # mysql column description
    foreach my $coldata (@Metadata_Table_Description) {
        my $column = $coldata->{'name'};
        $col_des = '';
        if (lc($coldata->{'type'}) =~ /(enum|set)/) { # 'enum' or 'set'
            $col_des.=$column." ".$coldata->{'type'}."('".
                join("', '",@{$coldata->{'values'}})."')";
        } else {
            $col_des.=$column." ".$coldata->{'type'};
            if (exists($coldata->{'size'})) {
                $col_des.="(".$coldata->{'size'}.")";
            }
        }
        # Modifiers
        if (exists($coldata->{'restrictions'})){
            $col_des.=" ".$coldata->{'restrictions'};
        }
        if (exists($coldata->{'default'})) {
            $col_des.=" DEFAULT '".$coldata->{'default'}."'";
        }
        $col_des.=' AUTO_INCREMENT' if (exists($coldata->{'auto_inc'}) &&
                                        ($coldata->{'auto_inc'} eq 'yes'));
        $col_des.=' PRIMARY KEY'    if (exists($coldata->{'primary_key'}) &&
                                        ($coldata->{'primary_key'} eq 'yes'));
    } continue {
        # skip blank items.
        push (@Columns,$col_des) if ($col_des ne '');
    }
    foreach my $colname (@Fulltext_indicies) {
        my $text = 'FULLTEXT idx_'.$colname.' ('.$colname.')';
        push (@Columns,$text);
    }
    $request .= "(".join(", ",@Columns).") TYPE=MyISAM";
    return $request;
}

######################################################################
######################################################################

=pod

=item store_metadata()

Inputs: database handle ($dbh), a table name, and a hash or hash reference 
containing the metadata for a single resource.

Returns: 1 on success, 0 on failure to store.

=cut

######################################################################
######################################################################
{
    ##
    ##  WARNING: The following cleverness may cause trouble in cases where
    ##  the dbi connection is dropped and recreated - a stale statement
    ##  handler may linger around and cause trouble.
    ##
    ##  In most scripts, this will work fine.  If the dbi is going to be
    ##  dropped and (possibly) later recreated, call &clear_sth.  Yes it
    ##  is annoying but $sth appearantly does not have a link back to the 
    ##  $dbh, so we can't check our validity.
    ##
    my $sth = undef;
    my $sth_table = undef;

sub create_statement_handler {
    my $dbh = shift();
    my $tablename = shift();
    $tablename = 'metadata' if (! defined($tablename));
    $sth_table = $tablename;
    my $request = 'INSERT INTO '.$tablename.' VALUES(';
    foreach (@Metadata_Table_Description) {
        $request .= '?,';
    }
    chop $request;
    $request.= ')';
    $sth = $dbh->prepare($request);
    return;
}

sub clear_sth { $sth=undef; $sth_table=undef;}

sub store_metadata {
    my $dbh = shift();
    my $tablename = shift();
    my $errors = '';
    if (! defined($sth) || 
        ( defined($tablename) && ($sth_table ne $tablename)) || 
        (! defined($tablename) && $sth_table ne 'metadata')) {
        &create_statement_handler($dbh,$tablename);
    }
    my $successcount = 0;
    while (my $mdata = shift()) {
        next if (ref($mdata) ne "HASH");
        my @MData;
        foreach my $field (@Metadata_Table_Description) {
            if (exists($mdata->{$field->{'name'}})) {
                if ($mdata->{$field->{'name'}} eq 'nan') {
                    push(@MData,'NULL');
                } else {
                    push(@MData,$mdata->{$field->{'name'}});
                }
            } else {
                push(@MData,undef);
            }
        }
        $sth->execute(@MData);
        if (! $sth->err) {
            $successcount++;
        } else {
            $errors = join(',',$errors,$sth->errstr);
        }
    }
    if (wantarray()) {
        return ($successcount,$errors);
    } else {
        return $successcount;
    }
}

}

######################################################################
######################################################################

=pod

=item lookup_metadata()

Inputs: database handle ($dbh) and a hash or hash reference containing 
metadata which will be used for a search.

Returns: scalar with error string on failure, array reference on success.
The array reference is the same one returned by $sth->fetchall_arrayref().

=cut

######################################################################
######################################################################
sub lookup_metadata {
    my ($dbh,$condition,$fetchparameter) = @_;
    my $error;
    my $returnvalue=[];
    my $request = 'SELECT * FROM metadata';
    if (defined($condition)) {
        $request .= ' WHERE '.$condition;
    }
    my $sth = $dbh->prepare($request);
    if ($sth->err) {
        $error = $sth->errstr;
    }
    if (! $error) {
        $sth->execute();
        if ($sth->err) {
            $error = $sth->errstr;
        } else {
            $returnvalue = $sth->fetchall_arrayref($fetchparameter);
            if ($sth->err) {
                $error = $sth->errstr;
            }
        }
    }
    return ($error,$returnvalue);
}

######################################################################
######################################################################

=pod

=item delete_metadata()



=cut

######################################################################
######################################################################
sub delete_metadata {}

######################################################################
######################################################################


######################################################################
######################################################################

=pod

=item &process_reseval_data 

Process a nohist_resevaldata hash into a more complex data structure.

Input: Hash reference containing reseval data

Returns: Hash with the following structure:

$hash{$url}->{'statistics'}->{$courseid}->{'avetries'}   = $value
$hash{$url}->{'statistics'}->{$courseid}->{'count'}      = $value
$hash{$url}->{'statistics'}->{$courseid}->{'difficulty'} = $value
$hash{$url}->{'statistics'}->{$courseid}->{'stdno'}      = $value
$hash{$url}->{'statistics'}->{$courseid}->{'timestamp'}  = $value

$hash{$url}->{'evaluation'}->{$username}->{'clear'}     = $value
$hash{$url}->{'evaluation'}->{$username}->{'comments'}  = $value
$hash{$url}->{'evaluation'}->{$username}->{'depth'}     = $value
$hash{$url}->{'evaluation'}->{$username}->{'technical'} = $value
$hash{$url}->{'evaluation'}->{$username}->{'helpful'}   = $value

$hash{$url}->{'course'}    = \@Courses
$hash{$url}->{'comefrom'}  = \@Resources
$hash{$url}->{'goto'}      = \@Resources
$hash{$url}->{'usage'}     = \@Resources

$hash{$url}->{'stats'}->{$courseid\_$section}->{$key} = $value

=cut

######################################################################
######################################################################
sub process_reseval_data {
    my ($evaldata) = @_;
    my %DynamicData;
    #
    # Process every stored element
    while (my ($storedkey,$value) = each(%{$evaldata})) {
        my ($source,$file,$type) = split('___',$storedkey);
        $source = &unescape($source);
        $file = &unescape($file);
        $value = &unescape($value);
         "    got ".$file."\n        ".$type." ".$source."\n";
        if ($type =~ /^(avetries|count|difficulty|stdno|timestamp)$/) {
            #
            # Statistics: $source is course id
            $DynamicData{$file}->{'statistics'}->{$source}->{$type}=$value;
        } elsif ($type =~ /^(clear|comments|depth|technical|helpful)$/){
            #
            # Evaluation $source is username, check if they evaluated it
            # more than once.  If so, pad the entry with a space.
            while(exists($DynamicData{$file}->{'evaluation'}->{$type}->{$source})) {
                $source .= ' ';
            }
            $DynamicData{$file}->{'evaluation'}->{$type}->{$source}=$value;
        } elsif ($type =~ /^(course|comefrom|goto|usage)$/) {
            #
            # Context $source is course id or resource
            push(@{$DynamicData{$file}->{$type}},&unescape($source));
        } elsif ($type eq 'stats') {
            #
            # Statistics storage...
            # $source is $cid\_$sec\_$stdno
            # $value is stat1=value&stat2=value&stat3=value,....
            #
            my ($cid,$sec,$stdno)=split('_',$source);
            my $crssec = $cid.'_'.$sec;
            my @Data = split('&',$value);
            my %Statistics;
            while (my ($key,$value) = split('=',pop(@Data))) {
                $Statistics{$key} = $value;
            }
            #
            # Only store the data if the number of students is greater
            # than the data already stored
            if (! exists($DynamicData{$file}->{'stats'}->{$crssec}) ||
                $DynamicData{$file}->{'stats'}->{$crssec}->{'stdno'}<$stdno){
                $DynamicData{$file}->{'stats'}->{$crssec}=\%Statistics;
            }
        }
    }
    return %DynamicData;
}


######################################################################
######################################################################

=pod

=item &process_dynamic_metadata

Inputs: $url: the url of the item to process
$DynamicData: hash reference for the results of &process_reseval_data

Returns: Hash containing the following keys:
    avetries, avetries_list, difficulty, difficulty_list, stdno, stdno_list,
    course, course_list, goto, goto_list, comefrom, comefrom_list,
    usage, clear, technical, correct, helpful, depth, comments

    Each of the return keys is associated with either a number or a string
    The *_list items are comma-seperated strings.  'comments' is a string
    containing generically marked-up comments.

=cut

######################################################################
######################################################################
sub process_dynamic_metadata {
    my ($url,$DynamicData) = @_;
    my %data;
    my $resdata = $DynamicData->{$url};
    #
    # Get the statistical data
    foreach my $type (qw/avetries difficulty stdno/) {
        my $count;
        my $sum;
        my @Values;
        #
        foreach my $coursedata (values(%{$resdata->{'statistics'}}),
                                values(%{$resdata->{'stats'}})) {
            if (ref($coursedata) eq 'HASH' && exists($coursedata->{$type})) {
                $count++;
                $sum += $coursedata->{$type};
                push(@Values,$coursedata->{$type});
            }
        }
        if ($count) {
            $data{$type} = $sum/$count;
            $data{$type.'_list'} = join(',',@Values);
        }
    }
    #
    # Get the context data
    foreach my $type (qw/course goto comefrom/) {
        if (defined($resdata->{$type}) && 
            ref($resdata->{$type}) eq 'ARRAY') {
            $data{$type} = scalar(@{$resdata->{$type}});
            $data{$type.'_list'} = join(',',@{$resdata->{$type}});
        }
    }
    if (defined($resdata->{'usage'}) && 
        ref($resdata->{'usage'}) eq 'ARRAY') {
        $data{'sequsage'} = scalar(@{$resdata->{'usage'}});
        $data{'sequsage_list'} = join(',',@{$resdata->{'usage'}});
    }
    #
    # Get the evaluation data
    foreach my $type (qw/clear technical correct helpful depth/) {
        my $count;
        my $sum;
        foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{$type}})){
            $sum += $resdata->{'evaluation'}->{$type}->{$evaluator};
            $count++;
        }
        if ($count > 0) {
            $data{$type}=$sum/$count;
        }
    }
    #
    # put together comments
    my $comments = '<div class="LCevalcomments">';
    foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{'comments'}})){
        $comments .= '<span class="author">'.$evaluator.'</span>'.
            '<span class="comment">'.
            $resdata->{'evaluation'}->{'comments'}->{$evaluator}.'</span>';
    }
    $comments .= '</div>';
    #
    return %data;
}

######################################################################
######################################################################
##
## The usual suspects, repeated here to reduce dependency hell
##
######################################################################
######################################################################
sub unescape {
    my $str=shift;
    $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    return $str;
}

sub escape {
    my $str=shift;
    $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
    return $str;
}


1;

__END__;

=pod

=back

=cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>