File:  [LON-CAPA] / loncom / metadata_database / parse_activity_log.pl
Revision 1.11: download - view: text, annotated - select for diffs
Wed Dec 22 20:42:39 2004 UTC (19 years, 4 months ago) by matthew
Branches: MAIN
CVS tags: version_1_3_X, version_1_3_3, version_1_3_2, version_1_3_1, version_1_3_0, HEAD
Change storage of POST values to escape them and join with '&'.  Also modified
error logging to use '&' instead of ':' as a delimiter.

#!/usr/bin/perl
#
# The LearningOnline Network
#
# $Id: parse_activity_log.pl,v 1.11 2004/12/22 20:42:39 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
#--------------------------------------------------------------------
#
# Exit codes
#   0     Everything is okay
#   1     Another copy is running on this course
#   2     Activity log does not exist
#   3     Unable to connect to database
#   4     Unable to create database tables
#   5     Unable to open log file
#   6     Unable to get lock on activity log
#

#
# Notes:
#
# Logging is done via the $logthis variable, which may be the result of 
# overcleverness.  log via $logthis->('logtext');  Those are parentheses,
# not curly braces.  If the -log command line parameter is set, the $logthis
# routine is set to a routine which writes to a file.  If the command line
# parameter is not set $logthis is set to &nothing, which does what you
# would expect.
#

use strict;
use DBI;
use lib '/home/httpd/lib/perl/Apache';
use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration();
use Apache::lonmysql();
use lonmysql();
use Time::HiRes();
use Getopt::Long();
use IO::File;
use File::Copy;
use Fcntl qw(:flock);

#
# Determine parameters
my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup,$log,$backup);
&Getopt::Long::GetOptions( "course=s"  => \$course,
                           "domain=s"  => \$domain,
                           "backup"    => \$backup,
                           "help"      => \$help,
                           "logfile=s" => \$file,
                           "timerun"   => \$time_run,
                           "nocleanup" => \$nocleanup,
                           "drop"      => \$drop,
                           "log"       => \$log);
if (! defined($course) || $help) {
    print<<USAGE;
parse_activity_log.pl

Process a lon-capa activity log into a database.
Parameters:
   course             Required
   domain             Optional
   backup             optional   if present, backup the activity log file
                                 before processing it
   drop               optional   if present, drop all course 
                                 specific activity log tables.
   file               optional   Specify the file to parse, including path
   time               optional   if present, print out timing data
   nocleanup          optional   if present, do not remove old files
   log                optional   if present, prepare log file of activity
Examples:
  $0 -course=123456abcdef -domain=msu
  $0 -course=123456abcdef -file=activity.log
USAGE
    exit;
}

##
## Set up timing code
my $time_this = \&nothing;
if ($time_run) {
    $time_this = \&time_action;
}
my $initial_time = Time::HiRes::time;

##
## Read in configuration parameters
##
my %perlvar = %{&LONCAPA::Configuration::read_conf('loncapa.conf')};

if (! defined($domain) || $domain eq '') {
    $domain = $perlvar{'lonDefDomain'};
}
&update_process_name($course.'@'.$domain);

##
## Set up logging code
my $logthis = \&nothing;
if ($log) {
    my $logfile = $perlvar{'lonDaemons'}.'/tmp/parse_activity_log.log.'.time;
    print STDERR "$0: logging to $logfile".$/;
    if (! open(LOGFILE,">$logfile")) {
        warn("Unable to open $logfile for writing.  Run aborted.");
        exit 5;
    } else {
        $logthis = \&log_to_file;
    }
}


##
## Determine filenames
##
my $sourcefilename;   # activity log data
my $newfilename;      # $sourcefilename will be renamed to this
my $gz_sql_filename;  # the gzipped mysql backup data file name.
my $error_filename;   # Errors in parsing the activity log will be written here
if ($file) {
    $sourcefilename = $file;
} else {
    $sourcefilename = &get_filename($course,$domain);
}
my $sql_filename = $sourcefilename;
$sql_filename =~ s|[^/]*$|activity.log.sql|;
$gz_sql_filename = $sql_filename.'.gz';
$error_filename = $sourcefilename;
$error_filename =~ s|[^/]*$|activity.log.errors|;
$logthis->('Beginning logging '.time);


#
# Wait for a lock on the lockfile to avoid collisions
my $lockfilename = $sourcefilename.'.lock';
open(LOCKFILE,'>'.$lockfilename);
if (!flock(LOCKFILE,LOCK_EX)) {
    warn("Unable to lock $lockfilename.  Aborting".$/);
    exit 6;
}

##
## There will only be a $newfilename file if a copy of this program is already
## running.
my $newfilename = $sourcefilename.'.processing';
if (-e $newfilename) {
    warn "$newfilename exists";
    $logthis->($newfilename.' exists, so I cannot work on it.');
    exit 2;
}

if (-e $sourcefilename) {
    $logthis->('renaming '.$sourcefilename.' to '.$newfilename);
    rename($sourcefilename,$newfilename);
    Copy($newfilename,$newfilename.'.'.time) if ($backup);
    $logthis->("renamed $sourcefilename to $newfilename");
} else {
    my $command = 'touch '.$newfilename;
    $logthis->($command);
    system($command);
    $logthis->('touch was completed');
}

close(LOCKFILE);

##
## Table definitions
##
my $prefix = $course.'_'.$domain.'_';
my $student_table = $prefix.'students';
my $student_table_def = 
{ id => $student_table,
  permanent => 'no',
  columns => [
              { name => 'student_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc => 'yes', },
              { name => 'student',
                type => 'VARCHAR(100) BINARY',
                restrictions => 'NOT NULL', },
              ],
      'PRIMARY KEY' => ['student_id',],
          };

my $res_table = $prefix.'resource';
my $res_table_def = 
{ id => $res_table,
  permanent => 'no',
  columns => [{ name => 'res_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc     => 'yes', },
              { name => 'resource',
                type => 'MEDIUMTEXT',
                restrictions => 'NOT NULL'},
              ],
  'PRIMARY KEY' => ['res_id'],
};

#my $action_table = $prefix.'actions';
#my $action_table_def =
#{ id => $action_table,
#  permanent => 'no',
#  columns => [{ name => 'action_id',
#                type => 'MEDIUMINT UNSIGNED',
#                restrictions => 'NOT NULL',
#                auto_inc     => 'yes', },
#              { name => 'action',
#                type => 'VARCHAR(100)',
#                restrictions => 'NOT NULL'},
#              ],
#  'PRIMARY KEY' => ['action_id',], 
#};

my $machine_table = $prefix.'machine_table';
my $machine_table_def =
{ id => $machine_table,
  permanent => 'no',
  columns => [{ name => 'machine_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',
                auto_inc     => 'yes', },
              { name => 'machine',
                type => 'VARCHAR(100)',
                restrictions => 'NOT NULL'},
              ],
  'PRIMARY KEY' => ['machine_id',],
 };

my $activity_table = $prefix.'activity';
my $activity_table_def = 
{ id => $activity_table,
  permanent => 'no',
  columns => [
              { name => 'res_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',},
              { name => 'time',
                type => 'DATETIME',
                restrictions => 'NOT NULL',},
              { name => 'student_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',},
              { name => 'action',
                type => 'VARCHAR(10)',
                restrictions => 'NOT NULL',},
              { name => 'idx',                # This is here in case a student
                type => 'MEDIUMINT UNSIGNED', # has multiple submissions during
                restrictions => 'NOT NULL',   # one second.  It happens, trust
                auto_inc     => 'yes', },     # me.
              { name => 'machine_id',
                type => 'MEDIUMINT UNSIGNED',
                restrictions => 'NOT NULL',},
              { name => 'action_values',
                type => 'MEDIUMTEXT', },
              ], 
      'PRIMARY KEY' => ['time','student_id','res_id','idx'],
      'KEY' => [{columns => ['student_id']},
                {columns => ['time']},],
};

my @Activity_Table = ($activity_table_def);
my @ID_Tables = ($student_table_def,$res_table_def,$machine_table_def);
##
## End of table definitions
##

$logthis->('Connectiong to mysql');
&Apache::lonmysql::set_mysql_user_and_password('www',
                                               $perlvar{'lonSqlAccess'});
if (!&Apache::lonmysql::verify_sql_connection()) {
    warn "Unable to connect to MySQL database.";
    $logthis->("Unable to connect to MySQL database.");
    exit 3;
}
$logthis->('SQL connection is up');

if ($drop) { &drop_tables(); $logthis->('dropped tables'); }

if (-s $gz_sql_filename) {
    my $backup_modification_time = (stat($gz_sql_filename))[9];
    $logthis->($gz_sql_filename.' was last modified '.
               localtime($backup_modification_time).
               '('.$backup_modification_time.')');
    # Check for missing tables
    my @Current_Tables = &Apache::lonmysql::tables_in_db();
    $logthis->(join(',',@Current_Tables));
    my %Found;
    foreach my $tablename (@Current_Tables) {
        foreach my $table (@Activity_Table,@ID_Tables) {
            if ($tablename eq  $table->{'id'}) {
                $Found{$tablename}++;
            }
        }
    }
    $logthis->('Found tables '.join(',',keys(%Found)));
    my $missing_a_table = 0;
    foreach my $table (@Activity_Table,@ID_Tables) {    
        # Hmmm, should I dump the tables?
        if (! $Found{$table->{'id'}}) {
            $logthis->('Missing table '.$table->{'id'});
            $missing_a_table = 1;
            last;
        }
    }
    if ($missing_a_table) {
        my $table_modification_time = $backup_modification_time;
        # If the backup happened prior to the last table modification,
        foreach my $table (@Activity_Table,@ID_Tables) {    
            my %tabledata = &Apache::lonmysql::table_information($table->{'id'});
            next if (! scalar(keys(%tabledata))); # table does not exist
            if ($table_modification_time < $tabledata{'Update_time'}) {
                $table_modification_time = $tabledata{'Update_time'};
            }
        }
        $logthis->("Table modification time = ".$table_modification_time);
        if ($table_modification_time > $backup_modification_time) {
            # Save the current tables in case we need them another time.
            my $backup_name = $gz_sql_filename.'.'.time;
            $logthis->('Backing existing tables up in '.$backup_name);
            &backup_tables($backup_name);
        }
        $time_this->();
        &load_backup_tables($gz_sql_filename);
        $time_this->('load backup tables');
    }
}

##
## Ensure the tables we need exist
# create_tables does not complain if the tables already exist
$logthis->('creating tables');
if (! &create_tables()) {
    warn "Unable to create tables";
    $logthis->('Unable to create tables');
    exit 4;
}

##
## Read the ids used for various tables
$logthis->('reading id tables');
&read_id_tables();
$logthis->('finished reading id tables');

##
## Set up the errors file
my $error_fh = IO::File->new(">>$error_filename");

##
## Parse the course log
$logthis->('processing course log');
if (-s $newfilename) {
    my $result = &process_courselog($newfilename,$error_fh);
    if (! defined($result)) {
        # Something went wrong along the way...
        $logthis->('process_courselog returned undef');
        exit 5;
    } elsif ($result > 0) {
        $time_this->();
        $logthis->('process_courselog returned '.$result.' backing up tables');
        &backup_tables($gz_sql_filename);
        $time_this->('write backup tables');
    }
}
close($error_fh);

##
## Clean up the filesystem
&Apache::lonmysql::disconnect_from_db();
unlink($newfilename) if (-e $newfilename && ! $nocleanup);

##
## Print timing data
$logthis->('printing timing data');
if ($time_run) {
    my $elapsed_time = Time::HiRes::time - $initial_time;
    print "Overall time: ".$elapsed_time.$/;
    print &outputtimes();
    $logthis->("Overall time: ".$elapsed_time);
    $logthis->(&outputtimes());
}

if ($log) {
    close LOGFILE;
}

exit 0;   # Everything is okay, so end here before it gets worse.

########################################################
########################################################
##
##                 Process Course Log
##
########################################################
########################################################
#
# Returns the number of lines in the activity.log file that were processed.
sub process_courselog {
    my ($inputfile,$error_fh) = @_;
    if (! open(IN,$inputfile)) {
        warn "Unable to open '$inputfile' for reading";
        $logthis->("Unable to open '$inputfile' for reading");
        return undef;
    }
    my ($linecount,$insertcount);
    my $dbh = &Apache::lonmysql::get_dbh();
    #
    # Timing variables
    my @RowData;
    while (my $line=<IN>){
        # last if ($linecount > 1000);
        #
        # Bulk storage variables
        $time_this->();
        chomp($line);
        $linecount++;
        # print $linecount++.$/;
        my ($timestamp,$host,$log)=split(/\:/,$line,3);
        $time_this->('splitline');
        #
        # $log has the actual log entries; currently still escaped, and
        # %26(timestamp)%3a(url)%3a(user)%3a(domain)
        # then additionally
        # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
        # or
        # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
        #
        # get delimiter between timestamped entries to be &&&
        $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
        $log = &unescape($log);
        $time_this->('translate_and_unescape');
        # now go over all log entries 
        if (! defined($host)) { $host = 'unknown'; }
        my $machine_id = &get_id($machine_table,'machine',$host);
        my $prevchunk = 'none';
        foreach my $chunk (split(/\&\&\&/,$log)) {
            my $warningflag = '';
            $time_this->();
	    my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk);
            my $student = $uname.':'.$udom;
            if (! defined($res) || $res =~ /^\s*$/) {
                $res = '/adm/roles';
                $action = 'LOGIN';
            }
            if ($res =~ m|^/prtspool/|) {
                $res = '/prtspool/';
            }
            if (! defined($action) || $action eq '') {
                $action = 'VIEW';
            }
            if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) {
                $warningflag .= 'action';
                print $error_fh 'full log entry:'.$log.$/;
                print $error_fh 'error on chunk:'.$chunk.$/;
                $logthis->('(action) Unable to parse '.$/.$chunk.$/.
                         'got '.
                         'time = '.$time.$/.
                         'res  = '.$res.$/.
                         'uname= '.$uname.$/.
                         'udom = '.$udom.$/.
                         'action='.$action.$/.
                         '@values = '.join('&',@values));
                next; #skip it if we cannot understand what is happening.
            }
            if (! defined($student) || $student eq ':') {
                $student = 'unknown';
                $warningflag .= 'student';
            }
            if (! defined($res) || $res =~ /^\s*$/) {
                $res = 'unknown';
                $warningflag .= 'res';
            }
            if (! defined($action) || $action =~ /^\s*$/) {
                $action = 'unknown';
                $warningflag .= 'action';
            }
            if (! defined($time) || $time !~ /^\d+$/) {
                $time = 0;
                $warningflag .= 'time';
            }
            #
            $time_this->('split_and_error_check');
            my $student_id = &get_id($student_table,'student',$student);
            my $res_id     = &get_id($res_table,'resource',$res);
#            my $action_id  = &get_id($action_table,'action',$action);
            my $sql_time   = &Apache::lonmysql::sqltime($time);
            #
            if (! defined($student_id) || $student_id eq '') { 
                $warningflag.='student_id'; 
            }
            if (! defined($res_id) || $res_id eq '') { 
                $warningflag.='res_id'; 
            }
#            if (! defined($action_id) || $action_id eq '') { 
#                $warningflag.='action_id'; 
#            }
            if ($warningflag ne '') {
                print $error_fh 'full log entry:'.$log.$/;
                print $error_fh 'error on chunk:'.$chunk.$/;
                $logthis->('warningflag ('.$warningflag.') on chunk '.
                           $/.$chunk.$/.'prevchunk = '.$/.$prevchunk);
                $prevchunk .= $chunk;
                next; # skip this chunk
            }
            #
            my $store_values;
            if ($action eq 'POST') {
                $store_values = 
                    $dbh->quote(join('&',map { &escape($_); } @values));
            } else {
                $store_values = $dbh->quote(join('&',@values));
            }
            $time_this->('get_ids');
            #
            my $row = [$res_id,
                       qq{'$sql_time'},
                       $student_id,
                       "'".$action."'",
#                       $action_id,
                       qq{''},        # idx
                       $machine_id,
                       $store_values];
            push(@RowData,$row);
            $time_this->('push_row');
            $prevchunk = $chunk;
            #
        }
        $time_this->();
        if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) {
            my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
                                                            undef,
                                                            \@RowData);
            # $logthis->('result = '.$result);
            $time_this->('bulk_store_rows');
            if (! defined($result)) {
                my $error = &Apache::lonmysql::get_error();
                warn "Error occured during insert.".$error;
                $logthis->('error = '.$error);
            }
            undef(@RowData);
        }
    }
    if (@RowData) {
        $time_this->();
        $logthis->('storing '.$linecount);
        my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
                                                        undef,
                                                        \@RowData);
        $logthis->('result = '.$result);
        $time_this->('bulk_store_rows');
        if (! defined($result)) {
            my $error = &Apache::lonmysql::get_error();
            warn "Error occured during insert.".$error;
            $logthis->('error = '.$error);
        }
        undef(@RowData);
    }
    close IN;
#    print "Number of lines: ".$linecount.$/;
#    print "Number of inserts: ".$insertcount.$/;
    return $linecount;
}


##
## Somtimes, instead of doing something, doing nothing is appropriate.
sub nothing {
    return;
}

##
## Logging routine
##
sub log_to_file {
    my ($input)=@_;
    print LOGFILE $input.$/;
}

##
## Timing routines
##
{
    my %Timing;
    my $starttime;

sub time_action {
    my ($key) = @_;
    if (defined($key)) {
        $Timing{$key}+=Time::HiRes::time-$starttime;
        $Timing{'count_'.$key}++;
    }
    $starttime = Time::HiRes::time;
}

sub outputtimes {
    my $Str;
    if ($time_run) {
        $Str = "Timing Data:".$/;
        while (my($k,$v) = each(%Timing)) {
            next if ($k =~ /^count_/);
            my $count = $Timing{'count_'.$k};
            $Str .= 
                '  '.sprintf("%25.25s",$k).
                '  '.sprintf('% 8d',$count).
                '  '.sprintf('%12.5f',$v).$/;
        }
    }
    return $Str;
}

}


##
## Use mysqldump to store backups of the tables
##
sub backup_tables {
    my ($gz_sql_filename) = @_;
    my $command = qq{mysqldump --opt loncapa };
                             
    foreach my $table (@ID_Tables,@Activity_Table) {
        my $tablename = $table->{'id'};
        $command .= $tablename.' ';
    }
    $command .= '| gzip >'.$gz_sql_filename;
    $logthis->($command);
    system($command);
}

##
## Load in mysqldumped files
##
sub load_backup_tables {
    my ($gz_sql_filename) = @_;
    if (-s $gz_sql_filename) {
        $logthis->('loading data from gzipped sql file');
        my $command='gzip -dc '.$gz_sql_filename.' | mysql --database=loncapa';
        system($command);
        $logthis->('finished loading gzipped data');;
    } else {
        return undef;
    }
}

##
## 
##
sub update_process_name {
    my ($text) = @_;
    $0 = 'parse_activity_log.pl: '.$text;
}

sub get_filename {
    my ($course,$domain) = @_;
    my ($a,$b,$c,undef) = split('',$course,4);
    return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log";
}

sub create_tables {
    foreach my $table (@ID_Tables,@Activity_Table) {
        my $table_id = &Apache::lonmysql::create_table($table);
#        print STDERR "Unable to create table ".$table->{'id'}.$/;
#        print STDERR join($/,&Apache::lonmysql::build_table_creation_request($table)).$/;
        if (! defined($table_id)) {
            warn "Unable to create table ".$table->{'id'}.$/;
            warn join($/,&Apache::lonmysql::build_table_creation_request($table)).$/;
            return 0;
        }
    }
    return 1;
}

sub drop_tables {
    foreach my $table (@ID_Tables,@Activity_Table) {
        my $table_id = $table->{'id'};
        &Apache::lonmysql::drop_table($table_id);
    }
}

#################################################################
#################################################################
##
## Database item id code
##
#################################################################
#################################################################
{ # Scoping for ID lookup code
    my %IDs;

sub read_id_tables {
    foreach my $table (@ID_Tables) {
        my @Data = &Apache::lonmysql::get_rows($table->{'id'});
        my $count = 0;
        foreach my $row (@Data) {
            $IDs{$table->{'id'}}->{$row->[1]} = $row->[0];
        }
    }
    return;
}

sub get_id {
    my ($table,$fieldname,$value) = @_;
    if (exists($IDs{$table}->{$value})) {
        return $IDs{$table}->{$value};
    } else {
        # insert into the table - if the item already exists, that is
        # okay.
        my $result = &Apache::lonmysql::store_row($table,[undef,$value]);
        if (! defined($result)) {
            warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error());
        }
        # get the id
        my @Data = 
            &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'});
        if (@Data) {
            $IDs{$table}->{$value}=$Data[0]->[0];
            return $IDs{$table}->{$value};
        } else {
            $logthis->("Unable to retrieve id for $table $fieldname $value");
            return undef;
        }
    }
}

} # End of ID scoping


###############################################################
###############################################################
##
##   The usual suspects
##
###############################################################
###############################################################
sub escape {
    my $str=shift;
    $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
    return $str;
}

sub unescape {
    my $str=shift;
    $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    return $str;
}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>