#!/usr/bin/perl # # The LearningOnline Network # # $Id: parse_activity_log.pl,v 1.13 2005/02/09 21:54:15 matthew Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # #-------------------------------------------------------------------- # # Exit codes # 0 Everything is okay # 1 Another copy is running on this course # 2 Activity log does not exist # 3 Unable to connect to database # 4 Unable to create database tables # 5 Unable to open log file # 6 Unable to get lock on activity log # # # Notes: # # Logging is done via the $logthis variable, which may be the result of # overcleverness. log via $logthis->('logtext'); Those are parentheses, # not curly braces. If the -log command line parameter is set, the $logthis # routine is set to a routine which writes to a file. If the command line # parameter is not set $logthis is set to ¬hing, which does what you # would expect. # use strict; use DBI; use lib '/home/httpd/lib/perl/Apache'; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration(); use Apache::lonmysql(); use lonmysql(); use Time::HiRes(); use Getopt::Long(); use IO::File; use File::Copy; use Fcntl qw(:flock); # # Determine parameters my ($help,$course,$domain,$drop_when_done,$srcfile,$logfile,$time_run,$nocleanup,$log,$backup); &Getopt::Long::GetOptions( "course=s" => \$course, "domain=s" => \$domain, "backup" => \$backup, "help" => \$help, "logfile=s" => \$logfile, "srcfile=s" => \$srcfile, "timerun" => \$time_run, "nocleanup" => \$nocleanup, "dropwhendone" => \$drop_when_done, "log" => \$log); if (! defined($course) || $help) { print<$logfile")) { warn("Unable to open $logfile for writing. Run aborted."); exit 5; } else { $logthis = \&log_to_file; } } ## ## Determine filenames ## my $sourcefilename; # activity log data my $newfilename; # $sourcefilename will be renamed to this my $gz_sql_filename; # the gzipped mysql backup data file name. my $error_filename; # Errors in parsing the activity log will be written here if ($srcfile) { $sourcefilename = $srcfile; } else { $sourcefilename = &get_filename($course,$domain); } my $sql_filename = $sourcefilename; $sql_filename =~ s|[^/]*$|activity.log.sql|; $gz_sql_filename = $sql_filename.'.gz'; $error_filename = $sourcefilename; $error_filename =~ s|[^/]*$|activity.log.errors|; $logthis->('Beginning logging '.time); # # Wait for a lock on the lockfile to avoid collisions my $lockfilename = $sourcefilename.'.lock'; open(LOCKFILE,'>'.$lockfilename); if (!flock(LOCKFILE,LOCK_EX)) { warn("Unable to lock $lockfilename. Aborting".$/); exit 6; } ## ## There will only be a $newfilename file if a copy of this program is already ## running. my $newfilename = $sourcefilename.'.processing'; if (-e $newfilename) { warn "$newfilename exists"; $logthis->($newfilename.' exists, so I cannot work on it.'); exit 2; } if (-e $sourcefilename) { $logthis->('renaming '.$sourcefilename.' to '.$newfilename); rename($sourcefilename,$newfilename); Copy($newfilename,$newfilename.'.'.time) if ($backup); $logthis->("renamed $sourcefilename to $newfilename"); } else { my $command = 'touch '.$newfilename; $logthis->($command); system($command); $logthis->('touch was completed'); } close(LOCKFILE); ## ## Table definitions ## my $prefix = $course.'_'.$domain.'_'; my $student_table = &Apache::lonmysql::fix_table_name($prefix.'students'); my $student_table_def = { id => $student_table, permanent => 'no', columns => [ { name => 'student_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL', auto_inc => 'yes', }, { name => 'student', type => 'VARCHAR(100) BINARY', restrictions => 'NOT NULL', }, ], 'PRIMARY KEY' => ['student_id',], }; my $res_table = &Apache::lonmysql::fix_table_name($prefix.'resource'); my $res_table_def = { id => $res_table, permanent => 'no', columns => [{ name => 'res_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL', auto_inc => 'yes', }, { name => 'resource', type => 'MEDIUMTEXT', restrictions => 'NOT NULL'}, ], 'PRIMARY KEY' => ['res_id'], }; #my $action_table = &Apache::lonmysql::fix_table_name($prefix.'actions'); #my $action_table_def = #{ id => $action_table, # permanent => 'no', # columns => [{ name => 'action_id', # type => 'MEDIUMINT UNSIGNED', # restrictions => 'NOT NULL', # auto_inc => 'yes', }, # { name => 'action', # type => 'VARCHAR(100)', # restrictions => 'NOT NULL'}, # ], # 'PRIMARY KEY' => ['action_id',], #}; my $machine_table = &Apache::lonmysql::fix_table_name($prefix.'machine_table'); my $machine_table_def = { id => $machine_table, permanent => 'no', columns => [{ name => 'machine_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL', auto_inc => 'yes', }, { name => 'machine', type => 'VARCHAR(100)', restrictions => 'NOT NULL'}, ], 'PRIMARY KEY' => ['machine_id',], }; my $activity_table = &Apache::lonmysql::fix_table_name($prefix.'activity'); my $activity_table_def = { id => $activity_table, permanent => 'no', columns => [ { name => 'res_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, { name => 'time', type => 'DATETIME', restrictions => 'NOT NULL',}, { name => 'student_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, { name => 'action', type => 'VARCHAR(10)', restrictions => 'NOT NULL',}, { name => 'idx', # This is here in case a student type => 'MEDIUMINT UNSIGNED', # has multiple submissions during restrictions => 'NOT NULL', # one second. It happens, trust auto_inc => 'yes', }, # me. { name => 'machine_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, { name => 'action_values', type => 'MEDIUMTEXT', }, ], 'PRIMARY KEY' => ['time','student_id','res_id','idx'], 'KEY' => [{columns => ['student_id']}, {columns => ['time']},], }; my @Activity_Table = ($activity_table_def); my @ID_Tables = ($student_table_def,$res_table_def,$machine_table_def); ## ## End of table definitions ## $logthis->('Connectiong to mysql'); &Apache::lonmysql::set_mysql_user_and_password('www', $perlvar{'lonSqlAccess'}); if (!&Apache::lonmysql::verify_sql_connection()) { warn "Unable to connect to MySQL database."; $logthis->("Unable to connect to MySQL database."); exit 3; } $logthis->('SQL connection is up'); if (-s $gz_sql_filename) { my $backup_modification_time = (stat($gz_sql_filename))[9]; $logthis->($gz_sql_filename.' was last modified '. localtime($backup_modification_time). '('.$backup_modification_time.')'); # Check for missing tables my @Current_Tables = &Apache::lonmysql::tables_in_db(); $logthis->(join(',',@Current_Tables)); my %Found; foreach my $tablename (@Current_Tables) { foreach my $table (@Activity_Table,@ID_Tables) { if ($tablename eq $table->{'id'}) { $Found{$tablename}++; } } } $logthis->('Found tables '.join(',',keys(%Found))); my $missing_a_table = 0; foreach my $table (@Activity_Table,@ID_Tables) { # Hmmm, should I dump the tables? if (! $Found{$table->{'id'}}) { $logthis->('Missing table '.$table->{'id'}); $missing_a_table = 1; last; } } if ($missing_a_table) { my $table_modification_time = $backup_modification_time; # If the backup happened prior to the last table modification, foreach my $table (@Activity_Table,@ID_Tables) { my %tabledata = &Apache::lonmysql::table_information($table->{'id'}); next if (! scalar(keys(%tabledata))); # table does not exist if ($table_modification_time < $tabledata{'Update_time'}) { $table_modification_time = $tabledata{'Update_time'}; } } $logthis->("Table modification time = ".$table_modification_time); if ($table_modification_time > $backup_modification_time) { # Save the current tables in case we need them another time. my $backup_name = $gz_sql_filename.'.'.time; $logthis->('Backing existing tables up in '.$backup_name); &backup_tables($backup_name); } $time_this->(); &load_backup_tables($gz_sql_filename); $time_this->('load backup tables'); } } ## ## Ensure the tables we need exist # create_tables does not complain if the tables already exist $logthis->('creating tables'); if (! &create_tables()) { warn "Unable to create tables"; $logthis->('Unable to create tables'); exit 4; } ## ## Read the ids used for various tables $logthis->('reading id tables'); &read_id_tables(); $logthis->('finished reading id tables'); ## ## Set up the errors file my $error_fh = IO::File->new(">>$error_filename"); ## ## Parse the course log $logthis->('processing course log'); if (-s $newfilename) { my $result = &process_courselog($newfilename,$error_fh); if (! defined($result)) { # Something went wrong along the way... $logthis->('process_courselog returned undef'); exit 5; } elsif ($result > 0) { $time_this->(); $logthis->('process_courselog returned '.$result.' backing up tables'); &backup_tables($gz_sql_filename); $time_this->('write backup tables'); } if ($drop_when_done) { &drop_tables(); $logthis->('dropped tables'); } } close($error_fh); ## ## Clean up the filesystem &Apache::lonmysql::disconnect_from_db(); unlink($newfilename) if (-e $newfilename && ! $nocleanup); ## ## Print timing data $logthis->('printing timing data'); if ($time_run) { my $elapsed_time = Time::HiRes::time - $initial_time; print "Overall time: ".$elapsed_time.$/; print &outputtimes(); $logthis->("Overall time: ".$elapsed_time); $logthis->(&outputtimes()); } if ($log) { close LOGFILE; } foreach my $file ($lockfilename, $error_filename,$logfile) { if (-z $file) { unlink($file); } } exit 0; # Everything is okay, so end here before it gets worse. ######################################################## ######################################################## ## ## Process Course Log ## ######################################################## ######################################################## # # Returns the number of lines in the activity.log file that were processed. sub process_courselog { my ($inputfile,$error_fh) = @_; if (! open(IN,$inputfile)) { warn "Unable to open '$inputfile' for reading"; $logthis->("Unable to open '$inputfile' for reading"); return undef; } my ($linecount,$insertcount); my $dbh = &Apache::lonmysql::get_dbh(); # # Timing variables my @RowData; while (my $line=){ # last if ($linecount > 1000); # # Bulk storage variables $time_this->(); chomp($line); $linecount++; # print $linecount++.$/; my ($timestamp,$host,$log)=split(/\:/,$line,3); $time_this->('splitline'); # # $log has the actual log entries; currently still escaped, and # %26(timestamp)%3a(url)%3a(user)%3a(domain) # then additionally # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value) # or # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value) # # get delimiter between timestamped entries to be &&& $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g; $log = &unescape($log); $time_this->('translate_and_unescape'); # now go over all log entries if (! defined($host)) { $host = 'unknown'; } my $machine_id = &get_id($machine_table,'machine',$host); my $prevchunk = 'none'; foreach my $chunk (split(/\&\&\&/,$log)) { my $warningflag = ''; $time_this->(); my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk); my $student = $uname.':'.$udom; if (! defined($res) || $res =~ /^\s*$/) { $res = '/adm/roles'; $action = 'LOGIN'; } if ($res =~ m|^/prtspool/|) { $res = '/prtspool/'; } if (! defined($action) || $action eq '') { $action = 'VIEW'; } if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) { $warningflag .= 'action'; print $error_fh 'full log entry:'.$log.$/; print $error_fh 'error on chunk:'.$chunk.$/; $logthis->('(action) Unable to parse '.$/.$chunk.$/. 'got '. 'time = '.$time.$/. 'res = '.$res.$/. 'uname= '.$uname.$/. 'udom = '.$udom.$/. 'action='.$action.$/. '@values = '.join('&',@values)); next; #skip it if we cannot understand what is happening. } if (! defined($student) || $student eq ':') { $student = 'unknown'; $warningflag .= 'student'; } if (! defined($res) || $res =~ /^\s*$/) { $res = 'unknown'; $warningflag .= 'res'; } if (! defined($action) || $action =~ /^\s*$/) { $action = 'unknown'; $warningflag .= 'action'; } if (! defined($time) || $time !~ /^\d+$/) { $time = 0; $warningflag .= 'time'; } # $time_this->('split_and_error_check'); my $student_id = &get_id($student_table,'student',$student); my $res_id = &get_id($res_table,'resource',$res); # my $action_id = &get_id($action_table,'action',$action); my $sql_time = &Apache::lonmysql::sqltime($time); # if (! defined($student_id) || $student_id eq '') { $warningflag.='student_id'; } if (! defined($res_id) || $res_id eq '') { $warningflag.='res_id'; } # if (! defined($action_id) || $action_id eq '') { # $warningflag.='action_id'; # } if ($warningflag ne '') { print $error_fh 'full log entry:'.$log.$/; print $error_fh 'error on chunk:'.$chunk.$/; $logthis->('warningflag ('.$warningflag.') on chunk '. $/.$chunk.$/.'prevchunk = '.$/.$prevchunk); $prevchunk .= $chunk; next; # skip this chunk } # my $store_values; if ($action eq 'POST') { $store_values = $dbh->quote(join('&',map { &escape($_); } @values)); } else { $store_values = $dbh->quote(join('&',@values)); } $time_this->('get_ids'); # my $row = [$res_id, qq{'$sql_time'}, $student_id, "'".$action."'", # $action_id, qq{''}, # idx $machine_id, $store_values]; push(@RowData,$row); $time_this->('push_row'); $prevchunk = $chunk; # } $time_this->(); if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) { my $result = &Apache::lonmysql::bulk_store_rows($activity_table, undef, \@RowData); # $logthis->('result = '.$result); $time_this->('bulk_store_rows'); if (! defined($result)) { my $error = &Apache::lonmysql::get_error(); warn "Error occured during insert.".$error; $logthis->('error = '.$error); } undef(@RowData); } } if (@RowData) { $time_this->(); $logthis->('storing '.$linecount); my $result = &Apache::lonmysql::bulk_store_rows($activity_table, undef, \@RowData); $logthis->('result = '.$result); $time_this->('bulk_store_rows'); if (! defined($result)) { my $error = &Apache::lonmysql::get_error(); warn "Error occured during insert.".$error; $logthis->('error = '.$error); } undef(@RowData); } close IN; # print "Number of lines: ".$linecount.$/; # print "Number of inserts: ".$insertcount.$/; return $linecount; } ## ## Somtimes, instead of doing something, doing nothing is appropriate. sub nothing { return; } ## ## Logging routine ## sub log_to_file { my ($input)=@_; print LOGFILE $input.$/; } ## ## Timing routines ## { my %Timing; my $starttime; sub time_action { my ($key) = @_; if (defined($key)) { $Timing{$key}+=Time::HiRes::time-$starttime; $Timing{'count_'.$key}++; } $starttime = Time::HiRes::time; } sub outputtimes { my $Str; if ($time_run) { $Str = "Timing Data:".$/; while (my($k,$v) = each(%Timing)) { next if ($k =~ /^count_/); my $count = $Timing{'count_'.$k}; $Str .= ' '.sprintf("%25.25s",$k). ' '.sprintf('% 8d',$count). ' '.sprintf('%12.5f',$v).$/; } } return $Str; } } ## ## Use mysqldump to store backups of the tables ## sub backup_tables { my ($gz_sql_filename) = @_; my $command = qq{mysqldump --quote-names --opt loncapa }; foreach my $table (@ID_Tables,@Activity_Table) { my $tablename = $table->{'id'}; $tablename =~ s/\`//g; $command .= $tablename.' '; } $command .= '| gzip >'.$gz_sql_filename; $logthis->($command); system($command); } ## ## Load in mysqldumped files ## sub load_backup_tables { my ($gz_sql_filename) = @_; if (-s $gz_sql_filename) { $logthis->('loading data from gzipped sql file'); my $command='gzip -dc '.$gz_sql_filename.' | mysql --database=loncapa'; system($command); $logthis->('finished loading gzipped data');; } else { return undef; } } ## ## ## sub update_process_name { my ($text) = @_; $0 = 'parse_activity_log.pl: '.$text; } sub get_filename { my ($course,$domain) = @_; my ($a,$b,$c,undef) = split('',$course,4); return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log"; } sub create_tables { foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = &Apache::lonmysql::create_table($table); if (! defined($table_id)) { warn "Unable to create table ".$table->{'id'}.$/; $logthis->('Unable to create table '.$table->{'id'}); $logthis->(join($/,&Apache::lonmysql::build_table_creation_request($table))); return 0; } } return 1; } sub drop_tables { foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = $table->{'id'}; &Apache::lonmysql::drop_table($table_id); } } ################################################################# ################################################################# ## ## Database item id code ## ################################################################# ################################################################# { # Scoping for ID lookup code my %IDs; sub read_id_tables { foreach my $table (@ID_Tables) { my @Data = &Apache::lonmysql::get_rows($table->{'id'}); my $count = 0; foreach my $row (@Data) { $IDs{$table->{'id'}}->{$row->[1]} = $row->[0]; } } return; } sub get_id { my ($table,$fieldname,$value) = @_; if (exists($IDs{$table}->{$value})) { return $IDs{$table}->{$value}; } else { # insert into the table - if the item already exists, that is # okay. my $result = &Apache::lonmysql::store_row($table,[undef,$value]); if (! defined($result)) { warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error()); } # get the id my @Data = &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'}); if (@Data) { $IDs{$table}->{$value}=$Data[0]->[0]; return $IDs{$table}->{$value}; } else { $logthis->("Unable to retrieve id for $table $fieldname $value"); return undef; } } } } # End of ID scoping ############################################################### ############################################################### ## ## The usual suspects ## ############################################################### ############################################################### sub escape { my $str=shift; $str =~ s/(\W)/"%".unpack('H2',$1)/eg; return $str; } sub unescape { my $str=shift; $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; return $str; }