#!/usr/bin/perl # # The LearningOnline Network # # $Id: parse_activity_log.pl,v 1.3 2004/08/19 21:08:46 matthew Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # #-------------------------------------------------------------------- # # Exit codes # 0 Everything is okay # 1 Another copy is running on this course # 2 Activity log does not exist # 3 Unable to connect to database # 4 Unable to create database tables # 5 Unspecified error? # use strict; use DBI; use lib '/home/httpd/lib/perl/Apache'; use lonmysql(); use Time::HiRes(); use Getopt::Long(); use IO::File; # # Determine parameters my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup,$log); &Getopt::Long::GetOptions( "course=s" => \$course, "domain=s" => \$domain, "help" => \$help, "logfile=s" => \$file, "timerun" => \$time_run, "nocleanup" => \$nocleanup, "drop" => \$drop, "log" => \$log); if (! defined($course) || $help) { print<$logfile")) { die "Unable to open $logfile for writing. Run aborted."; } else { $logthis = \&log_to_file; } } ## ## Determine filenames ## my $sourcefilename; # activity log data my $newfilename; # $sourcefilename will be renamed to this my $sql_filename; # the mysql backup data file name. my $error_filename; # Errors in parsing the activity log will be written here if ($file) { $sourcefilename = $file; } else { $sourcefilename = &get_filename($course,$domain); } $sql_filename = $sourcefilename; $sql_filename =~ s|[^/]*$|activity.log.sql|; $error_filename = $sourcefilename; $error_filename =~ s|[^/]*$|activity.log.errors|; $logthis->('Beginning logging '.time); ## ## There will only be a $newfilename file if a copy of this program is already ## running. my $newfilename = $sourcefilename.'.processing'; if (-e $newfilename) { warn "$newfilename exists"; $logthis->($newfilename.' exists'); exit 2; } if (-e $sourcefilename) { $logthis->('renaming '.$sourcefilename.' to '.$newfilename); rename($sourcefilename,$newfilename); $logthis->("renamed $sourcefilename to $newfilename"); } else { my $command = 'touch '.$newfilename; $logthis->($command); system($command); $logthis->('touch was completed'); } ## ## Table definitions ## my $prefix = $course.'_'.$domain.'_'; my $student_table = $prefix.'students'; my $student_table_def = { id => $student_table, permanent => 'no', columns => [ { name => 'student_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL', auto_inc => 'yes', }, { name => 'student', type => 'VARCHAR(100) BINARY', restrictions => 'NOT NULL', }, ], 'PRIMARY KEY' => ['student_id',], }; my $res_table = $prefix.'resource'; my $res_table_def = { id => $res_table, permanent => 'no', columns => [{ name => 'res_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL', auto_inc => 'yes', }, { name => 'resource', type => 'MEDIUMTEXT', restrictions => 'NOT NULL'}, ], 'PRIMARY KEY' => ['res_id'], }; my $action_table = $prefix.'actions'; my $action_table_def = { id => $action_table, permanent => 'no', columns => [{ name => 'action_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL', auto_inc => 'yes', }, { name => 'action', type => 'VARCHAR(100)', restrictions => 'NOT NULL'}, ], 'PRIMARY KEY' => ['action_id',], }; my $machine_table = $prefix.'machine_table'; my $machine_table_def = { id => $machine_table, permanent => 'no', columns => [{ name => 'machine_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL', auto_inc => 'yes', }, { name => 'machine', type => 'VARCHAR(100)', restrictions => 'NOT NULL'}, ], 'PRIMARY KEY' => ['machine_id',], }; my $activity_table = $prefix.'activity'; my $activity_table_def = { id => $activity_table, permanent => 'no', columns => [ { name => 'res_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, { name => 'time', type => 'DATETIME', restrictions => 'NOT NULL',}, { name => 'student_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, { name => 'action_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, { name => 'idx', # This is here in case a student type => 'MEDIUMINT UNSIGNED', # has multiple submissions during restrictions => 'NOT NULL', # one second. It happens, trust auto_inc => 'yes', }, # me. { name => 'machine_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, { name => 'action_values', type => 'MEDIUMTEXT', }, ], 'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'], }; my @Activity_Table = ($activity_table_def); my @ID_Tables = ($student_table_def,$res_table_def, $action_table_def,$machine_table_def); ## ## End of table definitions ## # $logthis->('Connectiong to mysql'); &Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'}, $perlvar{'lonSqlAccess'}); if (!&Apache::lonmysql::verify_sql_connection()) { warn "Unable to connect to MySQL database."; $logthis->("Unable to connect to MySQL database."); exit 3; } $logthis->('SQL connection is up'); if ($drop) { &drop_tables(); $logthis->('dropped tables'); } if (-s $sql_filename) { # if ANY one of the tables does not exist, load the tables from the # backup. my @Current_Tables = &Apache::lonmysql::tables_in_db(); my %Found; foreach my $tablename (@Current_Tables) { foreach my $table (@Activity_Table,@ID_Tables) { if ($tablename eq $table->{'id'}) { $Found{$tablename}++; } } } foreach my $table (@Activity_Table,@ID_Tables) { if (! $Found{$table->{'id'}}) { $time_this->(); &load_backup_tables($sql_filename); $time_this->('load backup tables'); last; } } } ## ## Ensure the tables we need exist # create_tables does not complain if the tables already exist $logthis->('creating tables'); if (! &create_tables()) { warn "Unable to create tables"; $logthis->('Unable to create tables'); exit 4; } ## ## Read the ids used for various tables $logthis->('reading id tables'); &read_id_tables(); $logthis->('finished reading id tables'); ## ## Set up the errors file my $error_fh = IO::File->new(">>$error_filename"); ## ## Parse the course log $logthis->('processing course log'); if (-s $newfilename) { my $result = &process_courselog($newfilename,$error_fh); if (! defined($result)) { # Something went wrong along the way... $logthis->('process_courselog returned undef'); exit 5; } elsif ($result > 0) { $time_this->(); $logthis->('process_courselog returned '.$result.' backup up tables'); &backup_tables($sql_filename); $time_this->('write backup tables'); } } close($error_fh); ## ## Clean up the filesystem &Apache::lonmysql::disconnect_from_db(); unlink($newfilename) if (-e $newfilename && ! $nocleanup); ## ## Print timing data $logthis->('printing timing data'); if ($time_run) { my $elapsed_time = Time::HiRes::time - $initial_time; print "Overall time: ".$elapsed_time.$/; print &outputtimes(); $logthis->("Overall time: ".$elapsed_time); $logthis->(&outputtimes()); } if ($log) { close LOGFILE; } exit 0; # Everything is okay, so end here before it gets worse. ######################################################## ######################################################## ## ## Process Course Log ## ######################################################## ######################################################## # # Returns the number of lines in the activity.log file that were processed. sub process_courselog { my ($inputfile,$error_fh) = @_; if (! open(IN,$inputfile)) { warn "Unable to open '$inputfile' for reading"; $logthis->("Unable to open '$inputfile' for reading"); return undef; } my ($linecount,$insertcount); my $dbh = &Apache::lonmysql::get_dbh(); # # Timing variables my @RowData; while (my $line=){ # last if ($linecount > 1000); # # Bulk storage variables $time_this->(); chomp($line); $linecount++; # print $linecount++.$/; my ($timestamp,$host,$log)=split(/\:/,$line,3); $time_this->('splitline'); # # $log has the actual log entries; currently still escaped, and # %26(timestamp)%3a(url)%3a(user)%3a(domain) # then additionally # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value) # or # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value) # # get delimiter between timestamped entries to be &&& $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g; $log = &unescape($log); $time_this->('translate_and_unescape'); # now go over all log entries if (! defined($host)) { $host = 'unknown'; } my $machine_id = &get_id($machine_table,'machine',$host); my $prevchunk = 'none'; foreach my $chunk (split(/\&\&\&/,$log)) { my $warningflag = ''; $time_this->(); my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk); my $student = $uname.':'.$udom; if (! defined($res) || $res =~ /^\s*$/) { $res = '/adm/roles'; $action = 'LOGIN'; } if ($res =~ m|^/prtspool/|) { $res = '/prtspool/'; } if (! defined($action) || $action eq '') { $action = 'VIEW'; } if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) { $warningflag .= 'action'; print $error_fh 'full log entry:'.$log.$/; print $error_fh 'error on chunk:'.$chunk.$/; $logthis->('(action) Unable to parse '.$/.$chunk.$/. 'got '. 'time = '.$time.$/. 'res = '.$res.$/. 'uname= '.$uname.$/. 'udom = '.$udom.$/. 'action='.$action.$/. '@values = '.join(':',@values)); next; #skip it if we cannot understand what is happening. } if (! defined($student) || $student eq ':') { $student = 'unknown'; $warningflag .= 'student'; } if (! defined($res) || $res =~ /^\s*$/) { $res = 'unknown'; $warningflag .= 'res'; } if (! defined($action) || $action =~ /^\s*$/) { $action = 'unknown'; $warningflag .= 'action'; } if (! defined($time) || $time !~ /^\d+$/) { $time = 0; $warningflag .= 'time'; } # $time_this->('split_and_error_check'); my $student_id = &get_id($student_table,'student',$student); my $res_id = &get_id($res_table,'resource',$res); my $action_id = &get_id($action_table,'action',$action); my $sql_time = &Apache::lonmysql::sqltime($time); # if (! defined($student_id) || $student_id eq '') { $warningflag.='student_id'; } if (! defined($res_id) || $res_id eq '') { $warningflag.='res_id'; } if (! defined($action_id) || $action_id eq '') { $warningflag.='action_id'; } if ($warningflag ne '') { print $error_fh 'full log entry:'.$log.$/; print $error_fh 'error on chunk:'.$chunk.$/; $logthis->('warningflag ('.$warningflag.') on chunk '. $/.$chunk.$/.'prevchunk = '.$/.$prevchunk); $prevchunk .= $chunk; next; # skip this chunk } # my $values = $dbh->quote(join('',@values)); $time_this->('get_ids'); # my $row = [$res_id, qq{'$sql_time'}, $student_id, $action_id, qq{''}, # idx $machine_id, $values]; push(@RowData,$row); $time_this->('push_row'); $prevchunk = $chunk; # } $time_this->(); if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) { my $result = &Apache::lonmysql::bulk_store_rows($activity_table, undef, \@RowData); # $logthis->('result = '.$result); $time_this->('bulk_store_rows'); if (! defined($result)) { my $error = &Apache::lonmysql::get_error(); warn "Error occured during insert.".$error; $logthis->('error = '.$error); } undef(@RowData); } } if (@RowData) { $time_this->(); $logthis->('storing '.$linecount); my $result = &Apache::lonmysql::bulk_store_rows($activity_table, undef, \@RowData); $logthis->('result = '.$result); $time_this->('bulk_store_rows'); if (! defined($result)) { my $error = &Apache::lonmysql::get_error(); warn "Error occured during insert.".$error; $logthis->('error = '.$error); } undef(@RowData); } close IN; # print "Number of lines: ".$linecount.$/; # print "Number of inserts: ".$insertcount.$/; return $linecount; } ## ## Somtimes, instead of doing something, doing nothing is appropriate. sub nothing { return; } ## ## Logging routine ## sub log_to_file { my ($input)=@_; print LOGFILE $input.$/; } ## ## Timing routines ## { my %Timing; my $starttime; sub time_action { my ($key) = @_; if (defined($key)) { $Timing{$key}+=Time::HiRes::time-$starttime; $Timing{'count_'.$key}++; } $starttime = Time::HiRes::time; } sub outputtimes { my $Str; if ($time_run) { $Str = "Timing Data:".$/; while (my($k,$v) = each(%Timing)) { next if ($k =~ /^count_/); my $count = $Timing{'count_'.$k}; $Str .= ' '.sprintf("%25.25s",$k). ' '.sprintf('% 8d',$count). ' '.sprintf('%12.5f',$v).$/; } } return $Str; } } ## ## Use mysqldump to store backups of the tables ## sub backup_tables { my ($sql_filename) = @_; my $command = qq{mysqldump --opt loncapa }; foreach my $table (@ID_Tables,@Activity_Table) { my $tablename = $table->{'id'}; $command .= $tablename.' '; } $command .= '>'.$sql_filename; $logthis->($command); system($command); } ## ## Load in mysqldumped files ## sub load_backup_tables { my ($sql_filename) = @_; return undef if (! -e $sql_filename); # Check for .my.cnf my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa'; $logthis->('loading previously saved sql table'.$/.$command); system($command); $logthis->('finished loading old data'); } ## ## ## sub initialize_configuration { # Fake it for now: $perlvar{'lonSqlUser'} = 'www'; $perlvar{'lonSqlAccess'} = 'localhostkey'; $perlvar{'lonUsersDir'} = '/home/httpd/lonUsers'; $perlvar{'lonDefDomain'} = '103'; } sub update_process_name { my ($text) = @_; $0 = 'parse_activity_log.pl: '.$text; } sub get_filename { my ($course,$domain) = @_; my ($a,$b,$c,undef) = split('',$course,4); return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log"; } sub create_tables { foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = &Apache::lonmysql::create_table($table); if (! defined($table_id)) { warn "Unable to create table ".$table->{'id'}.$/; warn &Apache::lonmysql::build_table_creation_request($table).$/; return 0; } } return 1; } sub drop_tables { foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = $table->{'id'}; &Apache::lonmysql::drop_table($table_id); } } ################################################################# ################################################################# ## ## Database item id code ## ################################################################# ################################################################# { # Scoping for ID lookup code my %IDs; sub read_id_tables { foreach my $table (@ID_Tables) { my @Data = &Apache::lonmysql::get_rows($table->{'id'}); my $count = 0; foreach my $row (@Data) { $IDs{$table->{'id'}}->{$row->[1]} = $row->[0]; } } return; } sub get_id { my ($table,$fieldname,$value) = @_; if (exists($IDs{$table}->{$value})) { return $IDs{$table}->{$value}; } else { # insert into the table - if the item already exists, that is # okay. my $result = &Apache::lonmysql::store_row($table,[undef,$value]); if (! defined($result)) { warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error()); } # get the id my @Data = &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'}); if (@Data) { $IDs{$table}->{$value}=$Data[0]->[0]; return $IDs{$table}->{$value}; } else { $logthis->("Unable to retrieve id for $table $fieldname $value"); return undef; } } } } # End of ID scoping ############################################################### ############################################################### ## ## The usual suspects ## ############################################################### ############################################################### sub escape { my $str=shift; $str =~ s/(\W)/"%".unpack('H2',$1)/eg; return $str; } sub unescape { my $str=shift; $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; return $str; }