--- loncom/metadata_database/parse_activity_log.pl 2004/08/11 18:37:23 1.1 +++ loncom/metadata_database/parse_activity_log.pl 2004/12/20 19:53:36 1.8 @@ -2,7 +2,7 @@ # # The LearningOnline Network # -# $Id: parse_activity_log.pl,v 1.1 2004/08/11 18:37:23 matthew Exp $ +# $Id: parse_activity_log.pl,v 1.8 2004/12/20 19:53:36 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -26,16 +26,7 @@ # # http://www.lon-capa.org/ # -############################################################################### -# -# Expects -# -# ../key/$class.key - key file $username:$keynumber -# ../rawdata/$class.log - log file -# ../rawdata/$class.seq - sequence file -# ../data writable -# ------------------------------------------------------------------ Course log - +#-------------------------------------------------------------------- # # Exit codes # 0 Everything is okay @@ -46,23 +37,41 @@ # 5 Unspecified error? # +# +# Notes: +# +# Logging is done via the $logthis variable, which may be the result of +# overcleverness. log via $logthis->('logtext'); Those are parentheses, +# not curly braces. If the -log command line parameter is set, the $logthis +# routine is set to a routine which writes to a file. If the command line +# parameter is not set $logthis is set to ¬hing, which does what you +# would expect. +# + use strict; use DBI; -use lib '/home/httpd/lib/perl/Apache'; +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration(); +use Apache::lonmysql(); use lonmysql(); use Time::HiRes(); use Getopt::Long(); +use IO::File; +use File::Copy; +use Fcntl qw(:flock); # # Determine parameters -my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup); +my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup,$log,$backup); &Getopt::Long::GetOptions( "course=s" => \$course, "domain=s" => \$domain, + "backup" => \$backup, "help" => \$help, "logfile=s" => \$file, "timerun" => \$time_run, "nocleanup" => \$nocleanup, - "drop" => \$drop); + "drop" => \$drop, + "log" => \$log); if (! defined($course) || $help) { print<$logfile")) { + die "Unable to open $logfile for writing. Run aborted."; + } else { + $logthis = \&log_to_file; + } +} + + +## ## Determine filenames ## my $sourcefilename; # activity log data my $newfilename; # $sourcefilename will be renamed to this -my $sql_filename; # the mysql backup data file name. +my $gz_sql_filename; # the gzipped mysql backup data file name. +my $error_filename; # Errors in parsing the activity log will be written here if ($file) { $sourcefilename = $file; } else { $sourcefilename = &get_filename($course,$domain); } -$sql_filename = $sourcefilename; -$sql_filename =~ s|[^/]*$|activitylog.sql|; +my $sql_filename = $sourcefilename; +$sql_filename =~ s|[^/]*$|activity.log.sql|; +$gz_sql_filename = $sql_filename.'.gz'; +$error_filename = $sourcefilename; +$error_filename =~ s|[^/]*$|activity.log.errors|; +$logthis->('Beginning logging '.time); + + +# +# Wait for a lock on the lockfile to avoid collisions +my $lockfilename = $sourcefilename.'.lock'; +open(LOCKFILE,'>'.$lockfilename); +flock(LOCKFILE,LOCK_EX) || die("Unable to lock $lockfilename. Aborting".$/); ## ## There will only be a $newfilename file if a copy of this program is already @@ -122,13 +159,24 @@ $sql_filename =~ s|[^/]*$|activitylog.sq my $newfilename = $sourcefilename.'.processing'; if (-e $newfilename) { warn "$newfilename exists"; + $logthis->($newfilename.' exists, so I cannot work on it.'); exit 2; } if (-e $sourcefilename) { + $logthis->('renaming '.$sourcefilename.' to '.$newfilename); rename($sourcefilename,$newfilename); + Copy($newfilename,$newfilename.'.'.time) if ($backup); + $logthis->("renamed $sourcefilename to $newfilename"); +} else { + my $command = 'touch '.$newfilename; + $logthis->($command); + system($command); + $logthis->('touch was completed'); } +close(LOCKFILE); + ## ## Table definitions ## @@ -164,20 +212,20 @@ my $res_table_def = 'PRIMARY KEY' => ['res_id'], }; -my $action_table = $prefix.'actions'; -my $action_table_def = -{ id => $action_table, - permanent => 'no', - columns => [{ name => 'action_id', - type => 'MEDIUMINT UNSIGNED', - restrictions => 'NOT NULL', - auto_inc => 'yes', }, - { name => 'action', - type => 'VARCHAR(100)', - restrictions => 'NOT NULL'}, - ], - 'PRIMARY KEY' => ['action_id',], -}; +#my $action_table = $prefix.'actions'; +#my $action_table_def = +#{ id => $action_table, +# permanent => 'no', +# columns => [{ name => 'action_id', +# type => 'MEDIUMINT UNSIGNED', +# restrictions => 'NOT NULL', +# auto_inc => 'yes', }, +# { name => 'action', +# type => 'VARCHAR(100)', +# restrictions => 'NOT NULL'}, +# ], +# 'PRIMARY KEY' => ['action_id',], +#}; my $machine_table = $prefix.'machine_table'; my $machine_table_def = @@ -206,94 +254,149 @@ my $activity_table_def = type => 'DATETIME', restrictions => 'NOT NULL',}, { name => 'student_id', - type => 'VARCHAR(100) BINARY', + type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, - { name => 'action_id', - type => 'VARCHAR(100) BINARY', + { name => 'action', + type => 'VARCHAR(10)', restrictions => 'NOT NULL',}, { name => 'idx', # This is here in case a student type => 'MEDIUMINT UNSIGNED', # has multiple submissions during restrictions => 'NOT NULL', # one second. It happens, trust auto_inc => 'yes', }, # me. { name => 'machine_id', - type => 'VARCHAR(100) BINARY', + type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, { name => 'action_values', type => 'MEDIUMTEXT', }, ], - 'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'], + 'PRIMARY KEY' => ['time','student_id','res_id','idx'], + 'KEY' => [{columns => ['student_id']}, + {columns => ['time']},], }; -my @Activity_Tables = ($student_table_def,$res_table_def, - $action_table_def,$machine_table_def, - $activity_table_def); - +my @Activity_Table = ($activity_table_def); +my @ID_Tables = ($student_table_def,$res_table_def,$machine_table_def); ## ## End of table definitions ## -# -&Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'}, +$logthis->('Connectiong to mysql'); +&Apache::lonmysql::set_mysql_user_and_password('www', $perlvar{'lonSqlAccess'}); if (!&Apache::lonmysql::verify_sql_connection()) { warn "Unable to connect to MySQL database."; + $logthis->("Unable to connect to MySQL database."); exit 3; } +$logthis->('SQL connection is up'); -if ($drop) { &drop_tables(); } -if (-e $sql_filename) { - # if ANY one of the tables does not exist, load the tables from the - # backup. +if ($drop) { &drop_tables(); $logthis->('dropped tables'); } + +if (-s $gz_sql_filename) { + my $backup_modification_time = (stat($gz_sql_filename))[9]; + $logthis->($gz_sql_filename.' was last modified '. + localtime($backup_modification_time). + '('.$backup_modification_time.')'); + # Check for missing tables my @Current_Tables = &Apache::lonmysql::tables_in_db(); + $logthis->(join(',',@Current_Tables)); my %Found; foreach my $tablename (@Current_Tables) { - foreach my $table (@Activity_Tables) { + foreach my $table (@Activity_Table,@ID_Tables) { if ($tablename eq $table->{'id'}) { $Found{$tablename}++; } } } - foreach my $table (@Activity_Tables) { + $logthis->('Found tables '.join(',',keys(%Found))); + my $missing_a_table = 0; + foreach my $table (@Activity_Table,@ID_Tables) { + # Hmmm, should I dump the tables? if (! $Found{$table->{'id'}}) { - $time_this->(); - &load_backup_tables($sql_filename); - $time_this->('load backup tables'); + $logthis->('Missing table '.$table->{'id'}); + $missing_a_table = 1; last; } } + if ($missing_a_table) { + my $table_modification_time = $backup_modification_time; + # If the backup happened prior to the last table modification, + foreach my $table (@Activity_Table,@ID_Tables) { + my %tabledata = &Apache::lonmysql::table_information($table->{'id'}); + next if (! scalar(keys(%tabledata))); # table does not exist + if ($table_modification_time < $tabledata{'Update_time'}) { + $table_modification_time = $tabledata{'Update_time'}; + } + } + $logthis->("Table modification time = ".$table_modification_time); + if ($table_modification_time > $backup_modification_time) { + # Save the current tables in case we need them another time. + my $backup_name = $gz_sql_filename.'.'.time; + $logthis->('Backing existing tables up in '.$backup_name); + &backup_tables($backup_name); + } + $time_this->(); + &load_backup_tables($gz_sql_filename); + $time_this->('load backup tables'); + } } +## +## Ensure the tables we need exist # create_tables does not complain if the tables already exist +$logthis->('creating tables'); if (! &create_tables()) { warn "Unable to create tables"; + $logthis->('Unable to create tables'); exit 4; } +## +## Read the ids used for various tables +$logthis->('reading id tables'); &read_id_tables(); +$logthis->('finished reading id tables'); ## -## Do the main bit of work -if (-e $newfilename) { - my $result = &process_courselog($newfilename); +## Set up the errors file +my $error_fh = IO::File->new(">>$error_filename"); + +## +## Parse the course log +$logthis->('processing course log'); +if (-s $newfilename) { + my $result = &process_courselog($newfilename,$error_fh); if (! defined($result)) { # Something went wrong along the way... + $logthis->('process_courselog returned undef'); exit 5; } elsif ($result > 0) { $time_this->(); - &backup_tables($sql_filename); + $logthis->('process_courselog returned '.$result.' backing up tables'); + &backup_tables($gz_sql_filename); $time_this->('write backup tables'); } } +close($error_fh); ## ## Clean up the filesystem -## &Apache::lonmysql::disconnect_from_db(); -unlink($newfilename) if (! $nocleanup); +unlink($newfilename) if (-e $newfilename && ! $nocleanup); +## +## Print timing data +$logthis->('printing timing data'); if ($time_run) { - print "Overall time: ".(Time::HiRes::time - $initial_time).$/; + my $elapsed_time = Time::HiRes::time - $initial_time; + print "Overall time: ".$elapsed_time.$/; print &outputtimes(); + $logthis->("Overall time: ".$elapsed_time); + $logthis->(&outputtimes()); +} + +if ($log) { + close LOGFILE; } exit 0; # Everything is okay, so end here before it gets worse. @@ -308,9 +411,10 @@ exit 0; # Everything is okay, so end h # # Returns the number of lines in the activity.log file that were processed. sub process_courselog { - my ($inputfile) = @_; + my ($inputfile,$error_fh) = @_; if (! open(IN,$inputfile)) { warn "Unable to open '$inputfile' for reading"; + $logthis->("Unable to open '$inputfile' for reading"); return undef; } my ($linecount,$insertcount); @@ -341,62 +445,122 @@ sub process_courselog { $log = &unescape($log); $time_this->('translate_and_unescape'); # now go over all log entries + if (! defined($host)) { $host = 'unknown'; } my $machine_id = &get_id($machine_table,'machine',$host); - foreach (split(/\&\&\&/,$log)) { + my $prevchunk = 'none'; + foreach my $chunk (split(/\&\&\&/,$log)) { + my $warningflag = ''; $time_this->(); - my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_); + my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk); + my $student = $uname.':'.$udom; if (! defined($res) || $res =~ /^\s*$/) { $res = '/adm/roles'; - $action = 'log in'; + $action = 'LOGIN'; } if ($res =~ m|^/prtspool/|) { $res = '/prtspool/'; } if (! defined($action) || $action eq '') { - $action = 'view'; + $action = 'VIEW'; } - my $student = $uname.':'.$udom; + if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) { + $warningflag .= 'action'; + print $error_fh 'full log entry:'.$log.$/; + print $error_fh 'error on chunk:'.$chunk.$/; + $logthis->('(action) Unable to parse '.$/.$chunk.$/. + 'got '. + 'time = '.$time.$/. + 'res = '.$res.$/. + 'uname= '.$uname.$/. + 'udom = '.$udom.$/. + 'action='.$action.$/. + '@values = '.join(':',@values)); + next; #skip it if we cannot understand what is happening. + } + if (! defined($student) || $student eq ':') { + $student = 'unknown'; + $warningflag .= 'student'; + } + if (! defined($res) || $res =~ /^\s*$/) { + $res = 'unknown'; + $warningflag .= 'res'; + } + if (! defined($action) || $action =~ /^\s*$/) { + $action = 'unknown'; + $warningflag .= 'action'; + } + if (! defined($time) || $time !~ /^\d+$/) { + $time = 0; + $warningflag .= 'time'; + } + # $time_this->('split_and_error_check'); my $student_id = &get_id($student_table,'student',$student); - my $res_id = &get_id($res_table,'resource',$res); - my $action_id = &get_id($action_table,'action',$action); - my $sql_time = &Apache::lonmysql::sqltime($time); + my $res_id = &get_id($res_table,'resource',$res); +# my $action_id = &get_id($action_table,'action',$action); + my $sql_time = &Apache::lonmysql::sqltime($time); + # + if (! defined($student_id) || $student_id eq '') { + $warningflag.='student_id'; + } + if (! defined($res_id) || $res_id eq '') { + $warningflag.='res_id'; + } +# if (! defined($action_id) || $action_id eq '') { +# $warningflag.='action_id'; +# } + if ($warningflag ne '') { + print $error_fh 'full log entry:'.$log.$/; + print $error_fh 'error on chunk:'.$chunk.$/; + $logthis->('warningflag ('.$warningflag.') on chunk '. + $/.$chunk.$/.'prevchunk = '.$/.$prevchunk); + $prevchunk .= $chunk; + next; # skip this chunk + } + # my $values = $dbh->quote(join('',@values)); $time_this->('get_ids'); # my $row = [$res_id, qq{'$sql_time'}, $student_id, - $action_id, + "'".$action."'", +# $action_id, qq{''}, # idx $machine_id, $values]; push(@RowData,$row); $time_this->('push_row'); + $prevchunk = $chunk; # } $time_this->(); - if ($linecount % 100 == 0) { + if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) { my $result = &Apache::lonmysql::bulk_store_rows($activity_table, undef, \@RowData); + # $logthis->('result = '.$result); $time_this->('bulk_store_rows'); if (! defined($result)) { - warn "Error occured during insert.". - &Apache::lonmysql::get_error(); + my $error = &Apache::lonmysql::get_error(); + warn "Error occured during insert.".$error; + $logthis->('error = '.$error); } undef(@RowData); } } if (@RowData) { $time_this->(); + $logthis->('storing '.$linecount); my $result = &Apache::lonmysql::bulk_store_rows($activity_table, undef, \@RowData); + $logthis->('result = '.$result); $time_this->('bulk_store_rows'); if (! defined($result)) { - warn "Error occured during insert.". - &Apache::lonmysql::get_error(); + my $error = &Apache::lonmysql::get_error(); + warn "Error occured during insert.".$error; + $logthis->('error = '.$error); } undef(@RowData); } @@ -406,6 +570,21 @@ sub process_courselog { return $linecount; } + +## +## Somtimes, instead of doing something, doing nothing is appropriate. +sub nothing { + return; +} + +## +## Logging routine +## +sub log_to_file { + my ($input)=@_; + print LOGFILE $input.$/; +} + ## ## Timing routines ## @@ -413,10 +592,6 @@ sub process_courselog { my %Timing; my $starttime; -sub nothing { - return; -} - sub time_action { my ($key) = @_; if (defined($key)) { @@ -449,15 +624,15 @@ sub outputtimes { ## Use mysqldump to store backups of the tables ## sub backup_tables { - my ($sql_filename) = @_; + my ($gz_sql_filename) = @_; my $command = qq{mysqldump --opt loncapa }; - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables,@Activity_Table) { my $tablename = $table->{'id'}; $command .= $tablename.' '; } - $command .= '>'.$sql_filename; - warn $command.$/; + $command .= '| gzip >'.$gz_sql_filename; + $logthis->($command); system($command); } @@ -465,25 +640,20 @@ sub backup_tables { ## Load in mysqldumped files ## sub load_backup_tables { - my ($sql_filename) = @_; - return undef if (! -e $sql_filename); - # Check for .my.cnf - my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa'; - warn $command.$/; - system($command); + my ($gz_sql_filename) = @_; + if (-s $gz_sql_filename) { + $logthis->('loading data from gzipped sql file'); + my $command='gzip -dc '.$gz_sql_filename.' | mysql --database=loncapa'; + system($command); + $logthis->('finished loading gzipped data');; + } else { + return undef; + } } ## ## ## -sub initialize_configuration { - # Fake it for now: - $perlvar{'lonSqlUser'} = 'www'; - $perlvar{'lonSqlAccess'} = 'localhostkey'; - $perlvar{'lonUsersDir'} = '/home/httpd/lonUsers'; - $perlvar{'lonDefDomain'} = '103'; -} - sub update_process_name { my ($text) = @_; $0 = 'parse_activity_log.pl: '.$text; @@ -496,11 +666,13 @@ sub get_filename { } sub create_tables { - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = &Apache::lonmysql::create_table($table); +# print STDERR "Unable to create table ".$table->{'id'}.$/; +# print STDERR join($/,&Apache::lonmysql::build_table_creation_request($table)).$/; if (! defined($table_id)) { warn "Unable to create table ".$table->{'id'}.$/; - warn &Apache::lonmysql::build_table_creation_request($table).$/; + warn join($/,&Apache::lonmysql::build_table_creation_request($table)).$/; return 0; } } @@ -508,7 +680,7 @@ sub create_tables { } sub drop_tables { - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = $table->{'id'}; &Apache::lonmysql::drop_table($table_id); } @@ -525,12 +697,14 @@ sub drop_tables { my %IDs; sub read_id_tables { - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables) { my @Data = &Apache::lonmysql::get_rows($table->{'id'}); + my $count = 0; foreach my $row (@Data) { $IDs{$table->{'id'}}->{$row->[1]} = $row->[0]; } } + return; } sub get_id { @@ -551,7 +725,7 @@ sub get_id { $IDs{$table}->{$value}=$Data[0]->[0]; return $IDs{$table}->{$value}; } else { - warn "Unable to retrieve id for $table $fieldname $value".$/; + $logthis->("Unable to retrieve id for $table $fieldname $value"); return undef; } } @@ -578,4 +752,3 @@ sub unescape { $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; return $str; } -