--- loncom/metadata_database/parse_activity_log.pl 2004/08/11 18:37:23 1.1 +++ loncom/metadata_database/parse_activity_log.pl 2004/12/16 15:03:29 1.6 @@ -2,7 +2,7 @@ # # The LearningOnline Network # -# $Id: parse_activity_log.pl,v 1.1 2004/08/11 18:37:23 matthew Exp $ +# $Id: parse_activity_log.pl,v 1.6 2004/12/16 15:03:29 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -26,16 +26,7 @@ # # http://www.lon-capa.org/ # -############################################################################### -# -# Expects -# -# ../key/$class.key - key file $username:$keynumber -# ../rawdata/$class.log - log file -# ../rawdata/$class.seq - sequence file -# ../data writable -# ------------------------------------------------------------------ Course log - +#-------------------------------------------------------------------- # # Exit codes # 0 Everything is okay @@ -52,17 +43,20 @@ use lib '/home/httpd/lib/perl/Apache'; use lonmysql(); use Time::HiRes(); use Getopt::Long(); - +use IO::File; +use File::Copy; # # Determine parameters -my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup); +my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup,$log,$backup); &Getopt::Long::GetOptions( "course=s" => \$course, "domain=s" => \$domain, + "backup" => \$backup, "help" => \$help, "logfile=s" => \$file, "timerun" => \$time_run, "nocleanup" => \$nocleanup, - "drop" => \$drop); + "drop" => \$drop, + "log" => \$log); if (! defined($course) || $help) { print<$logfile")) { + die "Unable to open $logfile for writing. Run aborted."; + } else { + $logthis = \&log_to_file; + } +} + + +## ## Determine filenames ## my $sourcefilename; # activity log data my $newfilename; # $sourcefilename will be renamed to this -my $sql_filename; # the mysql backup data file name. +my $gz_sql_filename; # the gzipped mysql backup data file name. +my $error_filename; # Errors in parsing the activity log will be written here if ($file) { $sourcefilename = $file; } else { $sourcefilename = &get_filename($course,$domain); } -$sql_filename = $sourcefilename; -$sql_filename =~ s|[^/]*$|activitylog.sql|; +my $sql_filename = $sourcefilename; +$sql_filename =~ s|[^/]*$|activity.log.sql|; +$gz_sql_filename = $sql_filename.'.gz'; +$error_filename = $sourcefilename; +$error_filename =~ s|[^/]*$|activity.log.errors|; +$logthis->('Beginning logging '.time); ## ## There will only be a $newfilename file if a copy of this program is already @@ -122,11 +137,20 @@ $sql_filename =~ s|[^/]*$|activitylog.sq my $newfilename = $sourcefilename.'.processing'; if (-e $newfilename) { warn "$newfilename exists"; + $logthis->($newfilename.' exists'); exit 2; } if (-e $sourcefilename) { + $logthis->('renaming '.$sourcefilename.' to '.$newfilename); rename($sourcefilename,$newfilename); + Copy($newfilename,$newfilename.'.'.time) if ($backup); + $logthis->("renamed $sourcefilename to $newfilename"); +} else { + my $command = 'touch '.$newfilename; + $logthis->($command); + system($command); + $logthis->('touch was completed'); } ## @@ -164,20 +188,20 @@ my $res_table_def = 'PRIMARY KEY' => ['res_id'], }; -my $action_table = $prefix.'actions'; -my $action_table_def = -{ id => $action_table, - permanent => 'no', - columns => [{ name => 'action_id', - type => 'MEDIUMINT UNSIGNED', - restrictions => 'NOT NULL', - auto_inc => 'yes', }, - { name => 'action', - type => 'VARCHAR(100)', - restrictions => 'NOT NULL'}, - ], - 'PRIMARY KEY' => ['action_id',], -}; +#my $action_table = $prefix.'actions'; +#my $action_table_def = +#{ id => $action_table, +# permanent => 'no', +# columns => [{ name => 'action_id', +# type => 'MEDIUMINT UNSIGNED', +# restrictions => 'NOT NULL', +# auto_inc => 'yes', }, +# { name => 'action', +# type => 'VARCHAR(100)', +# restrictions => 'NOT NULL'}, +# ], +# 'PRIMARY KEY' => ['action_id',], +#}; my $machine_table = $prefix.'machine_table'; my $machine_table_def = @@ -206,94 +230,129 @@ my $activity_table_def = type => 'DATETIME', restrictions => 'NOT NULL',}, { name => 'student_id', - type => 'VARCHAR(100) BINARY', + type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, - { name => 'action_id', - type => 'VARCHAR(100) BINARY', + { name => 'action', + type => 'VARCHAR(10)', restrictions => 'NOT NULL',}, { name => 'idx', # This is here in case a student type => 'MEDIUMINT UNSIGNED', # has multiple submissions during restrictions => 'NOT NULL', # one second. It happens, trust auto_inc => 'yes', }, # me. { name => 'machine_id', - type => 'VARCHAR(100) BINARY', + type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, { name => 'action_values', type => 'MEDIUMTEXT', }, ], - 'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'], + 'PRIMARY KEY' => ['time','student_id','res_id','idx'], + 'KEY' => [{columns => ['student_id']}, + {columns => ['time']},], }; -my @Activity_Tables = ($student_table_def,$res_table_def, - $action_table_def,$machine_table_def, - $activity_table_def); +my @Activity_Table = ($activity_table_def); +#my @ID_Tables = ($student_table_def,$res_table_def, +# $action_table_def,$machine_table_def); + +my @ID_Tables = ($student_table_def,$res_table_def,$machine_table_def); + ## ## End of table definitions ## # +$logthis->('Connectiong to mysql'); &Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'}, $perlvar{'lonSqlAccess'}); if (!&Apache::lonmysql::verify_sql_connection()) { warn "Unable to connect to MySQL database."; + $logthis->("Unable to connect to MySQL database."); exit 3; } -if ($drop) { &drop_tables(); } -if (-e $sql_filename) { +$logthis->('SQL connection is up'); + +if ($drop) { &drop_tables(); $logthis->('dropped tables'); } + +if (-s $gz_sql_filename) { # if ANY one of the tables does not exist, load the tables from the # backup. my @Current_Tables = &Apache::lonmysql::tables_in_db(); my %Found; foreach my $tablename (@Current_Tables) { - foreach my $table (@Activity_Tables) { + foreach my $table (@Activity_Table,@ID_Tables) { if ($tablename eq $table->{'id'}) { $Found{$tablename}++; } } } - foreach my $table (@Activity_Tables) { + foreach my $table (@Activity_Table,@ID_Tables) { if (! $Found{$table->{'id'}}) { $time_this->(); - &load_backup_tables($sql_filename); + &load_backup_tables($gz_sql_filename); $time_this->('load backup tables'); last; } } } +## +## Ensure the tables we need exist # create_tables does not complain if the tables already exist +$logthis->('creating tables'); if (! &create_tables()) { warn "Unable to create tables"; + $logthis->('Unable to create tables'); exit 4; } +## +## Read the ids used for various tables +$logthis->('reading id tables'); &read_id_tables(); +$logthis->('finished reading id tables'); ## -## Do the main bit of work -if (-e $newfilename) { - my $result = &process_courselog($newfilename); +## Set up the errors file +my $error_fh = IO::File->new(">>$error_filename"); + +## +## Parse the course log +$logthis->('processing course log'); +if (-s $newfilename) { + my $result = &process_courselog($newfilename,$error_fh); if (! defined($result)) { # Something went wrong along the way... + $logthis->('process_courselog returned undef'); exit 5; } elsif ($result > 0) { $time_this->(); - &backup_tables($sql_filename); + $logthis->('process_courselog returned '.$result.' backup up tables'); + &backup_tables($gz_sql_filename); $time_this->('write backup tables'); } } +close($error_fh); ## ## Clean up the filesystem -## &Apache::lonmysql::disconnect_from_db(); -unlink($newfilename) if (! $nocleanup); +unlink($newfilename) if (-e $newfilename && ! $nocleanup); +## +## Print timing data +$logthis->('printing timing data'); if ($time_run) { - print "Overall time: ".(Time::HiRes::time - $initial_time).$/; + my $elapsed_time = Time::HiRes::time - $initial_time; + print "Overall time: ".$elapsed_time.$/; print &outputtimes(); + $logthis->("Overall time: ".$elapsed_time); + $logthis->(&outputtimes()); +} + +if ($log) { + close LOGFILE; } exit 0; # Everything is okay, so end here before it gets worse. @@ -308,9 +367,10 @@ exit 0; # Everything is okay, so end h # # Returns the number of lines in the activity.log file that were processed. sub process_courselog { - my ($inputfile) = @_; + my ($inputfile,$error_fh) = @_; if (! open(IN,$inputfile)) { warn "Unable to open '$inputfile' for reading"; + $logthis->("Unable to open '$inputfile' for reading"); return undef; } my ($linecount,$insertcount); @@ -341,62 +401,122 @@ sub process_courselog { $log = &unescape($log); $time_this->('translate_and_unescape'); # now go over all log entries + if (! defined($host)) { $host = 'unknown'; } my $machine_id = &get_id($machine_table,'machine',$host); - foreach (split(/\&\&\&/,$log)) { + my $prevchunk = 'none'; + foreach my $chunk (split(/\&\&\&/,$log)) { + my $warningflag = ''; $time_this->(); - my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_); + my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk); + my $student = $uname.':'.$udom; if (! defined($res) || $res =~ /^\s*$/) { $res = '/adm/roles'; - $action = 'log in'; + $action = 'LOGIN'; } if ($res =~ m|^/prtspool/|) { $res = '/prtspool/'; } if (! defined($action) || $action eq '') { - $action = 'view'; + $action = 'VIEW'; } - my $student = $uname.':'.$udom; + if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) { + $warningflag .= 'action'; + print $error_fh 'full log entry:'.$log.$/; + print $error_fh 'error on chunk:'.$chunk.$/; + $logthis->('(action) Unable to parse '.$/.$chunk.$/. + 'got '. + 'time = '.$time.$/. + 'res = '.$res.$/. + 'uname= '.$uname.$/. + 'udom = '.$udom.$/. + 'action='.$action.$/. + '@values = '.join(':',@values)); + next; #skip it if we cannot understand what is happening. + } + if (! defined($student) || $student eq ':') { + $student = 'unknown'; + $warningflag .= 'student'; + } + if (! defined($res) || $res =~ /^\s*$/) { + $res = 'unknown'; + $warningflag .= 'res'; + } + if (! defined($action) || $action =~ /^\s*$/) { + $action = 'unknown'; + $warningflag .= 'action'; + } + if (! defined($time) || $time !~ /^\d+$/) { + $time = 0; + $warningflag .= 'time'; + } + # $time_this->('split_and_error_check'); my $student_id = &get_id($student_table,'student',$student); - my $res_id = &get_id($res_table,'resource',$res); - my $action_id = &get_id($action_table,'action',$action); - my $sql_time = &Apache::lonmysql::sqltime($time); + my $res_id = &get_id($res_table,'resource',$res); +# my $action_id = &get_id($action_table,'action',$action); + my $sql_time = &Apache::lonmysql::sqltime($time); + # + if (! defined($student_id) || $student_id eq '') { + $warningflag.='student_id'; + } + if (! defined($res_id) || $res_id eq '') { + $warningflag.='res_id'; + } +# if (! defined($action_id) || $action_id eq '') { +# $warningflag.='action_id'; +# } + if ($warningflag ne '') { + print $error_fh 'full log entry:'.$log.$/; + print $error_fh 'error on chunk:'.$chunk.$/; + $logthis->('warningflag ('.$warningflag.') on chunk '. + $/.$chunk.$/.'prevchunk = '.$/.$prevchunk); + $prevchunk .= $chunk; + next; # skip this chunk + } + # my $values = $dbh->quote(join('',@values)); $time_this->('get_ids'); # my $row = [$res_id, qq{'$sql_time'}, $student_id, - $action_id, + "'".$action."'", +# $action_id, qq{''}, # idx $machine_id, $values]; push(@RowData,$row); $time_this->('push_row'); + $prevchunk = $chunk; # } $time_this->(); - if ($linecount % 100 == 0) { + if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) { my $result = &Apache::lonmysql::bulk_store_rows($activity_table, undef, \@RowData); + # $logthis->('result = '.$result); $time_this->('bulk_store_rows'); if (! defined($result)) { - warn "Error occured during insert.". - &Apache::lonmysql::get_error(); + my $error = &Apache::lonmysql::get_error(); + warn "Error occured during insert.".$error; + $logthis->('error = '.$error); } undef(@RowData); } } if (@RowData) { $time_this->(); + $logthis->('storing '.$linecount); my $result = &Apache::lonmysql::bulk_store_rows($activity_table, undef, \@RowData); + $logthis->('result = '.$result); $time_this->('bulk_store_rows'); if (! defined($result)) { - warn "Error occured during insert.". - &Apache::lonmysql::get_error(); + my $error = &Apache::lonmysql::get_error(); + warn "Error occured during insert.".$error; + $logthis->('error = '.$error); } undef(@RowData); } @@ -406,6 +526,21 @@ sub process_courselog { return $linecount; } + +## +## Somtimes, instead of doing something, doing nothing is appropriate. +sub nothing { + return; +} + +## +## Logging routine +## +sub log_to_file { + my ($input)=@_; + print LOGFILE $input.$/; +} + ## ## Timing routines ## @@ -413,10 +548,6 @@ sub process_courselog { my %Timing; my $starttime; -sub nothing { - return; -} - sub time_action { my ($key) = @_; if (defined($key)) { @@ -449,15 +580,15 @@ sub outputtimes { ## Use mysqldump to store backups of the tables ## sub backup_tables { - my ($sql_filename) = @_; + my ($gz_sql_filename) = @_; my $command = qq{mysqldump --opt loncapa }; - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables,@Activity_Table) { my $tablename = $table->{'id'}; $command .= $tablename.' '; } - $command .= '>'.$sql_filename; - warn $command.$/; + $command .= '| gzip >'.$gz_sql_filename; + $logthis->($command); system($command); } @@ -465,12 +596,15 @@ sub backup_tables { ## Load in mysqldumped files ## sub load_backup_tables { - my ($sql_filename) = @_; - return undef if (! -e $sql_filename); - # Check for .my.cnf - my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa'; - warn $command.$/; - system($command); + my ($gz_sql_filename) = @_; + if (-s $gz_sql_filename) { + &logthis('loading data from gzipped sql file'); + my $command='gzip -dc activity.log.sql.gz | mysql --database=loncapa'; + system($command); + $logthis->('finished loading gzipped data');; + } else { + return undef; + } } ## @@ -496,11 +630,13 @@ sub get_filename { } sub create_tables { - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = &Apache::lonmysql::create_table($table); +# print STDERR "Unable to create table ".$table->{'id'}.$/; +# print STDERR join($/,&Apache::lonmysql::build_table_creation_request($table)).$/; if (! defined($table_id)) { warn "Unable to create table ".$table->{'id'}.$/; - warn &Apache::lonmysql::build_table_creation_request($table).$/; + warn join($/,&Apache::lonmysql::build_table_creation_request($table)).$/; return 0; } } @@ -508,7 +644,7 @@ sub create_tables { } sub drop_tables { - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = $table->{'id'}; &Apache::lonmysql::drop_table($table_id); } @@ -525,12 +661,14 @@ sub drop_tables { my %IDs; sub read_id_tables { - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables) { my @Data = &Apache::lonmysql::get_rows($table->{'id'}); + my $count = 0; foreach my $row (@Data) { $IDs{$table->{'id'}}->{$row->[1]} = $row->[0]; } } + return; } sub get_id { @@ -551,7 +689,7 @@ sub get_id { $IDs{$table}->{$value}=$Data[0]->[0]; return $IDs{$table}->{$value}; } else { - warn "Unable to retrieve id for $table $fieldname $value".$/; + $logthis->("Unable to retrieve id for $table $fieldname $value"); return undef; } } @@ -578,4 +716,3 @@ sub unescape { $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; return $str; } -