--- loncom/metadata_database/parse_activity_log.pl 2004/08/18 19:33:27 1.2 +++ loncom/metadata_database/parse_activity_log.pl 2004/12/20 19:53:36 1.8 @@ -2,7 +2,7 @@ # # The LearningOnline Network # -# $Id: parse_activity_log.pl,v 1.2 2004/08/18 19:33:27 matthew Exp $ +# $Id: parse_activity_log.pl,v 1.8 2004/12/20 19:53:36 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -26,16 +26,7 @@ # # http://www.lon-capa.org/ # -############################################################################### -# -# Expects -# -# ../key/$class.key - key file $username:$keynumber -# ../rawdata/$class.log - log file -# ../rawdata/$class.seq - sequence file -# ../data writable -# ------------------------------------------------------------------ Course log - +#-------------------------------------------------------------------- # # Exit codes # 0 Everything is okay @@ -46,18 +37,35 @@ # 5 Unspecified error? # +# +# Notes: +# +# Logging is done via the $logthis variable, which may be the result of +# overcleverness. log via $logthis->('logtext'); Those are parentheses, +# not curly braces. If the -log command line parameter is set, the $logthis +# routine is set to a routine which writes to a file. If the command line +# parameter is not set $logthis is set to ¬hing, which does what you +# would expect. +# + use strict; use DBI; -use lib '/home/httpd/lib/perl/Apache'; +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration(); +use Apache::lonmysql(); use lonmysql(); use Time::HiRes(); use Getopt::Long(); +use IO::File; +use File::Copy; +use Fcntl qw(:flock); # # Determine parameters -my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup,$log); +my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup,$log,$backup); &Getopt::Long::GetOptions( "course=s" => \$course, "domain=s" => \$domain, + "backup" => \$backup, "help" => \$help, "logfile=s" => \$file, "timerun" => \$time_run, @@ -72,6 +80,8 @@ Process a lon-capa activity log into a d Parameters: course Required domain Optional + backup optional if present, backup the activity log file + before processing it drop optional if present, drop all course specific activity log tables. file optional Specify the file to parse, including path @@ -87,7 +97,6 @@ USAGE ## ## Set up timing code -## my $time_this = \¬hing; if ($time_run) { $time_this = \&time_action; @@ -95,11 +104,20 @@ if ($time_run) { my $initial_time = Time::HiRes::time; ## -## Set up logging code +## Read in configuration parameters +## +my %perlvar = %{&LONCAPA::Configuration::read_conf('loncapa.conf')}; + +if (! defined($domain) || $domain eq '') { + $domain = $perlvar{'lonDefDomain'}; +} +&update_process_name($course.'@'.$domain); + ## +## Set up logging code my $logthis = \¬hing; if ($log) { - my $logfile = "/tmp/parse_activity_log.log.".time; + my $logfile = $perlvar{'lonDaemons'}.'/tmp/parse_activity_log.log.'.time; print STDERR "$0: logging to $logfile".$/; if (! open(LOGFILE,">$logfile")) { die "Unable to open $logfile for writing. Run aborted."; @@ -107,29 +125,33 @@ if ($log) { $logthis = \&log_to_file; } } -## -## Read in configuration parameters -## -my %perlvar; -&initialize_configuration(); -if (! defined($domain) || $domain eq '') { - $domain = $perlvar{'lonDefDomain'}; -} -&update_process_name($course.'@'.$domain); + ## ## Determine filenames ## my $sourcefilename; # activity log data my $newfilename; # $sourcefilename will be renamed to this -my $sql_filename; # the mysql backup data file name. +my $gz_sql_filename; # the gzipped mysql backup data file name. +my $error_filename; # Errors in parsing the activity log will be written here if ($file) { $sourcefilename = $file; } else { $sourcefilename = &get_filename($course,$domain); } -$sql_filename = $sourcefilename; +my $sql_filename = $sourcefilename; $sql_filename =~ s|[^/]*$|activity.log.sql|; +$gz_sql_filename = $sql_filename.'.gz'; +$error_filename = $sourcefilename; +$error_filename =~ s|[^/]*$|activity.log.errors|; +$logthis->('Beginning logging '.time); + + +# +# Wait for a lock on the lockfile to avoid collisions +my $lockfilename = $sourcefilename.'.lock'; +open(LOCKFILE,'>'.$lockfilename); +flock(LOCKFILE,LOCK_EX) || die("Unable to lock $lockfilename. Aborting".$/); ## ## There will only be a $newfilename file if a copy of this program is already @@ -137,15 +159,24 @@ $sql_filename =~ s|[^/]*$|activity.log.s my $newfilename = $sourcefilename.'.processing'; if (-e $newfilename) { warn "$newfilename exists"; - $logthis->($newfilename.' exists'); + $logthis->($newfilename.' exists, so I cannot work on it.'); exit 2; } if (-e $sourcefilename) { + $logthis->('renaming '.$sourcefilename.' to '.$newfilename); rename($sourcefilename,$newfilename); + Copy($newfilename,$newfilename.'.'.time) if ($backup); $logthis->("renamed $sourcefilename to $newfilename"); +} else { + my $command = 'touch '.$newfilename; + $logthis->($command); + system($command); + $logthis->('touch was completed'); } +close(LOCKFILE); + ## ## Table definitions ## @@ -181,20 +212,20 @@ my $res_table_def = 'PRIMARY KEY' => ['res_id'], }; -my $action_table = $prefix.'actions'; -my $action_table_def = -{ id => $action_table, - permanent => 'no', - columns => [{ name => 'action_id', - type => 'MEDIUMINT UNSIGNED', - restrictions => 'NOT NULL', - auto_inc => 'yes', }, - { name => 'action', - type => 'VARCHAR(100)', - restrictions => 'NOT NULL'}, - ], - 'PRIMARY KEY' => ['action_id',], -}; +#my $action_table = $prefix.'actions'; +#my $action_table_def = +#{ id => $action_table, +# permanent => 'no', +# columns => [{ name => 'action_id', +# type => 'MEDIUMINT UNSIGNED', +# restrictions => 'NOT NULL', +# auto_inc => 'yes', }, +# { name => 'action', +# type => 'VARCHAR(100)', +# restrictions => 'NOT NULL'}, +# ], +# 'PRIMARY KEY' => ['action_id',], +#}; my $machine_table = $prefix.'machine_table'; my $machine_table_def = @@ -225,8 +256,8 @@ my $activity_table_def = { name => 'student_id', type => 'MEDIUMINT UNSIGNED', restrictions => 'NOT NULL',}, - { name => 'action_id', - type => 'MEDIUMINT UNSIGNED', + { name => 'action', + type => 'VARCHAR(10)', restrictions => 'NOT NULL',}, { name => 'idx', # This is here in case a student type => 'MEDIUMINT UNSIGNED', # has multiple submissions during @@ -238,83 +269,124 @@ my $activity_table_def = { name => 'action_values', type => 'MEDIUMTEXT', }, ], - 'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'], + 'PRIMARY KEY' => ['time','student_id','res_id','idx'], + 'KEY' => [{columns => ['student_id']}, + {columns => ['time']},], }; -my @Activity_Tables = ($student_table_def,$res_table_def, - $action_table_def,$machine_table_def, - $activity_table_def); - +my @Activity_Table = ($activity_table_def); +my @ID_Tables = ($student_table_def,$res_table_def,$machine_table_def); ## ## End of table definitions ## -# -&Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'}, +$logthis->('Connectiong to mysql'); +&Apache::lonmysql::set_mysql_user_and_password('www', $perlvar{'lonSqlAccess'}); if (!&Apache::lonmysql::verify_sql_connection()) { warn "Unable to connect to MySQL database."; $logthis->("Unable to connect to MySQL database."); exit 3; } +$logthis->('SQL connection is up'); if ($drop) { &drop_tables(); $logthis->('dropped tables'); } -if (-e $sql_filename) { - $logthis->('reading in from '.$sql_filename); - # if ANY one of the tables does not exist, load the tables from the - # backup. + +if (-s $gz_sql_filename) { + my $backup_modification_time = (stat($gz_sql_filename))[9]; + $logthis->($gz_sql_filename.' was last modified '. + localtime($backup_modification_time). + '('.$backup_modification_time.')'); + # Check for missing tables my @Current_Tables = &Apache::lonmysql::tables_in_db(); + $logthis->(join(',',@Current_Tables)); my %Found; foreach my $tablename (@Current_Tables) { - foreach my $table (@Activity_Tables) { + foreach my $table (@Activity_Table,@ID_Tables) { if ($tablename eq $table->{'id'}) { $Found{$tablename}++; } } } - foreach my $table (@Activity_Tables) { + $logthis->('Found tables '.join(',',keys(%Found))); + my $missing_a_table = 0; + foreach my $table (@Activity_Table,@ID_Tables) { + # Hmmm, should I dump the tables? if (! $Found{$table->{'id'}}) { - $time_this->(); - &load_backup_tables($sql_filename); - $time_this->('load backup tables'); + $logthis->('Missing table '.$table->{'id'}); + $missing_a_table = 1; last; } } + if ($missing_a_table) { + my $table_modification_time = $backup_modification_time; + # If the backup happened prior to the last table modification, + foreach my $table (@Activity_Table,@ID_Tables) { + my %tabledata = &Apache::lonmysql::table_information($table->{'id'}); + next if (! scalar(keys(%tabledata))); # table does not exist + if ($table_modification_time < $tabledata{'Update_time'}) { + $table_modification_time = $tabledata{'Update_time'}; + } + } + $logthis->("Table modification time = ".$table_modification_time); + if ($table_modification_time > $backup_modification_time) { + # Save the current tables in case we need them another time. + my $backup_name = $gz_sql_filename.'.'.time; + $logthis->('Backing existing tables up in '.$backup_name); + &backup_tables($backup_name); + } + $time_this->(); + &load_backup_tables($gz_sql_filename); + $time_this->('load backup tables'); + } } +## +## Ensure the tables we need exist # create_tables does not complain if the tables already exist +$logthis->('creating tables'); if (! &create_tables()) { warn "Unable to create tables"; $logthis->('Unable to create tables'); exit 4; } +## +## Read the ids used for various tables $logthis->('reading id tables'); &read_id_tables(); $logthis->('finished reading id tables'); ## -## Do the main bit of work -if (-e $newfilename) { - my $result = &process_courselog($newfilename); +## Set up the errors file +my $error_fh = IO::File->new(">>$error_filename"); + +## +## Parse the course log +$logthis->('processing course log'); +if (-s $newfilename) { + my $result = &process_courselog($newfilename,$error_fh); if (! defined($result)) { # Something went wrong along the way... $logthis->('process_courselog returned undef'); exit 5; } elsif ($result > 0) { $time_this->(); - $logthis->('process_courselog returned '.$result.' backup up tables'); - &backup_tables($sql_filename); + $logthis->('process_courselog returned '.$result.' backing up tables'); + &backup_tables($gz_sql_filename); $time_this->('write backup tables'); } } +close($error_fh); ## ## Clean up the filesystem -## &Apache::lonmysql::disconnect_from_db(); -unlink($newfilename) if (! $nocleanup); +unlink($newfilename) if (-e $newfilename && ! $nocleanup); +## +## Print timing data +$logthis->('printing timing data'); if ($time_run) { my $elapsed_time = Time::HiRes::time - $initial_time; print "Overall time: ".$elapsed_time.$/; @@ -339,7 +411,7 @@ exit 0; # Everything is okay, so end h # # Returns the number of lines in the activity.log file that were processed. sub process_courselog { - my ($inputfile) = @_; + my ($inputfile,$error_fh) = @_; if (! open(IN,$inputfile)) { warn "Unable to open '$inputfile' for reading"; $logthis->("Unable to open '$inputfile' for reading"); @@ -384,17 +456,26 @@ sub process_courselog { if (! defined($res) || $res =~ /^\s*$/) { $res = '/adm/roles'; $action = 'LOGIN'; -# $warningflag .= 'res'; } if ($res =~ m|^/prtspool/|) { $res = '/prtspool/'; } if (! defined($action) || $action eq '') { $action = 'VIEW'; -# $warningflag .= 'action'; } if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) { $warningflag .= 'action'; + print $error_fh 'full log entry:'.$log.$/; + print $error_fh 'error on chunk:'.$chunk.$/; + $logthis->('(action) Unable to parse '.$/.$chunk.$/. + 'got '. + 'time = '.$time.$/. + 'res = '.$res.$/. + 'uname= '.$uname.$/. + 'udom = '.$udom.$/. + 'action='.$action.$/. + '@values = '.join(':',@values)); + next; #skip it if we cannot understand what is happening. } if (! defined($student) || $student eq ':') { $student = 'unknown'; @@ -415,9 +496,9 @@ sub process_courselog { # $time_this->('split_and_error_check'); my $student_id = &get_id($student_table,'student',$student); - my $res_id = &get_id($res_table,'resource',$res); - my $action_id = &get_id($action_table,'action',$action); - my $sql_time = &Apache::lonmysql::sqltime($time); + my $res_id = &get_id($res_table,'resource',$res); +# my $action_id = &get_id($action_table,'action',$action); + my $sql_time = &Apache::lonmysql::sqltime($time); # if (! defined($student_id) || $student_id eq '') { $warningflag.='student_id'; @@ -425,10 +506,12 @@ sub process_courselog { if (! defined($res_id) || $res_id eq '') { $warningflag.='res_id'; } - if (! defined($action_id) || $action_id eq '') { - $warningflag.='action_id'; - } +# if (! defined($action_id) || $action_id eq '') { +# $warningflag.='action_id'; +# } if ($warningflag ne '') { + print $error_fh 'full log entry:'.$log.$/; + print $error_fh 'error on chunk:'.$chunk.$/; $logthis->('warningflag ('.$warningflag.') on chunk '. $/.$chunk.$/.'prevchunk = '.$/.$prevchunk); $prevchunk .= $chunk; @@ -441,7 +524,8 @@ sub process_courselog { my $row = [$res_id, qq{'$sql_time'}, $student_id, - $action_id, + "'".$action."'", +# $action_id, qq{''}, # idx $machine_id, $values]; @@ -540,14 +624,14 @@ sub outputtimes { ## Use mysqldump to store backups of the tables ## sub backup_tables { - my ($sql_filename) = @_; + my ($gz_sql_filename) = @_; my $command = qq{mysqldump --opt loncapa }; - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables,@Activity_Table) { my $tablename = $table->{'id'}; $command .= $tablename.' '; } - $command .= '>'.$sql_filename; + $command .= '| gzip >'.$gz_sql_filename; $logthis->($command); system($command); } @@ -556,25 +640,20 @@ sub backup_tables { ## Load in mysqldumped files ## sub load_backup_tables { - my ($sql_filename) = @_; - return undef if (! -e $sql_filename); - # Check for .my.cnf - my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa'; - $logthis->('loading previously saved sql table'.$/.$command); - system($command); + my ($gz_sql_filename) = @_; + if (-s $gz_sql_filename) { + $logthis->('loading data from gzipped sql file'); + my $command='gzip -dc '.$gz_sql_filename.' | mysql --database=loncapa'; + system($command); + $logthis->('finished loading gzipped data');; + } else { + return undef; + } } ## ## ## -sub initialize_configuration { - # Fake it for now: - $perlvar{'lonSqlUser'} = 'www'; - $perlvar{'lonSqlAccess'} = 'localhostkey'; - $perlvar{'lonUsersDir'} = '/home/httpd/lonUsers'; - $perlvar{'lonDefDomain'} = '103'; -} - sub update_process_name { my ($text) = @_; $0 = 'parse_activity_log.pl: '.$text; @@ -587,11 +666,13 @@ sub get_filename { } sub create_tables { - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = &Apache::lonmysql::create_table($table); +# print STDERR "Unable to create table ".$table->{'id'}.$/; +# print STDERR join($/,&Apache::lonmysql::build_table_creation_request($table)).$/; if (! defined($table_id)) { warn "Unable to create table ".$table->{'id'}.$/; - warn &Apache::lonmysql::build_table_creation_request($table).$/; + warn join($/,&Apache::lonmysql::build_table_creation_request($table)).$/; return 0; } } @@ -599,7 +680,7 @@ sub create_tables { } sub drop_tables { - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables,@Activity_Table) { my $table_id = $table->{'id'}; &Apache::lonmysql::drop_table($table_id); } @@ -616,12 +697,14 @@ sub drop_tables { my %IDs; sub read_id_tables { - foreach my $table (@Activity_Tables) { + foreach my $table (@ID_Tables) { my @Data = &Apache::lonmysql::get_rows($table->{'id'}); + my $count = 0; foreach my $row (@Data) { $IDs{$table->{'id'}}->{$row->[1]} = $row->[0]; } } + return; } sub get_id {