--- loncom/metadata_database/parse_activity_log.pl 2005/02/09 21:54:15 1.13 +++ loncom/metadata_database/parse_activity_log.pl 2005/03/31 22:09:40 1.14 @@ -2,7 +2,7 @@ # # The LearningOnline Network # -# $Id: parse_activity_log.pl,v 1.13 2005/02/09 21:54:15 matthew Exp $ +# $Id: parse_activity_log.pl,v 1.14 2005/03/31 22:09:40 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -61,6 +61,7 @@ use Getopt::Long(); use IO::File; use File::Copy; use Fcntl qw(:flock); +use HTML::TokeParser; # # Determine parameters @@ -141,7 +142,6 @@ if ($log) { ## my $sourcefilename; # activity log data my $newfilename; # $sourcefilename will be renamed to this -my $gz_sql_filename; # the gzipped mysql backup data file name. my $error_filename; # Errors in parsing the activity log will be written here if ($srcfile) { $sourcefilename = $srcfile; @@ -150,12 +150,16 @@ if ($srcfile) { } my $sql_filename = $sourcefilename; $sql_filename =~ s|[^/]*$|activity.log.sql|; -$gz_sql_filename = $sql_filename.'.gz'; +my $gz_sql_filename = $sql_filename.'.gz'; +# +my $xml_filename = $sourcefilename; +$xml_filename =~ s|[^/]*$|activity.log.xml|; +my $gz_xml_filename = $xml_filename.'.gz'; +# $error_filename = $sourcefilename; $error_filename =~ s|[^/]*$|activity.log.errors|; $logthis->('Beginning logging '.time); - # # Wait for a lock on the lockfile to avoid collisions my $lockfilename = $sourcefilename.'.lock'; @@ -192,10 +196,9 @@ close(LOCKFILE); ## ## Table definitions ## -my $prefix = $course.'_'.$domain.'_'; -my $student_table = &Apache::lonmysql::fix_table_name($prefix.'students'); +my %tables = &table_names($course,$domain); my $student_table_def = -{ id => $student_table, +{ id => $tables{'student'}, permanent => 'no', columns => [ { name => 'student_id', @@ -209,9 +212,8 @@ my $student_table_def = 'PRIMARY KEY' => ['student_id',], }; -my $res_table = &Apache::lonmysql::fix_table_name($prefix.'resource'); my $res_table_def = -{ id => $res_table, +{ id => $tables{'res'}, permanent => 'no', columns => [{ name => 'res_id', type => 'MEDIUMINT UNSIGNED', @@ -224,7 +226,6 @@ my $res_table_def = 'PRIMARY KEY' => ['res_id'], }; -#my $action_table = &Apache::lonmysql::fix_table_name($prefix.'actions'); #my $action_table_def = #{ id => $action_table, # permanent => 'no', @@ -239,9 +240,8 @@ my $res_table_def = # 'PRIMARY KEY' => ['action_id',], #}; -my $machine_table = &Apache::lonmysql::fix_table_name($prefix.'machine_table'); my $machine_table_def = -{ id => $machine_table, +{ id => $tables{'machine'}, permanent => 'no', columns => [{ name => 'machine_id', type => 'MEDIUMINT UNSIGNED', @@ -254,9 +254,8 @@ my $machine_table_def = 'PRIMARY KEY' => ['machine_id',], }; -my $activity_table = &Apache::lonmysql::fix_table_name($prefix.'activity'); my $activity_table_def = -{ id => $activity_table, +{ id => $tables{'activity'}, permanent => 'no', columns => [ { name => 'res_id', @@ -288,9 +287,11 @@ my $activity_table_def = my @Activity_Table = ($activity_table_def); my @ID_Tables = ($student_table_def,$res_table_def,$machine_table_def); + ## ## End of table definitions ## +$logthis->('tables = '.join(',',keys(%tables))); $logthis->('Connectiong to mysql'); &Apache::lonmysql::set_mysql_user_and_password('www', @@ -302,53 +303,46 @@ if (!&Apache::lonmysql::verify_sql_conne } $logthis->('SQL connection is up'); -if (-s $gz_sql_filename) { +my $missing_table = &check_for_missing_tables(values(%tables)); +if (-s $gz_sql_filename && ! -s $gz_xml_filename) { my $backup_modification_time = (stat($gz_sql_filename))[9]; $logthis->($gz_sql_filename.' was last modified '. localtime($backup_modification_time). '('.$backup_modification_time.')'); - # Check for missing tables - my @Current_Tables = &Apache::lonmysql::tables_in_db(); - $logthis->(join(',',@Current_Tables)); - my %Found; - foreach my $tablename (@Current_Tables) { - foreach my $table (@Activity_Table,@ID_Tables) { - if ($tablename eq $table->{'id'}) { - $Found{$tablename}++; - } - } - } - $logthis->('Found tables '.join(',',keys(%Found))); - my $missing_a_table = 0; - foreach my $table (@Activity_Table,@ID_Tables) { - # Hmmm, should I dump the tables? - if (! $Found{$table->{'id'}}) { - $logthis->('Missing table '.$table->{'id'}); - $missing_a_table = 1; - last; + if ($missing_table) { + # If the backup happened prior to the last table modification, + # we need to save the tables. + if (&latest_table_modification_time() > $backup_modification_time) { + # Save the current tables in case we need them another time. + $logthis->('Backing existing tables up'); + &backup_tables_as_xml($gz_xml_filename.'.save_'.time,\%tables); } + $time_this->(); + &load_backup_sql_tables($gz_sql_filename); + &backup_tables_as_xml($gz_xml_filename,\%tables); + $time_this->('load backup tables'); } - if ($missing_a_table) { +} elsif (-s $gz_xml_filename) { + my $backup_modification_time = (stat($gz_xml_filename))[9]; + $logthis->($gz_xml_filename.' was last modified '. + localtime($backup_modification_time). + '('.$backup_modification_time.')'); + if ($missing_table) { my $table_modification_time = $backup_modification_time; # If the backup happened prior to the last table modification, - foreach my $table (@Activity_Table,@ID_Tables) { - my %tabledata = &Apache::lonmysql::table_information($table->{'id'}); - next if (! scalar(keys(%tabledata))); # table does not exist - if ($table_modification_time < $tabledata{'Update_time'}) { - $table_modification_time = $tabledata{'Update_time'}; - } - } - $logthis->("Table modification time = ".$table_modification_time); - if ($table_modification_time > $backup_modification_time) { + # we need to save the tables. + if (&latest_table_modification_time() > $backup_modification_time) { # Save the current tables in case we need them another time. - my $backup_name = $gz_sql_filename.'.'.time; - $logthis->('Backing existing tables up in '.$backup_name); - &backup_tables($backup_name); + $logthis->('Backing existing tables up'); + &backup_tables_as_xml($gz_xml_filename.'.save_'.time,\%tables); } $time_this->(); - &load_backup_tables($gz_sql_filename); + # We have to make our own tables for the xml format + &drop_tables(); + &create_tables(); + &load_backup_xml_tables($gz_xml_filename,\%tables); $time_this->('load backup tables'); - } + } } ## @@ -375,7 +369,7 @@ my $error_fh = IO::File->new(">>$error_f ## Parse the course log $logthis->('processing course log'); if (-s $newfilename) { - my $result = &process_courselog($newfilename,$error_fh); + my $result = &process_courselog($newfilename,$error_fh,\%tables); if (! defined($result)) { # Something went wrong along the way... $logthis->('process_courselog returned undef'); @@ -383,7 +377,7 @@ if (-s $newfilename) { } elsif ($result > 0) { $time_this->(); $logthis->('process_courselog returned '.$result.' backing up tables'); - &backup_tables($gz_sql_filename); + &backup_tables_as_xml($gz_xml_filename,\%tables); $time_this->('write backup tables'); } if ($drop_when_done) { &drop_tables(); $logthis->('dropped tables'); } @@ -416,11 +410,25 @@ foreach my $file ($lockfilename, $error_ } } - exit 0; # Everything is okay, so end here before it gets worse. ######################################################## ######################################################## +sub table_names { + my ($course,$domain) = @_; + my $prefix = $course.'_'.$domain.'_'; + # + my %tables = + ( student =>&Apache::lonmysql::fix_table_name($prefix.'students'), + res =>&Apache::lonmysql::fix_table_name($prefix.'resource'), + machine =>&Apache::lonmysql::fix_table_name($prefix.'machine_table'), + activity=>&Apache::lonmysql::fix_table_name($prefix.'activity'), + ); + return %tables; +} + +######################################################## +######################################################## ## ## Process Course Log ## @@ -429,7 +437,7 @@ exit 0; # Everything is okay, so end h # # Returns the number of lines in the activity.log file that were processed. sub process_courselog { - my ($inputfile,$error_fh) = @_; + my ($inputfile,$error_fh,$tables) = @_; if (! open(IN,$inputfile)) { warn "Unable to open '$inputfile' for reading"; $logthis->("Unable to open '$inputfile' for reading"); @@ -438,8 +446,7 @@ sub process_courselog { my ($linecount,$insertcount); my $dbh = &Apache::lonmysql::get_dbh(); # - # Timing variables - my @RowData; + &store_entry(); while (my $line=){ # last if ($linecount > 1000); # @@ -449,7 +456,6 @@ sub process_courselog { $linecount++; # print $linecount++.$/; my ($timestamp,$host,$log)=split(/\:/,$line,3); - $time_this->('splitline'); # # $log has the actual log entries; currently still escaped, and # %26(timestamp)%3a(url)%3a(user)%3a(domain) @@ -461,16 +467,12 @@ sub process_courselog { # get delimiter between timestamped entries to be &&& $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g; $log = &unescape($log); - $time_this->('translate_and_unescape'); # now go over all log entries if (! defined($host)) { $host = 'unknown'; } - my $machine_id = &get_id($machine_table,'machine',$host); my $prevchunk = 'none'; foreach my $chunk (split(/\&\&\&/,$log)) { my $warningflag = ''; - $time_this->(); my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk); - my $student = $uname.':'.$udom; if (! defined($res) || $res =~ /^\s*$/) { $res = '/adm/roles'; $action = 'LOGIN'; @@ -495,114 +497,44 @@ sub process_courselog { '@values = '.join('&',@values)); next; #skip it if we cannot understand what is happening. } - if (! defined($student) || $student eq ':') { - $student = 'unknown'; - $warningflag .= 'student'; - } - if (! defined($res) || $res =~ /^\s*$/) { - $res = 'unknown'; - $warningflag .= 'res'; - } - if (! defined($action) || $action =~ /^\s*$/) { - $action = 'unknown'; - $warningflag .= 'action'; - } - if (! defined($time) || $time !~ /^\d+$/) { - $time = 0; - $warningflag .= 'time'; - } - # - $time_this->('split_and_error_check'); - my $student_id = &get_id($student_table,'student',$student); - my $res_id = &get_id($res_table,'resource',$res); -# my $action_id = &get_id($action_table,'action',$action); - my $sql_time = &Apache::lonmysql::sqltime($time); - # - if (! defined($student_id) || $student_id eq '') { - $warningflag.='student_id'; - } - if (! defined($res_id) || $res_id eq '') { - $warningflag.='res_id'; - } -# if (! defined($action_id) || $action_id eq '') { -# $warningflag.='action_id'; -# } - if ($warningflag ne '') { - print $error_fh 'full log entry:'.$log.$/; - print $error_fh 'error on chunk:'.$chunk.$/; - $logthis->('warningflag ('.$warningflag.') on chunk '. - $/.$chunk.$/.'prevchunk = '.$/.$prevchunk); - $prevchunk .= $chunk; - next; # skip this chunk - } # - my $store_values; + my %data = (student => $uname.':'.$udom, + resource => $res, + machine => $host, + action => $action, + time => &Apache::lonmysql::sqltime($time)); if ($action eq 'POST') { - $store_values = + $data{'action_values'} = $dbh->quote(join('&',map { &escape($_); } @values)); } else { - $store_values = $dbh->quote(join('&',@values)); + $data{'action_values'} = $dbh->quote(join('&',@values)); } - $time_this->('get_ids'); - # - my $row = [$res_id, - qq{'$sql_time'}, - $student_id, - "'".$action."'", -# $action_id, - qq{''}, # idx - $machine_id, - $store_values]; - push(@RowData,$row); - $time_this->('push_row'); - $prevchunk = $chunk; - # - } - $time_this->(); - if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) { - my $result = &Apache::lonmysql::bulk_store_rows($activity_table, - undef, - \@RowData); - # $logthis->('result = '.$result); - $time_this->('bulk_store_rows'); - if (! defined($result)) { - my $error = &Apache::lonmysql::get_error(); - warn "Error occured during insert.".$error; - $logthis->('error = '.$error); + my $error = &store_entry($dbh,$tables,\%data); + if ($error) { + $logthis->('error store_entry:'.$error." on %data"); } - undef(@RowData); + $prevchunk = $chunk; } } - if (@RowData) { - $time_this->(); - $logthis->('storing '.$linecount); - my $result = &Apache::lonmysql::bulk_store_rows($activity_table, - undef, - \@RowData); - $logthis->('result = '.$result); - $time_this->('bulk_store_rows'); - if (! defined($result)) { - my $error = &Apache::lonmysql::get_error(); - warn "Error occured during insert.".$error; - $logthis->('error = '.$error); - } - undef(@RowData); + my $result = &store_entry($dbh,$tables); + if (! defined($result)) { + my $error = &Apache::lonmysql::get_error(); + warn "Error occured during insert.".$error; + $logthis->('error = '.$error); } close IN; -# print "Number of lines: ".$linecount.$/; -# print "Number of inserts: ".$insertcount.$/; return $linecount; } ## -## Somtimes, instead of doing something, doing nothing is appropriate. +## default value for $logthis and $time_this sub nothing { return; } ## -## Logging routine +## Logging routine (look for $log) ## sub log_to_file { my ($input)=@_; @@ -643,11 +575,47 @@ sub outputtimes { } +sub latest_table_modification_time { + my $latest_time; + foreach my $table (@Activity_Table,@ID_Tables) { + my %tabledata = &Apache::lonmysql::table_information($table->{'id'}); + next if (! scalar(keys(%tabledata))); # table does not exist + if (! defined($latest_time) || + $latest_time < $tabledata{'Update_time'}) { + $latest_time = $tabledata{'Update_time'}; + } + } + return $latest_time; +} + +sub check_for_missing_tables { + my @wanted_tables = @_; + # Check for missing tables + my @Current_Tables = &Apache::lonmysql::tables_in_db(); + my %Found; + foreach my $tablename (@Current_Tables) { + foreach my $table (@wanted_tables) { + if ($tablename eq $table) { + $Found{$tablename}++; + } + } + } + $logthis->('Found tables '.join(',',keys(%Found))); + my $missing_a_table = 0; + foreach my $table (@wanted_tables) { + if (! $Found{$table}) { + $logthis->('Missing table '.$table); + $missing_a_table = 1; + last; + } + } + return $missing_a_table; +} ## ## Use mysqldump to store backups of the tables ## -sub backup_tables { +sub backup_tables_as_sql { my ($gz_sql_filename) = @_; my $command = qq{mysqldump --quote-names --opt loncapa }; foreach my $table (@ID_Tables,@Activity_Table) { @@ -663,7 +631,7 @@ sub backup_tables { ## ## Load in mysqldumped files ## -sub load_backup_tables { +sub load_backup_sql_tables { my ($gz_sql_filename) = @_; if (-s $gz_sql_filename) { $logthis->('loading data from gzipped sql file'); @@ -756,6 +724,173 @@ sub get_id { } # End of ID scoping +############################################################### +############################################################### +## +## Save as XML +## +############################################################### +############################################################### +sub backup_tables_as_xml { + my ($filename,$tables) = @_; + open(XMLFILE,"|gzip - > $filename") || return ('error:unable to write '.$filename); + my $query = qq{ + SELECT B.resource, + A.time, + A.idx, + C.student, + A.action, + E.machine, + A.action_values + FROM $tables->{'activity'} AS A + LEFT JOIN $tables->{'res'} AS B ON B.res_id=A.res_id + LEFT JOIN $tables->{'student'} AS C ON C.student_id=A.student_id + LEFT JOIN $tables->{'machine'} AS E ON E.machine_id=A.machine_id + ORDER BY A.time DESC + }; + $query =~ s/\s+/ /g; + my $dbh = &Apache::lonmysql::get_dbh(); + my $sth = $dbh->prepare($query); + if (! $sth->execute()) { + $logthis->(''. + 'WARNING: Could not retrieve from database:'. + $sth->errstr().''); + return undef; + } else { + my ($res,$sqltime,$idx,$student,$action,$machine,$action_values); + if ($sth->bind_columns(\$res,\$sqltime,\$idx,\$student,\$action, + \$machine,\$action_values)) { + + while ($sth->fetch) { + print XMLFILE ''. + qq{$res}. + qq{}. + qq{$idx}. + qq{$student}. + qq{$action}. + qq{$machine}. + qq{$action_values}. + ''.$/; + } + } else { + warn "Unable to bind to columns.\n"; + return undef; + } + } + close XMLFILE; + return; +} + +############################################################### +############################################################### +## +## load as xml +## +############################################################### +############################################################### +sub load_backup_xml_tables { + my ($filename,$tables) = @_; + my $xmlfh; + open($xmlfh,"cat $filename | gzip -d - |"); + if (! defined($xmlfh)) { + return ('error:unable to read '.$filename); + } + my $dbh = &Apache::lonmysql::get_dbh(); + my $parser = HTML::TokeParser->new($xmlfh); + $parser->xml_mode('1'); + &store_entry(); + my %data; + while (my $token = $parser->get_token()) { + if ($token->[0] eq 'S' && $token->[1] eq 'row') { + undef(%data); + } + foreach my $tag ('resource','time','idx', + 'student','action','machine','action_values') { + if ($token->[0] eq 'S' && $token->[1] eq $tag) { + my $text = $parser->get_text("/$tag"); + $data{$tag} = $text; + } + } + if ($token->[0] eq 'E' && $token->[1] eq 'row') { + $data{'action_values'} =qq{'$data{'action_values'}'}; + my $error = &store_entry($dbh,$tables,\%data); + } + } + &store_entry($dbh,$tables); + return; +} + + +####################################################################### +####################################################################### +## +## store_entry - accumulate data to be inserted into the database +## +## Pass no values in to clear accumulator +## Pass ($dbh,\%tables) to initiate storage of values +## Pass ($dbh,\%tables,\%data) to use normally +## +####################################################################### +####################################################################### + +{ + my @rows; + my $max_row_count = 100; + +sub store_entry { + if (! @_) { + undef(@rows); + return ''; + } + my ($dbh,$tables,$data) = @_; + return if (! defined($tables)); + if (defined($data)) { + my $error; + foreach my $field ('student','resource','action','time') { + if (! defined($data->{$field}) || $data->{$field} eq ':' || + $data->{$field}=~ /^\s*$/) { + $error.=$field.','; + } + } + if ($error) { $error=~s/,$//; return $error; } + # + my $student_id = &get_id($tables->{'student'},'student', + $data->{'student'}); + my $res_id = &get_id($tables->{'res'}, + 'resource',$data->{'resource'}); + my $machine_id = &get_id($tables->{'machine'}, + 'machine',$data->{'machine'}); + my $idx = $data->{'idx'}; if (! $idx) { $idx = "''"; } + # + push(@rows,[$res_id, + qq{'$data->{'time'}'}, + $student_id, + qq{'$data->{'action'}'}, + $idx, + $machine_id, + $data->{'action_values'}]); + } + if (defined($tables) && + ( (! defined($data) && scalar(@rows)) || scalar(@rows)>$max_row_count) + ){ + # Store the rows + my $result = + &Apache::lonmysql::bulk_store_rows($tables->{'activity'}, + undef, + \@rows); + if (! defined($result)) { + my $error = &Apache::lonmysql::get_error(); + warn "Error occured during insert.".$error; + return $error; + } + undef(@rows); + return $result if (! defined($data)); + } + + return ''; +} + +} # end of scope for &store_entry ############################################################### ###############################################################