--- loncom/metadata_database/parse_activity_log.pl 2005/07/11 05:16:19 1.15 +++ loncom/metadata_database/parse_activity_log.pl 2014/11/24 02:36:34 1.25 @@ -2,7 +2,7 @@ # # The LearningOnline Network # -# $Id: parse_activity_log.pl,v 1.15 2005/07/11 05:16:19 matthew Exp $ +# $Id: parse_activity_log.pl,v 1.25 2014/11/24 02:36:34 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -48,14 +48,11 @@ # parameter is not set $logthis is set to ¬hing, which does what you # would expect. # - use strict; use DBI; -use lib '/home/httpd/lib/perl/Apache'; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration(); use Apache::lonmysql(); -use lonmysql(); use Time::HiRes(); use Getopt::Long(); use IO::File; @@ -131,7 +128,7 @@ if ($log) { print STDERR "$0: logging to $logfile".$/; if (! open(LOGFILE,">$logfile")) { warn("Unable to open $logfile for writing. Run aborted."); - exit 5; + &clean_up_and_exit(5); } else { $logthis = \&log_to_file; } @@ -144,6 +141,7 @@ if ($log) { my $sourcefilename; # activity log data my $newfilename; # $sourcefilename will be renamed to this my $error_filename; # Errors in parsing the activity log will be written here +my $chunk_filename; # where we save data we are not going to write to db if ($srcfile) { $sourcefilename = $srcfile; } else { @@ -153,6 +151,8 @@ my $sql_filename = $sourcefilename; $sql_filename =~ s|[^/]*$|activity.log.sql|; my $gz_sql_filename = $sql_filename.'.gz'; # +$chunk_filename = $sourcefilename.".unprocessed_chunks"; +# my $xml_filename = $sourcefilename; my $gz_xml_filename = $xml_filename.'.gz'; if (defined($xmlfile)) { @@ -175,37 +175,26 @@ $logthis->('Beginning logging '.time); # # Wait for a lock on the lockfile to avoid collisions my $lockfilename = $sourcefilename.'.lock'; +$newfilename = $sourcefilename.'.processing'; if (! defined($xmlfile)) { -open(LOCKFILE,'>'.$lockfilename); -if (!flock(LOCKFILE,LOCK_EX)) { - warn("Unable to lock $lockfilename. Aborting".$/); - exit 6; -} + open(LOCKFILE,'>'.$lockfilename); + if (!flock(LOCKFILE,LOCK_EX|LOCK_NB)) { + warn("Unable to lock $lockfilename. Aborting".$/); + # don't call clean_up_and_exit another instance is running and + # we don't want to 'cleanup' their files + exit 6; + } -## -## There will only be a $newfilename file if a copy of this program is already -## running. -my $newfilename = $sourcefilename.'.processing'; -if (-e $newfilename) { - warn "$newfilename exists"; - $logthis->($newfilename.' exists, so I cannot work on it.'); - exit 2; -} - -if (-e $sourcefilename) { - $logthis->('renaming '.$sourcefilename.' to '.$newfilename); - rename($sourcefilename,$newfilename); - Copy($newfilename,$newfilename.'.'.time) if ($backup); - $logthis->("renamed $sourcefilename to $newfilename"); -} else { - my $command = 'touch '.$newfilename; - $logthis->($command); - system($command); - $logthis->('touch was completed'); + if (! -e $newfilename && -e $sourcefilename) { + $logthis->('renaming '.$sourcefilename.' to '.$newfilename); + rename($sourcefilename,$newfilename); + Copy($newfilename,$newfilename.'.'.time) if ($backup); + $logthis->("renamed $sourcefilename to $newfilename"); + } elsif (! -e $newfilename) { + utime(undef,undef,$newfilename); + } } -close(LOCKFILE); -} ## ## Table definitions ## @@ -312,10 +301,11 @@ $logthis->('Connectiong to mysql'); if (!&Apache::lonmysql::verify_sql_connection()) { warn "Unable to connect to MySQL database."; $logthis->("Unable to connect to MySQL database."); - exit 3; + &clean_up_and_exit(3); } $logthis->('SQL connection is up'); +&update_process_name($course.'@'.$domain." loading existing data"); my $missing_table = &check_for_missing_tables(values(%tables)); if (-s $gz_sql_filename && ! -s $gz_xml_filename) { my $backup_modification_time = (stat($gz_sql_filename))[9]; @@ -359,7 +349,7 @@ if (-s $gz_sql_filename && ! -s $gz_xml_ } if (defined($xmlfile)) { - exit(0); + &clean_up_and_exit(0); } ## @@ -369,7 +359,7 @@ $logthis->('creating tables'); if (! &create_tables()) { warn "Unable to create tables"; $logthis->('Unable to create tables'); - exit 4; + &clean_up_and_exit(4); } ## @@ -385,12 +375,14 @@ my $error_fh = IO::File->new(">>$error_f ## ## Parse the course log $logthis->('processing course log'); +&update_process_name($course.'@'.$domain." processing new data"); if (-s $newfilename) { my $result = &process_courselog($newfilename,$error_fh,\%tables); + &update_process_name($course.'@'.$domain." backing up new data"); if (! defined($result)) { # Something went wrong along the way... $logthis->('process_courselog returned undef'); - exit 5; + &clean_up_and_exit(5); } elsif ($result > 0) { $time_this->(); $logthis->('process_courselog returned '.$result.'.'.$/. @@ -418,17 +410,25 @@ if ($time_run) { $logthis->(&outputtimes()); } -if ($log) { - close LOGFILE; -} +&clean_up_and_exit(0); + +######################################################## +######################################################## -foreach my $file ($lockfilename, $error_filename,$logfile) { - if (defined($file) && -z $file) { - unlink($file); +sub clean_up_and_exit { + my ($exit_code) = @_; + # Close files + close(LOCKFILE); + close(LOGFILE); + # Remove zero length files + foreach my $file ($lockfilename, $error_filename,$logfile) { + if (defined($file) && -z $file) { + unlink($file); + } } -} -exit 0; # Everything is okay, so end here before it gets worse. + exit $exit_code; +} ######################################################## ######################################################## @@ -481,6 +481,8 @@ sub process_courselog { # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value) # or # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value) + # or + # %3aPUTSTORE%3a(name)%3d(value)%26(name)%3d(value) # # get delimiter between timestamped entries to be &&& $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g; @@ -489,8 +491,18 @@ sub process_courselog { if (! defined($host)) { $host = 'unknown'; } my $prevchunk = 'none'; foreach my $chunk (split(/\&\&\&/,$log)) { + if (length($chunk) > 20000) { + # avoid putting too much data into the database + # (usually an uploaded file or something similar) + if (! &savechunk(\$chunk,$timestamp,$host)) { + close(IN); + return undef; + } + next; + } my $warningflag = ''; my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk); + # if (! defined($res) || $res =~ /^\s*$/) { $res = '/adm/roles'; $action = 'LOGIN'; @@ -501,11 +513,15 @@ sub process_courselog { if (! defined($action) || $action eq '') { $action = 'VIEW'; } - if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) { + if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE|PUTSTORE)$/) { $warningflag .= 'action'; print $error_fh 'full log entry:'.$log.$/; - print $error_fh 'error on chunk:'.$chunk.$/; - $logthis->('(action) Unable to parse '.$/.$chunk.$/. + print $error_fh 'error on chunk (saving)'.$/; + if (! &savechunk(\$chunk,$timestamp,$host)) { + close(IN); + return undef; + } + $logthis->('(action) Unable to parse chunk'.$/. 'got '. 'time = '.$time.$/. 'res = '.$res.$/. @@ -542,6 +558,20 @@ sub process_courselog { } close IN; return $linecount; + ## + ## + sub savechunk { + my ($chunkref,$timestamp,$host) = @_; + my $chunk = &escape(${$chunkref}); + if (! open(CHUNKFILE,">>$chunk_filename") || + ! print CHUNKFILE $timestamp.':'.$host.':'.$chunk.$/) { + # abort + close(CHUNKFILE); + return 0; + } + close(CHUNKFILE); + return 1; + } } @@ -718,21 +748,20 @@ sub read_id_tables { sub get_id { my ($table,$fieldname,$value) = @_; - if (exists($IDs{$table}->{$value})) { + if (exists($IDs{$table}->{$value}) && $IDs{$table}->{$value} =~ /^\d+$/) { return $IDs{$table}->{$value}; } else { # insert into the table - if the item already exists, that is # okay. my $result = &Apache::lonmysql::store_row($table,[undef,$value]); if (! defined($result)) { - warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error()); + warn("Got error on id insert for $value\n". + &Apache::lonmysql::get_error()); } # get the id - my @Data = - &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'}); - if (@Data) { - $IDs{$table}->{$value}=$Data[0]->[0]; - return $IDs{$table}->{$value}; + my $id = &Apache::lonmysql::get_dbh()->{'mysql_insertid'}; + if (defined($id)) { + $IDs{$table}->{$value}=$id; } else { $logthis->("Unable to retrieve id for $table $fieldname $value"); return undef; @@ -812,6 +841,7 @@ sub backup_tables_as_xml { my %ids = (); sub load_backup_xml_tables { my ($filename,$tables) = @_; + my $dbh = &Apache::lonmysql::get_dbh(); my $xmlfh; open($xmlfh,"cat $filename | gzip -d - |"); if (! defined($xmlfh)) { @@ -839,7 +869,7 @@ sub load_backup_xml_tables { my $resource_id = &xml_get_id('resource',$resource); my $student_id = &xml_get_id('student',$student); my $machine_id = &xml_get_id('machine',$machine); - &xml_store_activity_row(map { defined($_)?qq{'$_'}:'' + &xml_store_activity_row(map { defined($_)?$dbh->quote($_):'' } ($resource_id, $time, $student_id, @@ -891,9 +921,10 @@ sub xml_store_activity_row { sub xml_store_id_table { my ($table,$tabledata) =@_; + my $dbh = &Apache::lonmysql::get_dbh(); if (! &Apache::lonmysql::bulk_store_rows ($tables{$table},2, - [map{[$tabledata->{$_},qq{"$_"}]} keys(%$tabledata)])) { + [map{[$tabledata->{$_},$dbh->quote($_)]} keys(%$tabledata)])) { $logthis->("Error:".&Apache::lonmysql::get_error()); warn "Error:".&Apache::lonmysql::get_error().$/; } @@ -914,9 +945,9 @@ sub xml_store_id_table { ####################################################################### { my @rows; - my $max_row_count = 100; sub store_entry { + my $max_row_count = 100; if (! @_) { undef(@rows); return '';