--- loncom/metadata_database/parse_activity_log.pl 2005/09/19 20:30:37 1.17 +++ loncom/metadata_database/parse_activity_log.pl 2006/04/08 07:10:10 1.24 @@ -2,7 +2,7 @@ # # The LearningOnline Network # -# $Id: parse_activity_log.pl,v 1.17 2005/09/19 20:30:37 albertel Exp $ +# $Id: parse_activity_log.pl,v 1.24 2006/04/08 07:10:10 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -48,14 +48,11 @@ # parameter is not set $logthis is set to ¬hing, which does what you # would expect. # - use strict; use DBI; -use lib '/home/httpd/lib/perl/Apache'; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration(); use Apache::lonmysql(); -use lonmysql(); use Time::HiRes(); use Getopt::Long(); use IO::File; @@ -144,6 +141,7 @@ if ($log) { my $sourcefilename; # activity log data my $newfilename; # $sourcefilename will be renamed to this my $error_filename; # Errors in parsing the activity log will be written here +my $chunk_filename; # where we save data we are not going to write to db if ($srcfile) { $sourcefilename = $srcfile; } else { @@ -153,6 +151,8 @@ my $sql_filename = $sourcefilename; $sql_filename =~ s|[^/]*$|activity.log.sql|; my $gz_sql_filename = $sql_filename.'.gz'; # +$chunk_filename = $sourcefilename.".unprocessed_chunks"; +# my $xml_filename = $sourcefilename; my $gz_xml_filename = $xml_filename.'.gz'; if (defined($xmlfile)) { @@ -181,7 +181,7 @@ if (! defined($xmlfile)) { if (!flock(LOCKFILE,LOCK_EX|LOCK_NB)) { warn("Unable to lock $lockfilename. Aborting".$/); # don't call clean_up_and_exit another instance is running and - # we don't want to 'cleanup' there files + # we don't want to 'cleanup' their files exit 6; } @@ -305,6 +305,7 @@ if (!&Apache::lonmysql::verify_sql_conne } $logthis->('SQL connection is up'); +&update_process_name($course.'@'.$domain." loading existing data"); my $missing_table = &check_for_missing_tables(values(%tables)); if (-s $gz_sql_filename && ! -s $gz_xml_filename) { my $backup_modification_time = (stat($gz_sql_filename))[9]; @@ -374,8 +375,10 @@ my $error_fh = IO::File->new(">>$error_f ## ## Parse the course log $logthis->('processing course log'); +&update_process_name($course.'@'.$domain." processing new data"); if (-s $newfilename) { my $result = &process_courselog($newfilename,$error_fh,\%tables); + &update_process_name($course.'@'.$domain." backing up new data"); if (! defined($result)) { # Something went wrong along the way... $logthis->('process_courselog returned undef'); @@ -486,8 +489,18 @@ sub process_courselog { if (! defined($host)) { $host = 'unknown'; } my $prevchunk = 'none'; foreach my $chunk (split(/\&\&\&/,$log)) { + if (length($chunk) > 20000) { + # avoid putting too much data into the database + # (usually an uploaded file or something similar) + if (! &savechunk(\$chunk,$timestamp,$host)) { + close(IN); + return undef; + } + next; + } my $warningflag = ''; my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk); + # if (! defined($res) || $res =~ /^\s*$/) { $res = '/adm/roles'; $action = 'LOGIN'; @@ -501,8 +514,12 @@ sub process_courselog { if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) { $warningflag .= 'action'; print $error_fh 'full log entry:'.$log.$/; - print $error_fh 'error on chunk:'.$chunk.$/; - $logthis->('(action) Unable to parse '.$/.$chunk.$/. + print $error_fh 'error on chunk (saving)'.$/; + if (! &savechunk(\$chunk,$timestamp,$host)) { + close(IN); + return undef; + } + $logthis->('(action) Unable to parse chunk'.$/. 'got '. 'time = '.$time.$/. 'res = '.$res.$/. @@ -539,6 +556,20 @@ sub process_courselog { } close IN; return $linecount; + ## + ## + sub savechunk { + my ($chunkref,$timestamp,$host) = @_; + my $chunk = &escape(${$chunkref}); + if (! open(CHUNKFILE,">>$chunk_filename") || + ! print CHUNKFILE $timestamp.':'.$host.':'.$chunk.$/) { + # abort + close(CHUNKFILE); + return 0; + } + close(CHUNKFILE); + return 1; + } } @@ -912,9 +943,9 @@ sub xml_store_id_table { ####################################################################### { my @rows; - my $max_row_count = 100; sub store_entry { + my $max_row_count = 100; if (! @_) { undef(@rows); return '';