File:  [LON-CAPA] / loncom / metadata_database / parse_activity_log.pl
Revision 1.1: download - view: text, annotated - select for diffs
Wed Aug 11 18:37:23 2004 UTC (19 years, 10 months ago) by matthew
Branches: MAIN
CVS tags: HEAD
Moved from modules/matthew to main LONCAPA directory structure.

    1: #!/usr/bin/perl
    2: #
    3: # The LearningOnline Network
    4: #
    5: # $Id: parse_activity_log.pl,v 1.1 2004/08/11 18:37:23 matthew Exp $
    6: #
    7: # Copyright Michigan State University Board of Trustees
    8: #
    9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
   10: #
   11: # LON-CAPA is free software; you can redistribute it and/or modify
   12: # it under the terms of the GNU General Public License as published by
   13: # the Free Software Foundation; either version 2 of the License, or
   14: # (at your option) any later version.
   15: #
   16: # LON-CAPA is distributed in the hope that it will be useful,
   17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   19: # GNU General Public License for more details.
   20: #
   21: # You should have received a copy of the GNU General Public License
   22: # along with LON-CAPA; if not, write to the Free Software
   23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   24: #
   25: # /home/httpd/html/adm/gpl.txt
   26: #
   27: # http://www.lon-capa.org/
   28: #
   29: ###############################################################################
   30: #
   31: # Expects
   32: #
   33: # ../key/$class.key - key file $username:$keynumber
   34: # ../rawdata/$class.log - log file
   35: # ../rawdata/$class.seq - sequence file
   36: # ../data writable
   37: # ------------------------------------------------------------------ Course log
   38: 
   39: #
   40: # Exit codes
   41: #   0     Everything is okay
   42: #   1     Another copy is running on this course
   43: #   2     Activity log does not exist
   44: #   3     Unable to connect to database
   45: #   4     Unable to create database tables
   46: #   5     Unspecified error?
   47: #
   48: 
   49: use strict;
   50: use DBI;
   51: use lib '/home/httpd/lib/perl/Apache';
   52: use lonmysql();
   53: use Time::HiRes();
   54: use Getopt::Long();
   55: 
   56: #
   57: # Determine parameters
   58: my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup);
   59: &Getopt::Long::GetOptions( "course=s"  => \$course,
   60:                            "domain=s"  => \$domain,
   61:                            "help"      => \$help,
   62:                            "logfile=s" => \$file,
   63:                            "timerun"   => \$time_run,
   64:                            "nocleanup" => \$nocleanup,
   65:                            "drop"      => \$drop);
   66: if (! defined($course) || $help) {
   67:     print<<USAGE;
   68: parse_activity_log.pl
   69: 
   70: Process a lon-capa activity log into a database.
   71: Parameters:
   72:    course             Required
   73:    domain             Optional
   74:    drop               optional   if present, drop all course 
   75:                                  specific activity log tables.
   76:    file               optional   Specify the file to parse, including path
   77:    time               optional   if present, print out timing data
   78:    nocleanup          optional   if present, do not remove old files
   79: Examples:
   80:   $0 -course=123456abcdef -domain=msu
   81:   $0 -course=123456abcdef -file=activity.log
   82: USAGE
   83:     exit;
   84: }
   85: 
   86: ##
   87: ## Set up timing code
   88: ##
   89: my $time_this = \&nothing;
   90: if ($time_run) {
   91:     $time_this = \&time_action;
   92: }
   93: my $initial_time = Time::HiRes::time;
   94: 
   95: ##
   96: ## Read in configuration parameters
   97: ##
   98: my %perlvar;
   99: &initialize_configuration();
  100: if (! defined($domain) || $domain eq '') {
  101:     $domain = $perlvar{'lonDefDomain'};
  102: }
  103: &update_process_name($course.'@'.$domain);
  104: 
  105: ##
  106: ## Determine filenames
  107: ##
  108: my $sourcefilename;   # activity log data
  109: my $newfilename;      # $sourcefilename will be renamed to this
  110: my $sql_filename;  # the mysql backup data file name.
  111: if ($file) {
  112:     $sourcefilename = $file;
  113: } else {
  114:     $sourcefilename = &get_filename($course,$domain);
  115: }
  116: $sql_filename = $sourcefilename;
  117: $sql_filename =~ s|[^/]*$|activitylog.sql|;
  118: 
  119: ##
  120: ## There will only be a $newfilename file if a copy of this program is already
  121: ## running.
  122: my $newfilename = $sourcefilename.'.processing';
  123: if (-e $newfilename) {
  124:     warn "$newfilename exists";
  125:     exit 2;
  126: }
  127: 
  128: if (-e $sourcefilename) {
  129:     rename($sourcefilename,$newfilename);
  130: }
  131: 
  132: ##
  133: ## Table definitions
  134: ##
  135: my $prefix = $course.'_'.$domain.'_';
  136: my $student_table = $prefix.'students';
  137: my $student_table_def = 
  138: { id => $student_table,
  139:   permanent => 'no',
  140:   columns => [
  141:               { name => 'student_id',
  142:                 type => 'MEDIUMINT UNSIGNED',
  143:                 restrictions => 'NOT NULL',
  144:                 auto_inc => 'yes', },
  145:               { name => 'student',
  146:                 type => 'VARCHAR(100) BINARY',
  147:                 restrictions => 'NOT NULL', },
  148:               ],
  149:       'PRIMARY KEY' => ['student_id',],
  150:           };
  151: 
  152: my $res_table = $prefix.'resource';
  153: my $res_table_def = 
  154: { id => $res_table,
  155:   permanent => 'no',
  156:   columns => [{ name => 'res_id',
  157:                 type => 'MEDIUMINT UNSIGNED',
  158:                 restrictions => 'NOT NULL',
  159:                 auto_inc     => 'yes', },
  160:               { name => 'resource',
  161:                 type => 'MEDIUMTEXT',
  162:                 restrictions => 'NOT NULL'},
  163:               ],
  164:   'PRIMARY KEY' => ['res_id'],
  165: };
  166: 
  167: my $action_table = $prefix.'actions';
  168: my $action_table_def =
  169: { id => $action_table,
  170:   permanent => 'no',
  171:   columns => [{ name => 'action_id',
  172:                 type => 'MEDIUMINT UNSIGNED',
  173:                 restrictions => 'NOT NULL',
  174:                 auto_inc     => 'yes', },
  175:               { name => 'action',
  176:                 type => 'VARCHAR(100)',
  177:                 restrictions => 'NOT NULL'},
  178:               ],
  179:   'PRIMARY KEY' => ['action_id',], 
  180: };
  181: 
  182: my $machine_table = $prefix.'machine_table';
  183: my $machine_table_def =
  184: { id => $machine_table,
  185:   permanent => 'no',
  186:   columns => [{ name => 'machine_id',
  187:                 type => 'MEDIUMINT UNSIGNED',
  188:                 restrictions => 'NOT NULL',
  189:                 auto_inc     => 'yes', },
  190:               { name => 'machine',
  191:                 type => 'VARCHAR(100)',
  192:                 restrictions => 'NOT NULL'},
  193:               ],
  194:   'PRIMARY KEY' => ['machine_id',],
  195:  };
  196: 
  197: my $activity_table = $prefix.'activity';
  198: my $activity_table_def = 
  199: { id => $activity_table,
  200:   permanent => 'no',
  201:   columns => [
  202:               { name => 'res_id',
  203:                 type => 'MEDIUMINT UNSIGNED',
  204:                 restrictions => 'NOT NULL',},
  205:               { name => 'time',
  206:                 type => 'DATETIME',
  207:                 restrictions => 'NOT NULL',},
  208:               { name => 'student_id',
  209:                 type => 'VARCHAR(100) BINARY',
  210:                 restrictions => 'NOT NULL',},
  211:               { name => 'action_id',
  212:                 type => 'VARCHAR(100) BINARY',
  213:                 restrictions => 'NOT NULL',},
  214:               { name => 'idx',                # This is here in case a student
  215:                 type => 'MEDIUMINT UNSIGNED', # has multiple submissions during
  216:                 restrictions => 'NOT NULL',   # one second.  It happens, trust
  217:                 auto_inc     => 'yes', },     # me.
  218:               { name => 'machine_id',
  219:                 type => 'VARCHAR(100) BINARY',
  220:                 restrictions => 'NOT NULL',},
  221:               { name => 'action_values',
  222:                 type => 'MEDIUMTEXT', },
  223:               ], 
  224:       'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'],
  225: };
  226: my @Activity_Tables = ($student_table_def,$res_table_def,
  227:                        $action_table_def,$machine_table_def,
  228:                        $activity_table_def);
  229: 
  230: 
  231: ##
  232: ## End of table definitions
  233: ##
  234: 
  235: # 
  236: &Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'},
  237:                                                $perlvar{'lonSqlAccess'});
  238: if (!&Apache::lonmysql::verify_sql_connection()) {
  239:     warn "Unable to connect to MySQL database.";
  240:     exit 3;
  241: }
  242: 
  243: if ($drop) { &drop_tables(); }
  244: if (-e $sql_filename) {
  245:     # if ANY one of the tables does not exist, load the tables from the
  246:     # backup.
  247:     my @Current_Tables = &Apache::lonmysql::tables_in_db();
  248:     my %Found;
  249:     foreach my $tablename (@Current_Tables) {
  250:         foreach my $table (@Activity_Tables) {
  251:             if ($tablename eq  $table->{'id'}) {
  252:                 $Found{$tablename}++;
  253:             }
  254:         }
  255:     }
  256:     foreach my $table (@Activity_Tables) {    
  257:         if (! $Found{$table->{'id'}}) {
  258:             $time_this->();
  259:             &load_backup_tables($sql_filename);
  260:             $time_this->('load backup tables');
  261:             last;
  262:         }
  263:     }
  264: }
  265: 
  266: # create_tables does not complain if the tables already exist
  267: if (! &create_tables()) {
  268:     warn "Unable to create tables";
  269:     exit 4;
  270: }
  271: 
  272: &read_id_tables();
  273: 
  274: ##
  275: ## Do the main bit of work
  276: if (-e $newfilename) {
  277:     my $result = &process_courselog($newfilename);
  278:     if (! defined($result)) {
  279:         # Something went wrong along the way...
  280:         exit 5;
  281:     } elsif ($result > 0) {
  282:         $time_this->();
  283:         &backup_tables($sql_filename);
  284:         $time_this->('write backup tables');
  285:     }
  286: }
  287: 
  288: ##
  289: ## Clean up the filesystem
  290: ##
  291: &Apache::lonmysql::disconnect_from_db();
  292: unlink($newfilename) if (! $nocleanup);
  293: 
  294: if ($time_run) {
  295:     print "Overall time: ".(Time::HiRes::time - $initial_time).$/;
  296:     print &outputtimes();
  297: }
  298: 
  299: exit 0;   # Everything is okay, so end here before it gets worse.
  300: 
  301: ########################################################
  302: ########################################################
  303: ##
  304: ##                 Process Course Log
  305: ##
  306: ########################################################
  307: ########################################################
  308: #
  309: # Returns the number of lines in the activity.log file that were processed.
  310: sub process_courselog {
  311:     my ($inputfile) = @_;
  312:     if (! open(IN,$inputfile)) {
  313:         warn "Unable to open '$inputfile' for reading";
  314:         return undef;
  315:     }
  316:     my ($linecount,$insertcount);
  317:     my $dbh = &Apache::lonmysql::get_dbh();
  318:     #
  319:     # Timing variables
  320:     my @RowData;
  321:     while (my $line=<IN>){
  322:         # last if ($linecount > 1000);
  323:         #
  324:         # Bulk storage variables
  325:         $time_this->();
  326:         chomp($line);
  327:         $linecount++;
  328:         # print $linecount++.$/;
  329:         my ($timestamp,$host,$log)=split(/\:/,$line,3);
  330:         $time_this->('splitline');
  331:         #
  332:         # $log has the actual log entries; currently still escaped, and
  333:         # %26(timestamp)%3a(url)%3a(user)%3a(domain)
  334:         # then additionally
  335:         # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
  336:         # or
  337:         # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
  338:         #
  339:         # get delimiter between timestamped entries to be &&&
  340:         $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
  341:         $log = &unescape($log);
  342:         $time_this->('translate_and_unescape');
  343:         # now go over all log entries 
  344:         my $machine_id = &get_id($machine_table,'machine',$host);
  345:         foreach (split(/\&\&\&/,$log)) {
  346:             $time_this->();
  347: 	    my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_);
  348:             if (! defined($res) || $res =~ /^\s*$/) {
  349:                 $res = '/adm/roles';
  350:                 $action = 'log in';
  351:             }
  352:             if ($res =~ m|^/prtspool/|) {
  353:                 $res = '/prtspool/';
  354:             }
  355:             if (! defined($action) || $action eq '') {
  356:                 $action = 'view';
  357:             }
  358:             my $student = $uname.':'.$udom;
  359:             $time_this->('split_and_error_check');
  360:             my $student_id = &get_id($student_table,'student',$student);
  361:             my $res_id = &get_id($res_table,'resource',$res);
  362:             my $action_id = &get_id($action_table,'action',$action);
  363:             my $sql_time = &Apache::lonmysql::sqltime($time);
  364:             my $values = $dbh->quote(join('',@values));
  365:             $time_this->('get_ids');
  366:             #
  367:             my $row = [$res_id,
  368:                        qq{'$sql_time'},
  369:                        $student_id,
  370:                        $action_id,
  371:                        qq{''},        # idx
  372:                        $machine_id,
  373:                        $values];
  374:             push(@RowData,$row);
  375:             $time_this->('push_row');
  376:             #
  377:         }
  378:         $time_this->();
  379:         if ($linecount % 100 == 0) {
  380:             my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
  381:                                                             undef,
  382:                                                             \@RowData);
  383:             $time_this->('bulk_store_rows');
  384:             if (! defined($result)) {
  385:                 warn "Error occured during insert.".
  386:                     &Apache::lonmysql::get_error();
  387:             }
  388:             undef(@RowData);
  389:         }
  390:     }
  391:     if (@RowData) {
  392:         $time_this->();
  393:         my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
  394:                                                         undef,
  395:                                                         \@RowData);
  396:         $time_this->('bulk_store_rows');
  397:         if (! defined($result)) {
  398:             warn "Error occured during insert.".
  399:                 &Apache::lonmysql::get_error();
  400:         }
  401:         undef(@RowData);
  402:     }
  403:     close IN;
  404: #    print "Number of lines: ".$linecount.$/;
  405: #    print "Number of inserts: ".$insertcount.$/;
  406:     return $linecount;
  407: }
  408: 
  409: ##
  410: ## Timing routines
  411: ##
  412: {
  413:     my %Timing;
  414:     my $starttime;
  415: 
  416: sub nothing {
  417:     return;
  418: }
  419: 
  420: sub time_action {
  421:     my ($key) = @_;
  422:     if (defined($key)) {
  423:         $Timing{$key}+=Time::HiRes::time-$starttime;
  424:         $Timing{'count_'.$key}++;
  425:     }
  426:     $starttime = Time::HiRes::time;
  427: }
  428: 
  429: sub outputtimes {
  430:     my $Str;
  431:     if ($time_run) {
  432:         $Str = "Timing Data:".$/;
  433:         while (my($k,$v) = each(%Timing)) {
  434:             next if ($k =~ /^count_/);
  435:             my $count = $Timing{'count_'.$k};
  436:             $Str .= 
  437:                 '  '.sprintf("%25.25s",$k).
  438:                 '  '.sprintf('% 8d',$count).
  439:                 '  '.sprintf('%12.5f',$v).$/;
  440:         }
  441:     }
  442:     return $Str;
  443: }
  444: 
  445: }
  446: 
  447: 
  448: ##
  449: ## Use mysqldump to store backups of the tables
  450: ##
  451: sub backup_tables {
  452:     my ($sql_filename) = @_;
  453:     my $command = qq{mysqldump --opt loncapa };
  454:                              
  455:     foreach my $table (@Activity_Tables) {
  456:         my $tablename = $table->{'id'};
  457:         $command .= $tablename.' ';
  458:     }
  459:     $command .= '>'.$sql_filename;
  460:     warn $command.$/;
  461:     system($command);
  462: }
  463: 
  464: ##
  465: ## Load in mysqldumped files
  466: ##
  467: sub load_backup_tables {
  468:     my ($sql_filename) = @_;
  469:     return undef if (! -e $sql_filename);
  470:     # Check for .my.cnf
  471:     my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa';
  472:     warn $command.$/;
  473:     system($command);
  474: }
  475: 
  476: ##
  477: ## 
  478: ##
  479: sub initialize_configuration {
  480:     # Fake it for now:
  481:     $perlvar{'lonSqlUser'} = 'www';
  482:     $perlvar{'lonSqlAccess'} = 'localhostkey';
  483:     $perlvar{'lonUsersDir'} = '/home/httpd/lonUsers';
  484:     $perlvar{'lonDefDomain'} = '103';
  485: }
  486: 
  487: sub update_process_name {
  488:     my ($text) = @_;
  489:     $0 = 'parse_activity_log.pl: '.$text;
  490: }
  491: 
  492: sub get_filename {
  493:     my ($course,$domain) = @_;
  494:     my ($a,$b,$c,undef) = split('',$course,4);
  495:     return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log";
  496: }
  497: 
  498: sub create_tables {
  499:     foreach my $table (@Activity_Tables) {
  500:         my $table_id = &Apache::lonmysql::create_table($table);
  501:         if (! defined($table_id)) {
  502:             warn "Unable to create table ".$table->{'id'}.$/;
  503:             warn &Apache::lonmysql::build_table_creation_request($table).$/;
  504:             return 0;
  505:         }
  506:     }
  507:     return 1;
  508: }
  509: 
  510: sub drop_tables {
  511:     foreach my $table (@Activity_Tables) {
  512:         my $table_id = $table->{'id'};
  513:         &Apache::lonmysql::drop_table($table_id);
  514:     }
  515: }
  516: 
  517: #################################################################
  518: #################################################################
  519: ##
  520: ## Database item id code
  521: ##
  522: #################################################################
  523: #################################################################
  524: { # Scoping for ID lookup code
  525:     my %IDs;
  526: 
  527: sub read_id_tables {
  528:     foreach my $table (@Activity_Tables) {
  529:         my @Data = &Apache::lonmysql::get_rows($table->{'id'});
  530:         foreach my $row (@Data) {
  531:             $IDs{$table->{'id'}}->{$row->[1]} = $row->[0];
  532:         }
  533:     }
  534: }
  535: 
  536: sub get_id {
  537:     my ($table,$fieldname,$value) = @_;
  538:     if (exists($IDs{$table}->{$value})) {
  539:         return $IDs{$table}->{$value};
  540:     } else {
  541:         # insert into the table - if the item already exists, that is
  542:         # okay.
  543:         my $result = &Apache::lonmysql::store_row($table,[undef,$value]);
  544:         if (! defined($result)) {
  545:             warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error());
  546:         }
  547:         # get the id
  548:         my @Data = 
  549:             &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'});
  550:         if (@Data) {
  551:             $IDs{$table}->{$value}=$Data[0]->[0];
  552:             return $IDs{$table}->{$value};
  553:         } else {
  554:             warn "Unable to retrieve id for $table $fieldname $value".$/;
  555:             return undef;
  556:         }
  557:     }
  558: }
  559: 
  560: } # End of ID scoping
  561: 
  562: 
  563: ###############################################################
  564: ###############################################################
  565: ##
  566: ##   The usual suspects
  567: ##
  568: ###############################################################
  569: ###############################################################
  570: sub escape {
  571:     my $str=shift;
  572:     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
  573:     return $str;
  574: }
  575: 
  576: sub unescape {
  577:     my $str=shift;
  578:     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
  579:     return $str;
  580: }
  581: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>