File:  [LON-CAPA] / loncom / debugging_tools / activity_to_accesscount.pl
Revision 1.2: download - view: text, annotated - select for diffs
Fri Nov 14 19:41:38 2003 UTC (20 years, 6 months ago) by matthew
Branches: MAIN
CVS tags: HEAD
Now updates a target db with access count data.  Keeps track of when a source
activity.log file was parsed (in the target db file) and skips entries already
recorded.  Given how large the activity.log files get, this script can take a
long time to run.

    1: #!/usr/bin/perl -w
    2: #
    3: use strict;
    4: use GDBM_File;
    5: 
    6: sub unescape {
    7:     my $str=shift;
    8:     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    9:     return $str;
   10: }
   11: 
   12: my %resourceaccess;
   13: 
   14: sub main {
   15:     my $file=$ARGV[0];
   16:     my ($path) = ($file =~ m:(.*)/activity\.log$:);
   17:     my $target = $path.'/nohist_accesscount.db';
   18:     print STDERR "source: $file\ntarget: $target\n";
   19:     my %accessDB;
   20:     my $accesstime = 0;
   21:     my $starttime = time;
   22:     if (-e $target) {
   23:         if (! tie(%accessDB,'GDBM_File',$target,&GDBM_READER,0640)) {
   24:             warn "Unable to tie to $target";
   25:             return;
   26:         }
   27:         #
   28:         if (exists($accessDB{'tabulated '.$file})) {
   29:             $accesstime = $accessDB{'tabulated '.$file};
   30:         }
   31:         untie(%accessDB);
   32:     }
   33:     #
   34:     my $line;
   35:     open FILEID,'<'.$file;
   36:     my @allaccess;
   37:     print STDERR "Access by resource after $accesstime\n\n";
   38:     my $numlines = 0;
   39:     while ($line=<FILEID>) {
   40:         $numlines++;
   41:         if (int($numlines / 1000)*1000 == $numlines) {
   42:             if (int($numlines / 10000)*10000 == $numlines) {
   43:                 print STDERR '*';
   44:             } else {
   45:                 print STDERR '.';
   46:             }
   47:             if (int($numlines / 50000)*50000 == $numlines) {
   48:                 print STDERR $/;
   49:             }
   50:         }
   51:         next if ($line eq '' || $line !~ /:/);
   52:         chomp($line);
   53:         my ($time,$machine,$what)=split(':',$line);
   54: 	$what=&unescape($what);
   55:         my @accesses = split(/(\d{10}):/,$what);
   56:         shift(@accesses);
   57: 	while (@accesses) {
   58:             my $date = shift(@accesses);
   59:             next if ($date =~ /\D/ || $date < $accesstime);
   60:             my $access = shift(@accesses);
   61:             next if (! defined($access) || $access eq '' || 
   62:                      ! defined($date)   || $date   eq '');
   63:             $access =~ s/(\&$|^:)//g;
   64:             my ($resource,$who,$domain,$post,@posts)=split(':',$access);
   65: 	    if (!$resource || $resource eq '') {
   66:                 next; 
   67:             }
   68:             $resource = &unescape($resource);
   69:             if ($resource !~ m:(.*)/(.*)/: || $resource =~ m:/prtspool/:) {
   70:                 next;
   71:             }
   72:             if ($resource =~ /___\d+___/) {
   73:                 (undef,$resource) = split(/___\d+___/,$resource);
   74:             }
   75:             next if ($resource =~ m:^/(res/adm|adm)/:);
   76:             $resource =~ s:^/?res/::;
   77:             $resourceaccess{$resource}++;            
   78: 	}
   79:     }
   80:     print STDERR 'done.  Updating '.$target.$/;
   81:     if (! tie(%accessDB,'GDBM_File',$target,&GDBM_WRCREAT,0640)) {
   82:         warn "Unable to open $target to store data".$/;
   83:         return;
   84:     }
   85:     #
   86:     while (my ($resource,$count) = each(%resourceaccess)) {
   87:         if (exists($accessDB{$resource})) {
   88:             $accessDB{$resource}+=$count;
   89:         } else {
   90:             $accessDB{$resource} = $count;
   91:         }
   92:         print sprintf("%10.0f",$count).':'.$resource."\n";
   93:     }
   94:     $accessDB{'tabulated '.$file} = $starttime;
   95:     untie(%accessDB);
   96: }
   97: 
   98: main;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>