File:  [LON-CAPA] / loncom / debugging_tools / activity_to_accesscount.pl
Revision 1.2: download - view: text, annotated - select for diffs
Fri Nov 14 19:41:38 2003 UTC (20 years, 5 months ago) by matthew
Branches: MAIN
CVS tags: HEAD
Now updates a target db with access count data.  Keeps track of when a source
activity.log file was parsed (in the target db file) and skips entries already
recorded.  Given how large the activity.log files get, this script can take a
long time to run.

#!/usr/bin/perl -w
#
use strict;
use GDBM_File;

sub unescape {
    my $str=shift;
    $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
    return $str;
}

my %resourceaccess;

sub main {
    my $file=$ARGV[0];
    my ($path) = ($file =~ m:(.*)/activity\.log$:);
    my $target = $path.'/nohist_accesscount.db';
    print STDERR "source: $file\ntarget: $target\n";
    my %accessDB;
    my $accesstime = 0;
    my $starttime = time;
    if (-e $target) {
        if (! tie(%accessDB,'GDBM_File',$target,&GDBM_READER,0640)) {
            warn "Unable to tie to $target";
            return;
        }
        #
        if (exists($accessDB{'tabulated '.$file})) {
            $accesstime = $accessDB{'tabulated '.$file};
        }
        untie(%accessDB);
    }
    #
    my $line;
    open FILEID,'<'.$file;
    my @allaccess;
    print STDERR "Access by resource after $accesstime\n\n";
    my $numlines = 0;
    while ($line=<FILEID>) {
        $numlines++;
        if (int($numlines / 1000)*1000 == $numlines) {
            if (int($numlines / 10000)*10000 == $numlines) {
                print STDERR '*';
            } else {
                print STDERR '.';
            }
            if (int($numlines / 50000)*50000 == $numlines) {
                print STDERR $/;
            }
        }
        next if ($line eq '' || $line !~ /:/);
        chomp($line);
        my ($time,$machine,$what)=split(':',$line);
	$what=&unescape($what);
        my @accesses = split(/(\d{10}):/,$what);
        shift(@accesses);
	while (@accesses) {
            my $date = shift(@accesses);
            next if ($date =~ /\D/ || $date < $accesstime);
            my $access = shift(@accesses);
            next if (! defined($access) || $access eq '' || 
                     ! defined($date)   || $date   eq '');
            $access =~ s/(\&$|^:)//g;
            my ($resource,$who,$domain,$post,@posts)=split(':',$access);
	    if (!$resource || $resource eq '') {
                next; 
            }
            $resource = &unescape($resource);
            if ($resource !~ m:(.*)/(.*)/: || $resource =~ m:/prtspool/:) {
                next;
            }
            if ($resource =~ /___\d+___/) {
                (undef,$resource) = split(/___\d+___/,$resource);
            }
            next if ($resource =~ m:^/(res/adm|adm)/:);
            $resource =~ s:^/?res/::;
            $resourceaccess{$resource}++;            
	}
    }
    print STDERR 'done.  Updating '.$target.$/;
    if (! tie(%accessDB,'GDBM_File',$target,&GDBM_WRCREAT,0640)) {
        warn "Unable to open $target to store data".$/;
        return;
    }
    #
    while (my ($resource,$count) = each(%resourceaccess)) {
        if (exists($accessDB{$resource})) {
            $accessDB{$resource}+=$count;
        } else {
            $accessDB{$resource} = $count;
        }
        print sprintf("%10.0f",$count).':'.$resource."\n";
    }
    $accessDB{'tabulated '.$file} = $starttime;
    untie(%accessDB);
}

main;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>