--- loncom/metadata_database/searchcat.pl 2003/02/03 13:42:16 1.29 +++ loncom/metadata_database/searchcat.pl 2003/02/03 17:01:55 1.30 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.29 2003/02/03 13:42:16 albertel Exp $ +# $Id: searchcat.pl,v 1.30 2003/02/03 17:01:55 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -374,13 +374,22 @@ sub escape ($) B - evaluate and store dynamic metadata. -Dynamic metadata is stored in a nohist_resevaldata GDBM database. -The only thing that this subroutine really makes happen is adjusting -a 'count' value inside the F as well -as updating F with information from -F. +Returns the dynamic metadata for an author, which will later be added to the +MySQL database (not yet implemented). + +The vast majority of entries in F, which contains +the dynamic metadata for an author's resources, are "count", which make +the file really large and evaluation really slow. + +While computing the current value of all dynamic metadata +for later insertion into the MySQL metadata cache (not yet implemented), +this routine also simply adds up all "count" type fields and replaces them by +one new field with the to-date count. + +Only after successful completion of working with one author, copy new file to +original file. Copy to tmp-"new"-db-file was necessary since db-file size +would not shrink after "delete" of key. -It may need optmization, but since it gets called once a week. . . =over 4 Parameters: @@ -399,16 +408,14 @@ Returns: =cut -sub build_on_the_fly_dynamic_metadata ($) - { - # some elements in here maybe non-obvious +sub build_on_the_fly_dynamic_metadata { # Need to compute the user's directory. - my $url = &declutter(shift(@_)); - $url =~ s/\.meta$//; - my %returnhash = (); - my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!); - my $user_directory = &construct_path_to_user_directory($adomain,$aauthor); + my $url=&declutter(shift); + $url=~s/\.meta$//; + my %returnhash=(); + my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); + my $user_directory=&construct_path_to_user_directory($adomain,$aauthor); # Attempt a GDBM database instantiation inside users directory and proceed. if ((tie(%evaldata,'GDBM_File', @@ -416,14 +423,13 @@ sub build_on_the_fly_dynamic_metadata ($ '/nohist_resevaldata.db',&GDBM_READER(),0640)) && (tie(%newevaldata,'GDBM_File', $user_directory. - '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) - { + '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { # For different variables, track the running sum and counts. - my %sum = (); - my %cnt = (); + my %sum=(); + my %cnt=(); # Define computed items as a sum (add) or an average (avg) or a raw - # count (cnt) or 'app'? + # count (cnt) or append (app)? my %listitems=('count' => 'add', 'course' => 'add', 'avetries' => 'avg', @@ -439,93 +445,70 @@ sub build_on_the_fly_dynamic_metadata ($ ); # Untaint the url and use as part of a regular expression. - my $regexp = $url; - $regexp =~ s/(\W)/\\$1/g; - $regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs - - # Check existing nohist database for this url. - # this is modfying the 'count' entries - # and copying all othe entries over - foreach (keys %evaldata) - { - my $key = &unescape($_); - if ($key =~ /$regexp/) # If url-based entry exists. - { - my $ctype = $1; # Set to specific category type. + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; #' emacs + + # Check existing database for this author. + # this is modifying the 'count' entries + # and copying all other entries over + + foreach (keys %evaldata) { + my $key=&unescape($_); + if ($key=~/$regexp/) { # If url-based entry exists. + my $ctype=$1; # Set to specific category type. # Do an increment for this category type. - if (defined($cnt{$ctype})) - { + if (defined($cnt{$ctype})) { $cnt{$ctype}++; - } - else - { - $cnt{$ctype} = 1; - } - unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN? - { + } else { + $cnt{$ctype}=1; + } + unless ($listitems{$ctype} eq 'app') { # append comments # Increment the sum based on the evaluated data in the db. - if (defined($sum{$ctype})) - { - $sum{$ctype} += $evaldata{$_}; - } - else - { - $sum{$ctype} = $evaldata{$_}; - } - } - else # 'app' mode, means to use '
' as a separator - { - if (defined($sum{$ctype})) - { - if ($evaldata{$_}) - { - $sum{$ctype} .= '
'.$evaldata{$_}; - } - } - else - { - $sum{$ctype} = ''.$evaldata{$_}; - } - } - if ($ctype ne 'count') - { + if (defined($sum{$ctype})) { + $sum{$ctype}+=$evaldata{$_}; + } else { + $sum{$ctype}=$evaldata{$_}; + } + } else { # 'app' mode, means to use '
' as a separator + if (defined($sum{$ctype})) { + if ($evaldata{$_}) { + $sum{$ctype}.='
'.$evaldata{$_}; + } + } else { + $sum{$ctype}=''.$evaldata{$_}; + } + } + if ($ctype ne 'count') { # this is copying all data except 'count' attributes - $newevaldata{$_} = $evaldata{$_}; - } - } - } - - # the only other time this loop is useful is for the 'count' hash - # element - foreach (keys %cnt) - { - if ($listitems{$_} eq 'avg') - { - $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; - } - elsif ($listitems{$_} eq 'cnt') - { - $returnhash{$_} = $cnt{$_}; - } - else - { - $returnhash{$_} = $sum{$_}; - } - } - - # seems to be doing something useful - if ($returnhash{'count'}) - { - my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count'; - $newevaldata{$newkey} = $returnhash{'count'}; - } + $newevaldata{$_}=$evaldata{$_}; + } + } + } + + # these values will be returned (currently still unused) + foreach (keys %cnt) { + if ($listitems{$_} eq 'avg') { + $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; + } elsif ($listitems{$_} eq 'cnt') { + $returnhash{$_}=$cnt{$_}; + } else { + $returnhash{$_}=$sum{$_}; + } + } + + # generate new count key in resevaldata, insert sum + if ($returnhash{'count'}) { + my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; + $newevaldata{$newkey}=$returnhash{'count'}; + } untie(%evaldata); # Close/release the original nohist database. untie(%newevaldata); # Close/release the new nohist database. - } - return(%returnhash); - } + } + return %returnhash; +} =pod