--- loncom/metadata_database/searchcat.pl 2004/04/08 15:57:32 1.55 +++ loncom/metadata_database/searchcat.pl 2004/04/09 22:04:53 1.56 @@ -2,7 +2,7 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.55 2004/04/08 15:57:32 matthew Exp $ +# $Id: searchcat.pl,v 1.56 2004/04/09 22:04:53 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -71,12 +71,51 @@ use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; use LONCAPA::lonmetadata; +use Getopt::Long; use IO::File; use HTML::TokeParser; use GDBM_File; use POSIX qw(strftime mktime); + use File::Find; +# +# Set up configuration options +my ($simulate,$oneuser,$help,$verbose,$logfile,$debug); +GetOptions ( + 'help' => \$help, + 'simulate' => \$simulate, + 'only=s' => \$oneuser, + 'verbose=s' => \$verbose, + 'debug' => \$debug, + ); + +if ($help) { + print <<"ENDHELP"; +$0 +Rebuild and update the LON-CAPA metadata database. +Options: + -help Print this help + -simulate Do not modify the database. + -only=user Only compute for the given user. Implies -simulate + -verbose=val Sets logging level, val must be a number + -debug Turns on debugging output +ENDHELP + exit 0; +} + +if (! defined($debug)) { + $debug = 0; +} + +if (! defined($verbose)) { + $verbose = 0; +} + +if (defined($oneuser)) { + $simulate=1; +} + ## ## Use variables for table names so we can test this routine a little easier my $oldname = 'metadata'; @@ -104,13 +143,18 @@ if ($wwwid!=$<) { # # Let people know we are running open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log'); -print LOG '==== Searchcat Run '.localtime()."====\n"; +&log(0,'==== Searchcat Run '.localtime()."===="); +if ($debug) { + &log(0,'simulating') if ($simulate); + &log(0,'only processing user '.$oneuser) if ($oneuser); + &log(0,'verbosity level = '.$verbose); +} # # Connect to database my $dbh; if (! ($dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'}, { RaiseError =>0,PrintError=>0}))) { - print LOG "Cannot connect to database!\n"; + &log(0,"Cannot connect to database!"); die "MySQL Error: Cannot connect to database!\n"; } # This can return an error and still be okay, so we do not bother checking. @@ -122,24 +166,29 @@ my $request = &LONCAPA::lonmetadata::cre $dbh->do($request); if ($dbh->err) { $dbh->disconnect(); - print LOG "\nMySQL Error Create: ".$dbh->errstr."\n"; + &log(0,"MySQL Error Create: ".$dbh->errstr); die $dbh->errstr; } # # find out which users we need to examine -opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}"); +my $dom = $perlvar{'lonDefDomain'}; +opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$dom"); my @homeusers = grep { - &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_"); + &ishome("$perlvar{'lonDocRoot'}/res/$dom/$_"); } grep { !/^\.\.?$/; } readdir(RESOURCES); closedir RESOURCES; # +if ($oneuser) { + @homeusers=($oneuser); +} +# # Loop through the users foreach my $user (@homeusers) { - print LOG "=== User: ".$user."\n"; - my $prodir=&propath($perlvar{'lonDefDomain'},$user); + &log(0,"=== User: ".$user); + &process_dynamic_metadata($user,$dom); # # Use File::Find to get the files we need to read/modify find( @@ -152,18 +201,23 @@ foreach my $user (@homeusers) { } # # Rename the table -$dbh->do('DROP TABLE IF EXISTS '.$oldname); -if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { - print LOG "MySQL Error Rename: ".$dbh->errstr."\n"; - die $dbh->errstr; +if (! $simulate) { + $dbh->do('DROP TABLE IF EXISTS '.$oldname); + if (! $dbh->do('RENAME TABLE '.$newname.' TO '.$oldname)) { + &log(0,"MySQL Error Rename: ".$dbh->errstr); + die $dbh->errstr; + } else { + &log(1,"MySQL table rename successful."); + } } + if (! $dbh->disconnect) { - print LOG "MySQL Error Disconnect: ".$dbh->errstr."\n"; + &log(0,"MySQL Error Disconnect: ".$dbh->errstr); die $dbh->errstr; } ## ## Finished! -print LOG "==== Searchcat completed ".localtime()." ====\n"; +&log(0,"==== Searchcat completed ".localtime()." ===="); close(LOG); &write_type_count(); @@ -171,6 +225,23 @@ close(LOG); exit 0; +## +## Status logging routine. Inputs: $level, $message +## +## $level 0 should be used for normal output and error messages +## +## $message does not need to end with \n. In the case of errors +## the message should contain as much information as possible to +## help in diagnosing the problem. +## +sub log { + my ($level,$message)=@_; + $level = 0 if (! defined($level)); + if ($verbose >= $level) { + print LOG $message.$/; + } +} + ######################################################## ######################################################## ### ### @@ -205,10 +276,12 @@ sub only_meta_files { sub print_filename { my ($file) = $_; my $fullfilename = $File::Find::name; - if (-d $file) { - print LOG " Got directory ".$fullfilename."\n"; - } else { - print LOG " Got file ".$fullfilename."\n"; + if ($debug) { + if (-d $file) { + &log(5," Got directory ".$fullfilename); + } else { + &log(5," Got file ".$fullfilename); + } } $_=$file; } @@ -217,16 +290,18 @@ sub log_metadata { my ($file) = $_; my $fullfilename = $File::Find::name; return if (-d $fullfilename); # No need to do anything here for directories - print LOG $fullfilename."\n"; - my $ref=&metadata($fullfilename); - if (! defined($ref)) { - print LOG " No data\n"; - return; - } - while (my($key,$value) = each(%$ref)) { - print LOG " ".$key." => ".$value."\n"; + if ($debug) { + &log(6,$fullfilename); + my $ref=&metadata($fullfilename); + if (! defined($ref)) { + &log(6," No data"); + return; + } + while (my($key,$value) = each(%$ref)) { + &log(6," ".$key." => ".$value); + } + &count_copyright($ref->{'copyright'}); } - &count_copyright($ref->{'copyright'}); $_=$file; } @@ -237,26 +312,26 @@ sub log_metadata { ## Only input is the filename in $_. sub process_meta_file { my ($file) = $_; - my $filename = $File::Find::name; + my $filename = $File::Find::name; # full filename return if (-d $filename); # No need to do anything here for directories # - print LOG $filename."\n"; + &log(3,$filename) if ($debug); # my $ref=&metadata($filename); # # $url is the original file url, not the metadata file my $url='/res/'.&declutter($filename); $url=~s/\.meta$//; - print LOG " ".$url."\n"; + &log(3," ".$url) if ($debug); # # Ignore some files based on their metadata if ($ref->{'obsolete'}) { - print LOG "obsolete\n"; + &log(3,"obsolete") if ($debug); return; } &count_copyright($ref->{'copyright'}); if ($ref->{'copyright'} eq 'private') { - print LOG "private\n"; + &log(3,"private") if ($debug); return; } # @@ -264,8 +339,10 @@ sub process_meta_file { my %dyn; if ($url=~ m:/default$:) { $url=~ s:/default$:/:; + &log(3,"Skipping dynamic data") if ($debug); } else { - # %dyn=&dynamicmeta($url); + &log(3,"Retrieving dynamic data") if ($debug); + %dyn=&get_dynamic_metadata($url); &count_type($url); } # @@ -276,17 +353,17 @@ sub process_meta_file { %dyn, 'url'=>$url, 'version'=>'current'); - my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, - \%Data); - if ($err) { - print LOG "\nMySQL Error Insert: ".$err."\n"; - die $err; - } - if ($count < 1) { - print LOG "Unable to insert record into MySQL database for $url\n"; - die "Unable to insert record into MySQl database for $url"; - } else { - print LOG "Count = ".$count."\n"; + if (! $simulate) { + my ($count,$err) = &LONCAPA::lonmetadata::store_metadata($dbh,$newname, + \%Data); + if ($err) { + &log(0,"MySQL Error Insert: ".$err); + die $err; + } + if ($count < 1) { + &log(0,"Unable to insert record into MySQL database for $url"); + die "Unable to insert record into MySQl database for $url"; + } } # # Reset $_ before leaving @@ -366,122 +443,184 @@ sub getfile { ### ### ######################################################## ######################################################## -sub dynamicmeta { - my $url = &declutter(shift()); - $url =~ s/\.meta$//; - my %data = ('count' => 0, - 'course' => 0, - 'course_list' => '', - 'avetries' => 'NULL', - 'avetries_list' => '', - 'stdno' => 0, - 'stdno_list' => '', - 'usage' => 0, - 'usage_list' => '', - 'goto' => 0, - 'goto_list' => '', - 'comefrom' => 0, - 'comefrom_list' => '', - 'difficulty' => 'NULL', - 'difficulty_list' => '', - 'sequsage' => '0', - 'sequsage_list' => '', - 'clear' => 'NULL', - 'technical' => 'NULL', - 'correct' => 'NULL', - 'helpful' => 'NULL', - 'depth' => 'NULL', - 'comments' => '', - ); - my ($dom,$auth)=($url=~/^(\w+)\/(\w+)\//); - my $prodir=&propath($dom,$auth); +## +## Dynamic metadata description +## +## Field Type +##----------------------------------------------------------- +## count integer +## course integer +## course_list comma seperated list of course ids +## avetries real +## avetries_list comma seperated list of real numbers +## stdno real +## stdno_list comma seperated list of real numbers +## usage integer +## usage_list comma seperated list of resources +## goto scalar +## goto_list comma seperated list of resources +## comefrom scalar +## comefrom_list comma seperated list of resources +## difficulty real +## difficulty_list comma seperated list of real numbers +## sequsage scalar +## sequsage_list comma seperated list of resources +## clear real +## technical real +## correct real +## helpful real +## depth real +## comments html of all the comments made +## +{ + +my %DynamicData; +my %Counts; + +sub process_dynamic_metadata { + my ($user,$dom) = @_; + undef(%DynamicData); + undef(%Counts); # - # Get metadata except counts + my $prodir = &propath($dom,$user); + # + # Read in the dynamic metadata my %evaldata; if (! tie(%evaldata,'GDBM_File', $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) { - return (undef); + return 0; } - my %sum=(); - my %count=(); - my %concat=(); - my %listitems=( - 'course' => 'add', - 'goto' => 'add', - 'comefrom' => 'add', - 'avetries' => 'average', - 'stdno' => 'add', - 'difficulty' => 'average', - 'clear' => 'average', - 'technical' => 'average', - 'helpful' => 'average', - 'correct' => 'average', - 'depth' => 'average', - 'comments' => 'append', - 'usage' => 'count' - ); - # - my $regexp=$url; - $regexp=~s/(\W)/\\$1/g; - $regexp='___'.$regexp.'___([a-z]+)$'; - while (my ($esckey,$value)=each %evaldata) { - my $key=&unescape($esckey); - if ($key=~/$regexp/) { - my ($item,$purl,$cat)=split(/___/,$key); - $count{$cat}++; - if ($listitems{$cat} ne 'append') { - if (defined($sum{$cat})) { - $sum{$cat}+=&unescape($value); - $concat{$cat}.=','.$item; - } else { - $sum{$cat}=&unescape($value); - $concat{$cat}=$item; - } - } else { - if (defined($sum{$cat})) { - if ($evaldata{$esckey}=~/\w/) { - $sum{$cat}.='
'.&unescape($evaldata{$esckey}); - } - } else { - $sum{$cat}=''.&unescape($evaldata{$esckey}); - } + # + # Process every stored element + while (my ($storedkey,$value) = each(%evaldata)) { + my ($source,$file,$type) = split('___',$storedkey); + $source = &unescape($source); + $file = &unescape($file); + $value = &unescape($value); + " got ".$file."\n ".$type." ".$source."\n"; + if ($type =~ /^(avetries|count|difficulty|stdno|timestamp)$/) { + # + # Statistics: $source is course id + $DynamicData{$file}->{'statistics'}->{$source}->{$type}=$value; + } elsif ($type =~ /^(clear|comments|depth|technical|helpful)$/){ + # + # Evaluation $source is username, check if they evaluated it + # more than once. If so, pad the entry with a space. + while(exists($DynamicData{$file}->{'evaluation'}->{$type}->{$source})) { + $source .= ' '; } + $DynamicData{$file}->{'evaluation'}->{$type}->{$source}=$value; + } elsif ($type =~ /^(course|comefrom|goto|usage)$/) { + # + # Context $source is course id or resource + push(@{$DynamicData{$file}->{$type}},&unescape($source)); + } else { + &log(0," ".$user."@".$dom.":Process metadata: Unable to decode ".$type); } } untie(%evaldata); - # transfer gathered data to returnhash, calculate averages where applicable - my %returnhash; - while (my $cat=each(%count)) { - if ($count{$cat} eq 'nan') { next; } - if ($sum{$cat} eq 'nan') { next; } - if ($listitems{$cat} eq 'average') { - if ($count{$cat}) { - $returnhash{$cat}=int(($sum{$cat}/$count{$cat})*100.0+0.5)/100.0; - } else { - $returnhash{$cat}='NULL'; + # + # Read in the access count data + &log(7,'Reading access count data') if ($debug); + my %countdata; + if (! tie(%countdata,'GDBM_File', + $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { + return 0; + } + while (my ($key,$count) = each(%countdata)) { + next if ($key !~ /^$dom/); + $key = &unescape($key); + &log(8,' Count '.$key.' = '.$count) if ($debug); + $Counts{$key}=$count; + } + untie(%countdata); + if ($debug) { + &log(7,scalar(keys(%Counts)). + " Counts read for ".$user."@".$dom); + &log(7,scalar(keys(%DynamicData)). + " Dynamic metadata read for ".$user."@".$dom); + } + # + return 1; +} + +sub get_dynamic_metadata { + my ($url) = @_; + $url =~ s:^/res/::; + if (! exists($DynamicData{$url})) { + &log(7,' No dynamic data for '.$url) if ($debug); + return (); + } + my %data; + my $resdata = $DynamicData{$url}; + # + # Get the statistical data + foreach my $type (qw/avetries difficulty stdno/) { + my $count; + my $sum; + my @Values; + foreach my $coursedata (values(%{$resdata->{'statistics'}})) { + if (ref($coursedata) eq 'HASH' && exists($coursedata->{$type})) { + $count++; + $sum += $coursedata->{$type}; + push(@Values,$coursedata->{$type}); } - } elsif ($listitems{$cat} eq 'count') { - $returnhash{$cat}=$count{$cat}; - } else { - $returnhash{$cat}=$sum{$cat}; } - $returnhash{$cat.'_list'}=$concat{$cat}; + if ($count) { + $data{$type} = $sum/$count; + $data{$type.'_list'} = join(',',@Values); + } + } + # find the count + $data{'count'} = $Counts{$url}; + # + # Get the context data + foreach my $type (qw/course goto comefrom/) { + if (defined($resdata->{$type}) && + ref($resdata->{$type}) eq 'ARRAY') { + $data{$type} = scalar(@{$resdata->{$type}}); + $data{$type.'_list'} = join(',',@{$resdata->{$type}}); + } + } + if (defined($resdata->{'usage'}) && + ref($resdata->{'usage'}) eq 'ARRAY') { + $data{'sequsage'} = scalar(@{$resdata->{'usage'}}); + $data{'sequsage_list'} = join(',',@{$resdata->{'usage'}}); + } + # + # Get the evaluation data + foreach my $type (qw/clear technical correct helpful depth/) { + my $count; + my $sum; + foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{$type}})){ + $sum += $resdata->{'evaluation'}->{$type}->{$evaluator}; + $count++; + } + if ($count > 0) { + $data{$type}=$sum/$count; + } } # - # get count - if (tie(my %evaldata,'GDBM_File', - $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) { - my $escurl=&escape($url); - if (! exists($evaldata{$escurl})) { - $returnhash{'count'}=0; - } else { - $returnhash{'count'}=$evaldata{$escurl}; - } - untie %evaldata; + # put together comments + my $comments = '
'; + foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{'comments'}})){ + $comments .= $evaluator.':'. + $resdata->{'evaluation'}->{'comments'}->{$evaluator}.'
'; + } + $comments .= '
'; + # + # Log the dynamic metadata + if ($debug) { + while (my($k,$v)=each(%data)) { + &log(8," ".$k." => ".$v); + } } - return %returnhash; + # + return %data; } +} # End of %DynamicData and %Counts scope + ######################################################## ######################################################## ### ### @@ -593,8 +732,10 @@ sub sqltime { $TimeData[5]+=1900; $mysqltime = sprintf('%04d-%02d-%02d %02d:%02d:%02d', @TimeData[5,4,3,2,1,0]); + } elsif (! defined($time) || $time == 0) { + $mysqltime = 0; } else { - print LOG " Unable to decode time ".$time."\n"; + &log(0," sqltime:Unable to decode time ".$time); $mysqltime = 0; } return $mysqltime;