# The LearningOnline Network with CAPA # Search Catalog # # $Id: lonsearchcat.pm,v 1.126 2002/06/24 15:09:52 matthew Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # # YEAR=2001 # 3/8, 3/12, 3/13, 3/14, 3/15, 3/19 Scott Harrison # 3/20, 3/21, 3/22, 3/26, 3/27, 4/2, 8/15, 8/24, 8/25 Scott Harrison # 10/12,10/14,10/15,10/16,11/28,11/29,12/10,12/12,12/16 Scott Harrison # YEAR=2002 # 1/17 Scott Harrison # 6/17 Matthew Hall # ############################################################################### ############################################################################### =pod =head1 NAME lonsearchcat =head1 SYNOPSIS Search interface to LON-CAPAs digital library =head1 DESCRIPTION This module enables searching for a distributed browseable catalog. This is part of the LearningOnline Network with CAPA project described at http://www.lon-capa.org. lonsearchcat presents the user with an interface to search the LON-CAPA digital library. lonsearchcat also initiates the execution of a search by sending the search parameters to LON-CAPA servers. The progress of search (on a server basis) is displayed to the user in a seperate window. =head1 Internals =over 4 =cut ############################################################################### ############################################################################### ## ## ## ORGANIZATION OF THIS PERL MODULE ## ## ## ## 1. Modules used by this module ## ## 2. Choices for different output views (detailed, summary, xml, etc) ## ## 3. BEGIN block (to be run once after compilation) ## ## 4. Handling routine called via Apache and mod_perl ## ## 5. Other subroutines ## ## ## ############################################################################### package Apache::lonsearchcat; # ------------------------------------------------- modules used by this module use strict; use Apache::Constants qw(:common); use Apache::lonnet(); use Apache::File(); use CGI qw(:standard); use Text::Query; use GDBM_File; use Apache::loncommon(); # ---------------------------------------- variables used throughout the module ###################################################################### ###################################################################### =pod =item Global variables =over 4 =item %hostdomains matches host name to host domain =item %hostips matches host name to host ip =item %hitcount stores number of hits per host =item $closebutton button that closes the search window =item $importbutton button to take the selecte results and go to group sorting =item %hash The ubiquitous database hash =item $diropendb The full path to the (temporary) search database file. This is set and used in &handler() and is also used in &output_results(). =back =cut ###################################################################### ###################################################################### # -- information holders my %hostdomains; # matches host name to host domain my %hostips; # matches host name to host ip my %hitcount; # stores number of hits per host # -- dynamically rendered interface components my $closebutton; # button that closes the search window my $importbutton; # button to take the selected results and go to group sorting # -- miscellaneous variables my $yourself; # allows for quickly limiting to oneself my %hash; # database hash # ------------------------------------------ choices for different output views # Detailed Citation View ---> sub detailed_citation_view # Summary View ---> sub summary_view # Fielded Format ---> sub fielded_format_view # XML/SGML ---> sub xml_sgml_view #------------------------------------------------------------- global variables my $diropendb = ""; my $domain = ""; # ----------------------------------------------------------------------- BEGIN =pod =item BEGIN block Load %hostdomains and %hostips with data from lonnet.pm. Only library servers are considered. =cut BEGIN { foreach (keys (%Apache::lonnet::libserv)) { $hostdomains{$_}=$Apache::lonnet::hostdom{$_}; $hostips{$_}=$Apache::lonnet::hostip{$_}; } } ###################################################################### ###################################################################### =pod =item &handler() - main handler invoked by httpd child =item Variables =over 4 =item $hidden holds 'hidden' html forms =item $scrout string that holds portions of the screen output =back =cut ###################################################################### ###################################################################### sub handler { my $r = shift; untie %hash; $r->content_type('text/html'); $r->send_http_header; return OK if $r->header_only; my $domain = $r->dir_config('lonDefDomain'); $diropendb= "/home/httpd/perl/tmp/".&Apache::lonnet::escape($domain). "\_".&Apache::lonnet::escape($ENV{'user.name'})."_searchcat.db"; &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'}, ['catalogmode','launch','acts','mode','form','element', 'reqinterface']); ## ## Clear out old values from database ## if ($ENV{'form.launch'} eq '1') { if (tie(%hash,'GDBM_File',$diropendb,&GDBM_WRCREAT,0640)) { &start_fresh_session(); untie %hash; } else { $r->print('
Unable to tie hash to db '. 'file'); return OK; } } ## ## Produce some output, so people know it is working ## $r->print("\n"); $r->rflush; ## ## Configure dynamic components of interface ## my $hidden; # Holds 'hidden' html forms if ($ENV{'form.catalogmode'} eq 'interactive') { $hidden="". "\n"; $closebutton=""."\n"; } elsif ($ENV{'form.catalogmode'} eq 'groupsearch') { $hidden=<'.
''.$uctitle.': Basic search: '.$ENV{'form.basicexp'}.' Advanced search '.$query.' \n";
if ($ENV{'form.catalogmode'} eq 'interactive') {
my $titleesc=$Fields{'title'};
$titleesc=~s/\'/\\'/; # '
$compiledresult.=<
';
}
######################################################################
######################################################################
=pod
=item &advancedsearch()
Parse advanced search results.
=cut
######################################################################
######################################################################
sub advancedsearch {
my ($r,$envhash,$hidden)=@_;
my %ENV=%{$envhash};
my $fillflag=0;
# Clean up fields for safety
for my $field ('title','author','subject','keywords','url','version',
'creationdatestart_month','creationdatestart_day',
'creationdatestart_year','creationdateend_month',
'creationdateend_day','creationdateend_year',
'lastrevisiondatestart_month','lastrevisiondatestart_day',
'lastrevisiondatestart_year','lastrevisiondateend_month',
'lastrevisiondateend_day','lastrevisiondateend_year',
'notes','abstract','mime','language','owner',
'custommetadata','customshow') {
$ENV{"form.$field"}=~s/[^\w\/\s\(\)\=\-\"\']//g;
}
foreach ('mode','form','element') {
# is this required? Hmmm.
next unless (exists($ENV{"form.$_"}));
$ENV{"form.$_"}=&Apache::lonnet::unescape($ENV{"form.$_"});
$ENV{"form.$_"}=~s/[^\w\/\s\(\)\=\-\"\']//g;
}
# Check to see if enough information was filled in
for my $field ('title','author','subject','keywords','url','version',
'notes','abstract','mime','language','owner',
'custommetadata') {
if (&filled($ENV{"form.$field"})) {
$fillflag++;
}
}
unless ($fillflag) {
&output_blank_field_error($r);
return OK;
}
# Turn the form input into a SQL-based query
my $query='';
my @queries;
# Evaluate logical expression AND/OR/NOT phrase fields.
foreach my $field ('title','author','subject','notes','abstract','url',
'keywords','version','owner') {
if ($ENV{'form.'.$field}) {
push @queries,&build_SQL_query($field,$ENV{'form.'.$field});
}
}
# Evaluate option lists
if ($ENV{'form.language'} and $ENV{'form.language'} ne 'any') {
push @queries,"(language like \"$ENV{'form.language'}\")";
}
if ($ENV{'form.mime'} and $ENV{'form.mime'} ne 'any') {
push @queries,"(mime like \"$ENV{'form.mime'}\")";
}
if ($ENV{'form.copyright'} and $ENV{'form.copyright'} ne 'any') {
push @queries,"(copyright like \"$ENV{'form.copyright'}\")";
}
# Evaluate date windows
my $datequery=&build_date_queries(
$ENV{'form.creationdatestart_month'},
$ENV{'form.creationdatestart_day'},
$ENV{'form.creationdatestart_year'},
$ENV{'form.creationdateend_month'},
$ENV{'form.creationdateend_day'},
$ENV{'form.creationdateend_year'},
$ENV{'form.lastrevisiondatestart_month'},
$ENV{'form.lastrevisiondatestart_day'},
$ENV{'form.lastrevisiondatestart_year'},
$ENV{'form.lastrevisiondateend_month'},
$ENV{'form.lastrevisiondateend_day'},
$ENV{'form.lastrevisiondateend_year'},
);
# Test to see if date windows are legitimate
if ($datequery=~/^Incorrect/) {
&output_date_error($r,$datequery);
return OK;
}
elsif ($datequery) {
push @queries,$datequery;
}
# Process form information for custom metadata querying
my $customquery='';
if ($ENV{'form.custommetadata'}) {
$customquery=&build_custommetadata_query('custommetadata',
$ENV{'form.custommetadata'});
}
my $customshow='';
if ($ENV{'form.customshow'}) {
$customshow=$ENV{'form.customshow'};
$customshow=~s/[^\w\s]//g;
my @fields=split(/\s+/,$customshow);
$customshow=join(" ",@fields);
}
# Send query statements over the network to be processed by either the SQL
# database or a recursive scheme of 'grep'-like actions (for custom
# metadata).
if (@queries) {
$query=join(" AND ",@queries);
$query="select * from metadata where $query";
my $reply; # reply hash reference
unless ($customquery or $customshow) {
$reply=&Apache::lonnet::metadata_query($query);
}
else {
$reply=&Apache::lonnet::metadata_query($query,
$customquery,$customshow);
}
&output_results('Advanced',$r,$envhash,$customquery,$reply,$hidden);
return OK;
} elsif ($customquery) {
my $reply; # reply hash reference
$reply=&Apache::lonnet::metadata_query('',
$customquery,$customshow);
&output_results('Advanced',$r,$envhash,$customquery,$reply,$hidden);
return OK;
}
# should not get to this point
return 'Error. Should not have gone to this point.';
}
######################################################################
######################################################################
=pod
=item &basicsearch()
Parse basic search form.
=cut
######################################################################
######################################################################
sub basicsearch {
my ($r,$envhash,$hidden)=@_;
my %ENV=%{$envhash};
# Clean up fields for safety
for my $field ('basicexp') {
$ENV{"form.$field"}=~s/[^\w\s\(\)\-]//g;
}
foreach ('mode','form','element') {
# is this required? Hmmm.
next unless (exists($ENV{"form.$_"}));
$ENV{"form.$_"}=&Apache::lonnet::unescape($ENV{"form.$_"});
$ENV{"form.$_"}=~s/[^\w\/\s\(\)\=\-\"\']//g;
}
# Check to see if enough is filled in
unless (&filled($ENV{'form.basicexp'})) {
&output_blank_field_error($r);
return OK;
}
# Build SQL query string based on form page
my $query='';
my $concatarg=join('," ",',
('title', 'author', 'subject', 'notes', 'abstract',
'keywords'));
$concatarg='title' if $ENV{'form.titleonly'};
$query=&build_SQL_query('concat('.$concatarg.')',$ENV{'form.'.'basicexp'});
# Get reply (either a hash reference to filehandles or bad connection)
# &Apache::lonnet::logthis("metadata query started:".time);
my $reply=&Apache::lonnet::metadata_query('select * from metadata where '.$query);
# &Apache::lonnet::logthis("metadata query finished:".time);
# Output search results
&output_results('Basic',$r,$envhash,$query,$reply,$hidden);
return OK;
}
######################################################################
######################################################################
=pod
=item &build_SQL_query()
Builds a SQL query string from a logical expression with AND/OR keywords
using Text::Query and &recursive_SQL_query_builder()
=cut
######################################################################
######################################################################
sub build_SQL_query {
my ($field_name,$logic_statement)=@_;
my $q=new Text::Query('abc',
-parse => 'Text::Query::ParseAdvanced',
-build => 'Text::Query::Build');
$q->prepare($logic_statement);
my $matchexp=${$q}{'matchexp'}; chomp $matchexp;
my $sql_query=&recursive_SQL_query_build($field_name,$matchexp);
return $sql_query;
}
######################################################################
######################################################################
=pod
=item &build_custommetadata_query()
Constructs a custom metadata query using a rather heinous regular
expression.
=cut
######################################################################
######################################################################
sub build_custommetadata_query {
my ($field_name,$logic_statement)=@_;
my $q=new Text::Query('abc',
-parse => 'Text::Query::ParseAdvanced',
-build => 'Text::Query::BuildAdvancedString');
$q->prepare($logic_statement);
my $matchexp=${$q}{'-parse'}{'-build'}{'matchstring'};
# quick fix to change literal into xml tag-matching
# will eventually have to write a separate builder module
# wordone=wordtwo becomes\Search Catalog
CATALOGBEGIN
$r->print(<
Search Query
CATALOGCONTROLS
#
# Remind them what they searched for
#
if ($mode eq 'Basic') {
$r->print('Search Results
');
$r->rflush();
#
# make the pop-up window for status
#
$r->print(&make_popwin(%rhash));
$r->rflush();
##
## Prepare for the main loop below
##
my $servercount=0;
my $hitcountsum=0;
my $servernum=(keys %rhash);
my $serversleft=$servernum;
##
## Run until we run out of time or we run out of servers
##
while($serversleft && $timeremain) {
##
## %rhash has servers deleted from it as results come in
## (within the foreach loop below).
##
foreach my $rkey (sort keys %rhash) {
# &Apache::lonnet::logthis("Server $rkey:".time);
$servercount++;
$compiledresult='';
my $reply=$rhash{$rkey};
my @results;
if ($reply eq 'con_lost') {
&popwin_imgupdate($r,$rkey,"srvbad.gif");
$serversleft--;
delete $rhash{$rkey};
} else {
# must do since 'use strict' checks for tainting
$reply=~/^([\.\w]+)$/;
my $replyfile=$r->dir_config('lonDaemons').'/tmp/'.$1;
$reply=~/(.*?)\_/;
for (my $counter=0;$counter<2;$counter++) {
if (-e $replyfile && ! -e "$replyfile.end") {
&popwin_imgupdate($r,$rkey,"srvhalf.gif");
&popwin_js($r,'popwin.hc["'.$rkey.'"]='.
'"still transferring..."'.';');
}
# Are we finished transferring data?
if (-e "$replyfile.end") {
$serversleft--;
delete $rhash{$rkey};
if (-s $replyfile) {
&popwin_imgupdate($r,$rkey,"srvgood.gif");
my $fh;
unless ($fh=Apache::File->new($replyfile)){
# Is it really appropriate to die on this error?
$r->print('ERROR: file '.
$replyfile.' cannot be opened');
return OK;
}
@results=<$fh> if $fh;
$hitcount{$rkey}=@results+0;
&popwin_js($r,'popwin.hc["'.$rkey.'"]='.
$hitcount{$rkey}.';');
$hitcountsum+=$hitcount{$rkey};
&popwin_js($r,'popwin.document.forms.popremain.'.
'numhits.value='.$hitcountsum.';');
} else {
&popwin_imgupdate($r,$rkey,"srvempty.gif");
&popwin_js($r,'popwin.hc["'.$rkey.'"]=0;');
}
last;
} # end of if ( -e "$replyfile.end")
last unless $timeremain;
sleep 1; # wait for daemons to write files?
$timeremain--;
$elapsetime++;
&popwin_js($r,"popwin.document.popremain.".
"elapsetime.value=$elapsetime;");
}
&popwin_js($r,'popwin.document.whirly.'.
'src="/adm/lonIcons/lonanimend.gif";');
} # end of if ($reply eq 'con_lost') else statement
my %Fields = undef; # Holds the data to be sent to the various
# *_view routines.
my ($extrashow,$customfields,$customhash) = &handle_custom_fields(\@results);
my @customfields = @$customfields;
my %customhash = %$customhash;
untie %hash if (keys %hash);
#
if (! tie(%hash,'GDBM_File',$diropendb,&GDBM_WRCREAT,0640)) {
$r->print('Unable to tie hash to db '.
'file');
} else {
if ($ENV{'form.launch'} eq '1') {
&start_fresh_session();
}
foreach my $result (@results) {
next if $result=~/^custom\=/;
chomp $result;
next unless $result;
%Fields = &parse_raw_result($result,$rkey);
$Fields{'extrashow'}=$extrashow;
if ($extrashow) {
foreach my $field (@customfields) {
my $value='';
$value = $1 if ($customhash{$Fields{'url'}}=~/\<{$field}[^\>]*\>(.*?)\<\/{$field}[^\>]*\>/s);
$Fields{'extrashow'}=~s/\<\!\-\- $field \-\-\>/ $value/g;
}
}
if ($compiledresult or $servercount!=$servernum) {
$compiledresult.="
";
}
$compiledresult.="\n
END
}
if ($ENV{'form.catalogmode'} eq 'groupsearch') {
$fnum+=0;
$hash{"pre_${fnum}_link"}=$Fields{'url'};
$hash{"pre_${fnum}_title"}=$Fields{'title'};
$compiledresult.=<
END
#
#
$fnum++;
}
my $viewselect;
if ($mode eq 'Basic') {
$viewselect=$ENV{'form.basicviewselect'};
}
elsif ($mode eq 'Advanced') {
$viewselect=$ENV{'form.advancedviewselect'};
}
if ($viewselect eq 'Detailed Citation View') {
$compiledresult.=&detailed_citation_view
(%Fields, hostname => $rkey );
}
elsif ($viewselect eq 'Summary View') {
$compiledresult.=&summary_view
(%Fields, hostname => $rkey );
}
elsif ($viewselect eq 'Fielded Format') {
$compiledresult.=&fielded_format_view
(%Fields, hostname => $rkey );
}
elsif ($viewselect eq 'XML/SGML') {
$compiledresult.=&xml_sgml_view
(%Fields, hostname => $rkey );
}
}
untie %hash;
}
if ($compiledresult) {
$resultflag=1;
$r->print($compiledresult);
}
my $percent=sprintf('%3.0f',($servercount/$servernum*100));
} # End of foreach loop over servers remaining
} # End of big loop - while($serversleft && $timeremain)
unless ($resultflag) {
$r->print("\nThere were no results that matched your query\n");
}
# $r->print(''."\n"); $r->rflush();
$r->print("\n