--- loncom/interface/lonsearchcat.pm 2004/05/10 18:59:18 1.227 +++ loncom/interface/lonsearchcat.pm 2004/06/03 19:23:08 1.228 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # Search Catalog # -# $Id: lonsearchcat.pm,v 1.227 2004/05/10 18:59:18 matthew Exp $ +# $Id: lonsearchcat.pm,v 1.228 2004/06/03 19:23:08 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -75,6 +75,7 @@ use Apache::lonhtmlcommon; use Apache::lonlocal; use LONCAPA::lonmetadata(); use HTML::Entities(); +use Parse::RecDescent; ###################################################################### ###################################################################### @@ -337,6 +338,29 @@ END &course_search($r); } elsif(($ENV{'form.phase'} eq 'basic_search') || ($ENV{'form.phase'} eq 'adv_search')) { + # + # We are running a search, try to parse it + my ($query,$customquery,$customshow,$libraries) = + (undef,undef,undef,undef); + my $pretty_string; + if ($ENV{'form.phase'} eq 'basic_search') { + ($query,$pretty_string,$libraries) = + &parse_basic_search($r,$closebutton,$hidden_fields); + return OK if (! defined($query)); + &make_persistent({ basicexp => $ENV{'form.basicexp'}}, + $persistent_db_file); + } else { # Advanced search + ($query,$customquery,$customshow,$libraries,$pretty_string) + = &parse_advanced_search($r,$closebutton,$hidden_fields); + return OK if (! defined($query)); + } + &make_persistent({ query => $query, + customquery => $customquery, + customshow => $customshow, + libraries => $libraries, + pretty_string => $pretty_string }, + $persistent_db_file); + # # Set up table if (! defined(&create_results_table())) { my $errorstring=&Apache::lonmysql::get_error(); @@ -364,29 +388,12 @@ Unable to properly store search informat END return OK; } - # - # We are running a search - my ($query,$customquery,$customshow,$libraries) = - (undef,undef,undef,undef); - my $pretty_string; - if ($ENV{'form.phase'} eq 'basic_search') { - ($query,$pretty_string,$libraries) = - &parse_basic_search($r,$closebutton,$hidden_fields); - } else { # Advanced search - ($query,$customquery,$customshow,$libraries,$pretty_string) - = &parse_advanced_search($r,$closebutton,$hidden_fields); - return OK if (! defined($query)); - } - &make_persistent({ query => $query, - customquery => $customquery, - customshow => $customshow, - libraries => $libraries, - pretty_string => $pretty_string }, - $persistent_db_file); ## ## Print out the frames interface ## - &print_frames_interface($r); + if (defined($query)) { + &print_frames_interface($r); + } } return OK; } @@ -603,8 +610,11 @@ sub print_basic_search_form { } $scrout.=''. ''. ''. ''.$/; # -# $scrout .= ''.$/; $scrout .= '
'. - &Apache::lonhtmlcommon::textbox('basicexp', - $ENV{'form.basicexp'},50).'
'. + &Apache::lonhtmlcommon::textbox + ('basicexp', + &HTML::Entities::encode($ENV{'form.basicexp'},'<>&"'),50 + ). + '
'. ''.&searchhelp().''.'
'. ''.(' 'x3).$adv_search_link.''.'
'. @@ -613,12 +623,6 @@ sub print_basic_search_form { '
'. -# ''. -# $userelatedwords.(' 'x3). -# $onlysearchdomain.(' 'x2).$adv_search_link. -# ''. -# '
'. ''. ' contains ". - $searchphrase.""; + my ($error,$SQLQuery) = + &process_phrase_input($ENV{'form.'.$field}, + $ENV{'form.'.$field.'_related'},$field); + if (defined($error)) { + &output_unparsed_phrase_error($r,$closebutton,'phase=disp_adv', + $hidden_fields,$field); + return; + } else { + $pretty_search_string .= + $font.$field.': '.$ENV{'form.'.$field}; if ($ENV{'form.'.$field.'_related'}) { - my @New_Words; - ($searchphrase,@New_Words) = &related_version($searchphrase); - if (@New_Words) { - $pretty_search_string .= " with related words: ". - "@New_Words."; + my @Words = + &Apache::loncommon::get_related_words + ($ENV{'form.'.$field}); + if (@Words) { + $pretty_search_string.= ' with related words: '. + join(', ',@Words[0..4]); } else { - $pretty_search_string .= " with no related words."; + $pretty_search_string.= ' with related words.'; } } - $pretty_search_string .= "
\n"; - push @queries,&build_SQL_query($field,$searchphrase); + $pretty_search_string .= '
'; + push (@queries,$SQLQuery); } } # @@ -1282,7 +1293,8 @@ sub parse_advanced_search { } } if (defined($searchphrase)) { - push @queries,&build_SQL_query('mime',$searchphrase); + my ($error,$SQLsearch) = &process_phrase_input($searchphrase,0,'mime'); + push @queries,$SQLsearch; $pretty_search_string .=$font.'mime contains '. $searchphrase.'
'; } @@ -1402,11 +1414,11 @@ sub parse_advanced_search { $pretty_search_string .= $pretty_domains_string."
\n"; # if (@queries) { - $query="SELECT * FROM metadata WHERE ".join(" AND ",@queries); + $query="SELECT * FROM metadata WHERE (".join(") AND (",@queries).')'; } elsif ($customquery) { $query = ''; } -# &Apache::lonnet::logthis('query = '.$/.$query); + # &Apache::lonnet::logthis('query = '.$/.$query); return ($query,$customquery,$customshow,$libraries_to_query, $pretty_search_string); } @@ -1468,7 +1480,7 @@ sub parse_basic_search { # # Clean up fields for safety for my $field ('basicexp') { - $ENV{"form.$field"}=~s/[^\w\s\(\)\-]//g; + $ENV{"form.$field"}=~s/[^\w\s\'\"\!\(\)\-]//g; } foreach ('mode','form','element') { # is this required? Hmmm. @@ -1481,36 +1493,28 @@ sub parse_basic_search { # # Check to see if enough of a query is filled in my $search_string = $ENV{'form.basicexp'}; - $search_string =~ s/(not\s*$|^\s*(and|or)|)//gi; if (! &filled($search_string)) { &output_blank_field_error($r,$closebutton,'phase=disp_basic'); return OK; } - my $pretty_search_string=''; + my $pretty_search_string=$search_string; my @Queries; - my $concatarg=join(',', - ('title', 'author', 'subject', 'notes', 'abstract', - 'keywords')); - foreach my $search (&process_phrase_input($search_string)){ - if ($ENV{'form.related'}) { - $pretty_search_string .= ' and
' if ($pretty_search_string ne ''); - $pretty_search_string .= ''.$search.''; - my @New_Words; - ($search,@New_Words) = &related_version($search); - next if (! $search); - if (@New_Words) { - $pretty_search_string .= - " with related words: @New_Words"; - } - } else { - $pretty_search_string .= ' and ' if ($pretty_search_string ne ''); - $pretty_search_string .= ''.$search.''; - } - # - # Build SQL query string based on form page - push(@Queries, - &build_SQL_query('concat_ws(" ",'.$concatarg.')',$search)); + my $searchfield = 'concat_ws(" ",'.join(',', + ('title','author','subject', + 'notes','abstract','keywords') + ).')'; + my ($error,$SQLQuery) = &process_phrase_input($search_string, + $ENV{'form.related'}, + $searchfield); + if ($error) { + &output_unparsed_phrase_error($r,$closebutton,'phase=disp_basic', + '','basicexp'); + return; } + push(@Queries,$SQLQuery); + #foreach my $q (@Queries) { + # &Apache::lonnet::logthis(' '.$q); + #} my $final_query = 'SELECT * FROM metadata WHERE '.join(" AND ",@Queries); # if (defined($pretty_domains_string) && $pretty_domains_string ne '') { @@ -1518,40 +1522,189 @@ sub parse_basic_search { } $pretty_search_string .= "
\n"; $pretty_search_string =~ s:^
and ::; -# &Apache::lonnet::logthis($final_query); + # &Apache::lonnet::logthis($final_query); return ($final_query,$pretty_search_string, $libraries_to_query); } + +############################################################### +############################################################### + +my @Phrases; + +sub concat { + my ($item) = @_; + my $results = ''; + foreach (@$item) { + if (ref($_) eq 'ARRAY') { + $results .= join(' ',@$_); + } + } + return $results; +} + sub process_phrase_input { - my ($phrase)=@_; - my @Phrases; - # &Apache::lonnet::logthis('phrase = :'.$phrase.':'); - my $in_quotes = 0; - my @Words = split(/\s+/,$phrase); - foreach my $word (@Words) { - $word =~ s/(\w+)\"(\w+)/$1$2/g; - if ($in_quotes) { - if ($word =~ s/(\")$//) { - $in_quotes = 0; + my ($phrase,$related,$field)=@_; + #&Apache::lonnet::logthis('phrase = :'.$phrase.':'); + my $grammar = <<'ENDGRAMMAR'; + searchphrase: + expression /^\Z/ { + # &Apache::lonsearchcat::print_item(\@item,0); + [@item]; + } + expression: + phrase(s) { + [@item]; + } + phrase: + orword { + [@item]; + } + | andword { + [@item]; + } + | minusword { + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + | word { + unshift(@::Phrases,$item[1]); + [@item]; + } + # + orword: + word 'OR' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]); + [@item]; + } + | word 'or' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]); + [@item]; + } + | minusword 'OR' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + | minusword 'or' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + andword: + word phrase { + unshift(@::Phrases,'AND'); + unshift(@::Phrases,$item[1]); + [@item]; + } + | minusword phrase { + unshift(@::Phrases,'AND'); + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + # + minusword: + '-' word { + [$item[2],'NOT']; + } + word: + "'" term(s) "'" { + &Apache::lonsearchcat::concat(\@item); + } + | '"' term(s) '"' { + &Apache::lonsearchcat::concat(\@item); + } + | term { + $item[1]; + } + term: + /[\w\Q:!@#$%^&*()+_=|{}<>,.;\\\/?\E]+/ { + $item[1]; + } +ENDGRAMMAR + # + # The end result of parsing the phrase with the grammar is an array + # @::Phrases. + # $phrase = "gene splicing" or cat -> "gene splicing","OR","cat" + # $phrase = "genetic engineering" -dna -> + # "genetic engineering","AND","NOT","dna" + # $phrase = cat or dog -poodle -> "cat","OR","dog","AND","NOT","poodle" + undef(@::Phrases); + my $p = new Parse::RecDescent($grammar); + if (! defined($p->searchphrase($phrase))) { + &Apache::lonnet::logthis('lonsearchcat:unable to process:'.$phrase); + return 'Unable to process phrase '.$phrase; + } + # + # Go through the phrases and make sense of them. + # Apply modifiers NOT OR and AND to the phrases. + my @NewPhrases; + while(@::Phrases) { + my $phrase = shift(@::Phrases); + # &Apache::lonnet::logthis('phrase = '.$phrase); + my $phrasedata; + if ($phrase =~ /^(NOT|OR|AND)$/) { + if ($phrase eq 'OR') { + $phrasedata->{'or'}++; + if (! @::Phrases) { $phrasedata = undef; last; } + $phrase = shift(@::Phrases); + } elsif ($phrase eq 'AND') { + $phrasedata->{'and'}++; + if (! @::Phrases) { $phrasedata = undef; last; } + $phrase = shift(@::Phrases); } - if ($Phrases[-1] ne '') { - $Phrases[-1] .= ' '; + if ($phrase eq 'NOT') { + $phrasedata->{'negate'}++; + if (! @::Phrases) { $phrasedata = undef; last; } + $phrase = shift(@::Phrases); } - $Phrases[-1] .= $word; + } + $phrasedata->{'phrase'} = $phrase; + if ($related) { + my @NewWords; + (undef,@NewWords) = &related_version($phrasedata->{'phrase'}); + $phrasedata->{'related_words'} = \@NewWords; + } + push(@NewPhrases,$phrasedata); + } + # + # Actually build the sql query from the phrases + my $SQLQuery; + foreach my $phrase (@NewPhrases) { + my $query; + if ($phrase->{'negate'}) { + $query .= $field.' NOT LIKE "%'.$phrase->{'phrase'}.'%"'; } else { - if ($word =~ s/^(\")//) { - $in_quotes=1; + $query .= $field.' LIKE "%'.$phrase->{'phrase'}.'%"'; + } + foreach my $related (@{$phrase->{'related_words'}}) { + if ($phrase->{'negate'}) { + $query .= ' AND '.$field.' NOT LIKE "%'.$related.'%"'; + } else { + $query .= ' OR '.$field.' LIKE "%'.$related.'%"'; + } + } + if ($SQLQuery) { + if ($phrase->{'or'}) { + $SQLQuery .= ' OR ('.$query.')'; + } else { + $SQLQuery .= ' AND ('.$query.')'; } - push(@Phrases,$word); + } else { + $SQLQuery = '('.$query.')'; } } # - #foreach my $p (@Phrases) { - # &Apache::lonnet::logthis(' subphrase = '.$p); - #} + # &Apache::lonnet::logthis("SQLQuery = $SQLQuery"); # - return @Phrases; + return undef,$SQLQuery; } ###################################################################### @@ -1581,30 +1734,6 @@ sub related_version { return $result,sort(@Words); } -###################################################################### -###################################################################### - -=pod - -=item &build_SQL_query() - -Builds a SQL query string from a logical expression with AND/OR keywords -using Text::Query and &recursive_SQL_query_builder() - -=cut - -###################################################################### -###################################################################### -sub build_SQL_query { - my ($field_name,$logic_statement)=@_; - my $q=new Text::Query('abc', - -parse => 'Text::Query::ParseAdvanced', - -build => 'Text::Query::Build'); - $q->prepare($logic_statement); - my $matchexp=${$q}{'matchexp'}; chomp $matchexp; - my $sql_query=&recursive_SQL_query_build($field_name,$matchexp); - return $sql_query; -} ###################################################################### ###################################################################### @@ -1639,47 +1768,6 @@ sub build_custommetadata_query { return $matchexp; } -###################################################################### -###################################################################### - -=pod - -=item &recursive_SQL_query_build() - -Recursively constructs an SQL query. Takes as input $dkey and $pattern. - -=cut - -###################################################################### -###################################################################### -sub recursive_SQL_query_build { - my ($dkey,$pattern)=@_; - my @matches=($pattern=~/(\[[^\]|\[]*\])/g); - return $pattern unless @matches; - foreach my $match (@matches) { - $match=~/\[ (\w+)\s(.*) \]/; - my ($key,$value)=($1,$2); - my $replacement=''; - if ($key eq 'literal') { - $replacement="($dkey LIKE \"\%$value\%\")"; - } elsif (lc($key) eq 'not') { - $value=~s/LIKE/NOT LIKE/; -# $replacement="($dkey not like $value)"; - $replacement="$value"; - } elsif ($key eq 'and') { - $value=~/(.*[\"|\)]) ([|\(|\^].*)/; - $replacement="($1 AND $2)"; - } elsif ($key eq 'or') { - $value=~/(.*[\"|\)]) ([|\(|\^].*)/; - $replacement="($1 OR $2)"; - } - substr($pattern, - index($pattern,$match), - length($match), - $replacement); - } - &recursive_SQL_query_build($dkey,$pattern); -} ###################################################################### ###################################################################### @@ -2254,9 +2342,10 @@ END # # Loop through the servers we have contacted but do not # have results from yet, looking for results. - while (my ($server,$status) = each(%Server_status)) { + foreach my $server (keys(%Server_status)) { last if ($connection->aborted()); &update_seconds($r); + my $status = $Server_status{$server}; if ($status eq 'con_lost') { delete ($Server_status{$server}); next; @@ -3210,6 +3299,51 @@ sub filled { =pod +=item &output_unparsed_phrase_error() + +=cut + +###################################################################### +###################################################################### +sub output_unparsed_phrase_error { + my ($r,$closebutton,$parms,$hidden_fields,$field)=@_; + my $errorstring; + if ($field eq 'basicexp') { + $errorstring = &mt('Unable to understand the search phrase [_1]. Please modify your search.',$ENV{'form.basicexp'}); + } else { + $errorstring = &mt('Unable to understand the search phrase [_1]:[_2].',$field,$ENV{'form.'.$field}); + } + my $bodytag = &Apache::loncommon::bodytag('Search'); + my $heading = &mt('Unparsed Field'); + my $revise = &mt('Revise search request'); + # make query information persistent to allow for subsequent revision + $r->print(< + +The LearningOnline Network with CAPA + +$bodytag +
+$hidden_fields +$closebutton +
+

$heading

+

+$errorstring +

+

+$revise +

+ + +ENDPAGE +} + +###################################################################### +###################################################################### + +=pod + =item &output_blank_field_error() Output a complete page that indicates the user has not filled in enough @@ -3227,33 +3361,31 @@ $parms is extra information to include i ###################################################################### sub output_blank_field_error { my ($r,$closebutton,$parms,$hidden_fields)=@_; - my $bodytag=&Apache::loncommon::bodytag(undef,undef,undef,1); - # make query information persistent to allow for subsequent revision - $r->print(<print(< The LearningOnline Network with CAPA -BEGINNING - $r->print(< $bodytag - -

Search Catalog

$hidden_fields -Revise search request  $closebutton
-

Unactionable search query.

+

$heading

+

+$errormsg +

-You did not fill in enough information for the search to be started. -You need to fill in relevant fields on the search page in order -for a query to be processed. +$revise 

-RESULTS +ENDPAGE + return; } ######################################################################