loncom/interface/loncommon.pm - diff

Return to loncommon.pm CVS log

Up to [LON-CAPA] / loncom / interface

Diff for /loncom/interface/loncommon.pm between versions 1.45 and 1.46

-version 1.45, 2002/07/09 17:15:58
+version 1.46, 2002/07/12 14:36:16
  Line 79  package Apache::loncommon;
  use strict;
  use Apache::lonnet();
+ use GDBM_File;
  use POSIX qw(strftime);
  use Apache::Constants qw(:common);
  use Apache::lonmsg();
  my $readit;
+ =pod
+ =item Global Variables
+ =over 4
+ =cut
  # ----------------------------------------------- Filetypes/Languages/Copyright
  my %language;
  my %cprtag;
  my %fe; my %fd;
  my %category_extensions;
- # -------------------------------------------------------------- Thesaurus data
+ # ---------------------------------------------- Thesaurus variables
- my @therelated;
- my @theword;
+ =pod
- my @thecount;
- my %theindex;
+ =item %Keywords
- my $thetotalcount;
- my $thefuzzy=2;
+ A hash used by &keyword to determine if a word is considered a keyword.
- my $thethreshold=0.1/$thefuzzy;
- my $theavecount;
+ =item $thesaurus_db_file
+ Scalar containing the full path to the thesaurus database.
+ =cut
+ my %Keywords;
+ my $thesaurus_db_file;
+ =pod
+ =back
+ =cut
  # ----------------------------------------------------------------------- BEGIN
- Line 114  thesaurus.tab, and filecategories.tab.
+ Line 136  thesaurus.tab, and filecategories.tab.
  # ----------------------------------------------------------------------- BEGIN
  BEGIN {
+     # Variable initialization
+     $thesaurus_db_file = $Apache::lonnet::perlvar{'lonTabDir'}."/thesaurus.db";
+     #
      unless ($readit) {
  # ------------------------------------------------------------------- languages
      {
- Line 171  BEGIN {
+ Line 195  BEGIN {
  	    }
  	}
      }
- # -------------------------------------------------------------- Thesaurus data
-     {
- 	my $fh=Apache::File->new($Apache::lonnet::perlvar{'lonTabDir'}.
- 	       '/thesaurus.dat');
- 	if ($fh) {
-             while (<$fh>) {
-                my ($tword,$tindex,$tcount,$trelated)=split(/\@/,$_);
-                $theindex{$tword}=$tindex;
-                $theword[$tindex]=$tword;
-                $thecount[$tindex]=$tcount;
-                $thetotalcount+=$tcount;
-                $therelated[$tindex]=$trelated;
- 	   }
-         }
-         $theavecount=$thetotalcount/$#thecount;
-     }
      &Apache::lonnet::logthis(
-               "<font color=yellow>INFO: Read file types and thesaurus</font>");
+               "<font color=yellow>INFO: Read file types</font>");
      $readit=1;
- }
+     }  # end of unless($readit)
  }
  # ============================================================= END BEGIN BLOCK
- Line 490  sub csv_translate {
+ Line 498  sub csv_translate {
  }
  ###############################################################
- ###############################################################
  ##        Home server <option> list generating code          ##
  ###############################################################
  #-------------------------------------------
- Line 805  END
+ Line 811  END
  ##   End Authentication changing form generation functions   ##
  ###############################################################
+ ###############################################################
+ ##                Thesaurus Functions                        ##
+ ###############################################################
+ =pod
- # ---------------------------------------------------------- Is this a keyword?
+ =item initialize_keywords
- sub keyword {
+ Initializes the package variable %Keywords if it is empty.  Uses the
-     my $newword=shift;
+ package variable $thesaurus_db_file.
-     $newword=~s/\W//g;
-     $newword=~tr/A-Z/a-z/;
+ =cut
-     my $tindex=$theindex{$newword};
-     if ($tindex) {
+ ###################################################
-         if ($thecount[$tindex]>$theavecount) {
-            return 1;
+ sub initialize_keywords {
-         }
+     return 1 if (scalar keys(%Keywords));
+     # If we are here, %Keywords is empty, so fill it up
+     #   Make sure the file we need exists...
+     if (! -e $thesaurus_db_file) {
+         &Apache::lonnet::logthis("Attempt to access $thesaurus_db_file".
+                                  " failed because it does not exist");
+         return 0;
+     }
+     #   Set up the hash as a database
+     my %thesaurus_db;
+     if (! tie(%thesaurus_db,'GDBM_File',
+               $thesaurus_db_file,&GDBM_READER,0640)){
+         &Apache::lonnet::logthis("Could not tie \%thesaurus_db to ".
+                                  $thesaurus_db_file);
+         return 0;
+     }
+     #  Get the average number of appearances of a word.
+     my $avecount = $thesaurus_db{'average.count'};
+     #  Put keywords (those that appear > average) into %Keywords
+     while (my ($word,$data)=each (%thesaurus_db)) {
+         my ($count,undef) = split /:/,$data;
+         $Keywords{$word}++ if ($count > $avecount);
+     }
+     untie %thesaurus_db;
+     # Remove special values from %Keywords.
+     foreach ('total.count','average.count') {
+         delete($Keywords{$_}) if (exists($Keywords{$_}));
      }
-     return 0;
+     return 1;
+ }
+ ###################################################
+ =pod
+ =item keyword($word)
+ Returns true if $word is a keyword.  A keyword is a word that appears more
+ than the average number of times in the thesaurus database.  Calls
+ &initialize_keywords
+ =cut
+ ###################################################
+ sub keyword {
+     return if (!&initialize_keywords());
+     my $word=lc(shift());
+     $word=~s/\W//g;
+     return exists($Keywords{$word});
  }
+ ###################################################
+ #         Old code, to be removed soon            #
+ ###################################################
  # -------------------------------------------------------- Return related words
+ #sub related {
+ #    my $newword=shift;
+ #    $newword=~s/\W//g;
+ #    $newword=~tr/A-Z/a-z/;
+ #    my $tindex=$theindex{$newword};
+ #    if ($tindex) {
+ #        my %found=();
+ #        foreach (split(/\,/,$therelated[$tindex])) {
+ ## - Related word found
+ #            my ($ridx,$rcount)=split(/\:/,$_);
+ ## - Direct relation index
+ #            my $directrel=$rcount/$thecount[$tindex];
+ #            if ($directrel>$thethreshold) {
+ #               foreach (split(/\,/,$therelated[$ridx])) {
+ #                  my ($rridx,$rrcount)=split(/\:/,$_);
+ #                  if ($rridx==$tindex) {
+ ## - Determine reverse relation index
+ #                     my $revrel=$rrcount/$thecount[$ridx];
+ ## - Calculate full index
+ #                     $found{$ridx}=$directrel*$revrel;
+ #                     if ($found{$ridx}>$thethreshold) {
+ #                        foreach (split(/\,/,$therelated[$ridx])) {
+ #                            my ($rrridx,$rrrcount)=split(/\:/,$_);
+ #                            unless ($found{$rrridx}) {
+ #                               my $revrevrel=$rrrcount/$thecount[$ridx];
+ #                               if (
+ #                          $directrel*$revrel*$revrevrel>$thethreshold
+ #                               ) {
+ #                                  $found{$rrridx}=
+ #                                       $directrel*$revrel*$revrevrel;
+ #                               }
+ #                            }
+ #                        }
+ #                     }
+ #                  }
+ #               }
+ #            }
+ #        }
+ #    }
+ #    return ();
+ #}
- sub related {
+ ###############################################################
-     my $newword=shift;
-     $newword=~s/\W//g;
+ =pod
-     $newword=~tr/A-Z/a-z/;
-     my $tindex=$theindex{$newword};
+ =item get_related_words
-     if ($tindex) {
-         my %found=();
+ Look up a word in the thesaurus.  Takes a scalar arguement and returns
-         foreach (split(/\,/,$therelated[$tindex])) {
+ an array of words.  If the keyword is not in the thesaurus, an empty array
- # - Related word found
+ will be returned.  The order of the words returned is determined by the
-             my ($ridx,$rcount)=split(/\:/,$_);
+ database which holds them.
- # - Direct relation index
-             my $directrel=$rcount/$thecount[$tindex];
+ Uses global $thesaurus_db_file.
-             if ($directrel>$thethreshold) {
-                foreach (split(/\,/,$therelated[$ridx])) {
+ =cut
-                   my ($rridx,$rrcount)=split(/\:/,$_);
-                   if ($rridx==$tindex) {
+ ###############################################################
- # - Determine reverse relation index
-                      my $revrel=$rrcount/$thecount[$ridx];
+ sub get_related_words {
- # - Calculate full index
+     my $keyword = shift;
-                      $found{$ridx}=$directrel*$revrel;
+     my %thesaurus_db;
-                      if ($found{$ridx}>$thethreshold) {
+     if (! -e $thesaurus_db_file) {
-                         foreach (split(/\,/,$therelated[$ridx])) {
+         &Apache::lonnet::logthis("Attempt to access $thesaurus_db_file ".
-                             my ($rrridx,$rrrcount)=split(/\:/,$_);
+                                  "failed because the file does not exist");
-                             unless ($found{$rrridx}) {
+         return ();
-                                my $revrevrel=$rrrcount/$thecount[$ridx];
+     }
-                                if (
+     if (! tie(%thesaurus_db,'GDBM_File',
-                           $directrel*$revrel*$revrevrel>$thethreshold
+               $thesaurus_db_file,&GDBM_READER,0640)){
-                                ) {
+         return ();
-                                   $found{$rrridx}=
+     }
-                                        $directrel*$revrel*$revrevrel;
+     my @Words=();
-                                }
+     if (exists($thesaurus_db{$keyword})) {
-                             }
+         $_ = $thesaurus_db{$keyword};
-                         }
+         (undef,@Words) = split/:/;  # The first element is the number of times
-                      }
+                                     # the word appears.  We do not need it now.
-                   }
+         for (my $i=0;$i<=$#Words;$i++) {
-                }
+             ($Words[$i],undef)= split/\,/,$Words[$i];
-             }
          }
      }
-     return ();
+     untie %thesaurus_db;
+     return @Words;
  }
+ ###############################################################
+ ##              End Thesaurus Functions                      ##
+ ###############################################################
  # ---------------------------------------------------------------- Language IDs
  sub languageids {
      return sort(keys(%language));

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.45
changed lines
	Added in v.1.46