--- loncom/interface/loncommon.pm 2001/12/21 17:06:56 1.19 +++ loncom/interface/loncommon.pm 2001/12/25 21:02:35 1.20 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # a pile of common routines # -# $Id: loncommon.pm,v 1.19 2001/12/21 17:06:56 harris41 Exp $ +# $Id: loncommon.pm,v 1.20 2001/12/25 21:02:35 www Exp $ # # Copyright Michigan State University Board of Trustees # @@ -29,6 +29,8 @@ # 2/13-12/7 Guy Albertelli # 12/11,12/12,12/17 Scott Harrison # 12/21 Gerd Kortemeyer +# 12/21 Scott Harrison +# 12/25 Gerd Kortemeyer # Makes a table out of the previous attempts # Inputs result_from_symbread, user, domain, course_id @@ -41,12 +43,22 @@ use POSIX qw(strftime); use Apache::Constants qw(:common); use Apache::lonmsg(); - +# ----------------------------------------------- Filetypes/Languages/Copyright my %language; my %cprtag; my %fe; my %fd; my %fc; +# -------------------------------------------------------------- Thesaurus data +my @therelated=(); +my @theword=(); +my @thecount=(); +my %theindex=(); +my $thetotalcount=0; +my $thefuzzy=2; +my $thethreshold=0.1/$thefuzzy; +my $theavecount; + # ----------------------------------------------------------------------- BEGIN BEGIN { # ------------------------------------------------------------------- languages @@ -104,6 +116,82 @@ BEGIN { } } } +# -------------------------------------------------------------- Thesaurus data + { + my $fh=Apache::File->new($Apache::lonnet::perlvar{'lonTabDir'}. + '/thesaurus.dat'); + if ($fh) { + while (<$fh>) { + my ($tword,$tindex,$tcount,$trelated)=split(/\@/,$_); + $theindex{$tword}=$tindex; + $theword[$tindex]=$tword; + $thecount[$tindex]=$tcount; + $thetotalcount+=$tcount; + $therelated[$tindex]=$trelated; + } + } + $theavecount=$thetotalcount/$#thecount; + } +} +# ============================================================= END BEGIN BLOCK + + +# ---------------------------------------------------------- Is this a keyword? + +sub keyword { + my $newword=shift; + $newword=~s/\W//g; + $newword=~tr/A-Z/a-z/; + my $tindex=$theindex{$newword}; + if ($tindex) { + if ($thecount[$tindex]>$theavecount) { + return 1; + } + } + return 0; +} +# -------------------------------------------------------- Return related words + +sub related { + my $newword=shift; + $newword=~s/\W//g; + $newword=~tr/A-Z/a-z/; + my $tindex=$theindex{$newword}; + if ($tindex) { + my %found=(); + foreach (split(/\,/,$therelated[$tindex])) { +# - Related word found + my ($ridx,$rcount)=split(/\:/,$_); +# - Direct relation index + my $directrel=$rcount/$thecount[$tindex]; + if ($directrel>$thethreshold) { + foreach (split(/\,/,$therelated[$ridx])) { + my ($rridx,$rrcount)=split(/\:/,$_); + if ($rridx==$tindex) { +# - Determine reverse relation index + my $revrel=$rrcount/$thecount[$ridx]; +# - Calculate full index + $found{$ridx}=$directrel*$revrel; + if ($found{$ridx}>$thethreshold) { + foreach (split(/\,/,$therelated[$ridx])) { + my ($rrridx,$rrrcount)=split(/\:/,$_); + unless ($found{$rrridx}) { + my $revrevrel=$rrrcount/$thecount[$ridx]; + if ( + $directrel*$revrel*$revrevrel>$thethreshold + ) { + $found{$rrridx}= + $directrel*$revrel*$revrevrel; + } + } + } + } + } + } + } + } + } + return (); } # ---------------------------------------------------------------- Language IDs