Annotation of loncom/localize/localize/checksimilar_1file.pl, revision 1.2

1.1       wenzelju    1: #!/usr/bin/perl
                      2: # The LearningOnline Network with CAPA
1.2     ! bisitz      3: # $Id: checksimilar_1file.pl,v 1.1 2010/03/09 15:16:26 wenzelju Exp $
1.1       wenzelju    4: 
                      5: use strict;
                      6: use warnings;
                      7: 
                      8: ####
                      9: #### Checks, if there are similar keys in the inputfile (for example de.pm)
                     10: ####
                     11: 
1.2     ! bisitz     12: ####--------Configuration--------####
        !            13: # Include check for similar phrases -> set to 1
        !            14: my $inclphrases = 0;
1.1       wenzelju   15: 
                     16: 
                     17: ####--------Subroutines--------####
                     18: 
                     19: 
                     20: 
                     21: sub read {
                     22:     # Read file into memory
                     23:     my $fn = shift;
                     24:     open(IN,$fn) or die;
                     25:     my %filecontent = ();
                     26:     my $contents = join('',<IN>);
                     27:     close(IN);
                     28:     # Build hash with hash from file
                     29:     my %Lexicon=();
                     30:     eval($contents.'; %filecontent=%Lexicon;');
                     31: 
                     32:     return %filecontent;
                     33: }
                     34: 
1.2     ! bisitz     35: sub similarities {
1.1       wenzelju   36:    my $text = shift;
1.2     ! bisitz     37:    $text =~ s/\[_\d\]//g; # translation parameters
        !            38:    $text =~ s/[.,\_\-?!: \/]//g; # punctuation
        !            39:    if ($inclphrases) {
        !            40:        $text =~ s/course/X002X/gi;
        !            41:        $text =~ s/community/X002X/gi;
        !            42:        $text =~ s/communities/X001X/gi;    
        !            43:        $text =~ s/member/X003X/gi;
        !            44:        $text =~ s/student/X003X/gi;
        !            45:        $text =~ s/students/X003X/gi;
        !            46:    }
1.1       wenzelju   47:    return $text;
                     48: }
                     49: 
                     50: 
                     51: 
                     52: 
                     53: ####--------Main programm--------####
                     54: 
                     55: 
                     56: my $file = $ARGV[0];
                     57: my %lang=&read($file);
                     58: my $count = 0;
                     59: #Copy hash for comparision
                     60: my %lang2=%lang;
                     61: my %sim;
                     62: 
                     63: #For each key in the hash compare it with each other key in the hash except itself
                     64: while( my ($kOUT, $vOUT) = each %lang ) {
                     65: 
                     66:    #Delete the current key, so that it does not find itself
                     67:    #(revert this action later, see below) 
                     68:    delete $lang2{$kOUT};
                     69:    my $temp = $kOUT;
                     70:    $temp = &similarities($temp);
                     71:    
                     72:    while( my ($kIN, $vIN) = each %lang2 ) {
                     73:       my $temp2 = $kIN;
                     74:       $temp2 = &similarities($temp2);
                     75:          #Print key, if it has similarity to another key and if it has not been checked already
                     76:          if(lc($temp) eq lc($temp2) && !($sim{$kOUT})){
                     77:             print ('###'.$kOUT."###".$kIN."###\n");
                     78:             #Remeber key as already checked
                     79:             $sim{$kIN} = $kOUT; 
                     80:             $count++;
                     81:          }
                     82:    }
                     83: 
                     84:    $lang2{$kOUT}=$vOUT;
                     85: }
                     86: print("Finished. ".$count." similar keys found.\n");
                     87: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>