Annotation of loncom/localize/localize/checksimilar_1file.pl, revision 1.2
1.1 wenzelju 1: #!/usr/bin/perl
2: # The LearningOnline Network with CAPA
1.2 ! bisitz 3: # $Id: checksimilar_1file.pl,v 1.1 2010/03/09 15:16:26 wenzelju Exp $
1.1 wenzelju 4:
5: use strict;
6: use warnings;
7:
8: ####
9: #### Checks, if there are similar keys in the inputfile (for example de.pm)
10: ####
11:
1.2 ! bisitz 12: ####--------Configuration--------####
! 13: # Include check for similar phrases -> set to 1
! 14: my $inclphrases = 0;
1.1 wenzelju 15:
16:
17: ####--------Subroutines--------####
18:
19:
20:
21: sub read {
22: # Read file into memory
23: my $fn = shift;
24: open(IN,$fn) or die;
25: my %filecontent = ();
26: my $contents = join('',<IN>);
27: close(IN);
28: # Build hash with hash from file
29: my %Lexicon=();
30: eval($contents.'; %filecontent=%Lexicon;');
31:
32: return %filecontent;
33: }
34:
1.2 ! bisitz 35: sub similarities {
1.1 wenzelju 36: my $text = shift;
1.2 ! bisitz 37: $text =~ s/\[_\d\]//g; # translation parameters
! 38: $text =~ s/[.,\_\-?!: \/]//g; # punctuation
! 39: if ($inclphrases) {
! 40: $text =~ s/course/X002X/gi;
! 41: $text =~ s/community/X002X/gi;
! 42: $text =~ s/communities/X001X/gi;
! 43: $text =~ s/member/X003X/gi;
! 44: $text =~ s/student/X003X/gi;
! 45: $text =~ s/students/X003X/gi;
! 46: }
1.1 wenzelju 47: return $text;
48: }
49:
50:
51:
52:
53: ####--------Main programm--------####
54:
55:
56: my $file = $ARGV[0];
57: my %lang=&read($file);
58: my $count = 0;
59: #Copy hash for comparision
60: my %lang2=%lang;
61: my %sim;
62:
63: #For each key in the hash compare it with each other key in the hash except itself
64: while( my ($kOUT, $vOUT) = each %lang ) {
65:
66: #Delete the current key, so that it does not find itself
67: #(revert this action later, see below)
68: delete $lang2{$kOUT};
69: my $temp = $kOUT;
70: $temp = &similarities($temp);
71:
72: while( my ($kIN, $vIN) = each %lang2 ) {
73: my $temp2 = $kIN;
74: $temp2 = &similarities($temp2);
75: #Print key, if it has similarity to another key and if it has not been checked already
76: if(lc($temp) eq lc($temp2) && !($sim{$kOUT})){
77: print ('###'.$kOUT."###".$kIN."###\n");
78: #Remeber key as already checked
79: $sim{$kIN} = $kOUT;
80: $count++;
81: }
82: }
83:
84: $lang2{$kOUT}=$vOUT;
85: }
86: print("Finished. ".$count." similar keys found.\n");
87:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>