#!/usr/bin/perl # The LearningOnline Network with CAPA # $Id: checksimilar_1file.pl,v 1.2 2013/01/21 15:29:30 bisitz Exp $ use strict; use warnings; #### #### Checks, if there are similar keys in the inputfile (for example de.pm) #### ####--------Configuration--------#### # Include check for similar phrases -> set to 1 my $inclphrases = 0; ####--------Subroutines--------#### sub read { # Read file into memory my $fn = shift; open(IN,$fn) or die; my %filecontent = (); my $contents = join('',); close(IN); # Build hash with hash from file my %Lexicon=(); eval($contents.'; %filecontent=%Lexicon;'); return %filecontent; } sub similarities { my $text = shift; $text =~ s/\[_\d\]//g; # translation parameters $text =~ s/[.,\_\-?!: \/]//g; # punctuation if ($inclphrases) { $text =~ s/course/X002X/gi; $text =~ s/community/X002X/gi; $text =~ s/communities/X001X/gi; $text =~ s/member/X003X/gi; $text =~ s/student/X003X/gi; $text =~ s/students/X003X/gi; } return $text; } ####--------Main programm--------#### my $file = $ARGV[0]; my %lang=&read($file); my $count = 0; #Copy hash for comparision my %lang2=%lang; my %sim; #For each key in the hash compare it with each other key in the hash except itself while( my ($kOUT, $vOUT) = each %lang ) { #Delete the current key, so that it does not find itself #(revert this action later, see below) delete $lang2{$kOUT}; my $temp = $kOUT; $temp = &similarities($temp); while( my ($kIN, $vIN) = each %lang2 ) { my $temp2 = $kIN; $temp2 = &similarities($temp2); #Print key, if it has similarity to another key and if it has not been checked already if(lc($temp) eq lc($temp2) && !($sim{$kOUT})){ print ('###'.$kOUT."###".$kIN."###\n"); #Remeber key as already checked $sim{$kIN} = $kOUT; $count++; } } $lang2{$kOUT}=$vOUT; } print("Finished. ".$count." similar keys found.\n");