#!/usr/bin/perl # The LearningOnline Network with CAPA # $Id: checksimilar_1file.pl,v 1.1 2010/03/09 15:16:26 wenzelju Exp $ use strict; use warnings; #### #### Checks, if there are similar keys in the inputfile (for example de.pm) #### ####--------Subroutines--------#### sub read { # Read file into memory my $fn = shift; open(IN,$fn) or die; my %filecontent = (); my $contents = join('',); close(IN); # Build hash with hash from file my %Lexicon=(); eval($contents.'; %filecontent=%Lexicon;'); return %filecontent; } sub similarities{ my $text = shift; $text =~ s/[.,\_\-?!:]//g; return $text; } ####--------Main programm--------#### my $file = $ARGV[0]; my %lang=&read($file); my $count = 0; #Copy hash for comparision my %lang2=%lang; my %sim; #For each key in the hash compare it with each other key in the hash except itself while( my ($kOUT, $vOUT) = each %lang ) { #Delete the current key, so that it does not find itself #(revert this action later, see below) delete $lang2{$kOUT}; my $temp = $kOUT; $temp = &similarities($temp); while( my ($kIN, $vIN) = each %lang2 ) { my $temp2 = $kIN; $temp2 = &similarities($temp2); #Print key, if it has similarity to another key and if it has not been checked already if(lc($temp) eq lc($temp2) && !($sim{$kOUT})){ print ('###'.$kOUT."###".$kIN."###\n"); #Remeber key as already checked $sim{$kIN} = $kOUT; $count++; } } $lang2{$kOUT}=$vOUT; } print("Finished. ".$count." similar keys found.\n");