--- loncom/localize/localize/checkduplicates.pl 2009/04/07 10:51:53 1.1 +++ loncom/localize/localize/checkduplicates.pl 2009/04/08 15:10:22 1.2 @@ -1,8 +1,9 @@ #!/usr/bin/perl # The LearningOnline Network with CAPA -# $Id: checkduplicates.pl,v 1.1 2009/04/07 10:51:53 bisitz Exp $ +# $Id: checkduplicates.pl,v 1.2 2009/04/08 15:10:22 bisitz Exp $ # 07.04.2009 Stefan Bisitz +# Optimization ideas by Stefan Droeschler use strict; use warnings; @@ -37,48 +38,31 @@ if ( $ARGV[0] =~ m/^\s*-h/ ) { # Start Analysis print "checkduplicates is searching for duplicates in $filename...\n"; - # Manually read all stored keys from translation file (inlcuding probable duplicates) -my @all_keys; +# and count key occurrences in a separate hash. +my %counter; my $line; open( FH, "<", $filename ) or die "$filename cannot be opened\n"; while ( !eof(FH) ) { $line = readline(FH); - next if $line=~/^\s*#/; + next if $line=~/^\s*#/; # ignore comments #$exprNP=~s/^["'](.*)["']$/$1/; # Remove " and ' at beginning and end - if ($line =~ m/ "(.*)"/) { # Find and save "..." key - push(@all_keys, $1); - } elsif ($line =~ m/ '(.*)'/) { # Find and save '...' key - push(@all_keys, $1); + if ($line =~ m/^\s+["'](.*)["']/) { # Find "..." or '...' key + $counter{$1}++; } } close(FH); - -# Read lexicon hash from translation file into hash -my %lexicon = &readlexicon($filename); - - -# Synch lexicon hash and Array of keys to find all doublettes -# Check for each key in the lexicon hash if this key occures more than one time in the hash file -# If found, print warning and count - +# Print all keys which occures more than one time my $dupl = 0; # total counter to count when a key occurred more than one time -my %found; # Hash to save keys which have already been found - -foreach my $lex_key (keys %lexicon) { - my $counter = 0; - foreach my $all_key (@all_keys) { - if ($all_key eq $lex_key) { - $counter++; - if ( ($counter > 1) && (!$found{$all_key}) ) { - $dupl++ if ($counter == 2); - $found{$all_key} = 1; - print 'Found duplicate key: '.$lex_key."\n"; - } - } +foreach my $count_key (keys %counter) { + my $count_value = $counter{$count_key}; + if ($count_value > 1) { + print 'Found '.$count_value.' times key: '.$count_key."\n"; + $dupl++; } } + if ($dupl == 0) { print "Be happy - No duplicates found.\n"; } else { @@ -86,38 +70,4 @@ if ($dupl == 0) { } # ---------------------------------------------------------------- -# Code taken from sync.pl -# in : $filename -# out: %lexicon - -sub readlexicon { - # Read translation file into memory - my $fn=shift; - open(IN,$fn) or die; - my %lexicon=(); - my $contents=join('',); - close(IN); - # Tidy up: remove header data - $contents=~s/package Apache\:[^\;]+//; - $contents=~s/use base[^\;]+//; - # Build hash with hash from file - my %Lexicon=(); - eval($contents.'; %lexicon=%Lexicon;'); - if ($@ ne "") { - print "\nAn error occurred during the attempt to retrieve the translation hash for the file '$fn'.\n" - ."Error: ".$@."\n"; - die; - } - # Remove entries which are not needed for synch - delete $lexicon{'_AUTO'}; - delete $lexicon{'char_encoding'}; - delete $lexicon{'language_code'}; - # Hash is expected not to be empty - if (!scalar(keys(%lexicon))) { - print "\nWarning: No translation phrases found in '$fn'.\n"; - } - return %lexicon; -} - -# ----------------------------------------------------------------