--- loncom/localize/localize/checkduplicates.pl	2009/04/07 10:51:53	1.1
+++ loncom/localize/localize/checkduplicates.pl	2009/04/08 15:10:22	1.2
@@ -1,8 +1,9 @@
 #!/usr/bin/perl
 # The LearningOnline Network with CAPA
-# $Id: checkduplicates.pl,v 1.1 2009/04/07 10:51:53 bisitz Exp $
+# $Id: checkduplicates.pl,v 1.2 2009/04/08 15:10:22 bisitz Exp $
 
 # 07.04.2009 Stefan Bisitz
+# Optimization ideas by Stefan Droeschler
 
 use strict;
 use warnings;
@@ -37,48 +38,31 @@ if ( $ARGV[0] =~ m/^\s*-h/ ) {
 # Start Analysis
 print "checkduplicates is searching for duplicates in $filename...\n";
 
-
 # Manually read all stored keys from translation file (inlcuding probable duplicates)
-my @all_keys;
+# and count key occurrences in a separate hash.
+my %counter;
 my $line;
 open( FH, "<", $filename ) or die "$filename cannot be opened\n";
 while ( !eof(FH) ) {
     $line = readline(FH);
-    next if $line=~/^\s*#/;
+    next if $line=~/^\s*#/; # ignore comments
     #$exprNP=~s/^["'](.*)["']$/$1/; # Remove " and ' at beginning and end
-    if ($line =~ m/   "(.*)"/) { # Find and save "..." key
-        push(@all_keys, $1);
-    } elsif ($line =~ m/   '(.*)'/) { # Find and save '...' key
-        push(@all_keys, $1);
+    if ($line =~ m/^\s+["'](.*)["']/) { # Find "..." or '...' key
+        $counter{$1}++;
     }
 }
 close(FH);
 
-
-# Read lexicon hash from translation file into hash
-my %lexicon = &readlexicon($filename);
-
-
-# Synch lexicon hash and Array of keys to find all doublettes
-# Check for each key in the lexicon hash if this key occures more than one time in the hash file
-# If found, print warning and count
-
+# Print all keys which occures more than one time
 my $dupl = 0; # total counter to count when a key occurred more than one time
-my %found; # Hash to save keys which have already been found
-
-foreach my $lex_key (keys %lexicon) {
-    my $counter = 0;
-    foreach my $all_key (@all_keys) {
-        if ($all_key eq $lex_key) {
-            $counter++;
-            if ( ($counter > 1) && (!$found{$all_key}) ) {
-                $dupl++ if ($counter == 2);
-                $found{$all_key} = 1;
-                print 'Found duplicate key: '.$lex_key."\n";
-            }
-        }
+foreach my $count_key (keys %counter) {
+    my $count_value = $counter{$count_key};
+    if ($count_value > 1) {
+        print 'Found '.$count_value.' times key: '.$count_key."\n";
+        $dupl++;
     }
 }
+
 if ($dupl == 0) {
     print "Be happy - No duplicates found.\n";
 } else {
@@ -86,38 +70,4 @@ if ($dupl == 0) {
 }
 
 # ----------------------------------------------------------------
-# Code taken from sync.pl
-# in : $filename
-# out: %lexicon
-
-sub readlexicon {
-    # Read translation file into memory
-    my $fn=shift;
-    open(IN,$fn) or die;
-    my %lexicon=();
-    my $contents=join('',<IN>);
-    close(IN);
-    # Tidy up: remove header data
-    $contents=~s/package Apache\:[^\;]+//;
-    $contents=~s/use base[^\;]+//;
-    # Build hash with hash from file
-    my %Lexicon=();
-    eval($contents.'; %lexicon=%Lexicon;');
-    if ($@ ne "") {
-        print "\nAn error occurred during the attempt to retrieve the translation hash for the file '$fn'.\n"
-             ."Error: ".$@."\n";
-        die;
-    }
-    # Remove entries which are not needed for synch
-    delete $lexicon{'_AUTO'};
-    delete $lexicon{'char_encoding'};
-    delete $lexicon{'language_code'};
-    # Hash is expected not to be empty
-    if (!scalar(keys(%lexicon))) {
-        print "\nWarning: No translation phrases found in '$fn'.\n";
-    }
-    return %lexicon;
-}
-
-# ----------------------------------------------------------------