version 1.1, 2009/04/07 10:51:53
|
version 1.2, 2009/04/08 15:10:22
|
Line 3
|
Line 3
|
# $Id$ |
# $Id$ |
|
|
# 07.04.2009 Stefan Bisitz |
# 07.04.2009 Stefan Bisitz |
|
# Optimization ideas by Stefan Droeschler |
|
|
use strict; |
use strict; |
use warnings; |
use warnings; |
Line 37 if ( $ARGV[0] =~ m/^\s*-h/ ) {
|
Line 38 if ( $ARGV[0] =~ m/^\s*-h/ ) {
|
# Start Analysis |
# Start Analysis |
print "checkduplicates is searching for duplicates in $filename...\n"; |
print "checkduplicates is searching for duplicates in $filename...\n"; |
|
|
|
|
# Manually read all stored keys from translation file (inlcuding probable duplicates) |
# Manually read all stored keys from translation file (inlcuding probable duplicates) |
my @all_keys; |
# and count key occurrences in a separate hash. |
|
my %counter; |
my $line; |
my $line; |
open( FH, "<", $filename ) or die "$filename cannot be opened\n"; |
open( FH, "<", $filename ) or die "$filename cannot be opened\n"; |
while ( !eof(FH) ) { |
while ( !eof(FH) ) { |
$line = readline(FH); |
$line = readline(FH); |
next if $line=~/^\s*#/; |
next if $line=~/^\s*#/; # ignore comments |
#$exprNP=~s/^["'](.*)["']$/$1/; # Remove " and ' at beginning and end |
#$exprNP=~s/^["'](.*)["']$/$1/; # Remove " and ' at beginning and end |
if ($line =~ m/ "(.*)"/) { # Find and save "..." key |
if ($line =~ m/^\s+["'](.*)["']/) { # Find "..." or '...' key |
push(@all_keys, $1); |
$counter{$1}++; |
} elsif ($line =~ m/ '(.*)'/) { # Find and save '...' key |
|
push(@all_keys, $1); |
|
} |
} |
} |
} |
close(FH); |
close(FH); |
|
|
|
# Print all keys which occures more than one time |
# Read lexicon hash from translation file into hash |
|
my %lexicon = &readlexicon($filename); |
|
|
|
|
|
# Synch lexicon hash and Array of keys to find all doublettes |
|
# Check for each key in the lexicon hash if this key occures more than one time in the hash file |
|
# If found, print warning and count |
|
|
|
my $dupl = 0; # total counter to count when a key occurred more than one time |
my $dupl = 0; # total counter to count when a key occurred more than one time |
my %found; # Hash to save keys which have already been found |
foreach my $count_key (keys %counter) { |
|
my $count_value = $counter{$count_key}; |
foreach my $lex_key (keys %lexicon) { |
if ($count_value > 1) { |
my $counter = 0; |
print 'Found '.$count_value.' times key: '.$count_key."\n"; |
foreach my $all_key (@all_keys) { |
$dupl++; |
if ($all_key eq $lex_key) { |
|
$counter++; |
|
if ( ($counter > 1) && (!$found{$all_key}) ) { |
|
$dupl++ if ($counter == 2); |
|
$found{$all_key} = 1; |
|
print 'Found duplicate key: '.$lex_key."\n"; |
|
} |
|
} |
|
} |
} |
} |
} |
|
|
if ($dupl == 0) { |
if ($dupl == 0) { |
print "Be happy - No duplicates found.\n"; |
print "Be happy - No duplicates found.\n"; |
} else { |
} else { |
Line 86 if ($dupl == 0) {
|
Line 70 if ($dupl == 0) {
|
} |
} |
|
|
# ---------------------------------------------------------------- |
# ---------------------------------------------------------------- |
# Code taken from sync.pl |
|
# in : $filename |
|
# out: %lexicon |
|
|
|
sub readlexicon { |
|
# Read translation file into memory |
|
my $fn=shift; |
|
open(IN,$fn) or die; |
|
my %lexicon=(); |
|
my $contents=join('',<IN>); |
|
close(IN); |
|
# Tidy up: remove header data |
|
$contents=~s/package Apache\:[^\;]+//; |
|
$contents=~s/use base[^\;]+//; |
|
# Build hash with hash from file |
|
my %Lexicon=(); |
|
eval($contents.'; %lexicon=%Lexicon;'); |
|
if ($@ ne "") { |
|
print "\nAn error occurred during the attempt to retrieve the translation hash for the file '$fn'.\n" |
|
."Error: ".$@."\n"; |
|
die; |
|
} |
|
# Remove entries which are not needed for synch |
|
delete $lexicon{'_AUTO'}; |
|
delete $lexicon{'char_encoding'}; |
|
delete $lexicon{'language_code'}; |
|
# Hash is expected not to be empty |
|
if (!scalar(keys(%lexicon))) { |
|
print "\nWarning: No translation phrases found in '$fn'.\n"; |
|
} |
|
return %lexicon; |
|
} |
|
|
|
# ---------------------------------------------------------------- |
|
|
|