File:  [LON-CAPA] / loncom / localize / localize / checksimilar_1file.pl
Revision 1.1: download - view: text, annotated - select for diffs
Tue Mar 9 15:16:26 2010 UTC (14 years, 2 months ago) by wenzelju
Branches: MAIN
CVS tags: language_hyphenation_merge, language_hyphenation, PRINT_INCOMPLETE_base, PRINT_INCOMPLETE, HEAD, BZ4492-merge, BZ4492-feature_horizontal_radioresponse
Scripts to check for similar phrases in translationfiles.

#!/usr/bin/perl
# The LearningOnline Network with CAPA
# $Id: checksimilar_1file.pl,v 1.1 2010/03/09 15:16:26 wenzelju Exp $

use strict;
use warnings;

####
#### Checks, if there are similar keys in the inputfile (for example de.pm)
####



####--------Subroutines--------####



sub read {
    # Read file into memory
    my $fn = shift;
    open(IN,$fn) or die;
    my %filecontent = ();
    my $contents = join('',<IN>);
    close(IN);
    # Build hash with hash from file
    my %Lexicon=();
    eval($contents.'; %filecontent=%Lexicon;');

    return %filecontent;
}

sub similarities{
   my $text = shift;
   $text =~ s/[.,\_\-?!:]//g;

   return $text;
}




####--------Main programm--------####


my $file = $ARGV[0];
my %lang=&read($file);
my $count = 0;
#Copy hash for comparision
my %lang2=%lang;
my %sim;

#For each key in the hash compare it with each other key in the hash except itself
while( my ($kOUT, $vOUT) = each %lang ) {

   #Delete the current key, so that it does not find itself
   #(revert this action later, see below) 
   delete $lang2{$kOUT};
   my $temp = $kOUT;
   $temp = &similarities($temp);
   
   while( my ($kIN, $vIN) = each %lang2 ) {
      my $temp2 = $kIN;
      $temp2 = &similarities($temp2);
         #Print key, if it has similarity to another key and if it has not been checked already
         if(lc($temp) eq lc($temp2) && !($sim{$kOUT})){
            print ('###'.$kOUT."###".$kIN."###\n");
            #Remeber key as already checked
            $sim{$kIN} = $kOUT; 
            $count++;
         }
   }

   $lang2{$kOUT}=$vOUT;
}
print("Finished. ".$count." similar keys found.\n");


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>