Diff for /loncom/localize/localize/checksimilar_2files.pl between versions 1.2 and 1.5

version 1.2, 2010/08/03 13:00:21 version 1.5, 2013/01/22 17:18:05
Line 35  sub read { Line 35  sub read {
     return %filecontent;      return %filecontent;
 }  }
   
 sub similarities{  sub similar_chars {
     my $text = shift;      my $text = shift;
     $text =~ s/[.,\_\-?!:]//g;      $text =~ s/\[_\d\]//g; # translation parameters
       $text =~ s/[.,\_\-?!: \/]//g; # punctuation
     return $text;      return $text;
 }  }
   
   
   
 sub CourseCommunity {  sub similar_phrases {
           
     my $text1 = shift;      my $text1 = shift;
     my $text2 = shift;      my $text2 = shift;
           
     $text1 =~ s/courses/X001X/gi;      my %phrases = (
     $text1 =~ s/communities/X001X/gi;              'courses'     => 1,
     $text1 =~ s/course/X002X/gi;          'communities' => 1,
     $text1 =~ s/community/X002X/gi;          'course'      => 2,
     $text2 =~ s/courses/X001X/gi;          'community'   => 2,
     $text2 =~ s/communities/X001X/gi;          'member'      => 3,
     $text2 =~ s/course/X002X/gi;          'student'     => 3,
     $text2 =~ s/community/X002X/gi;          'students'    => 3,
           'construction'=> 4,
           'authoring'   => 4,
       );
   
       foreach my $word (keys %phrases) {
           $text1 =~ s/$phrase/X$phrases{$word}X/gi;
           $text2 =~ s/$phrase/X$phrases{$word}X/gi;
       }
   
     if(lc($text1) eq lc($text2)) {      if (lc($text1) eq lc($text2)) {
         return 1;          return 1;
     }      }
           
Line 70  sub CourseCommunity { Line 79  sub CourseCommunity {
   
 my $file1 = $ARGV[0];  # Old language.pm  my $file1 = $ARGV[0];  # Old language.pm
 my $file2 = $ARGV[1];  # New Phrases  my $file2 = $ARGV[1];  # New Phrases
   
   print("Checking for similar expressions in phrases in $file1 and $file2...\n");
   
 my %langOLD = &read($file1); #Hash with old phrases  my %langOLD = &read($file1); #Hash with old phrases
 my %langNEW = &read($file2); #Hash with new phrases  my %langNEW = &read($file2); #Hash with new phrases
 my $dlm;   my $dlm; 
 my $count = 1; #Counter  my $count = 0;
   
 open(OUT,'>similarities.txt') or die;  
   
 # For each new phrase, check if there is already a similar one  # For each new phrase, check if there is already a similar one
 while( my ($kNEW, $vNEW) = each %langNEW ) {  while( my ($kNEW, $vNEW) = each %langNEW ) {
     my $temp1 = $kNEW;      my $temp1 = $kNEW;
     $temp1 = &similarities($temp1);      $temp1 = &similar_chars($temp1);
         
     while( my ($kOLD, $vOLD) = each %langOLD ) {      while( my ($kOLD, $vOLD) = each %langOLD ) {
         my $temp2 = $kOLD;          my $temp2 = $kOLD;
         $temp2 = &similarities($temp2);          $temp2 = &similar_chars($temp2);
   
         #Check for similar punctuation (case insensitive) or          #Check for similar punctuation (case insensitive) or
         #similarity related to Course/Community           #similarity related to similar phrases 
         if(lc($temp1) eq lc($temp2) || &CourseCommunity($temp1,$temp2)){          if (lc($temp1) eq lc($temp2) || &similar_phrases($temp1,$temp2)) {
             #Find delimiter for key and value              #Find delimiter for key and value
             if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {              if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
                 print " (Warning: Both, ' and \", occur!)";                  print " (Warning: Both, ' and \", occur!)";
Line 98  while( my ($kNEW, $vNEW) = each %langNEW Line 108  while( my ($kNEW, $vNEW) = each %langNEW
     } else {      } else {
         $dlm = "'";          $dlm = "'";
     }      }
             print OUT (<<ENDNEW);              print (<<ENDNEW);
 #Old key: $kOLD  #   $kOLD #(Old key)
    $dlm$kNEW$dlm     $dlm$kNEW$dlm
 => $dlm$vOLD$dlm,  => $dlm$vOLD$dlm,
   

Removed from v.1.2  
changed lines
  Added in v.1.5


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>