Diff for /loncom/localize/localize/checksimilar_2files.pl between versions 1.3 and 1.5

version 1.3, 2013/01/07 15:13:26 version 1.5, 2013/01/22 17:18:05
Line 35  sub read { Line 35  sub read {
     return %filecontent;      return %filecontent;
 }  }
   
 sub similarities{  sub similar_chars {
     my $text = shift;      my $text = shift;
     $text =~ s/[.,\_\-?!:]//g;      $text =~ s/\[_\d\]//g; # translation parameters
       $text =~ s/[.,\_\-?!: \/]//g; # punctuation
     return $text;      return $text;
 }  }
   
   
   
 sub CourseCommunity {  sub similar_phrases {
           
     my $text1 = shift;      my $text1 = shift;
     my $text2 = shift;      my $text2 = shift;
           
     $text1 =~ s/courses/X001X/gi;      my %phrases = (
     $text1 =~ s/communities/X001X/gi;              'courses'     => 1,
     $text1 =~ s/course/X002X/gi;          'communities' => 1,
     $text1 =~ s/community/X002X/gi;          'course'      => 2,
     $text2 =~ s/courses/X001X/gi;          'community'   => 2,
     $text2 =~ s/communities/X001X/gi;          'member'      => 3,
     $text2 =~ s/course/X002X/gi;          'student'     => 3,
     $text2 =~ s/community/X002X/gi;          'students'    => 3,
           'construction'=> 4,
           'authoring'   => 4,
       );
   
       foreach my $word (keys %phrases) {
           $text1 =~ s/$phrase/X$phrases{$word}X/gi;
           $text2 =~ s/$phrase/X$phrases{$word}X/gi;
       }
   
     if(lc($text1) eq lc($text2)) {      if (lc($text1) eq lc($text2)) {
         return 1;          return 1;
     }      }
           
Line 81  my $count = 0; Line 90  my $count = 0;
 # For each new phrase, check if there is already a similar one  # For each new phrase, check if there is already a similar one
 while( my ($kNEW, $vNEW) = each %langNEW ) {  while( my ($kNEW, $vNEW) = each %langNEW ) {
     my $temp1 = $kNEW;      my $temp1 = $kNEW;
     $temp1 = &similarities($temp1);      $temp1 = &similar_chars($temp1);
         
     while( my ($kOLD, $vOLD) = each %langOLD ) {      while( my ($kOLD, $vOLD) = each %langOLD ) {
         my $temp2 = $kOLD;          my $temp2 = $kOLD;
         $temp2 = &similarities($temp2);          $temp2 = &similar_chars($temp2);
   
         #Check for similar punctuation (case insensitive) or          #Check for similar punctuation (case insensitive) or
         #similarity related to Course/Community           #similarity related to similar phrases 
         if(lc($temp1) eq lc($temp2) || &CourseCommunity($temp1,$temp2)){          if (lc($temp1) eq lc($temp2) || &similar_phrases($temp1,$temp2)) {
             #Find delimiter for key and value              #Find delimiter for key and value
             if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {              if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
                 print " (Warning: Both, ' and \", occur!)";                  print " (Warning: Both, ' and \", occur!)";

Removed from v.1.3  
changed lines
  Added in v.1.5


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>