version 1.1, 2010/03/09 15:16:26
|
version 1.8, 2013/12/20 14:27:54
|
Line 20 use open ':utf8';
|
Line 20 use open ':utf8';
|
sub read { |
sub read { |
# Read file into memory |
# Read file into memory |
my $file = shift; |
my $file = shift; |
open(IN,$file) or die; |
open(IN,$file) or die "Error: Could not open file: $file\n"; |
my %filecontent = (); |
my %filecontent = (); |
my $contents = join('',<IN>); |
my $contents = join('',<IN>); |
close(IN); |
close(IN); |
Line 28 sub read {
|
Line 28 sub read {
|
my %Lexicon = (); |
my %Lexicon = (); |
eval($contents.'; %filecontent=%Lexicon;'); |
eval($contents.'; %filecontent=%Lexicon;'); |
if ($@ ne "") { |
if ($@ ne "") { |
print "\nAn error occurred during the attempt to retrieve the translation hash.\n" |
die "\nAn error occurred during the attempt to retrieve the translation hash.\n" |
."Error: ".$@."\n"; |
."Error: ".$@."\n"; |
die; |
|
} |
} |
return %filecontent; |
return %filecontent; |
} |
} |
|
|
sub similarities{ |
sub similar_chars { |
my $text = shift; |
my $text = shift; |
$text =~ s/[.,\_\-?!:]//g; |
$text =~ s/<\/*\w+ *\/*>//g; # HTML tags |
|
$text =~ s/\[_\d\]//g; # translation parameters |
|
$text =~ s/[.,\_\-?!: \/\(\)]//g; # punctuation |
return $text; |
return $text; |
} |
} |
|
|
|
|
|
|
sub CourseCommunity { |
sub similar_phrases { |
|
|
my $text1 = shift; |
my $text1 = shift; |
my $text2 = shift; |
my $text2 = shift; |
|
|
$text1 =~ s/courses//gi; |
my %phrases = ( |
$text1 =~ s/communities//gi; |
'courses' => 1, |
$text1 =~ s/course//gi; |
'communities' => 1, |
$text1 =~ s/community//gi; |
'course' => 2, |
$text2 =~ s/courses//gi; |
'community' => 2, |
$text2 =~ s/communities//gi; |
'member' => 3, |
$text2 =~ s/course//gi; |
'student' => 3, |
$text2 =~ s/community//gi; |
'students' => 3, |
|
'construction'=> 4, |
|
'authoring' => 4, |
|
); |
|
|
|
foreach my $word (keys %phrases) { |
|
$text1 =~ s/$word/X$phrases{$word}X/gi; |
|
$text2 =~ s/$word/X$phrases{$word}X/gi; |
|
} |
|
|
if(lc($text1) eq lc($text2)) { |
if (lc($text1) eq lc($text2)) { |
return 1; |
return 1; |
} |
} |
|
|
Line 68 sub CourseCommunity {
|
Line 77 sub CourseCommunity {
|
|
|
####--------Main Program--------#### |
####--------Main Program--------#### |
|
|
|
if (!$ARGV[0] or !$ARGV[1]) { |
|
die 'Error: Invalid files! Please specify two files which should be checked.'."\n"; |
|
} |
|
|
my $file1 = $ARGV[0]; # Old language.pm |
my $file1 = $ARGV[0]; # Old language.pm |
my $file2 = $ARGV[1]; # New Phrases |
my $file2 = $ARGV[1]; # New Phrases |
|
|
|
print("Checking for similar expressions in phrases in $file1 and $file2...\n"); |
|
|
my %langOLD = &read($file1); #Hash with old phrases |
my %langOLD = &read($file1); #Hash with old phrases |
my %langNEW = &read($file2); #Hash with new phrases |
my %langNEW = &read($file2); #Hash with new phrases |
my $dlm; |
my $dlm; |
my $count = 1; #Counter |
my $count = 0; |
|
|
open(OUT,'>similarities.txt') or die; |
|
|
|
# For each new phrase, check if there is already a similar one |
# For each new phrase, check if there is already a similar one |
while( my ($kNEW, $vNEW) = each %langNEW ) { |
while( my ($kNEW, $vNEW) = each %langNEW ) { |
my $temp1 = $kNEW; |
my $temp1 = $kNEW; |
$temp1 = &similarities($temp1); |
$temp1 = &similar_chars($temp1); |
|
|
while( my ($kOLD, $vOLD) = each %langOLD ) { |
while( my ($kOLD, $vOLD) = each %langOLD ) { |
my $temp2 = $kOLD; |
my $temp2 = $kOLD; |
$temp2 = &similarities($temp2); |
$temp2 = &similar_chars($temp2); |
|
|
#Check for similar punctuation (case insensitive) or |
#Check for similar punctuation (case insensitive) or |
#similarity related to Course/Community |
#similarity related to similar phrases |
if(lc($temp1) eq lc($temp2) || &CourseCommunity($temp1,$temp2)){ |
if (lc($temp1) eq lc($temp2) || &similar_phrases($temp1,$temp2)) { |
#Find delimiter for key and value |
#Find delimiter for key and value |
if (($kNEW=~/\'/) & ($kNEW=~/\"/)) { |
if (($kNEW=~/\'/) & ($kNEW=~/\"/)) { |
print " (Warning: Both, ' and \", occur!)"; |
print " (Warning: Both, ' and \", occur!)"; |
Line 98 while( my ($kNEW, $vNEW) = each %langNEW
|
Line 112 while( my ($kNEW, $vNEW) = each %langNEW
|
} else { |
} else { |
$dlm = "'"; |
$dlm = "'"; |
} |
} |
print OUT (<<ENDNEW); |
print (<<ENDNEW); |
#Old key: $kOLD |
# $kOLD #(Old key) |
$dlm$kNEW$dlm |
$dlm$kNEW$dlm |
=> $dlm$vOLD$dlm, |
=> $dlm$vOLD$dlm, |
|
|