loncom/localize/localize/checksimilar_2files.pl - diff

Return to checksimilar_2files.pl CVS log

Up to [LON-CAPA] / loncom / localize / localize

Diff for /loncom/localize/localize/checksimilar_2files.pl between versions 1.2 and 1.5

version 1.2, 2010/08/03 13:00:21	version 1.5, 2013/01/22 17:18:05
Line 35 sub read {	Line 35 sub read {
return %filecontent;	return %filecontent;
}	}

sub similarities{	sub similar_chars {
my $text = shift;	my $text = shift;
$text =~ s/[.,\_\-?!:]//g;	$text =~ s/\[_\d\]//g; # translation parameters
	$text =~ s/[.,\_\-?!: \/]//g; # punctuation
return $text;	return $text;
}	}



sub CourseCommunity {	sub similar_phrases {

my $text1 = shift;	my $text1 = shift;
my $text2 = shift;	my $text2 = shift;

$text1 =~ s/courses/X001X/gi;	my %phrases = (
$text1 =~ s/communities/X001X/gi;	'courses' => 1,
$text1 =~ s/course/X002X/gi;	'communities' => 1,
$text1 =~ s/community/X002X/gi;	'course' => 2,
$text2 =~ s/courses/X001X/gi;	'community' => 2,
$text2 =~ s/communities/X001X/gi;	'member' => 3,
$text2 =~ s/course/X002X/gi;	'student' => 3,
$text2 =~ s/community/X002X/gi;	'students' => 3,
	'construction'=> 4,
	'authoring' => 4,
	);

	foreach my $word (keys %phrases) {
	$text1 =~ s/$phrase/X$phrases{$word}X/gi;
	$text2 =~ s/$phrase/X$phrases{$word}X/gi;
	}

if(lc($text1) eq lc($text2)) {	if (lc($text1) eq lc($text2)) {
return 1;	return 1;
}	}

Line 70 sub CourseCommunity {	Line 79 sub CourseCommunity {

my $file1 = $ARGV[0]; # Old language.pm	my $file1 = $ARGV[0]; # Old language.pm
my $file2 = $ARGV[1]; # New Phrases	my $file2 = $ARGV[1]; # New Phrases

	print("Checking for similar expressions in phrases in $file1 and $file2...\n");

my %langOLD = &read($file1); #Hash with old phrases	my %langOLD = &read($file1); #Hash with old phrases
my %langNEW = &read($file2); #Hash with new phrases	my %langNEW = &read($file2); #Hash with new phrases
my $dlm;	my $dlm;
my $count = 1; #Counter	my $count = 0;

open(OUT,'>similarities.txt') or die;

# For each new phrase, check if there is already a similar one	# For each new phrase, check if there is already a similar one
while( my ($kNEW, $vNEW) = each %langNEW ) {	while( my ($kNEW, $vNEW) = each %langNEW ) {
my $temp1 = $kNEW;	my $temp1 = $kNEW;
$temp1 = &similarities($temp1);	$temp1 = &similar_chars($temp1);

while( my ($kOLD, $vOLD) = each %langOLD ) {	while( my ($kOLD, $vOLD) = each %langOLD ) {
my $temp2 = $kOLD;	my $temp2 = $kOLD;
$temp2 = &similarities($temp2);	$temp2 = &similar_chars($temp2);

#Check for similar punctuation (case insensitive) or	#Check for similar punctuation (case insensitive) or
#similarity related to Course/Community	#similarity related to similar phrases
if(lc($temp1) eq lc($temp2) \|\| &CourseCommunity($temp1,$temp2)){	if (lc($temp1) eq lc($temp2) \|\| &similar_phrases($temp1,$temp2)) {
#Find delimiter for key and value	#Find delimiter for key and value
if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {	if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
print " (Warning: Both, ' and \", occur!)";	print " (Warning: Both, ' and \", occur!)";
Line 98 while( my ($kNEW, $vNEW) = each %langNEW	Line 108 while( my ($kNEW, $vNEW) = each %langNEW
} else {	} else {
$dlm = "'";	$dlm = "'";
}	}
print OUT (<<ENDNEW);	print (<<ENDNEW);
#Old key: $kOLD	# $kOLD #(Old key)
$dlm$kNEW$dlm	$dlm$kNEW$dlm
=> $dlm$vOLD$dlm,	=> $dlm$vOLD$dlm,

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.2
changed lines
	Added in v.1.5