loncom/localize/localize/checksimilar_2files.pl - diff

Return to checksimilar_2files.pl CVS log

Up to [LON-CAPA] / loncom / localize / localize

Diff for /loncom/localize/localize/checksimilar_2files.pl between versions 1.3 and 1.5

version 1.3, 2013/01/07 15:13:26	version 1.5, 2013/01/22 17:18:05
Line 35 sub read {	Line 35 sub read {
return %filecontent;	return %filecontent;
}	}

sub similarities{	sub similar_chars {
my $text = shift;	my $text = shift;
$text =~ s/[.,\_\-?!:]//g;	$text =~ s/\[_\d\]//g; # translation parameters
	$text =~ s/[.,\_\-?!: \/]//g; # punctuation
return $text;	return $text;
}	}



sub CourseCommunity {	sub similar_phrases {

my $text1 = shift;	my $text1 = shift;
my $text2 = shift;	my $text2 = shift;

$text1 =~ s/courses/X001X/gi;	my %phrases = (
$text1 =~ s/communities/X001X/gi;	'courses' => 1,
$text1 =~ s/course/X002X/gi;	'communities' => 1,
$text1 =~ s/community/X002X/gi;	'course' => 2,
$text2 =~ s/courses/X001X/gi;	'community' => 2,
$text2 =~ s/communities/X001X/gi;	'member' => 3,
$text2 =~ s/course/X002X/gi;	'student' => 3,
$text2 =~ s/community/X002X/gi;	'students' => 3,
	'construction'=> 4,
	'authoring' => 4,
	);

	foreach my $word (keys %phrases) {
	$text1 =~ s/$phrase/X$phrases{$word}X/gi;
	$text2 =~ s/$phrase/X$phrases{$word}X/gi;
	}

if(lc($text1) eq lc($text2)) {	if (lc($text1) eq lc($text2)) {
return 1;	return 1;
}	}

Line 81 my $count = 0;	Line 90 my $count = 0;
# For each new phrase, check if there is already a similar one	# For each new phrase, check if there is already a similar one
while( my ($kNEW, $vNEW) = each %langNEW ) {	while( my ($kNEW, $vNEW) = each %langNEW ) {
my $temp1 = $kNEW;	my $temp1 = $kNEW;
$temp1 = &similarities($temp1);	$temp1 = &similar_chars($temp1);

while( my ($kOLD, $vOLD) = each %langOLD ) {	while( my ($kOLD, $vOLD) = each %langOLD ) {
my $temp2 = $kOLD;	my $temp2 = $kOLD;
$temp2 = &similarities($temp2);	$temp2 = &similar_chars($temp2);

#Check for similar punctuation (case insensitive) or	#Check for similar punctuation (case insensitive) or
#similarity related to Course/Community	#similarity related to similar phrases
if(lc($temp1) eq lc($temp2) \|\| &CourseCommunity($temp1,$temp2)){	if (lc($temp1) eq lc($temp2) \|\| &similar_phrases($temp1,$temp2)) {
#Find delimiter for key and value	#Find delimiter for key and value
if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {	if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
print " (Warning: Both, ' and \", occur!)";	print " (Warning: Both, ' and \", occur!)";

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.3
changed lines
	Added in v.1.5