loncom/localize/localize/checksimilar_2files.pl - diff

Return to checksimilar_2files.pl CVS log

Up to [LON-CAPA] / loncom / localize / localize

Diff for /loncom/localize/localize/checksimilar_2files.pl between versions 1.1 and 1.8

version 1.1, 2010/03/09 15:16:26	version 1.8, 2013/12/20 14:27:54
Line 20 use open ':utf8';	Line 20 use open ':utf8';
sub read {	sub read {
# Read file into memory	# Read file into memory
my $file = shift;	my $file = shift;
open(IN,$file) or die;	open(IN,$file) or die "Error: Could not open file: $file\n";
my %filecontent = ();	my %filecontent = ();
my $contents = join('',<IN>);	my $contents = join('',<IN>);
close(IN);	close(IN);
Line 28 sub read {	Line 28 sub read {
my %Lexicon = ();	my %Lexicon = ();
eval($contents.'; %filecontent=%Lexicon;');	eval($contents.'; %filecontent=%Lexicon;');
if ($@ ne "") {	if ($@ ne "") {
print "\nAn error occurred during the attempt to retrieve the translation hash.\n"	die "\nAn error occurred during the attempt to retrieve the translation hash.\n"
."Error: ".$@."\n";	."Error: ".$@."\n";
die;
}	}
return %filecontent;	return %filecontent;
}	}

sub similarities{	sub similar_chars {
my $text = shift;	my $text = shift;
$text =~ s/[.,\_\-?!:]//g;	$text =~ s/<\/\w+ \/*>//g; # HTML tags
	$text =~ s/\[_\d\]//g; # translation parameters
	$text =~ s/[.,\_\-?!: \/]//g; # punctuation
return $text;	return $text;
}	}



sub CourseCommunity {	sub similar_phrases {

my $text1 = shift;	my $text1 = shift;
my $text2 = shift;	my $text2 = shift;

$text1 =~ s/courses//gi;	my %phrases = (
$text1 =~ s/communities//gi;	'courses' => 1,
$text1 =~ s/course//gi;	'communities' => 1,
$text1 =~ s/community//gi;	'course' => 2,
$text2 =~ s/courses//gi;	'community' => 2,
$text2 =~ s/communities//gi;	'member' => 3,
$text2 =~ s/course//gi;	'student' => 3,
$text2 =~ s/community//gi;	'students' => 3,
	'construction'=> 4,
	'authoring' => 4,
	);

	foreach my $word (keys %phrases) {
	$text1 =~ s/$word/X$phrases{$word}X/gi;
	$text2 =~ s/$word/X$phrases{$word}X/gi;
	}

if(lc($text1) eq lc($text2)) {	if (lc($text1) eq lc($text2)) {
return 1;	return 1;
}	}

Line 68 sub CourseCommunity {	Line 77 sub CourseCommunity {

####--------Main Program--------####	####--------Main Program--------####

	if (!$ARGV[0] or !$ARGV[1]) {
	die 'Error: Invalid files! Please specify two files which should be checked.'."\n";
	}

my $file1 = $ARGV[0]; # Old language.pm	my $file1 = $ARGV[0]; # Old language.pm
my $file2 = $ARGV[1]; # New Phrases	my $file2 = $ARGV[1]; # New Phrases

	print("Checking for similar expressions in phrases in $file1 and $file2...\n");

my %langOLD = &read($file1); #Hash with old phrases	my %langOLD = &read($file1); #Hash with old phrases
my %langNEW = &read($file2); #Hash with new phrases	my %langNEW = &read($file2); #Hash with new phrases
my $dlm;	my $dlm;
my $count = 1; #Counter	my $count = 0;

open(OUT,'>similarities.txt') or die;

# For each new phrase, check if there is already a similar one	# For each new phrase, check if there is already a similar one
while( my ($kNEW, $vNEW) = each %langNEW ) {	while( my ($kNEW, $vNEW) = each %langNEW ) {
my $temp1 = $kNEW;	my $temp1 = $kNEW;
$temp1 = &similarities($temp1);	$temp1 = &similar_chars($temp1);

while( my ($kOLD, $vOLD) = each %langOLD ) {	while( my ($kOLD, $vOLD) = each %langOLD ) {
my $temp2 = $kOLD;	my $temp2 = $kOLD;
$temp2 = &similarities($temp2);	$temp2 = &similar_chars($temp2);

#Check for similar punctuation (case insensitive) or	#Check for similar punctuation (case insensitive) or
#similarity related to Course/Community	#similarity related to similar phrases
if(lc($temp1) eq lc($temp2) \|\| &CourseCommunity($temp1,$temp2)){	if (lc($temp1) eq lc($temp2) \|\| &similar_phrases($temp1,$temp2)) {
#Find delimiter for key and value	#Find delimiter for key and value
if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {	if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
print " (Warning: Both, ' and \", occur!)";	print " (Warning: Both, ' and \", occur!)";
Line 98 while( my ($kNEW, $vNEW) = each %langNEW	Line 112 while( my ($kNEW, $vNEW) = each %langNEW
} else {	} else {
$dlm = "'";	$dlm = "'";
}	}
print OUT (<<ENDNEW);	print (<<ENDNEW);
#Old key: $kOLD	# $kOLD #(Old key)
$dlm$kNEW$dlm	$dlm$kNEW$dlm
=> $dlm$vOLD$dlm,	=> $dlm$vOLD$dlm,

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.1
changed lines
	Added in v.1.8