File:  [LON-CAPA] / loncom / build / filecompare.pl
Revision 1.1: download - view: text, annotated - select for diffs
Fri Oct 26 00:18:54 2001 UTC (22 years, 6 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
script for comparing age, md5sum, size, diffs, lines, and/or plain existence
(a super-diff like summary command if you will).. will be used especially
for configuration files.. can accept files or directories as arguments -Scott

#!/usr/bin/perl

# Scott Harrison
# YEAR=2001
# 9/27, 10/24, 10/25

my $invocation=<<END;
# ------------------------------------------------------------------ Invocation
# filecompare.pl FILE1 FILE2
# or
# filecompare.pl DIR1 DIR2
#
# A list of space separated values (after the file/dir names)
# can restrict the comparison.
# These values can be: existence, age, md5sum, size, lines, and/or diffs.
#
# Flags (before file/dir names):
# -p show all files the same
# -n show all files different
# -a show all files (with comparisons)
# -q only show file names (based on first file/dir)
# -v verbose mode (default)
END
unless (@ARGV) {
    print $invocation;
    exit 1;
}
# ----------------------------------------------------------------------- Notes
#
# What are all the different ways to compare two files and how to look
# at the differences?
#
# Ways of comparison:
#   existence similarity
#   age similarity (modification time)
#   md5sum similarity
#   size similarity (bytes)
#   line count difference
#   number of different lines
#
# Quantities of comparison:
#   existence (no,yes); other values become 'n/a'
#   age in seconds
#   md5sum ("same" or "different")
#   size similarity (byte difference)
#   line count difference (integer)
#   number of different lines (integer)
#   
# Text output of comparison:
#   existence VALUE
#   age VALUE
#   md5sum VALUE
#   size VALUE
#   lines VALUE
#   diffs VALUE
#
# Output of comparison:
#   exist
#   if md5sum not same, then different
#   if age not 0, then older/newer
#   if size not 0, then bigger/smaller
#   if lines not 0, then more lines of code/less lines of code
#   if diffs not 0, then subtracted lines/added lines/changed lines

# implementing from unix command line (assuming bash)
# md5sum, diff, wc -l

# ---------------------------------------------- Process command line arguments
# Flags (before file/dir names):
# -p show all files the same
# -n show all files different
# -a show all files (with comparisons)
# -q only show file names (based on first file/dir)
# -v verbose mode (default)
my $verbose='1';
my $show='all';
while (@ARGV) {
    my $flag;
    if ($ARGV[0]=~/^\-(\w)/) {
	$flag=$1;
	shift @ARGV;
      SWITCH: {
	  $verbose=0, last SWITCH if $flag eq 'q';
	  $verbose=1, last SWITCH if $flag eq 'v';
	  $show='same', last SWITCH if $flag eq 'p';
	  $show='different', last SWITCH if $flag eq 'n';
	  $show='all', last SWITCH if $flag eq 'a';
	  print($invocation), exit(1);
      }
    }
    else {
	last;
    }
}
warn('Verbose: '.$verbose."\n");
warn('Show: '.$show."\n");

# FILE1 FILE2 or DIR1 DIR2
my $loc1=shift @ARGV;
my $loc2=shift @ARGV;
my $dirmode='directories';
my @files;
unless ($loc1 and $loc2) {
    print($invocation), exit(1);
}
if (-f $loc1) {
    $dirmode='files';
    @files=($loc1);
}
else {
    if (-e $loc1) {
	@files=`find $loc1 -type f`;
    }
    else {
	@files=($loc1);
    }
    map {chomp; s/^$loc1\///; $_} @files;
}
warn('Processing for mode: '.$dirmode."\n");
warn('Location #1: '.$loc1."\n");
warn('Location #2: '.$loc2."\n");

# A list of space separated values (after the file/dir names)
# can restrict the comparison.
my %restrict;
while (@ARGV) {
    my $r=shift @ARGV;
    if ($r eq 'existence' or
	$r eq 'md5sum' or
	$r eq 'age' or
	$r eq 'size' or
	$r eq 'lines' or
	$r eq 'diffs') {
	$restrict{$r}=1;
    }
    else {
	print($invocation), exit(1);
    }
}
if (%restrict) {
    warn('Restricting comparison to: '.
	 join(' ',keys %restrict)."\n");
}

my %OUTPUT=(
	 'existence'=>(
		    sub {
			print 'existence: '.@_[0];
			return;
		    }
         ),
	 'md5sum'=>(
		    sub {
			print 'md5sum: '.@_[0];
			return;
		    }
         ),
         'age'=>(
                    sub {
	                print 'age: '.@_[0];
			return;
		    }
         ),
         'size'=>(
                    sub {
			print 'size: '.@_[0];
			return;
		    }
         ),
         'lines'=>(
                    sub {
			print 'lines: '.@_[0];
			return;
		    }
         ),
         'diffs'=>(
                    sub {
			print 'diffs: '.@_[0];
			return;
		    }
         ),
);

my %MEASURE=(
	 'existence' => (
                    sub {
			my ($file1,$file2)=@_;
		        my $rv1=(-e $file1)?'yes':'no';
			my $rv2=(-e $file2)?'yes':'no';
			return ($rv1,$rv2);
		    }
         ),
	 'md5sum'=>(
		    sub {
			my ($file1,$file2)=@_;
			my $rv1=`md5sum $file1`; chop $rv1;
			my $rv2=`md5sum $file2`; chop $rv2;
			return ($rv1,$rv2);
		    }
         ),
         'age'=>(
                    sub {
			my ($file1,$file2)=@_;
			my @a=stat($file1); my $rv1=$a[10];
			@a=stat($file2); my $rv2=$a[10];
			return ($rv1,$rv2);
		    }
         ),
         'size'=>(
                    sub {
			my ($file1,$file2)=@_;
			my @a=stat($file1); my $rv1=$a[7];
			@a=stat($file2); my $rv2=$a[7];
			return ($rv1,$rv2);
		    }
         ),
         'lines'=>(
                    sub {
			my ($file1,$file2)=@_;
			my $rv1=`wc -l $file1`; chop $rv1;
			my $rv2=`wc -l $file2`; chop $rv2;
			return ($rv1,$rv2);
		    }
         ),
         'diffs'=>(
                    sub {
			my ($file1,$file2)=@_;
			my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
			chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
			my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
			chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
			return ($rv1,$rv2);
		    }
         ),
);

FLOP: foreach my $file (@files) {
    my $file1;
    my $file2;
    if ($dirmode eq 'directories') {
        $file1=$loc1.'/'.$file;
        $file2=$loc2.'/'.$file;
    }
    else {
        $file1=$loc1;
        $file2=$loc2;
    }
    my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
    my $existence=$existence1.':'.$existence2;
    my ($md5sum,$age,$size,$lines,$diffs);
    if ($existence1 eq 'no' or $existence2 eq 'no') {
        $md5sum='n/a';
        $age='n/a';
        $size='n/a';
        $lines='n/a';
        $diffs='n/a';
    }
    else {
        my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
        $age=$age1-$age2;
        my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
        if ($md5sum1 == $md5sum2) {
            $md5sum='same';
            $size=0;
            $lines=0;
            $diffs=0;
	}
        elsif ($md5sum1 != $md5sum2) {
            $md5sum='different';
            my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
            $size=$size1-$size2;
            my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
            $lines=$lines1-$lines2;
            my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
            $diffs=$diffs1.':'.$diffs2;
        }
    }
    my $showflag=0;
    if ($show eq 'all') {
        $showflag=1;
    }
    if ($show eq 'different') {
        my @ks=(keys %restrict);
        unless (@ks) {
	    @ks=('existence','md5sum','age','size','lines','diffs');
	}
        FLOP2: for my $key (@ks) {
	    if ($key eq 'existence') {
		if ($existence ne 'yes:yes') {
		    $showflag=1;
		}
	    }
	    elsif ($key eq 'md5sum') {
		if ($md5sum ne 'same') {
		    $showflag=1;
		}
	    }
	    elsif ($key eq 'age') {
		if ($age!=0) {
		    $showflag=1;
		}
	    }
	    elsif ($key eq 'size') {
		if ($size!=0) {
		    $showflag=1;
		}
	    }
	    elsif ($key eq 'lines') {
		if ($lines!=0) {
		    $showflag=1;
		}
	    }
	    elsif ($key eq 'diffs') {
		if ($diffs ne '0:0') {
		    $showflag=1;
		}
	    }
	    if ($showflag) {
		last FLOP2;
	    }
        }
    }
    elsif ($show eq 'same') {
        my @ks=(keys %restrict);
        unless (@ks) {
	    @ks=('existence','md5sum','age','size','lines','diffs');
	}
        my $showcount=length(@ks);
        FLOP3: for my $key (@ks) {
	    if ($key eq 'existence') {
		if ($existence ne 'yes:yes') {
		    $showcount--;
		}
	    }
	    elsif ($key eq 'md5sum') {
		if ($md5sum ne 'same') {
		    $showcount--;
		}
	    }
	    elsif ($key eq 'age') {
		if ($age!=0) {
		    $showcount--;
		}
	    }
	    elsif ($key eq 'size') {
		if ($size!=0) {
		    $showcount--;
		}
	    }
	    elsif ($key eq 'lines') {
		if ($lines!=0) {
		    $showcount--;
		}
	    }
	    elsif ($key eq 'diffs') {
		if ($diffs ne '0:0') {
		    $showcount--;
		}
	    }
        }
        if ($showcount==0) {
	    $showflag=1;
	}
    }
    print "$file";
    if ($verbose==1) {
        print "\t";
	print &{$OUTPUT{'existence'}}($existence);
        print "\t";
	print &{$OUTPUT{'age'}}($age);
        print "\t";
	print &{$OUTPUT{'md5sum'}}($md5sum);
        print "\t";
	print &{$OUTPUT{'size'}}($size);
        print "\t";
	print &{$OUTPUT{'lines'}}($lines);
        print "\t";
	print &{$OUTPUT{'diffs'}}($diffs);
    }
    print "\n";
}




FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>