File:  [LON-CAPA] / loncom / build / filecompare.pl
Revision 1.1: download - view: text, annotated - select for diffs
Fri Oct 26 00:18:54 2001 UTC (22 years, 7 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
script for comparing age, md5sum, size, diffs, lines, and/or plain existence
(a super-diff like summary command if you will).. will be used especially
for configuration files.. can accept files or directories as arguments -Scott

    1: #!/usr/bin/perl
    2: 
    3: # Scott Harrison
    4: # YEAR=2001
    5: # 9/27, 10/24, 10/25
    6: 
    7: my $invocation=<<END;
    8: # ------------------------------------------------------------------ Invocation
    9: # filecompare.pl FILE1 FILE2
   10: # or
   11: # filecompare.pl DIR1 DIR2
   12: #
   13: # A list of space separated values (after the file/dir names)
   14: # can restrict the comparison.
   15: # These values can be: existence, age, md5sum, size, lines, and/or diffs.
   16: #
   17: # Flags (before file/dir names):
   18: # -p show all files the same
   19: # -n show all files different
   20: # -a show all files (with comparisons)
   21: # -q only show file names (based on first file/dir)
   22: # -v verbose mode (default)
   23: END
   24: unless (@ARGV) {
   25:     print $invocation;
   26:     exit 1;
   27: }
   28: # ----------------------------------------------------------------------- Notes
   29: #
   30: # What are all the different ways to compare two files and how to look
   31: # at the differences?
   32: #
   33: # Ways of comparison:
   34: #   existence similarity
   35: #   age similarity (modification time)
   36: #   md5sum similarity
   37: #   size similarity (bytes)
   38: #   line count difference
   39: #   number of different lines
   40: #
   41: # Quantities of comparison:
   42: #   existence (no,yes); other values become 'n/a'
   43: #   age in seconds
   44: #   md5sum ("same" or "different")
   45: #   size similarity (byte difference)
   46: #   line count difference (integer)
   47: #   number of different lines (integer)
   48: #   
   49: # Text output of comparison:
   50: #   existence VALUE
   51: #   age VALUE
   52: #   md5sum VALUE
   53: #   size VALUE
   54: #   lines VALUE
   55: #   diffs VALUE
   56: #
   57: # Output of comparison:
   58: #   exist
   59: #   if md5sum not same, then different
   60: #   if age not 0, then older/newer
   61: #   if size not 0, then bigger/smaller
   62: #   if lines not 0, then more lines of code/less lines of code
   63: #   if diffs not 0, then subtracted lines/added lines/changed lines
   64: 
   65: # implementing from unix command line (assuming bash)
   66: # md5sum, diff, wc -l
   67: 
   68: # ---------------------------------------------- Process command line arguments
   69: # Flags (before file/dir names):
   70: # -p show all files the same
   71: # -n show all files different
   72: # -a show all files (with comparisons)
   73: # -q only show file names (based on first file/dir)
   74: # -v verbose mode (default)
   75: my $verbose='1';
   76: my $show='all';
   77: while (@ARGV) {
   78:     my $flag;
   79:     if ($ARGV[0]=~/^\-(\w)/) {
   80: 	$flag=$1;
   81: 	shift @ARGV;
   82:       SWITCH: {
   83: 	  $verbose=0, last SWITCH if $flag eq 'q';
   84: 	  $verbose=1, last SWITCH if $flag eq 'v';
   85: 	  $show='same', last SWITCH if $flag eq 'p';
   86: 	  $show='different', last SWITCH if $flag eq 'n';
   87: 	  $show='all', last SWITCH if $flag eq 'a';
   88: 	  print($invocation), exit(1);
   89:       }
   90:     }
   91:     else {
   92: 	last;
   93:     }
   94: }
   95: warn('Verbose: '.$verbose."\n");
   96: warn('Show: '.$show."\n");
   97: 
   98: # FILE1 FILE2 or DIR1 DIR2
   99: my $loc1=shift @ARGV;
  100: my $loc2=shift @ARGV;
  101: my $dirmode='directories';
  102: my @files;
  103: unless ($loc1 and $loc2) {
  104:     print($invocation), exit(1);
  105: }
  106: if (-f $loc1) {
  107:     $dirmode='files';
  108:     @files=($loc1);
  109: }
  110: else {
  111:     if (-e $loc1) {
  112: 	@files=`find $loc1 -type f`;
  113:     }
  114:     else {
  115: 	@files=($loc1);
  116:     }
  117:     map {chomp; s/^$loc1\///; $_} @files;
  118: }
  119: warn('Processing for mode: '.$dirmode."\n");
  120: warn('Location #1: '.$loc1."\n");
  121: warn('Location #2: '.$loc2."\n");
  122: 
  123: # A list of space separated values (after the file/dir names)
  124: # can restrict the comparison.
  125: my %restrict;
  126: while (@ARGV) {
  127:     my $r=shift @ARGV;
  128:     if ($r eq 'existence' or
  129: 	$r eq 'md5sum' or
  130: 	$r eq 'age' or
  131: 	$r eq 'size' or
  132: 	$r eq 'lines' or
  133: 	$r eq 'diffs') {
  134: 	$restrict{$r}=1;
  135:     }
  136:     else {
  137: 	print($invocation), exit(1);
  138:     }
  139: }
  140: if (%restrict) {
  141:     warn('Restricting comparison to: '.
  142: 	 join(' ',keys %restrict)."\n");
  143: }
  144: 
  145: my %OUTPUT=(
  146: 	 'existence'=>(
  147: 		    sub {
  148: 			print 'existence: '.@_[0];
  149: 			return;
  150: 		    }
  151:          ),
  152: 	 'md5sum'=>(
  153: 		    sub {
  154: 			print 'md5sum: '.@_[0];
  155: 			return;
  156: 		    }
  157:          ),
  158:          'age'=>(
  159:                     sub {
  160: 	                print 'age: '.@_[0];
  161: 			return;
  162: 		    }
  163:          ),
  164:          'size'=>(
  165:                     sub {
  166: 			print 'size: '.@_[0];
  167: 			return;
  168: 		    }
  169:          ),
  170:          'lines'=>(
  171:                     sub {
  172: 			print 'lines: '.@_[0];
  173: 			return;
  174: 		    }
  175:          ),
  176:          'diffs'=>(
  177:                     sub {
  178: 			print 'diffs: '.@_[0];
  179: 			return;
  180: 		    }
  181:          ),
  182: );
  183: 
  184: my %MEASURE=(
  185: 	 'existence' => (
  186:                     sub {
  187: 			my ($file1,$file2)=@_;
  188: 		        my $rv1=(-e $file1)?'yes':'no';
  189: 			my $rv2=(-e $file2)?'yes':'no';
  190: 			return ($rv1,$rv2);
  191: 		    }
  192:          ),
  193: 	 'md5sum'=>(
  194: 		    sub {
  195: 			my ($file1,$file2)=@_;
  196: 			my $rv1=`md5sum $file1`; chop $rv1;
  197: 			my $rv2=`md5sum $file2`; chop $rv2;
  198: 			return ($rv1,$rv2);
  199: 		    }
  200:          ),
  201:          'age'=>(
  202:                     sub {
  203: 			my ($file1,$file2)=@_;
  204: 			my @a=stat($file1); my $rv1=$a[10];
  205: 			@a=stat($file2); my $rv2=$a[10];
  206: 			return ($rv1,$rv2);
  207: 		    }
  208:          ),
  209:          'size'=>(
  210:                     sub {
  211: 			my ($file1,$file2)=@_;
  212: 			my @a=stat($file1); my $rv1=$a[7];
  213: 			@a=stat($file2); my $rv2=$a[7];
  214: 			return ($rv1,$rv2);
  215: 		    }
  216:          ),
  217:          'lines'=>(
  218:                     sub {
  219: 			my ($file1,$file2)=@_;
  220: 			my $rv1=`wc -l $file1`; chop $rv1;
  221: 			my $rv2=`wc -l $file2`; chop $rv2;
  222: 			return ($rv1,$rv2);
  223: 		    }
  224:          ),
  225:          'diffs'=>(
  226:                     sub {
  227: 			my ($file1,$file2)=@_;
  228: 			my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
  229: 			chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
  230: 			my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
  231: 			chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
  232: 			return ($rv1,$rv2);
  233: 		    }
  234:          ),
  235: );
  236: 
  237: FLOP: foreach my $file (@files) {
  238:     my $file1;
  239:     my $file2;
  240:     if ($dirmode eq 'directories') {
  241:         $file1=$loc1.'/'.$file;
  242:         $file2=$loc2.'/'.$file;
  243:     }
  244:     else {
  245:         $file1=$loc1;
  246:         $file2=$loc2;
  247:     }
  248:     my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
  249:     my $existence=$existence1.':'.$existence2;
  250:     my ($md5sum,$age,$size,$lines,$diffs);
  251:     if ($existence1 eq 'no' or $existence2 eq 'no') {
  252:         $md5sum='n/a';
  253:         $age='n/a';
  254:         $size='n/a';
  255:         $lines='n/a';
  256:         $diffs='n/a';
  257:     }
  258:     else {
  259:         my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
  260:         $age=$age1-$age2;
  261:         my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
  262:         if ($md5sum1 == $md5sum2) {
  263:             $md5sum='same';
  264:             $size=0;
  265:             $lines=0;
  266:             $diffs=0;
  267: 	}
  268:         elsif ($md5sum1 != $md5sum2) {
  269:             $md5sum='different';
  270:             my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
  271:             $size=$size1-$size2;
  272:             my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
  273:             $lines=$lines1-$lines2;
  274:             my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
  275:             $diffs=$diffs1.':'.$diffs2;
  276:         }
  277:     }
  278:     my $showflag=0;
  279:     if ($show eq 'all') {
  280:         $showflag=1;
  281:     }
  282:     if ($show eq 'different') {
  283:         my @ks=(keys %restrict);
  284:         unless (@ks) {
  285: 	    @ks=('existence','md5sum','age','size','lines','diffs');
  286: 	}
  287:         FLOP2: for my $key (@ks) {
  288: 	    if ($key eq 'existence') {
  289: 		if ($existence ne 'yes:yes') {
  290: 		    $showflag=1;
  291: 		}
  292: 	    }
  293: 	    elsif ($key eq 'md5sum') {
  294: 		if ($md5sum ne 'same') {
  295: 		    $showflag=1;
  296: 		}
  297: 	    }
  298: 	    elsif ($key eq 'age') {
  299: 		if ($age!=0) {
  300: 		    $showflag=1;
  301: 		}
  302: 	    }
  303: 	    elsif ($key eq 'size') {
  304: 		if ($size!=0) {
  305: 		    $showflag=1;
  306: 		}
  307: 	    }
  308: 	    elsif ($key eq 'lines') {
  309: 		if ($lines!=0) {
  310: 		    $showflag=1;
  311: 		}
  312: 	    }
  313: 	    elsif ($key eq 'diffs') {
  314: 		if ($diffs ne '0:0') {
  315: 		    $showflag=1;
  316: 		}
  317: 	    }
  318: 	    if ($showflag) {
  319: 		last FLOP2;
  320: 	    }
  321:         }
  322:     }
  323:     elsif ($show eq 'same') {
  324:         my @ks=(keys %restrict);
  325:         unless (@ks) {
  326: 	    @ks=('existence','md5sum','age','size','lines','diffs');
  327: 	}
  328:         my $showcount=length(@ks);
  329:         FLOP3: for my $key (@ks) {
  330: 	    if ($key eq 'existence') {
  331: 		if ($existence ne 'yes:yes') {
  332: 		    $showcount--;
  333: 		}
  334: 	    }
  335: 	    elsif ($key eq 'md5sum') {
  336: 		if ($md5sum ne 'same') {
  337: 		    $showcount--;
  338: 		}
  339: 	    }
  340: 	    elsif ($key eq 'age') {
  341: 		if ($age!=0) {
  342: 		    $showcount--;
  343: 		}
  344: 	    }
  345: 	    elsif ($key eq 'size') {
  346: 		if ($size!=0) {
  347: 		    $showcount--;
  348: 		}
  349: 	    }
  350: 	    elsif ($key eq 'lines') {
  351: 		if ($lines!=0) {
  352: 		    $showcount--;
  353: 		}
  354: 	    }
  355: 	    elsif ($key eq 'diffs') {
  356: 		if ($diffs ne '0:0') {
  357: 		    $showcount--;
  358: 		}
  359: 	    }
  360:         }
  361:         if ($showcount==0) {
  362: 	    $showflag=1;
  363: 	}
  364:     }
  365:     print "$file";
  366:     if ($verbose==1) {
  367:         print "\t";
  368: 	print &{$OUTPUT{'existence'}}($existence);
  369:         print "\t";
  370: 	print &{$OUTPUT{'age'}}($age);
  371:         print "\t";
  372: 	print &{$OUTPUT{'md5sum'}}($md5sum);
  373:         print "\t";
  374: 	print &{$OUTPUT{'size'}}($size);
  375:         print "\t";
  376: 	print &{$OUTPUT{'lines'}}($lines);
  377:         print "\t";
  378: 	print &{$OUTPUT{'diffs'}}($diffs);
  379:     }
  380:     print "\n";
  381: }
  382: 
  383: 
  384: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>