#!/usr/bin/perl # The LearningOnline Network with CAPA # filecompare.pl - script used to help probe and compare file statistics # # $Id: filecompare.pl,v 1.1 2002/05/23 18:50:42 harris41 Exp $ # # Copyright Michigan State University Board of Trustees # # This file is part of the LearningOnline Network with CAPA (LON-CAPA). # # LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # # http://www.lon-capa.org/ # # YEAR=2001 # 9/27, 10/24, 10/25, 11/4 Scott Harrison # 11/14 Guy Albertelli # 11/16,11/17 Scott Harrison # 12/3,12/5 Scott Harrison # ### ############################################################################### ## ## ## ORGANIZATION OF THIS PERL SCRIPT ## ## ## ## 1. Invocation ## ## 2. Notes ## ## 3. Dependencies ## ## 4. Process command line arguments ## ## 5. Process file/dir location arguments ## ## 6. Process comparison restrictions ## ## 7. Define output and measure subroutines ## ## 8. Loop through files and calculate differences ## ## 9. Subroutines ## ## 10. POD (plain old documentation, CPAN style) ## ## ## ############################################################################### # ------------------------------------------------------------------ Invocation my $invocation=< 1; cvstime<0 --> 2 N=2: same as N=1 except without md5sum N=3: md5sum=same --> 1; age<0 --> 2 N=4: cvstime>0 --> 2 The third way to pass arguments is set by the -s flag. filecompare.pl -s SOURCE=[source] TARGET=[target] MODE=[mode] LOC1 LOC2 TARGET corresponds to the root path of LOC2. SOURCE corresponds to the root path of LOC1. MODE can either be file, directory, link, or fileglob. END unless (@ARGV) { print $invocation; exit 1; } # ----------------------------------------------------------------------- Notes # # What are all the different ways to compare two files and how to look # at the differences? # # Ways of comparison: # existence similarity # cvs time similarity (1st arg treated as CVS source; only for buildmode) # age similarity (modification time) # md5sum similarity # size similarity (bytes) # line count difference # number of different lines # # Quantities of comparison: # existence (no,yes); other values become 'n/a' # cvstime in seconds # age in seconds # md5sum ("same" or "different") # size similarity (byte difference) # line count difference (integer) # number of different lines (integer) # ---------------------------------------------------------------- Dependencies # implementing from unix command line (assuming bash) # md5sum, diff, wc -l # ---------------------------------------------- Process command line arguments # Flags (before file/dir names): # -p show all files the same # -n show all files different # -a show all files (with comparisons) # -q only show file names (based on first file/dir) # -v verbose mode (default) # -bN build/install mode (returns exitcode) # -s status checking mode for lpml my $verbose='1'; my $show='all'; my $buildmode=0; my $statusmode=0; ALOOP: while (@ARGV) { my $flag; if ($ARGV[0]=~/^\-(\w)/) { $flag=$1; if ($flag eq 'b') { $ARGV[0]=~/^\-\w(\d)/; $buildmode=$1; shift @ARGV; next ALOOP; } shift @ARGV; SWITCH: { $verbose=0, last SWITCH if $flag eq 'q'; $verbose=1, last SWITCH if $flag eq 'v'; $show='same', last SWITCH if $flag eq 'p'; $show='different', last SWITCH if $flag eq 'n'; $show='all', last SWITCH if $flag eq 'a'; $statusmode=1, last SWITCH if $flag eq 's'; print($invocation), exit(1); } } else { last; } } dowarn('Verbose: '.$verbose."\n"); dowarn('Show: '.$show."\n"); my @files; my $loc1; my $loc2; my $dirmode='directories'; # ----------------------------------------- If status checking mode for lpml my ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob); my ($source,$target); if ($statusmode==1) { ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob)=splice(@ARGV,0,5); $targetroot.='/' if $targetroot!~/\/$/; $sourceroot=~s/^SOURCE\=//; $targetroot=~s/^TARGET\=//; $source=$sourceroot.'/'.$sourceglob; $target=$targetroot.''.$targetglob; # print "SOURCE: $source\n"; # print "TARGET: $target\n"; if ($mode eq 'MODE=fileglob') { $loc1=$source;$loc1=~s/\/[^\/]*$// if length($loc1)>2; $loc2=$target;$loc2=~s/\/[^\/]*$// if length($loc2)>2; @files=map {s/^$loc1\///;$_} glob($source); $dirmode='directories'; } elsif ($mode eq 'MODE=file') { $loc1=$source; $loc2=$target; $dirmode='files'; @files=($loc1); } } else { # ----------------------------------------- Process file/dir location arguments # FILE1 FILE2 or DIR1 DIR2 $loc1=shift @ARGV; $loc2=shift @ARGV; unless ($loc1 and $loc2) { print "LOC1: $loc1\nLOC2: $loc2\n"; print($invocation), exit(1); } if (-f $loc1) { $dirmode='files'; @files=($loc1); } else { if (-e $loc1) { @files=`find $loc1 -type f`; } else { @files=($loc1); } map {chomp; s/^$loc1\///; $_} @files; } dowarn('Processing for mode: '.$dirmode."\n"); dowarn('Location #1: '.$loc1."\n"); dowarn('Location #2: '.$loc2."\n"); } # --------------------------------------------- Process comparison restrictions # A list of space separated values (after the file/dir names) # can restrict the comparison. my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0, 'lines'=>0,'diffs'=>0); my %restrict; while (@ARGV) { my $r=shift @ARGV; if ($rhash{$r}==0) {$restrict{$r}=1;} else {print($invocation), exit(1);} } if (%restrict) { dowarn('Restricting comparison to: '. join(' ',keys %restrict)."\n"); } # --------------------------------------- Define output and measure subroutines my %OUTPUT=( 'existence'=>( sub {print 'existence: '.@_[0]; return;}), 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}), 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}), 'age'=>(sub {print 'age: '.@_[0];return;}), 'size'=>(sub {print 'size: '.@_[0];return;}), 'lines'=>(sub {print 'lines: '.@_[0];return;}), 'diffs'=>(sub {print 'diffs: '.@_[0];return;}), ); my %MEASURE=( 'existence' => ( sub { my ($file1,$file2)=@_; my $rv1=(-e $file1)?'yes':'no'; my $rv2=(-e $file2)?'yes':'no'; return ($rv1,$rv2); } ), 'md5sum'=>( sub { my ($file1,$file2)=@_; my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1; my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2; return ($rv1,$rv2); } ), 'cvstime'=>( sub { my ($file1,$file2)=@_; my $rv1=&cvstime($file1); my @a=stat($file2); my $gmt=gmtime($a[9]); my $rv2=&utctime($gmt); return ($rv1,$rv2); } ), 'age'=>( sub { my ($file1,$file2)=@_; my @a=stat($file1); my $rv1=$a[9]; @a=stat($file2); my $rv2=$a[9]; return ($rv1,$rv2); } ), 'size'=>( sub { my ($file1,$file2)=@_; my @a=stat($file1); my $rv1=$a[7]; @a=stat($file2); my $rv2=$a[7]; return ($rv1,$rv2); } ), 'lines'=>( sub { my ($file1,$file2)=@_; my $rv1=`wc -l $file1`; chop $rv1; my $rv2=`wc -l $file2`; chop $rv2; return ($rv1,$rv2); } ), 'diffs'=>( sub { my ($file1,$file2)=@_; my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`; chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//; my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`; chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//; return ($rv1,$rv2); } ), ); FLOOP: foreach my $file (@files) { my $file1; my $file2; if ($dirmode eq 'directories') { $file1=$loc1.'/'.$file; $file2=$loc2.'/'.$file; } else { $file1=$loc1; $file2=$loc2; } my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2); my $existence=$existence1.':'.$existence2; my ($cvstime,$md5sum,$age,$size,$lines,$diffs); if ($existence1 eq 'no' or $existence2 eq 'no') { $md5sum='n/a'; $age='n/a'; $cvstime='n/a'; $size='n/a'; $lines='n/a'; $diffs='n/a'; } else { if ($buildmode) { my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2); $cvstime=$cvstime1-$cvstime2; } else { $cvstime='n/a'; } my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2); $age=$age1-$age2; my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2); if ($md5sum1 eq $md5sum2) { $md5sum='same'; $size=0; $lines=0; $diffs='0:0'; } elsif ($md5sum1 ne $md5sum2) { $md5sum='different'; my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2); $size=$size1-$size2; my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2); $lines=$lines1-$lines2; my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2); $diffs=$diffs1.':'.$diffs2; } } my $showflag=0; if ($show eq 'all') { $showflag=1; } if ($show eq 'different') { my @ks=(keys %restrict); unless (@ks) { @ks=('existence','cvstime','md5sum','age','size','lines','diffs'); } FLOOP2: for my $key (@ks) { if ($key eq 'existence') { if ($existence ne 'yes:yes') { $showflag=1; } } elsif ($key eq 'md5sum') { if ($md5sum ne 'same') { $showflag=1; } } elsif ($key eq 'cvstime' and $buildmode) { if ($cvstime!=0) { $showflag=1; } } elsif ($key eq 'age') { if ($age!=0) { $showflag=1; } } elsif ($key eq 'size') { if ($size!=0) { $showflag=1; } } elsif ($key eq 'lines') { if ($lines!=0) { $showflag=1; } } elsif ($key eq 'diffs') { if ($diffs ne '0:0') { $showflag=1; } } if ($showflag) { last FLOOP2; } } } elsif ($show eq 'same') { my @ks=(keys %restrict); unless (@ks) { @ks=('existence','md5sum','cvstime','age','size','lines','diffs'); } my $showcount=length(@ks); $showcount-- unless $buildmode; FLOOP3: for my $key (@ks) { if ($key eq 'existence') { if ($existence ne 'yes:yes') { $showcount--; } } elsif ($key eq 'md5sum') { if ($md5sum ne 'same') { $showcount--; } } elsif ($key eq 'cvstime' and $buildmode) { if ($cvstime!=0) { $showcount--; } } elsif ($key eq 'age') { if ($age!=0) { $showcount--; } } elsif ($key eq 'size') { if ($size!=0) { $showcount--; } } elsif ($key eq 'lines') { if ($lines!=0) { $showcount--; } } elsif ($key eq 'diffs') { if ($diffs ne '0:0') { $showcount--; } } } if ($showcount==0) { $showflag=1; } } if ($buildmode==1) { # -b1 if ($md5sum eq 'same') { exit(1); } elsif ($cvstime<0) { exit(2); } else { exit(0); } } elsif ($buildmode==2) { # -b2 if ($cvstime<0) { exit(2); } else { exit(0); } } elsif ($buildmode==3) { # -b3 if ($md5sum eq 'same') { exit(1); } elsif ($age<0) { exit(2); } else { exit(0); } } elsif ($buildmode==4) { # -b4 if ($existence=~/no$/) { exit(3); } elsif ($cvstime>0) { exit(2); } elsif ($existence=~/^no/) { exit(1); } else { exit(0); } } if ($showflag) { print "$file"; if ($verbose==1) { print "\t"; print &{$OUTPUT{'existence'}}($existence); print "\t"; print &{$OUTPUT{'cvstime'}}($cvstime); print "\t"; print &{$OUTPUT{'age'}}($age); print "\t"; print &{$OUTPUT{'md5sum'}}($md5sum); print "\t"; print &{$OUTPUT{'size'}}($size); print "\t"; print &{$OUTPUT{'lines'}}($lines); print "\t"; print &{$OUTPUT{'diffs'}}($diffs); } print "\n"; } } # ----------------------------------------------------------------- Subroutines sub cvstime { my ($f)=@_; my $path; my $file; if ($f=~/^(.*\/)(.*?)$/) { $f=~/^(.*\/)(.*?)$/; ($path,$file)=($1,$2); } else { $file=$f; $path=''; } my $cvstime; if ($buildmode!=3) { my $entry=`grep '^/$file/' ${path}CVS/Entries 2>/dev/null`; # or # die('*** WARNING *** cannot grep against '.${path}. # 'CVS/Entries for ' .$file . "\n"); if ($entry) { my @fields=split(/\//,$entry); $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`; chomp $cvstime; } else { $cvstime='n/a'; } } else { $cvstime='n/a'; } return $cvstime; } sub utctime { my ($f)=@_; my $utctime=`date -d '$f UTC' --utc +"%s"`; chomp $utctime; return $utctime; } sub dowarn { my ($msg)=@_; warn($msg) unless $buildmode; } # ----------------------------------- POD (plain old documentation, CPAN style) =head1 NAME filecompare.pl - script used to help probe and compare file statistics =head1 SYNOPSIS filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ] or filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ] Restrictions: a list of space separated values (after the file/dir names) can restrict the comparison. These values can be: existence, cvstime, age, md5sum, size, lines, and/or diffs. Options (before file/dir names): -p show all files that have the same comparison -n show all files that have different comparisons -a show all files (with comparisons) -q only show file names (based on first file/dir) -v verbose mode (default) =head1 DESCRIPTION filecompare.pl can work in two modes: file comparison mode, or directory comparison mode. Comparisons can be a function of: * existence similarity * cvs time similarity (first argument treated as CVS source) * age similarity (modification time) * md5sum similarity * size similarity (bytes) * line count difference * number of different lines filecompare.pl integrates smoothly with the LPML installation language (linux packaging markup language). filecompare.pl is a tool that can be used for safe CVS source-to-target installations. =head1 README filecompare.pl integrates smoothly with the LPML installation language (linux packaging markup language). filecompare.pl is a tool that can be used for safe CVS source-to-target installations. The unique identifier is considered to be the file name(s) independent of the directory path. =head1 PREREQUISITES =head1 COREQUISITES =head1 OSNAMES linux =head1 SCRIPT CATEGORIES Packaging/Administrative =cut