Annotation of loncom/build/filecompare.pl, revision 1.4

1.1       harris41    1: #!/usr/bin/perl
                      2: 
1.4     ! harris41    3: # The LearningOnline Network witih CAPA
        !             4: #
        !             5: # filecompare.pl - script used to help probe and compare file statistics
        !             6: #
1.1       harris41    7: # YEAR=2001
1.4     ! harris41    8: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
        !             9: # 11/14 Guy Albertelli
        !            10: # 11/16 Scott Harrison
        !            11: #
        !            12: # $Id: pwchange,v 1.3 2001/11/14 13:19:36 albertel Exp $
        !            13: ###
1.1       harris41   14: 
1.4     ! harris41   15: # ------------------------------------------------------------------ Invocation
1.1       harris41   16: my $invocation=<<END;
1.4     ! harris41   17: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
        !            18: or
        !            19: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
        !            20: 
        !            21: Restrictions: a list of space separated values (after the file/dir names)
        !            22: can restrict the comparison.
        !            23: These values can be: existence, cvstime, age, md5sum, size, lines,
        !            24: and/or diffs.
        !            25: 
        !            26: Options (before file/dir names):
        !            27: -p show all files that have the same comparison
        !            28: -n show all files that have different comparisons
        !            29: -a show all files (with comparisons)
        !            30: -q only show file names (based on first file/dir)
        !            31: -v verbose mode (default)
1.1       harris41   32: END
                     33: unless (@ARGV) {
                     34:     print $invocation;
                     35:     exit 1;
                     36: }
                     37: # ----------------------------------------------------------------------- Notes
                     38: #
                     39: # What are all the different ways to compare two files and how to look
                     40: # at the differences?
                     41: #
                     42: # Ways of comparison:
                     43: #   existence similarity
1.2       harris41   44: #   cvs time similarity (first argument treated as CVS source)
1.1       harris41   45: #   age similarity (modification time)
                     46: #   md5sum similarity
                     47: #   size similarity (bytes)
                     48: #   line count difference
                     49: #   number of different lines
                     50: #
                     51: # Quantities of comparison:
                     52: #   existence (no,yes); other values become 'n/a'
1.2       harris41   53: #   cvstime in seconds
1.1       harris41   54: #   age in seconds
                     55: #   md5sum ("same" or "different")
                     56: #   size similarity (byte difference)
                     57: #   line count difference (integer)
                     58: #   number of different lines (integer)
                     59: #   
                     60: # Text output of comparison:
                     61: #   existence VALUE
1.2       harris41   62: #   cvstime VALUE
1.1       harris41   63: #   age VALUE
                     64: #   md5sum VALUE
                     65: #   size VALUE
                     66: #   lines VALUE
                     67: #   diffs VALUE
                     68: #
                     69: # Output of comparison:
                     70: #   exist
                     71: #   if md5sum not same, then different
1.2       harris41   72: #   if cvstime not 0, then older/newer
1.1       harris41   73: #   if age not 0, then older/newer
                     74: #   if size not 0, then bigger/smaller
                     75: #   if lines not 0, then more lines of code/less lines of code
                     76: #   if diffs not 0, then subtracted lines/added lines/changed lines
                     77: 
                     78: # implementing from unix command line (assuming bash)
                     79: # md5sum, diff, wc -l
                     80: 
                     81: # ---------------------------------------------- Process command line arguments
                     82: # Flags (before file/dir names):
                     83: # -p show all files the same
                     84: # -n show all files different
                     85: # -a show all files (with comparisons)
                     86: # -q only show file names (based on first file/dir)
                     87: # -v verbose mode (default)
1.2       harris41   88: # -b build/install mode (returns exitcode)
1.1       harris41   89: my $verbose='1';
                     90: my $show='all';
1.2       harris41   91: my $buildmode=0;
1.1       harris41   92: while (@ARGV) {
                     93:     my $flag;
                     94:     if ($ARGV[0]=~/^\-(\w)/) {
                     95: 	$flag=$1;
                     96: 	shift @ARGV;
                     97:       SWITCH: {
                     98: 	  $verbose=0, last SWITCH if $flag eq 'q';
                     99: 	  $verbose=1, last SWITCH if $flag eq 'v';
                    100: 	  $show='same', last SWITCH if $flag eq 'p';
                    101: 	  $show='different', last SWITCH if $flag eq 'n';
                    102: 	  $show='all', last SWITCH if $flag eq 'a';
1.2       harris41  103: 	  $buildmode=1, last SWITCH if $flag eq 'b';
                    104: 	  $buildmode=2, last SWITCH if $flag eq 'B';
                    105: 	  $buildmode=3, last SWITCH if $flag eq 'g';
                    106: 	  $buildmode=4, last SWITCH if $flag eq 'G';
1.1       harris41  107: 	  print($invocation), exit(1);
                    108:       }
                    109:     }
                    110:     else {
                    111: 	last;
                    112:     }
                    113: }
1.2       harris41  114: dowarn('Verbose: '.$verbose."\n");
                    115: dowarn('Show: '.$show."\n");
1.1       harris41  116: 
                    117: # FILE1 FILE2 or DIR1 DIR2
                    118: my $loc1=shift @ARGV;
                    119: my $loc2=shift @ARGV;
                    120: my $dirmode='directories';
                    121: my @files;
                    122: unless ($loc1 and $loc2) {
                    123:     print($invocation), exit(1);
                    124: }
                    125: if (-f $loc1) {
                    126:     $dirmode='files';
                    127:     @files=($loc1);
                    128: }
                    129: else {
                    130:     if (-e $loc1) {
                    131: 	@files=`find $loc1 -type f`;
                    132:     }
                    133:     else {
                    134: 	@files=($loc1);
                    135:     }
                    136:     map {chomp; s/^$loc1\///; $_} @files;
                    137: }
1.2       harris41  138: dowarn('Processing for mode: '.$dirmode."\n");
                    139: dowarn('Location #1: '.$loc1."\n");
                    140: dowarn('Location #2: '.$loc2."\n");
1.1       harris41  141: 
                    142: # A list of space separated values (after the file/dir names)
                    143: # can restrict the comparison.
                    144: my %restrict;
                    145: while (@ARGV) {
                    146:     my $r=shift @ARGV;
                    147:     if ($r eq 'existence' or
1.2       harris41  148: 	$r eq 'cvstime' or
1.1       harris41  149: 	$r eq 'md5sum' or
                    150: 	$r eq 'age' or
                    151: 	$r eq 'size' or
                    152: 	$r eq 'lines' or
                    153: 	$r eq 'diffs') {
                    154: 	$restrict{$r}=1;
                    155:     }
                    156:     else {
                    157: 	print($invocation), exit(1);
                    158:     }
                    159: }
                    160: if (%restrict) {
                    161:     warn('Restricting comparison to: '.
                    162: 	 join(' ',keys %restrict)."\n");
                    163: }
                    164: 
                    165: my %OUTPUT=(
1.4     ! harris41  166:          'existence'=>( sub {print 'existence: '.@_[0]; return;}),
        !           167: 	 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
        !           168:          'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
        !           169:          'age'=>(sub {print 'age: '.@_[0];return;}),
        !           170:          'size'=>(sub {print 'size: '.@_[0];return;}),
        !           171:          'lines'=>(sub {print 'lines: '.@_[0];return;}),
        !           172:          'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
1.1       harris41  173: );
                    174: 
                    175: my %MEASURE=(
1.4     ! harris41  176: 	 'existence' => ( sub { my ($file1,$file2)=@_;
1.1       harris41  177: 		        my $rv1=(-e $file1)?'yes':'no';
                    178: 			my $rv2=(-e $file2)?'yes':'no';
1.4     ! harris41  179: 			return ($rv1,$rv2); } ),
        !           180: 	 'md5sum'=>( sub { my ($file1,$file2)=@_;
1.3       albertel  181: 			my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
                    182: 			my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
1.4     ! harris41  183: 			return ($rv1,$rv2); } ),
        !           184: 	 'cvstime'=>( sub { my ($file1,$file2)=@_;
1.2       harris41  185: 			my $rv1=&cvstime($file1);
                    186: 			my @a=stat($file2); my $gmt=gmtime($a[9]);
                    187: 			my $rv2=&utctime($gmt);
1.4     ! harris41  188: 			return ($rv1,$rv2); } ),
        !           189:          'age'=>( sub {	my ($file1,$file2)=@_;
1.2       harris41  190: 			my @a=stat($file1); my $rv1=$a[9];
                    191: 			@a=stat($file2); my $rv2=$a[9];
1.4     ! harris41  192: 			return ($rv1,$rv2); } ),
        !           193:          'size'=>( sub { my ($file1,$file2)=@_;
1.1       harris41  194: 			my @a=stat($file1); my $rv1=$a[7];
                    195: 			@a=stat($file2); my $rv2=$a[7];
1.4     ! harris41  196: 			return ($rv1,$rv2); } ),
        !           197:          'lines'=>( sub { my ($file1,$file2)=@_;
1.1       harris41  198: 			my $rv1=`wc -l $file1`; chop $rv1;
                    199: 			my $rv2=`wc -l $file2`; chop $rv2;
1.4     ! harris41  200: 			return ($rv1,$rv2); } ),
        !           201:          'diffs'=>( sub { my ($file1,$file2)=@_;
1.1       harris41  202: 			my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
                    203: 			chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
                    204: 			my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
                    205: 			chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
1.4     ! harris41  206: 			return ($rv1,$rv2); } ),
1.1       harris41  207: );
                    208: 
                    209: FLOP: foreach my $file (@files) {
                    210:     my $file1;
                    211:     my $file2;
                    212:     if ($dirmode eq 'directories') {
                    213:         $file1=$loc1.'/'.$file;
                    214:         $file2=$loc2.'/'.$file;
                    215:     }
                    216:     else {
                    217:         $file1=$loc1;
                    218:         $file2=$loc2;
                    219:     }
                    220:     my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
                    221:     my $existence=$existence1.':'.$existence2;
1.2       harris41  222:     my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
1.1       harris41  223:     if ($existence1 eq 'no' or $existence2 eq 'no') {
                    224:         $md5sum='n/a';
                    225:         $age='n/a';
1.2       harris41  226:         $cvstime='n/a';
1.1       harris41  227:         $size='n/a';
                    228:         $lines='n/a';
                    229:         $diffs='n/a';
                    230:     }
                    231:     else {
1.2       harris41  232:         my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
                    233:         $cvstime=$cvstime1-$cvstime2;
1.1       harris41  234:         my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
                    235:         $age=$age1-$age2;
                    236:         my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
1.3       albertel  237:         if ($md5sum1 eq $md5sum2) {
1.1       harris41  238:             $md5sum='same';
                    239:             $size=0;
                    240:             $lines=0;
                    241:             $diffs=0;
                    242: 	}
1.3       albertel  243:         elsif ($md5sum1 ne $md5sum2) {
1.1       harris41  244:             $md5sum='different';
                    245:             my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
                    246:             $size=$size1-$size2;
                    247:             my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
                    248:             $lines=$lines1-$lines2;
                    249:             my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
                    250:             $diffs=$diffs1.':'.$diffs2;
                    251:         }
                    252:     }
                    253:     my $showflag=0;
                    254:     if ($show eq 'all') {
                    255:         $showflag=1;
                    256:     }
                    257:     if ($show eq 'different') {
                    258:         my @ks=(keys %restrict);
                    259:         unless (@ks) {
1.2       harris41  260: 	    @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
1.1       harris41  261: 	}
                    262:         FLOP2: for my $key (@ks) {
                    263: 	    if ($key eq 'existence') {
                    264: 		if ($existence ne 'yes:yes') {
                    265: 		    $showflag=1;
                    266: 		}
                    267: 	    }
                    268: 	    elsif ($key eq 'md5sum') {
                    269: 		if ($md5sum ne 'same') {
                    270: 		    $showflag=1;
                    271: 		}
                    272: 	    }
1.2       harris41  273: 	    elsif ($key eq 'cvstime') {
                    274: 		if ($cvstime!=0) {
                    275: 		    $showflag=1;
                    276: 		}
                    277: 	    }
1.1       harris41  278: 	    elsif ($key eq 'age') {
                    279: 		if ($age!=0) {
                    280: 		    $showflag=1;
                    281: 		}
                    282: 	    }
                    283: 	    elsif ($key eq 'size') {
                    284: 		if ($size!=0) {
                    285: 		    $showflag=1;
                    286: 		}
                    287: 	    }
                    288: 	    elsif ($key eq 'lines') {
                    289: 		if ($lines!=0) {
                    290: 		    $showflag=1;
                    291: 		}
                    292: 	    }
                    293: 	    elsif ($key eq 'diffs') {
                    294: 		if ($diffs ne '0:0') {
                    295: 		    $showflag=1;
                    296: 		}
                    297: 	    }
                    298: 	    if ($showflag) {
                    299: 		last FLOP2;
                    300: 	    }
                    301:         }
                    302:     }
                    303:     elsif ($show eq 'same') {
                    304:         my @ks=(keys %restrict);
                    305:         unless (@ks) {
1.2       harris41  306: 	    @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
1.1       harris41  307: 	}
                    308:         my $showcount=length(@ks);
                    309:         FLOP3: for my $key (@ks) {
                    310: 	    if ($key eq 'existence') {
                    311: 		if ($existence ne 'yes:yes') {
                    312: 		    $showcount--;
                    313: 		}
                    314: 	    }
                    315: 	    elsif ($key eq 'md5sum') {
                    316: 		if ($md5sum ne 'same') {
                    317: 		    $showcount--;
                    318: 		}
                    319: 	    }
1.2       harris41  320: 	    elsif ($key eq 'cvstime') {
                    321: 		if ($cvstime!=0) {
                    322: 		    $showcount--;
                    323: 		}
                    324: 	    }
1.1       harris41  325: 	    elsif ($key eq 'age') {
                    326: 		if ($age!=0) {
                    327: 		    $showcount--;
                    328: 		}
                    329: 	    }
                    330: 	    elsif ($key eq 'size') {
                    331: 		if ($size!=0) {
                    332: 		    $showcount--;
                    333: 		}
                    334: 	    }
                    335: 	    elsif ($key eq 'lines') {
                    336: 		if ($lines!=0) {
                    337: 		    $showcount--;
                    338: 		}
                    339: 	    }
                    340: 	    elsif ($key eq 'diffs') {
                    341: 		if ($diffs ne '0:0') {
                    342: 		    $showcount--;
                    343: 		}
                    344: 	    }
                    345:         }
                    346:         if ($showcount==0) {
                    347: 	    $showflag=1;
                    348: 	}
                    349:     }
1.2       harris41  350:     if ($buildmode==1) {
                    351:         if ($md5sum eq 'same') {
                    352: 	    exit(1);
                    353: 	}
                    354:         elsif ($cvstime<0) {
                    355: 	    exit(2);
                    356: 	}
                    357:         else {
                    358: 	    exit(0);
                    359: 	}
                    360:     }
                    361:     elsif ($buildmode==2) {
                    362:         if ($cvstime<0) {
                    363: 	    exit(2);
                    364: 	}
                    365:         else {
                    366: 	    exit(0);
                    367: 	}
                    368:     }
                    369:     elsif ($buildmode==3) {
                    370:         if ($md5sum eq 'same') {
                    371: 	    exit(1);
                    372: 	}
                    373:         elsif ($age<0) {
                    374: 	    exit(2);
                    375: 	}
                    376:         else {
                    377: 	    exit(0);
                    378: 	}
                    379:     }
                    380:     elsif ($buildmode==4) {
                    381:         if ($cvstime>0) {
                    382: 	    exit(2);
                    383: 	}
                    384:         else {
                    385: 	    exit(0);
                    386: 	}
                    387:     }
1.1       harris41  388:     print "$file";
                    389:     if ($verbose==1) {
                    390:         print "\t";
                    391: 	print &{$OUTPUT{'existence'}}($existence);
                    392:         print "\t";
1.2       harris41  393: 	print &{$OUTPUT{'cvstime'}}($cvstime);
                    394:         print "\t";
1.1       harris41  395: 	print &{$OUTPUT{'age'}}($age);
                    396:         print "\t";
                    397: 	print &{$OUTPUT{'md5sum'}}($md5sum);
                    398:         print "\t";
                    399: 	print &{$OUTPUT{'size'}}($size);
                    400:         print "\t";
                    401: 	print &{$OUTPUT{'lines'}}($lines);
                    402:         print "\t";
                    403: 	print &{$OUTPUT{'diffs'}}($diffs);
                    404:     }
                    405:     print "\n";
                    406: }
                    407: 
1.2       harris41  408: sub cvstime {
                    409:     my ($f)=@_;
                    410:     my $path; my $file;
                    411:     if ($f=~/^(.*\/)(.*?)$/) {
                    412: 	$f=~/^(.*\/)(.*?)$/;
                    413: 	($path,$file)=($1,$2);
                    414:     }
                    415:     else {
                    416: 	$file=$f; $path='';
                    417:     }
                    418:     my $cvstime;
                    419:     if ($buildmode!=3) {
                    420: 	my $entry=`grep '^/$file/' ${path}CVS/Entries` or
                    421: 	    die('*** ERROR *** cannot grep against '.${path}.
                    422: 		'CVS/Entries for ' .$file . "\n");
                    423:         my @fields=split(/\//,$entry);
                    424:         $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
                    425:         chomp $cvstime;
                    426:     }
                    427:     else {
                    428: 	$cvstime='n/a';
                    429:     }
                    430:     return $cvstime;
                    431: }
1.1       harris41  432: 
1.2       harris41  433: sub utctime {
                    434:     my ($f)=@_;
                    435:     my $utctime=`date -d '$f UTC' --utc +"%s"`;
                    436:     chomp $utctime;
                    437:     return $utctime;
                    438: }
1.1       harris41  439: 
1.2       harris41  440: sub dowarn {
                    441:     my ($msg)=@_;
                    442:     warn($msg) unless $buildmode;
                    443: }
1.4     ! harris41  444: 
        !           445: =head1 NAME
        !           446: 
        !           447: filecompare.pl - script used to help probe and compare file statistics
        !           448: 
        !           449: =head1 SYNOPSIS
        !           450: 
        !           451: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
        !           452: 
        !           453: or
        !           454: 
        !           455: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
        !           456: 
        !           457: Restrictions: a list of space separated values (after the file/dir names)
        !           458: can restrict the comparison.
        !           459: These values can be: existence, cvstime, age, md5sum, size, lines,
        !           460: and/or diffs.
        !           461: 
        !           462: Options (before file/dir names):
        !           463: 
        !           464:  -p show all files that have the same comparison
        !           465: 
        !           466:  -n show all files that have different comparisons
        !           467: 
        !           468:  -a show all files (with comparisons)
        !           469: 
        !           470:  -q only show file names (based on first file/dir)
        !           471: 
        !           472:  -v verbose mode (default)
        !           473: 
        !           474: =head1 DESCRIPTION
        !           475: 
        !           476: filecompare.pl can work in two modes: file comparison mode, or directory
        !           477: comparison mode.
        !           478: 
        !           479: Comparisons can be a function of:
        !           480: * existence similarity
        !           481: * cvs time similarity (first argument treated as CVS source)
        !           482: * age similarity (modification time)
        !           483: * md5sum similarity
        !           484: * size similarity (bytes)
        !           485: * line count difference
        !           486: * number of different lines
        !           487: 
        !           488: filecompare.pl integrates smoothly with the LPML installation language
        !           489: (linux packaging markup language).  filecompare.pl is a tool that can
        !           490: be used for safe CVS source-to-target installations.
        !           491: 
        !           492: =head1 README
        !           493: 
        !           494: filecompare.pl integrates smoothly with the LPML installation language
        !           495: (linux packaging markup language).  filecompare.pl is a tool that can
        !           496: be used for safe CVS source-to-target installations.
        !           497: 
        !           498: The unique identifier is considered to be the file name(s) independent
        !           499: of the directory path.
        !           500: 
        !           501: =head1 PREREQUISITES
        !           502: 
        !           503: =head1 COREQUISITES
        !           504: 
        !           505: =head1 OSNAMES
        !           506: 
        !           507: linux
        !           508: 
        !           509: =head1 SCRIPT CATEGORIES
        !           510: 
        !           511: Packaging/Administrative
        !           512: 
        !           513: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>