Annotation of loncom/build/filecompare.pl, revision 1.5

1.1       harris41    1: #!/usr/bin/perl
                      2: 
1.5     ! harris41    3: # The LearningOnline Network with CAPA
1.4       harris41    4: #
                      5: # filecompare.pl - script used to help probe and compare file statistics
                      6: #
1.1       harris41    7: # YEAR=2001
1.4       harris41    8: # 9/27, 10/24, 10/25, 11/4 Scott Harrison
                      9: # 11/14 Guy Albertelli
                     10: # 11/16 Scott Harrison
                     11: #
1.5     ! harris41   12: # $Id: filecompare.pl,v 1.4 2001/11/16 20:06:08 harris41 Exp $
1.4       harris41   13: ###
1.1       harris41   14: 
1.5     ! harris41   15: ###############################################################################
        !            16: ##                                                                           ##
        !            17: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
        !            18: ##                                                                           ##
        !            19: ## 1. Invocation                                                             ##
        !            20: ## 2. Notes                                                                  ##
        !            21: ## 3. Dependencies                                                           ##
        !            22: ## 4. Process command line arguments                                         ##
        !            23: ## 5. Process file/dir location arguments                                    ##
        !            24: ## 6. Process comparison restrictions                                        ##
        !            25: ## 7. Define output and measure subroutines                                  ##
        !            26: ## 8. Loop through files and calculate differences                           ##
        !            27: ## 9. Subroutines                                                            ##
        !            28: ## 10. POD (plain old documentation, CPAN style)                             ##
        !            29: ##                                                                           ##
        !            30: ###############################################################################
        !            31: 
1.4       harris41   32: # ------------------------------------------------------------------ Invocation
1.1       harris41   33: my $invocation=<<END;
1.4       harris41   34: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
                     35: or
                     36: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
                     37: 
                     38: Restrictions: a list of space separated values (after the file/dir names)
                     39: can restrict the comparison.
                     40: These values can be: existence, cvstime, age, md5sum, size, lines,
                     41: and/or diffs.
                     42: 
                     43: Options (before file/dir names):
                     44: -p show all files that have the same comparison
                     45: -n show all files that have different comparisons
                     46: -a show all files (with comparisons)
                     47: -q only show file names (based on first file/dir)
                     48: -v verbose mode (default)
1.5     ! harris41   49: -bN buildmode (controls exit code of this script; 0 unless...)
        !            50:    N=1: md5sum=same --> 2; cvstime<0 --> 1
        !            51:    N=2: same as N=1 except without md5sum
        !            52:    N=3: md5sum=same --> 1; age<0 --> 2
        !            53:    N=4: cvstime>0 --> 2
1.1       harris41   54: END
                     55: unless (@ARGV) {
                     56:     print $invocation;
                     57:     exit 1;
                     58: }
1.5     ! harris41   59: 
1.1       harris41   60: # ----------------------------------------------------------------------- Notes
                     61: #
                     62: # What are all the different ways to compare two files and how to look
                     63: # at the differences?
                     64: #
                     65: # Ways of comparison:
                     66: #   existence similarity
1.2       harris41   67: #   cvs time similarity (first argument treated as CVS source)
1.1       harris41   68: #   age similarity (modification time)
                     69: #   md5sum similarity
                     70: #   size similarity (bytes)
                     71: #   line count difference
                     72: #   number of different lines
                     73: #
                     74: # Quantities of comparison:
                     75: #   existence (no,yes); other values become 'n/a'
1.2       harris41   76: #   cvstime in seconds
1.1       harris41   77: #   age in seconds
                     78: #   md5sum ("same" or "different")
                     79: #   size similarity (byte difference)
                     80: #   line count difference (integer)
                     81: #   number of different lines (integer)
                     82: 
1.5     ! harris41   83: # ---------------------------------------------------------------- Dependencies
1.1       harris41   84: # implementing from unix command line (assuming bash)
                     85: # md5sum, diff, wc -l
                     86: 
                     87: # ---------------------------------------------- Process command line arguments
                     88: # Flags (before file/dir names):
                     89: # -p show all files the same
                     90: # -n show all files different
                     91: # -a show all files (with comparisons)
                     92: # -q only show file names (based on first file/dir)
                     93: # -v verbose mode (default)
1.5     ! harris41   94: # -bN build/install mode (returns exitcode)
1.1       harris41   95: my $verbose='1';
                     96: my $show='all';
1.2       harris41   97: my $buildmode=0;
1.1       harris41   98: while (@ARGV) {
                     99:     my $flag;
                    100:     if ($ARGV[0]=~/^\-(\w)/) {
                    101: 	$flag=$1;
1.5     ! harris41  102: 	if ($flag eq 'b') {
        !           103: 	    $ARGV[0]=~/^\-\w(\d)/;
        !           104: 	    $buildmode=$1;
        !           105: 	}
1.1       harris41  106: 	shift @ARGV;
                    107:       SWITCH: {
                    108: 	  $verbose=0, last SWITCH if $flag eq 'q';
                    109: 	  $verbose=1, last SWITCH if $flag eq 'v';
                    110: 	  $show='same', last SWITCH if $flag eq 'p';
                    111: 	  $show='different', last SWITCH if $flag eq 'n';
                    112: 	  $show='all', last SWITCH if $flag eq 'a';
                    113: 	  print($invocation), exit(1);
                    114:       }
                    115:     }
                    116:     else {
                    117: 	last;
                    118:     }
                    119: }
1.2       harris41  120: dowarn('Verbose: '.$verbose."\n");
                    121: dowarn('Show: '.$show."\n");
1.1       harris41  122: 
1.5     ! harris41  123: # ----------------------------------------- Process file/dir location arguments
1.1       harris41  124: # FILE1 FILE2 or DIR1 DIR2
                    125: my $loc1=shift @ARGV;
                    126: my $loc2=shift @ARGV;
                    127: my $dirmode='directories';
                    128: my @files;
                    129: unless ($loc1 and $loc2) {
                    130:     print($invocation), exit(1);
                    131: }
                    132: if (-f $loc1) {
                    133:     $dirmode='files';
                    134:     @files=($loc1);
                    135: }
                    136: else {
                    137:     if (-e $loc1) {
                    138: 	@files=`find $loc1 -type f`;
                    139:     }
                    140:     else {
                    141: 	@files=($loc1);
                    142:     }
                    143:     map {chomp; s/^$loc1\///; $_} @files;
                    144: }
1.2       harris41  145: dowarn('Processing for mode: '.$dirmode."\n");
                    146: dowarn('Location #1: '.$loc1."\n");
                    147: dowarn('Location #2: '.$loc2."\n");
1.1       harris41  148: 
1.5     ! harris41  149: # --------------------------------------------- Process comparison restrictions
1.1       harris41  150: # A list of space separated values (after the file/dir names)
                    151: # can restrict the comparison.
1.5     ! harris41  152: my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0,
        !           153: 	      'lines'=>0,'diffs'=>0);
1.1       harris41  154: my %restrict;
                    155: while (@ARGV) {
                    156:     my $r=shift @ARGV;
1.5     ! harris41  157:     if ($rhash{$r}==0) {$restrict{$r}=1;}
        !           158:     else {print($invocation), exit(1);}
1.1       harris41  159: }
                    160: if (%restrict) {
1.5     ! harris41  161:     dowarn('Restricting comparison to: '.
1.1       harris41  162: 	 join(' ',keys %restrict)."\n");
                    163: }
                    164: 
1.5     ! harris41  165: # --------------------------------------- Define output and measure subroutines
1.1       harris41  166: my %OUTPUT=(
1.4       harris41  167:          'existence'=>( sub {print 'existence: '.@_[0]; return;}),
                    168: 	 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}),
                    169:          'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}),
                    170:          'age'=>(sub {print 'age: '.@_[0];return;}),
                    171:          'size'=>(sub {print 'size: '.@_[0];return;}),
                    172:          'lines'=>(sub {print 'lines: '.@_[0];return;}),
                    173:          'diffs'=>(sub {print 'diffs: '.@_[0];return;}),
1.1       harris41  174: );
                    175: 
                    176: my %MEASURE=(
1.4       harris41  177: 	 'existence' => ( sub { my ($file1,$file2)=@_;
1.1       harris41  178: 		        my $rv1=(-e $file1)?'yes':'no';
                    179: 			my $rv2=(-e $file2)?'yes':'no';
1.4       harris41  180: 			return ($rv1,$rv2); } ),
                    181: 	 'md5sum'=>( sub { my ($file1,$file2)=@_;
1.3       albertel  182: 			my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
                    183: 			my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
1.4       harris41  184: 			return ($rv1,$rv2); } ),
                    185: 	 'cvstime'=>( sub { my ($file1,$file2)=@_;
1.2       harris41  186: 			my $rv1=&cvstime($file1);
                    187: 			my @a=stat($file2); my $gmt=gmtime($a[9]);
                    188: 			my $rv2=&utctime($gmt);
1.4       harris41  189: 			return ($rv1,$rv2); } ),
                    190:          'age'=>( sub {	my ($file1,$file2)=@_;
1.2       harris41  191: 			my @a=stat($file1); my $rv1=$a[9];
                    192: 			@a=stat($file2); my $rv2=$a[9];
1.4       harris41  193: 			return ($rv1,$rv2); } ),
                    194:          'size'=>( sub { my ($file1,$file2)=@_;
1.1       harris41  195: 			my @a=stat($file1); my $rv1=$a[7];
                    196: 			@a=stat($file2); my $rv2=$a[7];
1.4       harris41  197: 			return ($rv1,$rv2); } ),
                    198:          'lines'=>( sub { my ($file1,$file2)=@_;
1.1       harris41  199: 			my $rv1=`wc -l $file1`; chop $rv1;
                    200: 			my $rv2=`wc -l $file2`; chop $rv2;
1.4       harris41  201: 			return ($rv1,$rv2); } ),
                    202:          'diffs'=>( sub { my ($file1,$file2)=@_;
1.1       harris41  203: 			my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
                    204: 			chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
                    205: 			my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
                    206: 			chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
1.4       harris41  207: 			return ($rv1,$rv2); } ),
1.1       harris41  208: );
                    209: 
1.5     ! harris41  210: FLOOP: foreach my $file (@files) {
1.1       harris41  211:     my $file1;
                    212:     my $file2;
                    213:     if ($dirmode eq 'directories') {
                    214:         $file1=$loc1.'/'.$file;
                    215:         $file2=$loc2.'/'.$file;
                    216:     }
                    217:     else {
                    218:         $file1=$loc1;
                    219:         $file2=$loc2;
                    220:     }
                    221:     my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
                    222:     my $existence=$existence1.':'.$existence2;
1.2       harris41  223:     my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
1.1       harris41  224:     if ($existence1 eq 'no' or $existence2 eq 'no') {
                    225:         $md5sum='n/a';
                    226:         $age='n/a';
1.2       harris41  227:         $cvstime='n/a';
1.1       harris41  228:         $size='n/a';
                    229:         $lines='n/a';
                    230:         $diffs='n/a';
                    231:     }
                    232:     else {
1.2       harris41  233:         my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
                    234:         $cvstime=$cvstime1-$cvstime2;
1.1       harris41  235:         my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
                    236:         $age=$age1-$age2;
                    237:         my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
1.3       albertel  238:         if ($md5sum1 eq $md5sum2) {
1.1       harris41  239:             $md5sum='same';
                    240:             $size=0;
                    241:             $lines=0;
                    242:             $diffs=0;
                    243: 	}
1.3       albertel  244:         elsif ($md5sum1 ne $md5sum2) {
1.1       harris41  245:             $md5sum='different';
                    246:             my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
                    247:             $size=$size1-$size2;
                    248:             my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
                    249:             $lines=$lines1-$lines2;
                    250:             my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
                    251:             $diffs=$diffs1.':'.$diffs2;
                    252:         }
                    253:     }
                    254:     my $showflag=0;
                    255:     if ($show eq 'all') {
                    256:         $showflag=1;
                    257:     }
                    258:     if ($show eq 'different') {
                    259:         my @ks=(keys %restrict);
                    260:         unless (@ks) {
1.2       harris41  261: 	    @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
1.1       harris41  262: 	}
1.5     ! harris41  263:         FLOOP2: for my $key (@ks) {
1.1       harris41  264: 	    if ($key eq 'existence') {
                    265: 		if ($existence ne 'yes:yes') {
                    266: 		    $showflag=1;
                    267: 		}
                    268: 	    }
                    269: 	    elsif ($key eq 'md5sum') {
                    270: 		if ($md5sum ne 'same') {
                    271: 		    $showflag=1;
                    272: 		}
                    273: 	    }
1.2       harris41  274: 	    elsif ($key eq 'cvstime') {
                    275: 		if ($cvstime!=0) {
                    276: 		    $showflag=1;
                    277: 		}
                    278: 	    }
1.1       harris41  279: 	    elsif ($key eq 'age') {
                    280: 		if ($age!=0) {
                    281: 		    $showflag=1;
                    282: 		}
                    283: 	    }
                    284: 	    elsif ($key eq 'size') {
                    285: 		if ($size!=0) {
                    286: 		    $showflag=1;
                    287: 		}
                    288: 	    }
                    289: 	    elsif ($key eq 'lines') {
                    290: 		if ($lines!=0) {
                    291: 		    $showflag=1;
                    292: 		}
                    293: 	    }
                    294: 	    elsif ($key eq 'diffs') {
                    295: 		if ($diffs ne '0:0') {
                    296: 		    $showflag=1;
                    297: 		}
                    298: 	    }
                    299: 	    if ($showflag) {
1.5     ! harris41  300: 		last FLOOP2;
1.1       harris41  301: 	    }
                    302:         }
                    303:     }
                    304:     elsif ($show eq 'same') {
                    305:         my @ks=(keys %restrict);
                    306:         unless (@ks) {
1.2       harris41  307: 	    @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
1.1       harris41  308: 	}
                    309:         my $showcount=length(@ks);
1.5     ! harris41  310:         FLOOP3: for my $key (@ks) {
1.1       harris41  311: 	    if ($key eq 'existence') {
                    312: 		if ($existence ne 'yes:yes') {
                    313: 		    $showcount--;
                    314: 		}
                    315: 	    }
                    316: 	    elsif ($key eq 'md5sum') {
                    317: 		if ($md5sum ne 'same') {
                    318: 		    $showcount--;
                    319: 		}
                    320: 	    }
1.2       harris41  321: 	    elsif ($key eq 'cvstime') {
                    322: 		if ($cvstime!=0) {
                    323: 		    $showcount--;
                    324: 		}
                    325: 	    }
1.1       harris41  326: 	    elsif ($key eq 'age') {
                    327: 		if ($age!=0) {
                    328: 		    $showcount--;
                    329: 		}
                    330: 	    }
                    331: 	    elsif ($key eq 'size') {
                    332: 		if ($size!=0) {
                    333: 		    $showcount--;
                    334: 		}
                    335: 	    }
                    336: 	    elsif ($key eq 'lines') {
                    337: 		if ($lines!=0) {
                    338: 		    $showcount--;
                    339: 		}
                    340: 	    }
                    341: 	    elsif ($key eq 'diffs') {
                    342: 		if ($diffs ne '0:0') {
                    343: 		    $showcount--;
                    344: 		}
                    345: 	    }
                    346:         }
                    347:         if ($showcount==0) {
                    348: 	    $showflag=1;
                    349: 	}
                    350:     }
1.2       harris41  351:     if ($buildmode==1) {
                    352:         if ($md5sum eq 'same') {
                    353: 	    exit(1);
                    354: 	}
                    355:         elsif ($cvstime<0) {
                    356: 	    exit(2);
                    357: 	}
                    358:         else {
                    359: 	    exit(0);
                    360: 	}
                    361:     }
                    362:     elsif ($buildmode==2) {
                    363:         if ($cvstime<0) {
                    364: 	    exit(2);
                    365: 	}
                    366:         else {
                    367: 	    exit(0);
                    368: 	}
                    369:     }
                    370:     elsif ($buildmode==3) {
                    371:         if ($md5sum eq 'same') {
                    372: 	    exit(1);
                    373: 	}
                    374:         elsif ($age<0) {
                    375: 	    exit(2);
                    376: 	}
                    377:         else {
                    378: 	    exit(0);
                    379: 	}
                    380:     }
                    381:     elsif ($buildmode==4) {
                    382:         if ($cvstime>0) {
                    383: 	    exit(2);
                    384: 	}
                    385:         else {
                    386: 	    exit(0);
                    387: 	}
                    388:     }
1.1       harris41  389:     print "$file";
                    390:     if ($verbose==1) {
                    391:         print "\t";
                    392: 	print &{$OUTPUT{'existence'}}($existence);
                    393:         print "\t";
1.2       harris41  394: 	print &{$OUTPUT{'cvstime'}}($cvstime);
                    395:         print "\t";
1.1       harris41  396: 	print &{$OUTPUT{'age'}}($age);
                    397:         print "\t";
                    398: 	print &{$OUTPUT{'md5sum'}}($md5sum);
1.5     ! harris41  399: 	print "\t";
1.1       harris41  400: 	print &{$OUTPUT{'size'}}($size);
                    401:         print "\t";
                    402: 	print &{$OUTPUT{'lines'}}($lines);
                    403:         print "\t";
                    404: 	print &{$OUTPUT{'diffs'}}($diffs);
                    405:     }
                    406:     print "\n";
                    407: }
                    408: 
1.5     ! harris41  409: # ----------------------------------------------------------------- Subroutines
        !           410: 
1.2       harris41  411: sub cvstime {
                    412:     my ($f)=@_;
                    413:     my $path; my $file;
                    414:     if ($f=~/^(.*\/)(.*?)$/) {
                    415: 	$f=~/^(.*\/)(.*?)$/;
                    416: 	($path,$file)=($1,$2);
                    417:     }
                    418:     else {
                    419: 	$file=$f; $path='';
                    420:     }
                    421:     my $cvstime;
                    422:     if ($buildmode!=3) {
                    423: 	my $entry=`grep '^/$file/' ${path}CVS/Entries` or
                    424: 	    die('*** ERROR *** cannot grep against '.${path}.
                    425: 		'CVS/Entries for ' .$file . "\n");
                    426:         my @fields=split(/\//,$entry);
                    427:         $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
                    428:         chomp $cvstime;
                    429:     }
                    430:     else {
                    431: 	$cvstime='n/a';
                    432:     }
                    433:     return $cvstime;
                    434: }
1.1       harris41  435: 
1.2       harris41  436: sub utctime {
                    437:     my ($f)=@_;
                    438:     my $utctime=`date -d '$f UTC' --utc +"%s"`;
                    439:     chomp $utctime;
                    440:     return $utctime;
                    441: }
1.1       harris41  442: 
1.2       harris41  443: sub dowarn {
                    444:     my ($msg)=@_;
                    445:     warn($msg) unless $buildmode;
                    446: }
1.5     ! harris41  447: 
        !           448: # ----------------------------------- POD (plain old documentation, CPAN style)
1.4       harris41  449: 
                    450: =head1 NAME
                    451: 
                    452: filecompare.pl - script used to help probe and compare file statistics
                    453: 
                    454: =head1 SYNOPSIS
                    455: 
                    456: filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ]
                    457: 
                    458: or
                    459: 
                    460: filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ]
                    461: 
                    462: Restrictions: a list of space separated values (after the file/dir names)
                    463: can restrict the comparison.
                    464: These values can be: existence, cvstime, age, md5sum, size, lines,
                    465: and/or diffs.
                    466: 
                    467: Options (before file/dir names):
                    468: 
                    469:  -p show all files that have the same comparison
                    470: 
                    471:  -n show all files that have different comparisons
                    472: 
                    473:  -a show all files (with comparisons)
                    474: 
                    475:  -q only show file names (based on first file/dir)
                    476: 
                    477:  -v verbose mode (default)
                    478: 
                    479: =head1 DESCRIPTION
                    480: 
                    481: filecompare.pl can work in two modes: file comparison mode, or directory
                    482: comparison mode.
                    483: 
                    484: Comparisons can be a function of:
                    485: * existence similarity
                    486: * cvs time similarity (first argument treated as CVS source)
                    487: * age similarity (modification time)
                    488: * md5sum similarity
                    489: * size similarity (bytes)
                    490: * line count difference
                    491: * number of different lines
                    492: 
                    493: filecompare.pl integrates smoothly with the LPML installation language
                    494: (linux packaging markup language).  filecompare.pl is a tool that can
                    495: be used for safe CVS source-to-target installations.
                    496: 
                    497: =head1 README
                    498: 
                    499: filecompare.pl integrates smoothly with the LPML installation language
                    500: (linux packaging markup language).  filecompare.pl is a tool that can
                    501: be used for safe CVS source-to-target installations.
                    502: 
                    503: The unique identifier is considered to be the file name(s) independent
                    504: of the directory path.
                    505: 
                    506: =head1 PREREQUISITES
                    507: 
                    508: =head1 COREQUISITES
                    509: 
                    510: =head1 OSNAMES
                    511: 
                    512: linux
                    513: 
                    514: =head1 SCRIPT CATEGORIES
                    515: 
                    516: Packaging/Administrative
                    517: 
                    518: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>