--- loncom/build/filecompare.pl 2001/11/14 13:19:36 1.3 +++ loncom/build/filecompare.pl 2002/04/27 21:12:18 1.12 @@ -1,31 +1,95 @@ #!/usr/bin/perl -# Scott Harrison +# The LearningOnline Network with CAPA +# filecompare.pl - script used to help probe and compare file statistics +# +# $Id: filecompare.pl,v 1.12 2002/04/27 21:12:18 harris41 Exp $ +# +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ +# # YEAR=2001 -# 9/27, 10/24, 10/25, 11/4 +# 9/27, 10/24, 10/25, 11/4 Scott Harrison +# 11/14 Guy Albertelli +# 11/16,11/17 Scott Harrison +# 12/3,12/5 Scott Harrison +# +### + +############################################################################### +## ## +## ORGANIZATION OF THIS PERL SCRIPT ## +## ## +## 1. Invocation ## +## 2. Notes ## +## 3. Dependencies ## +## 4. Process command line arguments ## +## 5. Process file/dir location arguments ## +## 6. Process comparison restrictions ## +## 7. Define output and measure subroutines ## +## 8. Loop through files and calculate differences ## +## 9. Subroutines ## +## 10. POD (plain old documentation, CPAN style) ## +## ## +############################################################################### -my $invocation=< 1; cvstime<0 --> 2 + N=2: same as N=1 except without md5sum + N=3: md5sum=same --> 1; age<0 --> 2 + N=4: cvstime>0 --> 2 + +The third way to pass arguments is set by the -s flag. +filecompare.pl -s SOURCE=[source] TARGET=[target] MODE=[mode] LOC1 LOC2 + +TARGET corresponds to the root path of LOC2. SOURCE corresponds to +the root path of LOC1. MODE can either be file, directory, link, or fileglob. + END unless (@ARGV) { print $invocation; exit 1; } + # ----------------------------------------------------------------------- Notes # # What are all the different ways to compare two files and how to look @@ -33,7 +97,7 @@ unless (@ARGV) { # # Ways of comparison: # existence similarity -# cvs time similarity (first argument treated as CVS source) +# cvs time similarity (1st arg treated as CVS source; only for buildmode) # age similarity (modification time) # md5sum similarity # size similarity (bytes) @@ -48,25 +112,8 @@ unless (@ARGV) { # size similarity (byte difference) # line count difference (integer) # number of different lines (integer) -# -# Text output of comparison: -# existence VALUE -# cvstime VALUE -# age VALUE -# md5sum VALUE -# size VALUE -# lines VALUE -# diffs VALUE -# -# Output of comparison: -# exist -# if md5sum not same, then different -# if cvstime not 0, then older/newer -# if age not 0, then older/newer -# if size not 0, then bigger/smaller -# if lines not 0, then more lines of code/less lines of code -# if diffs not 0, then subtracted lines/added lines/changed lines +# ---------------------------------------------------------------- Dependencies # implementing from unix command line (assuming bash) # md5sum, diff, wc -l @@ -77,14 +124,23 @@ unless (@ARGV) { # -a show all files (with comparisons) # -q only show file names (based on first file/dir) # -v verbose mode (default) -# -b build/install mode (returns exitcode) +# -bN build/install mode (returns exitcode) +# -s status checking mode for lpml + my $verbose='1'; my $show='all'; my $buildmode=0; -while (@ARGV) { +my $statusmode=0; +ALOOP: while (@ARGV) { my $flag; if ($ARGV[0]=~/^\-(\w)/) { $flag=$1; + if ($flag eq 'b') { + $ARGV[0]=~/^\-\w(\d)/; + $buildmode=$1; + shift @ARGV; + next ALOOP; + } shift @ARGV; SWITCH: { $verbose=0, last SWITCH if $flag eq 'q'; @@ -92,10 +148,7 @@ while (@ARGV) { $show='same', last SWITCH if $flag eq 'p'; $show='different', last SWITCH if $flag eq 'n'; $show='all', last SWITCH if $flag eq 'a'; - $buildmode=1, last SWITCH if $flag eq 'b'; - $buildmode=2, last SWITCH if $flag eq 'B'; - $buildmode=3, last SWITCH if $flag eq 'g'; - $buildmode=4, last SWITCH if $flag eq 'G'; + $statusmode=1, last SWITCH if $flag eq 's'; print($invocation), exit(1); } } @@ -106,12 +159,43 @@ while (@ARGV) { dowarn('Verbose: '.$verbose."\n"); dowarn('Show: '.$show."\n"); -# FILE1 FILE2 or DIR1 DIR2 -my $loc1=shift @ARGV; -my $loc2=shift @ARGV; -my $dirmode='directories'; my @files; +my $loc1; +my $loc2; +my $dirmode='directories'; +# ----------------------------------------- If status checking mode for lpml +my ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob); +my ($source,$target); +if ($statusmode==1) { + ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob)=splice(@ARGV,0,5); + $targetroot.='/' if $targetroot!~/\/$/; + $sourceroot=~s/^SOURCE\=//; + $targetroot=~s/^TARGET\=//; + $source=$sourceroot.'/'.$sourceglob; + $target=$targetroot.''.$targetglob; +# print "SOURCE: $source\n"; +# print "TARGET: $target\n"; + if ($mode eq 'MODE=fileglob') { + $loc1=$source;$loc1=~s/\/[^\/]*$// if length($loc1)>2; + $loc2=$target;$loc2=~s/\/[^\/]*$// if length($loc2)>2; + @files=map {s/^$loc1\///;$_} glob($source); + $dirmode='directories'; + } + elsif ($mode eq 'MODE=file') { + $loc1=$source; + $loc2=$target; + $dirmode='files'; + @files=($loc1); + } +} +else { + +# ----------------------------------------- Process file/dir location arguments +# FILE1 FILE2 or DIR1 DIR2 +$loc1=shift @ARGV; +$loc2=shift @ARGV; unless ($loc1 and $loc2) { + print "LOC1: $loc1\nLOC2: $loc2\n"; print($invocation), exit(1); } if (-f $loc1) { @@ -130,138 +214,69 @@ else { dowarn('Processing for mode: '.$dirmode."\n"); dowarn('Location #1: '.$loc1."\n"); dowarn('Location #2: '.$loc2."\n"); - +} +# --------------------------------------------- Process comparison restrictions # A list of space separated values (after the file/dir names) # can restrict the comparison. +my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0, + 'lines'=>0,'diffs'=>0); my %restrict; while (@ARGV) { my $r=shift @ARGV; - if ($r eq 'existence' or - $r eq 'cvstime' or - $r eq 'md5sum' or - $r eq 'age' or - $r eq 'size' or - $r eq 'lines' or - $r eq 'diffs') { - $restrict{$r}=1; - } - else { - print($invocation), exit(1); - } + if ($rhash{$r}==0) {$restrict{$r}=1;} + else {print($invocation), exit(1);} } if (%restrict) { - warn('Restricting comparison to: '. + dowarn('Restricting comparison to: '. join(' ',keys %restrict)."\n"); } +# --------------------------------------- Define output and measure subroutines my %OUTPUT=( - 'existence'=>( - sub { - print 'existence: '.@_[0]; - return; - } - ), - 'md5sum'=>( - sub { - print 'md5sum: '.@_[0]; - return; - } - ), - 'cvstime'=>( - sub { - print 'cvstime: '.@_[0]; - return; - } - ), - 'age'=>( - sub { - print 'age: '.@_[0]; - return; - } - ), - 'size'=>( - sub { - print 'size: '.@_[0]; - return; - } - ), - 'lines'=>( - sub { - print 'lines: '.@_[0]; - return; - } - ), - 'diffs'=>( - sub { - print 'diffs: '.@_[0]; - return; - } - ), + 'existence'=>( sub {print 'existence: '.@_[0]; return;}), + 'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}), + 'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}), + 'age'=>(sub {print 'age: '.@_[0];return;}), + 'size'=>(sub {print 'size: '.@_[0];return;}), + 'lines'=>(sub {print 'lines: '.@_[0];return;}), + 'diffs'=>(sub {print 'diffs: '.@_[0];return;}), ); my %MEASURE=( - 'existence' => ( - sub { - my ($file1,$file2)=@_; + 'existence' => ( sub { my ($file1,$file2)=@_; my $rv1=(-e $file1)?'yes':'no'; my $rv2=(-e $file2)?'yes':'no'; - return ($rv1,$rv2); - } - ), - 'md5sum'=>( - sub { - my ($file1,$file2)=@_; + return ($rv1,$rv2); } ), + 'md5sum'=>( sub { my ($file1,$file2)=@_; my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1; my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2; - return ($rv1,$rv2); - } - ), - 'cvstime'=>( - sub { - my ($file1,$file2)=@_; + return ($rv1,$rv2); } ), + 'cvstime'=>( sub { my ($file1,$file2)=@_; my $rv1=&cvstime($file1); my @a=stat($file2); my $gmt=gmtime($a[9]); my $rv2=&utctime($gmt); - return ($rv1,$rv2); - } - ), - 'age'=>( - sub { - my ($file1,$file2)=@_; + return ($rv1,$rv2); } ), + 'age'=>( sub { my ($file1,$file2)=@_; my @a=stat($file1); my $rv1=$a[9]; @a=stat($file2); my $rv2=$a[9]; - return ($rv1,$rv2); - } - ), - 'size'=>( - sub { - my ($file1,$file2)=@_; + return ($rv1,$rv2); } ), + 'size'=>( sub { my ($file1,$file2)=@_; my @a=stat($file1); my $rv1=$a[7]; @a=stat($file2); my $rv2=$a[7]; - return ($rv1,$rv2); - } - ), - 'lines'=>( - sub { - my ($file1,$file2)=@_; + return ($rv1,$rv2); } ), + 'lines'=>( sub { my ($file1,$file2)=@_; my $rv1=`wc -l $file1`; chop $rv1; my $rv2=`wc -l $file2`; chop $rv2; - return ($rv1,$rv2); - } - ), - 'diffs'=>( - sub { - my ($file1,$file2)=@_; + return ($rv1,$rv2); } ), + 'diffs'=>( sub { my ($file1,$file2)=@_; my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`; chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//; my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`; chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//; - return ($rv1,$rv2); - } - ), + return ($rv1,$rv2); } ), ); -FLOP: foreach my $file (@files) { +FLOOP: foreach my $file (@files) { my $file1; my $file2; if ($dirmode eq 'directories') { @@ -284,8 +299,13 @@ FLOP: foreach my $file (@files) { $diffs='n/a'; } else { - my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2); - $cvstime=$cvstime1-$cvstime2; + if ($buildmode) { + my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2); + $cvstime=$cvstime1-$cvstime2; + } + else { + $cvstime='n/a'; + } my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2); $age=$age1-$age2; my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2); @@ -293,7 +313,7 @@ FLOP: foreach my $file (@files) { $md5sum='same'; $size=0; $lines=0; - $diffs=0; + $diffs='0:0'; } elsif ($md5sum1 ne $md5sum2) { $md5sum='different'; @@ -314,7 +334,7 @@ FLOP: foreach my $file (@files) { unless (@ks) { @ks=('existence','cvstime','md5sum','age','size','lines','diffs'); } - FLOP2: for my $key (@ks) { + FLOOP2: for my $key (@ks) { if ($key eq 'existence') { if ($existence ne 'yes:yes') { $showflag=1; @@ -325,7 +345,7 @@ FLOP: foreach my $file (@files) { $showflag=1; } } - elsif ($key eq 'cvstime') { + elsif ($key eq 'cvstime' and $buildmode) { if ($cvstime!=0) { $showflag=1; } @@ -351,7 +371,7 @@ FLOP: foreach my $file (@files) { } } if ($showflag) { - last FLOP2; + last FLOOP2; } } } @@ -361,7 +381,8 @@ FLOP: foreach my $file (@files) { @ks=('existence','md5sum','cvstime','age','size','lines','diffs'); } my $showcount=length(@ks); - FLOP3: for my $key (@ks) { + $showcount-- unless $buildmode; + FLOOP3: for my $key (@ks) { if ($key eq 'existence') { if ($existence ne 'yes:yes') { $showcount--; @@ -372,7 +393,7 @@ FLOP: foreach my $file (@files) { $showcount--; } } - elsif ($key eq 'cvstime') { + elsif ($key eq 'cvstime' and $buildmode) { if ($cvstime!=0) { $showcount--; } @@ -433,33 +454,43 @@ FLOP: foreach my $file (@files) { } } elsif ($buildmode==4) { - if ($cvstime>0) { + if ($existence=~/no$/) { + exit(3); + } + elsif ($cvstime>0) { exit(2); } + elsif ($existence=~/^no/) { + exit(1); + } else { exit(0); } } - print "$file"; - if ($verbose==1) { - print "\t"; - print &{$OUTPUT{'existence'}}($existence); - print "\t"; - print &{$OUTPUT{'cvstime'}}($cvstime); - print "\t"; - print &{$OUTPUT{'age'}}($age); - print "\t"; - print &{$OUTPUT{'md5sum'}}($md5sum); - print "\t"; - print &{$OUTPUT{'size'}}($size); - print "\t"; - print &{$OUTPUT{'lines'}}($lines); - print "\t"; - print &{$OUTPUT{'diffs'}}($diffs); + if ($showflag) { + print "$file"; + if ($verbose==1) { + print "\t"; + print &{$OUTPUT{'existence'}}($existence); + print "\t"; + print &{$OUTPUT{'cvstime'}}($cvstime); + print "\t"; + print &{$OUTPUT{'age'}}($age); + print "\t"; + print &{$OUTPUT{'md5sum'}}($md5sum); + print "\t"; + print &{$OUTPUT{'size'}}($size); + print "\t"; + print &{$OUTPUT{'lines'}}($lines); + print "\t"; + print &{$OUTPUT{'diffs'}}($diffs); + } + print "\n"; } - print "\n"; } +# ----------------------------------------------------------------- Subroutines + sub cvstime { my ($f)=@_; my $path; my $file; @@ -472,12 +503,18 @@ sub cvstime { } my $cvstime; if ($buildmode!=3) { - my $entry=`grep '^/$file/' ${path}CVS/Entries` or - die('*** ERROR *** cannot grep against '.${path}. - 'CVS/Entries for ' .$file . "\n"); - my @fields=split(/\//,$entry); - $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`; - chomp $cvstime; + my $entry=`grep '^/$file/' ${path}CVS/Entries 2>/dev/null`; +# or +# die('*** WARNING *** cannot grep against '.${path}. +# 'CVS/Entries for ' .$file . "\n"); + if ($entry) { + my @fields=split(/\//,$entry); + $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`; + chomp $cvstime; + } + else { + $cvstime='n/a'; + } } else { $cvstime='n/a'; @@ -496,3 +533,75 @@ sub dowarn { my ($msg)=@_; warn($msg) unless $buildmode; } + +# ----------------------------------- POD (plain old documentation, CPAN style) + +=head1 NAME + +filecompare.pl - script used to help probe and compare file statistics + +=head1 SYNOPSIS + +filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ] + +or + +filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ] + +Restrictions: a list of space separated values (after the file/dir names) +can restrict the comparison. +These values can be: existence, cvstime, age, md5sum, size, lines, +and/or diffs. + +Options (before file/dir names): + + -p show all files that have the same comparison + + -n show all files that have different comparisons + + -a show all files (with comparisons) + + -q only show file names (based on first file/dir) + + -v verbose mode (default) + +=head1 DESCRIPTION + +filecompare.pl can work in two modes: file comparison mode, or directory +comparison mode. + +Comparisons can be a function of: +* existence similarity +* cvs time similarity (first argument treated as CVS source) +* age similarity (modification time) +* md5sum similarity +* size similarity (bytes) +* line count difference +* number of different lines + +filecompare.pl integrates smoothly with the LPML installation language +(linux packaging markup language). filecompare.pl is a tool that can +be used for safe CVS source-to-target installations. + +=head1 README + +filecompare.pl integrates smoothly with the LPML installation language +(linux packaging markup language). filecompare.pl is a tool that can +be used for safe CVS source-to-target installations. + +The unique identifier is considered to be the file name(s) independent +of the directory path. + +=head1 PREREQUISITES + +=head1 COREQUISITES + +=head1 OSNAMES + +linux + +=head1 SCRIPT CATEGORIES + +Packaging/Administrative + +=cut