#!/usr/local/bin/perl eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}' if 0; # not running under some shell # cmp - compare two files # VERSION 1.0 # # Notes: # # It's nice to see that, speedwise, this beats the pants off # GNU and IRIX system cmp in many tasks. If compiled, this # would probably be faster in every case. Go Perl ! # # # Compat: # # GNU, Solaris, and IRIX cmp seem to give the EOF warning # whenever -l is given and file sizes are not equal. That # doesn't match the BSD docs. We follow the docs. # # # Todo: # # support grouped single-letter opts # revise how zero-length files are handled ? # END { close STDOUT || die "$0: can't close stdout: $!\n"; $? = 1 if $? == 255; # from die } my $chunk_size = 10_000; # how many bytes in a gulp my $volume=1; # controlled by -s and -l my $file1; my $file2; my $skip1; my $skip2; my $buffer1; my $buffer2; my $ino1; my $ino2; my $size1; my $size2; my $read_in1; my $read_in2; my $bytes_read = 0; my $lines_read = 0; my $actual_length; my $saw_difference; # does not yet support grouped opts while (@ARGV) { my $item= shift; $item =~ /^--$/ and $file1 = shift , last; $item =~ /^-\?$/ and manual(); # exits $item =~ /^-l$/ and $volume = 2 , next; $item =~ /^-s$/ and $volume = 0 , next; $item =~ /^-./ and usage(); # exits $file1 = $item; last; } usage() unless @ARGV >= 1 and @ARGV <= 3; # exits; $file2 = shift; (undef,$ino1,undef,undef,undef,undef,undef,$size1) = stat $file1; (undef,$ino2,undef,undef,undef,undef,undef,$size2) = stat $file2; exit 0 if defined $ino1 && $ino1 == $ino2; # hopefully, on platforms where # inode is meaningless, stat # returns undef. if ($size1 == 0 || $size2 == 0) { # special handling for zero-length if ($size2+$size1 == 0) { # files. exit 0; } else { # Can't we say 'differ at byte zero' # and so on here? That might make # more sense than this behavior. # Also, this should be made consistent # with the behavior when skip >= # filesize. if ($volume) { warn "$0: EOF on $file1\n" unless $size1; warn "$0: EOF on $file2\n" unless $size2; } exit 1; } } if (@ARGV) { $skip1 = shift; usage() unless $skip1 =~ /^(0x[A-F\d]+|\d+)$/; # exits; } if (@ARGV) { $skip2 = shift; usage() unless $skip2 =~ /^(0x[A-F\d]+|\d+)$/; # exits; } $skip1 = eval "$skip1" if defined $skip1; $skip2 = eval "$skip2" if defined $skip2; open FILE1, "<$file1" || die "$0: cannot open $file1\n"; open FILE2, "<$file2" || die "$0: cannot open $file2\n"; sysseek FILE1, $skip1, 0 if $skip1; sysseek FILE2, $skip2, 0 if $skip2; READ: while (defined ($read_in1 = sysread FILE1, $buffer1, $chunk_size)) { $read_in2 = sysread FILE2, $buffer2, $chunk_size; my $checklength = $chunk_size; if ($read_in1 < $chunk_size or $read_in2 < $chunk_size) { $checklength = ( $read_in1 < $read_in2 ? $read_in1 : $read_in2 ) - 1; } if ($buffer1 ne $buffer2) { for (0..$checklength) { next unless (substr $buffer1,$_,1) ne (substr $buffer2,$_,1); my $report_lines = $lines_read + 1 + (substr $buffer1,0,$_) =~ tr[\n][\n]; my $report_bytes = $bytes_read + 1 + $_; if ($volume == 1 ) { print "$file1 $file2 differ: char $report_bytes, line $report_lines\n"; exit 1; } elsif ($volume eq 0) { exit 1; } else { $saw_difference ||= 1; printf "%6d %3o %3o\n", $report_bytes, ord(substr $buffer1,$_,1), ord(substr $buffer2,$_,1); next READ if $_ == $checklength; next; } } } $lines_read += $buffer1 =~ tr[\n][\n]; $bytes_read += $checklength; if ($read_in1 < $read_in2) { warn "$0: EOF on $file1\n" unless $saw_difference or !$volume; exit 1; } elsif ($read_in1 > $read_in2) { warn "$0: EOF on $file2\n" unless $saw_difference or !$volume; exit 1; } elsif ($read_in1 == 0) { exit $saw_difference; } } close FILE1; close FILE2; exit $saw_difference; sub usage { die "usage: $0 [-l] [-s] file1 file2 [skip1 [skip2]]\n"; } sub manual { # I thought using inline docs like this was a natural extension # of perlishness, like better regular expressions, but tchrist # disapproves. # why is pod2text the only reasonable way to feed pod in # on STDIN ? Would rather use "|perldoc -", as it already # knows about pager, etc. open MAN, "|pod2text" or die "$0: Cannot open pod2text converter\n"; print MAN ; close MAN; exit; } __END__ =head1 NAME cmp - compare two files =head1 SYNOPSIS cmp B<[-l]> B<[-s]> I I I<[skip1 [skip2]]> =head1 DESCRIPTION I compares two files, byte-by-byte. The result of the comparison is always given by the exit status, and may be summarized on the standard output according to the options given on the command line. If the two compared files are identical, I will exit with a zero exit status. If the compared files are not identical, I will exit with a status of 1. If no options are given, I will return (on the standard output), the byte number and line number where the first difference is encountered. If one file is an initial subsequence of the other, a message will also be returned on the standard error indicating that EOF was reached in the shorter of the two file. I and I are optional byte offsets into I and I, respectively, that determine where the file comparison will begin. Offsets may be given in decimal, octal, or hexadecimal form. Indicate octal notation with a leading '0', and hexadecimal notation with a leading '0x'. =head2 OPTIONS =over =item -s silent execution; indicate results only by exit status, suppressing all output and warnings. =item -l list differences; return the byte number and the differing byte values for each difference between the two files. The byte number is given in decimal, and the byte values are given in octal. =back =head1 ENVIRONMENT No environment variables affect the execution of I. =head1 BUGS No known bugs. =head1 AUTHOR D Roland Walker Iwalker@pobox.comE>. =head1 COPYRIGHT and LICENSE This program is copyright (c) D Roland Walker 1999. This program is free and open software. You may use, modify, distribute, and sell this program (and any modified variants) in any way you wish, provided you do not restrict others from doing the same.