This program creates a recursive diff between two directory trees. The directory trees can be local or reachable by ssh. The diff lists added and removed files and for changed files, it lists the differences in diff style. This makes it convenient to review the differences between two machines that should be identical, or to find the steps that are needed to bring one directory tree to another.
After a hint by salva that Net::SSH2 comes included with Strawberry Perl, I was motivated to rip out some system-specific
ties to plink.exe and post this. Unfortunately, Net::SSH::Any doesn't seem to have a way to talk to the ssh agent for quick authentification, so this relies on Net::SSH2 instead of Net::SSH::Any.
#!perl -w
use strict;
use Algorithm::Diff;
use Getopt::Long;
use File::Find;
use File::stat;
use Net::SSH2;
=head1 NAME
diff-servers.pl
=head1 ABSTRACT
Generate a diff between two directory trees. The directory trees must
+be local or reachable
via ssh. Only the ssh2 protocol is supported.
=head1 SYNOPSIS
perl -w diff-servers.pl corion@production:/opt/mychat corion@staging
+:/tmp/deploy-0.025 --ignore .bak --ignore .gpg --ignore .pgp --ignore
+ random_seed
Quick diff, showing only added/missing files without comparing their c
+ontent
perl -w diff-servers.pl c:\mychat\old-versions\0.025 corion@staging:
+/tmp/deploy-0.025 -q
=head1 OPTIONS
=over 4
=item B<--ignore>
Regexp of directory entries to ignore
=item B<--no-mode>
Don't compare the file mode.
=item B<--no-owner>
Don't compare the file owner.
=item B<--quick>
Don't compare file contents
=item B<--verbose>
Be somewhat more verbose
=back
=head1 PREREQUISITES
Currently, the script expects C<find> and C<perl> to be available on t
+he remote
side. The dependency on C<find> could be eliminated by implementing th
+e functionality
in Perl. The dependency on C<perl> on the remote side could be elimina
+ted by using
the SFTP protocol for retrieving the directory tree, at an added compl
+exity.
=cut
GetOptions(
'verbose|v' => \my $verbose,
'ignore|i:s' => \my @ignore,
'no-owner|o' => \my $ignore_owner,
'no-mode|m' => \my $ignore_mode,
'quick|q' => \my $skip_contents,
);
use vars qw(%connections);
sub run_remote {
my( $server, $command )= @_;
my $user;
if( $server =~ /(.*)\@(.*)/ ) {
$user = $1;
$server = $2;
};
if( ! $connections{ $server }) {
my $ssh2 = Net::SSH2->new();
$ssh2->connect($server) or die "Couldn't connect to '$server':
+ $!";
if ($ssh2->auth( username => $user, interact => 1 )) {
$connections{ $server } = $ssh2;
} else {
die "No auth to $server.";
};
};
my $fh = $connections{ $server }->channel;
warn "[$command]" if $verbose;
$fh->exec($command) or die;
my @lines = map {s/\s+$//; $_ } <$fh>;
#warn "$server:[$_]" for @lines;
return @lines
}
sub get_local {
my( $file )= @_;
open my $fh, '<', $file
or warn "Couldn't read '$file': $!";
binmode $fh;
my @lines = map {s/\s+$//; $_ } <$fh>;
return @lines
}
sub split_serverpath {
my( $serverpath ) = @_;
if( $serverpath =~ /((?:\w+\@)[\w.]+):(.*)/ ) {
return ("$1","$2");
} else {
# Must be local
return (undef, $serverpath);
}
};
use Data::Dumper;
sub filelist {
my( $serverpath ) = @_;
my( $host, $dir ) = split_serverpath( $serverpath );
if( $host ) {
# Outputs a line per file
# mode user group type filename
my $uid_gid_file = q!perl -Mstrict -MFile::stat -nle 'next if
+/^\s*$/;my $s=stat($_);my($p,$u,$g,$t)=(0,q(-),q(-),q(f)); if($s and
+not -l) {$p=$s->mode;$u=(getpwuid($s->uid))[0];$g=(getgrgid($s->gid))
+[0] } else { $t=q(l)}; print sprintf qq(%08o %s %s %s %s), $p, $u,$g,
+$t,$_'!;
# Read all directory entries
my @remote_entries = map { my( $mode,$u,$g,$t,$name ) = split
+/ +/, $_, 6 ;
$name =~ s!^\Q$dir!!;
{ user => $u, group => $g, type =>
+$t, name => $name, mode => $mode };
} run_remote( $host, qq{find '$dir' -
+type f -o -type l| $uid_gid_file } );
return @remote_entries;
} else{
my @files;
find({ wanted => sub {
return if -d $_;
my $s = stat($_)
or warn "Couldn't stat [$_]: $!", return;
my $name = $_;
my $u='-';
my $g='-';
my $t='f';
my $mode = $s->mode;
$name =~ s!^\Q$dir!!;
push @files, { user => $u, group => $g, type => $t, name =
+> $name, mode => $mode };
}, no_chdir => 1 },
$dir );
#warn "local: $_" for @files;
return @files;
};
}
sub wanted_file {
my( $fileinfo )= @_;
my $file = $fileinfo->{name};
if( my @why = grep { $file =~ /\Q$_/ } @ignore ) {
#warn "Ignoring $file (@why)";
} else {
#warn "Allowing [$file] ...";
}
! grep { $file =~ /\Q$_/ } @ignore;
}
sub diff {
my( $name, $server1, $server2 )= @_;
my($host1, $path1) = split_serverpath( $server1 );
my($host2, $path2) = split_serverpath( $server2 );
my @left = $host1 ? run_remote( $host1, qq{cat '$path1$name'} ) :
+ get_local( "$server1$name" );
my @right = $host2 ? run_remote( $host2, qq{cat '$path2$name'} ) :
+ get_local( "$server2$name" );
my $diff = Algorithm::Diff->new( \@left, \@right );
$diff->Base( 1 ); # Return line numbers, not indices
my $has_diff;
while( $diff->Next() ) {
next if $diff->Same();
if( ! $has_diff ) {
$has_diff = 1;
print "$name\n";
};
my $sep = '';
if( ! $diff->Items(2) ) {
printf "%d,%dd%d\n",
$diff->Get(qw( Min1 Max1 Max2 ));
} elsif( ! $diff->Items(1) ) {
printf "%da%d,%d\n",
$diff->Get(qw( Max1 Min2 Max2 ));
} else {
$sep = "---\n";
printf "%d,%dc%d,%d\n",
$diff->Get(qw( Min1 Max1 Min2 Max2 ));
}
print "< $_\n" for $diff->Items(1);
print $sep;
print "> $_\n" for $diff->Items(2);
}
$has_diff
};
my( $server1, $server2 )= @ARGV;
#warn "Old: $server1";
#warn "New: $server2";
my %left_info = map { $_->{name} => $_ } grep { wanted_file($_) } file
+list( $server1 );
my %right_info = map { $_->{name} => $_ } grep { wanted_file($_) } fil
+elist( $server2 );
my @left_names = sort keys %left_info;
my @right_names = sort keys %right_info;
my $filediff = Algorithm::Diff->new( \@left_names, \@right_names );
my @samelist;
$filediff->Base( 1 ); # Return line numbers, not indices
while( $filediff->Next() ) {
if( $filediff->Same() ) {
# entry exists in both trees
push @samelist, $filediff->Items(1);
} else {
# Entries only on tree 2, but no symlink
my @new_items = grep { ! $right_info{ $_ }->{type} ne 'l' } $f
+ilediff->Items(2);
print "new: $_\n" for @new_items;
# Entries only on tree 1, but no symlink
my @old_items = grep { ! $left_info{ $_ }->{type} ne 'l' } $fi
+lediff->Items(1);
print "del: $_\n" for @old_items;
};
}
for my $same (@samelist) {
my $linfo = $left_info{ $same };
my $rinfo = $right_info{ $same };
#warn "File: $same";
#warn Dumper $linfo;
#warn Dumper $rinfo;
if( $linfo->{type} ne $rinfo->{type} ) {
print "$same: Link vs. file: $linfo->{type} => $rinfo->{type}\
+n";
};
next if $linfo->{type} eq 'l' or $rinfo->{type} eq 'l';
if( ! $ignore_owner ) {
if( $left_info{ $same }->{user} ne $right_info{ $same }->{
+user}
or $left_info{ $same }->{group} ne $right_info{ $same }->{
+group}
) {
print "$same: Ownership different: $left_info{ $same }->{use
+r}:$left_info{$same}->{group} ne $right_info{ $same }->{user}:$right_
+info{$same}->{group}\n";
};
};
if( ! $ignore_mode ) {
if( $left_info{ $same }->{mode} ne $right_info{ $same }->{m
+ode}
) {
print "$same: Mode different: $left_info{ $same }->{mode} $r
+ight_info{$same}->{mode}\n";
};
};
if( ! $skip_contents ) {
diff( $same, $server1, $server2 );
};
};
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.