http://qs321.pair.com?node_id=89853
Category: Win32 Stuff
Author/Contact Info bikeNomad, Ned Konz, perl@bike-nomad.com
Description: This is a backup script suitable for running from a scheduler or interactively. It backs up one or more directory trees to a removable disk (CD-RW, Zip, floppy) or to a directory, perhaps on a network. You can specify files to include or exclude, as well as the destination, handling of deleted files, and size quotas using a configuration file. It will prompt the user to insert a disk if necessary; otherwise, it's not interactive. Win32 only (had to make it for my girlfriend's computer).

update: added support for wildcards in includes and excludes.

#!/usr/bin/perl -w
# cdmirror.pl
# Mirror a directory structure to a CD-RW, Zip disk, floppy, or other
# removable media device. Written for Win32.
# Uses Windows calls to determine free space on disk.
# Checks for either a minimum amount free or a maximum backup size.
# Can also backup to a directory.
# Logs to a logfile.
# For both includes and excludes specs, wildcards (including in direct
+ory
# names) are OK, as are spaces. Don't use quotes, though.
# 
# By Ned Konz, perl@bike-nomad.com
# Version 1.1
# CHANGES
# 1.1: added wildcards
#
# Requires Win32::DriveInfo by Mike Blazer.
#
# Program argument[s] are the names of config files.
# You may use as many config files as you wish.
# Config file format is lines of text with single character
# flags at the beginning of the line. Order is unimportant.
# You may have comment lines that start with '#' signs.
# Meaning of config file lines:
#
# i <file or dir>               include <file or dir> (wildcards OK)
# x <file or dir>               exclude <file or dir> (wildcards OK)
# r <"del"|"keep"|"move">       whether to delete, keep, or move to se
+parate
#                               directory files that were deleted
# c <name>                      disk set name
# s <nn%|size[M|K|B]>           specify maximum size of backup set. If
+ given
#                               as a percentage, means this many perce
+nt
#                               free must be left on the backup device
+.
#                               Defaults to size in M; you can give K
#                               or B instead for Kbytes or bytes.
# d <dir>                       specify the directory to back up to
#                               or volume (like d:)
# l <filename>                  name of logfile (default=stderr)
#                               to log to stdout, use -

use strict;
use File::Find;
use File::Spec;
use File::Copy;
use File::DosGlob 'glob';
use File::Path;
use Win32;
use Win32::DriveInfo;

# Read config file
die "must supply readable config file as only command line argument\n"
    if (@ARGV != 1 or ! -r $ARGV[0]);

my @includes;
my @excludes;
my $deleteMode = 'keep';
my $setName = 'Backup Set';
my $minimumPercentFree = 0;
my $maximumSize = 0;
my $destination;
my $volume;
my $directory;
my $logName;

sub readConfig
{
    # TODO should this be in decimal instead?
    # What's the standard with Windows?
    my %sizeMultiplier = (
        'g' => 1024*1024*1024,
        'm' => 1024*1024,
        'k' => 1024,
        'b' => 1,
        '%' => 0.01
    );

    while (<>)
    {
        chomp;
        next if m{^\s*#};
        m{^\s*([ixlrcsd])\s+(.*)\s*$}i
            or die "bad command line format at line $. : $_\n";
        my $key = lc($1);
        my $value = $2;
        $value =~ tr#\\#/#; # because that's what File::Find uses

        if ($key eq 'i')
        {
            $value = "\"$value\"" if $value =~ / /;
            push(@includes, map { canonpath($_) } glob($value));
            next
        }
        if ($key eq 'x')
        {
            $value = "\"$value\"" if $value =~ / /;
            push(@excludes, map { canonpath($_) } glob($value));
            next
        }
        if ($key eq 'l') { $logName = $value; next }
        if ($key eq 'r')
        {
            $deleteMode = $value;
            die "Bad r option $value\n" if ($value !~ /^(del|keep|move
+)$/);
            next
        }
        if ($key eq 'c') { $setName = $value; next }
        if ($key eq 's')
        {
            $value =~ m{^(\d+)\s*([gmkb%]?)}i or die "Bad s (size) opt
+ion $value\n";
            my $size = $1 * $sizeMultiplier{ lc($2||'m') };
            if ($2 eq '%') { $minimumPercentFree = $size; }
            else { $maximumSize = $size; }
            next
        }
        if ($key eq 'd') { $destination = $value; next }
        die "Can't happen: bad key $key\n";
    }
    ($volume, $directory, undef) = File::Spec->splitpath($destination,
+ 1);
    $directory = '/' unless $directory;
}

sub displayConfig
{
    print STDERR "includes = ", join(', ', @includes), "\n";
    print STDERR "excludes = ", join(', ', @excludes), "\n";
    print STDERR "deleteMode = $deleteMode\n";
    print STDERR "setName = $setName\n";
    print STDERR "minimumPercentFree = $minimumPercentFree\n"
        if $minimumPercentFree  > 0;
    print STDERR "maximumSize = $maximumSize\n" 
        if $maximumSize > 0;
    print STDERR "destination = $destination\n";
    print STDERR "backup volume: $volume directory: $directory\n";
}

# Get free space
# returns (undef, undef) if no media in drive
sub getFreeSpace
{
    # $SectorsPerCluster, $BytesPerSector, $NumberOfFreeClusters,
    #   $TotalNumberOfClusters, 4=$FreeBytesAvailableToCaller,
    #   5=$TotalNumberOfBytes, $TotalNumberOfFreeBytes
    my @retval = Win32::DriveInfo::DriveSpace($volume);
    # TODO: how to fix this magic number? It's not in Errno.pm
    die "Can't get free space in $volume: ($^E)\n"
        if (!defined($retval[4]) and ($^E != 15));
    return @retval[ 5, 4 ];
}

# Note that File::Find uses '/' separators
# Note also that includes or excludes could be files or dirs
sub scanDirectories
{
    my $includes = shift;
    my $excludes = shift;
    my $relative = shift || 0;
    my %sizes;
    my %dirs;
    foreach my $topdir (@$includes)
    {
        my $isAFile = -f $topdir;
        my ($volume, $dir, $fn) = File::Spec->splitpath($topdir, !$isA
+File);
        if ($isAFile)
        {
            my @stat = stat _;
            $sizes{ "$dir$fn" } = [ @stat[7,9], $volume ];
            $dir =~ s#[\\/]$##;
            $dirs{ $dir }++;
            next;
        }
        my $volumeLength = length($volume);
        my $dirLength = length(catpath($volume, $dir, ''));
        File::Find::find( sub {
            my @stat = stat($File::Find::name);
            foreach my $exc (@$excludes)
            {
                # ignore case on this comparison.
                if (lc($exc) eq lc($File::Find::name))
                {
                    $File::Find::prune = -d _;
                    return;
                }
            }
            my $relativeDir = substr($File::Find::name,
                $relative ? $dirLength : $volumeLength);
            if (! -f _)
            {
                $dirs{ $relativeDir }++;
                return;
            }
            # save size and mtime
            $sizes{ $relativeDir } = [ @stat[7,9], $volume ];
        }, $topdir );
    }
    my @dirnames = sort(keys(%dirs));
    return (\%sizes, \@dirnames);
}

# Add commas to a number every three spaces.
sub comma
{
   my $num = shift;
   1 while $num =~ s/^(-?\d+)(\d{3})/$1,$2/;
   return $num;
}

# return with forward slashes
sub canonpath
{
    my $path = File::Spec->canonpath(shift);
    $path =~ tr#\\#/#;
    return $path;
}

# return with forward slashes
sub catpath
{ 
    canonpath(File::Spec->catpath(@_));
}

# Main program.
$SIG{__DIE__} = sub {
    print STDERR "@_";
    print STDERR "==== DIED ", scalar(localtime(time)), "\n";
    exit(2);
};

readConfig();

if (defined($logName))
{
    print "logging to $logName\n";
    open(STDERR, ">>$logName") or die "can't open $logName: $^E\n";
}

# This is all to catch the output from mkpath, which logs to stdout
select(STDERR);
$|++;
open(STDOUT, ">&STDERR");
select(STDOUT);
$|++;

print STDERR "==== START ", scalar(localtime(time)), "\n";
displayConfig();

# Try to get the backup disk inserted.
my ($totalBytes, $freeBytes);
while (!defined($totalBytes))
{
    ($totalBytes, $freeBytes) = getFreeSpace();
    if (!defined($totalBytes))
    {
        my $msg = <<EOM;
There is no disk in $volume!
Please put proper disk from $setName into $volume and hit Retry
or hit Cancel to abort the backup job.
EOM
        my $response = Win32::MsgBox($msg, 5|MB_ICONEXCLAMATION,
            'Backup disk not present');
        die "User chose to cancel\n" if ($response == 2);
    }
}

my $startTime = time();

# Scan media
mkpath([ $destination ], 1);

my $destDeletedDirectory = '';
my $destExclude = [];

if ($deleteMode eq 'move')
{
    $destDeletedDirectory = catpath($volume, $directory, 'deleted');
    print STDERR "Moving deleted files into directory $destDeletedDire
+ctory\n";
    $destExclude = [ $destDeletedDirectory ];
    mkpath($destExclude, 1);
}

my ($destFiles, $destDirs) = scanDirectories([$destination], $destExcl
+ude, 1);

# Scan source files
my ($sourceFiles, $sourceDirs) = scanDirectories(\@includes, \@exclude
+s, 0);

# Make dest dirs if needed
mkpath([ map { catpath($volume, $directory, $_) } @$sourceDirs ], 1);

# Deal with deleted files
while (my ($destName, $destData) = each(%$destFiles))
{
    next if exists($sourceFiles->{$destName});
    my $fullName = catpath($volume, $directory, $destName);
    if ($deleteMode eq 'keep')
    {
        print STDERR "Keeping deleted file $fullName\n";
    }
    elsif ($deleteMode eq 'del')
    {
        print STDERR "Removing deleted file $fullName\n";
        unlink( $fullName ) or warn "Can't remove $fullName: $^E\n";
    }
    elsif ($deleteMode eq 'move')
    {
        my $dest = canonpath("$destDeletedDirectory/$destName");
        my ($v, $d, undef) = File::Spec->splitpath($dest);
        mkpath([ "$v$d" ], 1);
        print STDERR "Moving deleted file $fullName to $dest\n";
        rename( $fullName, $dest )
            or warn "Can't move $fullName to $dest: $^E\n";
    }
}

($totalBytes, $freeBytes) = getFreeSpace();
my $usedBytes = $totalBytes - $freeBytes;

my $minimumFree = ($minimumPercentFree > 0)
    ? int($totalBytes * $minimumPercentFree)
    : 0;

print STDERR "Total bytes on $volume: ", comma($totalBytes), "\n";
print STDERR "Free bytes: ", comma($freeBytes), ", used bytes: ", comm
+a($usedBytes), "\n";
print STDERR "Maximum backup size: ", comma($maximumSize), "\n" if ($m
+aximumSize > 0);
print STDERR "Minimum amount free: ", comma($minimumFree), "\n" if ($m
+inimumFree > 0);

# Check against $minimumFree and $maximumSize
foreach my $srcName (sort(keys(%$sourceFiles)))
{
    my $srcData = $sourceFiles->{ $srcName };
    my $destData = $destFiles->{$srcName} || [0, 0, $volume];
    next if $destData->[0] == $srcData->[0]     # size
        && $destData->[1] == $srcData->[1];     # modtime
    # how much bigger will it make the backup?
    my $delta = $srcData->[0] - $destData->[0];
    $freeBytes -= $delta;
    $usedBytes += $delta;
}

if ($freeBytes < 0)
{
    my $overrun = -$freeBytes;
    die "Backup needs at least $overrun more bytes than remains on $vo
+lume\n";
}

if ($freeBytes < $minimumFree)
{
    my $overrun = $minimumFree - $freeBytes;
    die "Backup would leave too few bytes on $volume (by $overrun)\n";
}

if ($maximumSize > 0 && $usedBytes > $maximumSize)
{
    die "Backup would require at least $usedBytes on $volume,
but you asked for a maximum of $maximumSize";
}

my $bytesCopied = 0;

# Copy changed or new files, in name order
foreach my $srcName (sort(keys(%$sourceFiles)))
{
    my $srcData = $sourceFiles->{ $srcName };
    my $destData = $destFiles->{$srcName} || [0, 0, $volume];
    my $src = $srcData->[2] . $srcName;
    my $dest = catpath($volume, $directory, $srcName);
    next if $destData->[0] == $srcData->[0]     # size
        && $destData->[1] == $srcData->[1];     # modtime
    print STDERR $src, " => ", $dest, " ", $srcData->[0], "\n";
    copy($src, $dest) or warn "Copy $srcName failed: $^E\n";
    $bytesCopied += $srcData->[0];
}

my $endTime = time();
printf STDERR "copied %s bytes in %d seconds (%s per second)\n",
    comma($bytesCopied),
    $endTime-$startTime,
    comma(int($bytesCopied/($endTime-$startTime)))
        if ($endTime>$startTime);
print STDERR "==== END ", scalar(localtime($endTime)), "\n";

# vim:tw=76 ts=4 sw=4

A sample config file follows:

# Sample config file for cdmirror.pl
# Name of backup set
c DiskSet
# Include
i c:\windows\temp
# Exclude
x c:\windows\temp\Wcescomm.log
x c:\windows\temp\xx
# handling of deleted files (del|keep|move)
r del
# size (in this case, 5% minimum free)
s 5%
# destination disk
d e:
# Logfile (- means stdout)
l -