Category: | Text Processing |
Author/Contact Info | Tim Rayner tfrayner@yahoo.co.uk |
Description: | Here's a little script I wrote as an exercise. My aim was to implement a user-friendly way of substituting text across a heirarchy of files and directories. There is of course a simple way to do this without resorting to this script. However, the development of the script allowed me to add some nice features. Try 'perldoc findreplace.pl' for details.
I'd welcome any comments, particularly with regard to efficiency/performance and portability. |
#! /usr/bin/perl -w use strict; use warnings; use Getopt::Long; use File::Spec; use POSIX; =pod =head1 NAME findreplace.pl - a script to replace text in a heirarchy of files and directories =head1 SYNOPSIS B<findreplace.pl> S<[ B<-achmRF> ]> S<[ B<-d>[I<destdir>] ]> S<[ B<-e>[I<exclude>] ]> S<B<-f>[I<findstr>]> S<B<-r>[I<replacestr>]> I<files> =head1 DESCRIPTION This script is designed to take a list of files and/or directories and perform simple text substitutions in a global fashion. The processed files are then placed in a new directory, maintaining filesystem heirarchy. The script can be made to traverse directories recursively, optionally omitting named files from its processing. File and directory permissions can be maintained if so desired. The script will accept a wildcard file designation, or a list of files from I<stdin>. Raw octal ASCII codes can be included in substitutions using the B<-a> switch. The script will not overwrite write-protected files; nor will it override the current B<umask> default. =head1 OPTIONS =over 4 =item B<-h> Prints a short help text. =item B<-f> I<findstr> =item B<-r> I<replacestr> =item B<-d> I<destdir> The B<-f> switch designates the string to be found and replaced with the string specified using the B<-r> switch. The destination directory for altered files can be set using the B<-d> option. The default directory name is 'I<./changed>'. =item B<-R> Recurse down into subdirectories. =item B<-F> Force overwriting of existing files. Write-protected files will not be overwritten. If this switch is omitted the user will be asked whether to overwrite already existing files. =item B<-e> Exclude specific file and directory names. This option allows the user to pass a comma-delimited list of file and/or directory names to be excluded. =item B<-m> Maintain file and directory permissions. The current B<umask> value is the default. Note that mkdir() (and hence this script) apparently doesn't set suid bits and is unable to override B<umask>. Not that this is necessarily a bad thing. =item B<-c> Accept input from I<stdin>. =item B<-a> Allow the use of backslashed ASCII codes (e.g. \012, \015) in the I<findstr> and I<replacestr> substitution parameters. Note that codes passed from the command line must have their backslashes escaped, i.e. \\012, \\015. The codes must be in 3-digit octal notation. =back =head1 AUTHOR Tim Rayner (tfrayner@mac.com), 2001. =head1 BUGS The B<-c> I<stdin> input switch will accept a clean list of files (i.e. containing nothing but files or directories, correctly designated relative to the current working directory). Anything else will generate warnings but will attempt to soldier on regardless. =cut # SET GLOBALS $Getopt::Long::ignorecase=0; # case sensitive option matching # GET BASE DIRECTORY # we can't just look at $ARGV[0] because of command lines # specifying foo/*/* and stuff like that. sub getbase{ # takes a reference to an array of paths, ret +urns common path my $parray=shift; my $pathholder=$$parray[0]; my @pathholderdirs=File::Spec->splitdir($pathholder); foreach my $path (@$parray){ my @pathdirs=File::Spec->splitdir($path); my $i=$#pathholderdirs; while (@pathholderdirs[0 .. $i] ne @pathdirs[0 .. $i]){$i--;} $pathholder=File::Spec->catfile(@pathholderdirs[0 .. $i]); } return ($pathholder); } # GET TARGET DIRECTORY PERMISSIONS sub dirmode{ my $sourcedir=shift; my $permsflag=shift; my $mode; if ($permsflag){ $mode=(stat($sourcedir))[2]; }else{ $mode=0777-umask(); } return ($mode); } # CONVERT ASCII CODES TO CHARACTERS WITHIN STRINGS sub asciicode{ my $string=shift; while ($string=~ /\\(\d\d\d)/){ my $ascii=chr(oct($1)); $string=~ s/\\$1/$ascii/g; } return $string; } # PARSE COMMAND-LINE ARGUMENTS sub parseargs{ # Creates the main top-level hash used to store all the passed var +iables. # Note that recursing down into subdirectories needs a new hash fo +r each level # (see sub recursedir below). my %phash; $phash{clobber}=0; #defaults to no clobber $phash{changedir}="changed"; #default change directory &GetOptions("h|help" => \$phash{helptext}, "c|stdin" => \$phash{readstdin}, "m|maintain" => \$phash{keepperms}, "e|exclude=s" => \$phash{exclude}, "a|ascii" => \$phash{ascii}, "F|force" => \$phash{clobber}, "R|recurse" => \$phash{recurse}, "d|directory=s" => \$phash{changedir}, "f|find=s" => \$phash{find}, "r|replace=s" => \$phash{replace}); $phash{changedir}= File::Spec->rel2abs($phash{changedir}); if ($phash{helptext}){ die ("Usage: findreplace.pl [-h] [-c] [-m] [-R] [-F] [-d <destinat +ion directory>]". "\n\t[-e <exclude list>] [-a] -f <findstr> -r <replacestr> <f +iles>\n"); } unless ($phash{find} && $phash{replace}){ die("Insufficient arguments. Use -h for help summary.\a\n"); } # allow passing of ascii codes if ($phash{ascii}){ $phash{find}=&asciicode($phash{find}); $phash{replace}=&asciicode($phash{replace}); } # set up file array if (@ARGV){ foreach my $path(@ARGV){ $path=File::Spec->rel2abs($path); } $phash{filearrayref}=\@ARGV; } elsif ($phash{readstdin}){ my $i=0; foreach my $path(<STDIN>){ chomp($path); $path=File::Spec->rel2abs($path); $phash{filearrayref}[$i]=$path; $i++; } # reset STDIN to read from the terminal close(STDIN) or die ("STDIN error: $!.\a\n"); my $tty=POSIX::ctermid(); open(STDIN,"$tty") or die ("Can't read from terminal: $!.\a\n"); } else {die ("No input files specified.\a\n");} # set up excluded array if ($phash{exclude}){ my @temparray=split /,/, $phash{exclude}; # comma delimited. chang +e as required. $phash{excludearrayref}=\@temparray; } $phash{basedir}=&getbase($phash{filearrayref}); $phash{dirmode}=&dirmode($phash{basedir},$phash{keepperms}); my $pref=\%phash; return ($pref); } # CREATE TARGET DIRECTORY sub makenewdir{ my $changedir=shift; my $mode=shift; my $clobber=shift; my $safetooverwrite=0; until ($safetooverwrite){ if (-e $changedir && !$clobber){ print STDERR ("Directory \'$changedir\' aleady exists. Overwri +te?\n". "[Y(es)/N(o)/R(eselect destination)/C(lobber all duplica +tes)]". "\a\n"); my $answer = <STDIN>; chomp ($answer); $answer=lc($answer); SWITCH: { $answer eq 'y' && do {$safetooverwrite=1; last SWITCH;}; $answer eq 'r' && do {print STDERR ("Please input new destin +ation ". "directory name:\n"); $changedir = <STDIN>; chomp ($changedir); $changedir=File::Spec->rel2abs($changedir); last SWITCH;}; $answer eq 'c' && do {$clobber=1; $safetooverwrite=1; last SWITCH;}; die ("Script aborted by user.\n"); } } else {$safetooverwrite=1;} } unless (-e $changedir){ mkdir ($changedir,$mode) or die ("Error: mkdir: $!.\n"); } return ($changedir,$clobber); } # RECURSE INTO DIRECTORIES, IF -R OPTION USED sub recursedir { my $newindir = shift; my $pref = shift; my $callersubref = shift; opendir NEWDIR, $newindir; # avoid . and .. entries my @newfilearray = File::Spec->no_upwards(readdir NEWDIR); closedir NEWDIR; foreach my $entry (@newfilearray){ $entry=File::Spec->catfile($newindir,$entry); }; # create the new hash for the next recursion my %newphash = %$pref; $newphash{filearrayref}=\@newfilearray; $newindir = File::Spec->abs2rel($newindir,$$pref{basedir}); $newphash{changedir}=File::Spec->rel2abs($newindir,$$pref{destdir} +); $newphash{dirmode}=&dirmode((File::Spec->rel2abs($newindir,$$pref{ +basedir})), $$pref{keepperms}); my $newpref=\%newphash; # here we go again... ($$newpref{changedir},$$newpref{clobber})=&makenewdir($$newpref{ch +angedir}, $$newpref{dirmode}, $$newpref{clobber}); &{$callersubref} ($newpref); } # PROCESS FILES sub findreplace { my $pref=shift; my @filearray= @{$pref->{filearrayref}}; INFILELOOP: foreach my $infile (@filearray) { # handle excluded files here if ($$pref{exclude}){ my $filename=(File::Spec->splitpath($infile))[2]; my @excludearray=@{$pref->{excludearrayref}}; foreach my $exfile(@excludearray){ next INFILELOOP if ($filename eq $exfile) } } # directory handling, including recursion if (-d $infile){ if ($$pref{recurse}){ my $callersub = \&findreplace; &recursedir ($infile, $pref, $callersub); } next INFILELOOP; } # check output file doesn't already exist my $strippedname = (File::Spec->splitpath($infile))[2]; my $outfile = File::Spec->catfile($$pref{changedir},$strippedname) +; if (-e $outfile && !$$pref{clobber}){ print STDERR ("File \'$outfile\' aleady exists. ". "Overwrite? [Y(es)\/N(o)\/A(ll)]\n"); my $answer = <STDIN>; chomp ($answer); $answer=lc($answer); SWITCH: { $answer eq 'y' && do {last SWITCH;}; $answer eq 'a' && do {$$pref{clobber}=1; last SWITCH;}; next INFILELOOP; } } # actually find and replace stuff unless (open (INFILE, "<$infile")){ warn ("Error: $infile: $!. Skipping.\n"); next INFILELOOP; } unless (open (OUTFILE,">$outfile")){ warn ("Can't open output file \'$outfile\': $!. Skipping.\n"); next INFILELOOP; } foreach my $line (<INFILE>) { $line =~ s/$$pref{find}/$$pref{replace}/go; print OUTFILE ($line); } if ($$pref{keepperms}){ my $mode=(stat($infile))[2]; chmod $mode, $outfile; } close (OUTFILE) or die ("Error: $!.\a\n"); close (INFILE) or die ("Error: $!.\a\n"); } } # MAIN LOOP # $pref is reference to main parameter hash my $pref=&parseargs; # create initial destination directory ($$pref{changedir},$$pref{clobber})=&makenewdir($$pref{changedir}, $$pref{dirmode}, $$pref{clobber}); # Set base destination directory. Important to have as a constant for +recursedir(). $$pref{destdir}=$$pref{changedir}; # do the deed &findreplace ($pref); print ("Done.\n"); exit; |
Back to
Code Catacombs