diotalevi's grep

Category:	Utility Scripts
Author/Contact Info
Description:	This grep is much like everyone else's perl reimplementation of grep. It's only distinguishing features are automatically looking inside bzip2, gzip, zip, and tar files. It borrows the pretty formatting used by petdance in ack. This started life as an improved version of the grep that comes with the Solaris which isn't recursive.
#!/usr/bin/perl ## no critic VersionVar use strict; use warnings; use Getopt::Long 'GetOptions'; use autouse 'File::Find' => 'find'; use autouse 'Pod::Usage' => 'pod2usage'; use autouse 'Term::ANSIColor' => 'colored'; use autouse 'IPC::Open3' => 'open3'; $SIG{CHLD} = 'IGNORE'; use vars qw( $TextOnly ); main(); exit; sub main { # Fetch parameters. GetOptions( man => sub { pod2usage( -verbose => 2 ) }, help => sub { pod2usage( -verbose => 1 ) }, t => \$TextOnly, l => \my ($filename_only), w => \my ($word), i => \my ($ignore_case), Q => \my ($quotemeta), h => \my ($no_filename), n => \my ($line_no), R => \my ($no_recursive), v => \my ($invert_match), plain => \my ($no_ansicolor), 'name=s' => \my ($filename_rx), ) or pod2usage( -verbose => 0 ); my ( $match, @srcs ) = @ARGV; if ( not @srcs ) { @srcs = '.'; ## no critic Noisy } # Validate parameters. if ( not defined $match ) { pod2usage( -verbose => 0 ); } # Pre-process the pattern and then compile it. if ($quotemeta) { $match = quotemeta $match; } if ($word) { $match = "\\b$match\\b"; } if ($ignore_case) { $match = "(?i)$match"; } my $match_rx = qr/$match/; # Get a function which formats the output for whatever was # requested. All info is passed through the globals # $File::Find::rel_name, $., and $_. The input will contain # whatever linebreak is currently active so most things don't need # to add one. my $prev_file = ''; my $formatter = ( $line_no && $no_filename ? sub { "$.:" . shift } : $line_no ? sub { if ( $File::Find::name ne $prev_file ) { $prev_file = $File::Find::name; return ( ( $prev_file eq '' ? '' : "\n" ) . colored( $File::Find::name, 'bold green' ) . "\n +$.:" . shift ); } else { return "$.:" . shift; } } : $no_filename ? sub {shift} : $filename_only ? sub { if ( $File::Find::name ne $prev_file ) { $prev_file = $File::Find::name; return ( ( $prev_file eq '' ? '' : "\n" ) . colored( $File::Find::name, 'bold green' ) . "\n" ); } else { return; } } : sub { if ( $File::Find::name ne $prev_file ) { $prev_file = $File::Find::name; return ( ( $prev_file eq '' ? '' : "\n" ) . colored( $File::Find::name, 'bold green' ) . "\n +" . shift ); } else { return shift; } } ); my $grep_file_fn = sub { grep_file( ignore_rcs => 1, plain => $no_ansicolor, match_rx => $match_rx, filename_rx => $filename_rx, formatter => $formatter, invert_match => $invert_match, match_once => $filename_only ); }; # Here's the main loop. For each source directory/file, search it. for my $src (@srcs) { # Examine all files in $src. if ($no_recursive) { # Mimic the API of File::Find for grep_file(). # local $File::Find::dir = unimplemented ## no critic local $File::Find::name = $src; local $_ = $src; $grep_file_fn->(); } else { find( $grep_file_fn, $src ); } } return 0; } sub open_file_harder { my ($filename) = @_; return if not defined $filename; if ( my ($extension) = $filename =~ /(\.[^.]+)\z/mx ) { my @readers = ( [ qr/\.t(?:ar\.)?gz\z/ => qw( gzcat ), $filename ], [ qr/\.zip\z/, => qw( unzip -p ), $filename ], [ qr/\.Z\z/ => qw( zcat ), $filename ], [ qr/\.gz\z/ => qw( gzcat ), $filename ], [ qr/\.bz2\z/ => qw( bzcat ), $filename ], ); for my $reader (@readers) { my ( $pattern, @command ) = @{$reader}; if ( $extension =~ $pattern ) { open3( undef, my $fh, undef, @command ); return $fh; } } } open my $fh, '<', $filename or die "Couldn't open $filename: $!"; return $fh; } sub grep_file { my %p = @_; my $match_rx = $p{match_rx}; my $formatter = $p{formatter}; my $invert_match = $p{invert_match}; my $plain = $p{plain}; my $match_once = $p{match_once}; my $filename = $_; # Ignore CVS stuff. return if $File::Find::name =~ m{/CVS/?}; # If there is a pattern required of filenames, try that one # first. This requires no checks to the FS so I'm doing this # before the next stuff. return if defined $p{filename_rx} and not $filename =~ $p{filename_rx}; # Ignore non-existant files. return if not -f $filename; # Ignore non-text files if that's what was requested. return if $TextOnly and not -T _; eval { my $fh = open_file_harder($filename); LINE: while ( my $line = <$fh> ) { # If the line matches the pattern print it as a formatted # line. my $matched; if ($plain) { $matched = ( $line =~ /$match_rx/mx ); } else { $matched = ( $line =~ /$match_rx/mx ); $line =~ s/((?:$match_rx)+)/ colored( "$1", 'yellow on_b +lack' ) /gemx; } # Given Match then exclusive or is great here. # 0 1 # +---+---+ # Invert 0 \| \| X \| # 1 \| X \| \| if ( $matched xor $invert_match ) { print $formatter->($line); last LINE if $match_once; } } }; return 1; } __END__ =head1 NAME dgrep - A recursive grep that uses perl regular expressions. =head1 SYNOPSIS dgrep [options] [file ...] Options: -help Prints this help message -man Prints the manual -t Searches only `text' files -w Matches only "words" using \b...\b -i Case-insensitive matching -Q Ignore perl meta-characters -v Invert output, match lines that don't match the pattern -h Exclude filename from output -n Include line number in output -R Disable recursion, no directories. -plain Disable highliting of matched text -name EXPR Only open files matching this regular expression =head1 OPTIONS =over 4 =item B<-help> Prints a simple message on usage and then exits. =item B<-man> Prints the manual and then exits. =item B<-t> Only `text' files are searched. =item B<-w> When matching, the pattern is surrounded by perl\'s \b assertion. That is, the match must be on a "word" boundary, either starting or finishing. To perl, "word" is locale specific but generally means any alphanumeric character and underscore. =item B<-i> Match without regard to casing. This is affected by locale. =item B<-Q> Pattern is a literal string. All regex metacharacters will be escaped using the quotemeta() function. =item B<-v> Print only lines which do B<not> match the pattern. This is equivalent to grep\'s -v parameter. =item B<-h> Omit the filename from the output when a line is matched. This is semi-equivalent to grep\'s -h parameter. =item B<-n> Print the line number. =item B<-R> Do not recurse into any subdirectories. =item B<-plain> C<dgrep> automatically inserts ANSI escape codes to highlight matched text. Use the C<-plain> option to disable that. =item B<-name> EXPR C<dgrep> usually searches every file and directory, recursively. When C<-name EXPR> is used, only filenames matching this regular expression are searched. =back =head1 DESCRIPTION B<dgrep> is an "improved" version of the grep that comes with the Sun box. It is normally recursive, accepts perl regular expressions, and optionally prints the filename the match was found in. =cut

Back to Code Catacombs