comment on

#!/usr/bin/perl
## no critic VersionVar
use strict;
use warnings;

use Getopt::Long 'GetOptions';
use autouse 'File::Find'      => 'find';
use autouse 'Pod::Usage'      => 'pod2usage';
use autouse 'Term::ANSIColor' => 'colored';

use autouse 'IPC::Open3' => 'open3';
$SIG{CHLD} = 'IGNORE';

use vars qw( $TextOnly );

main();
exit;

sub main {

    # Fetch parameters.
    GetOptions(
        man  => sub { pod2usage( -verbose => 2 ) },
        help => sub { pod2usage( -verbose => 1 ) },

        t        => \$TextOnly,
        l        => \my ($filename_only),
        w        => \my ($word),
        i        => \my ($ignore_case),
        Q        => \my ($quotemeta),
        h        => \my ($no_filename),
        n        => \my ($line_no),
        R        => \my ($no_recursive),
        v        => \my ($invert_match),
        plain    => \my ($no_ansicolor),
        'name=s' => \my ($filename_rx),
        )
        or pod2usage( -verbose => 0 );
    my ( $match, @srcs ) = @ARGV;
    if ( not @srcs ) {
        @srcs = '.';    ## no critic Noisy
    }

    # Validate parameters.
    if ( not defined $match ) {
        pod2usage( -verbose => 0 );
    }

    # Pre-process the pattern and then compile it.
    if ($quotemeta) {
        $match = quotemeta $match;
    }
    if ($word) {
        $match = "\\b$match\\b";
    }
    if ($ignore_case) {
        $match = "(?i)$match";
    }
    my $match_rx = qr/$match/;

    # Get a function which formats the output for whatever was
    # requested. All info is passed through the globals
    # $File::Find::rel_name, $., and $_. The input will contain
    # whatever linebreak is currently active so most things don't need
    # to add one.
    my $prev_file = '';
    my $formatter = (
        $line_no && $no_filename
        ? sub { "$.:" . shift }
        : $line_no ? sub {
            if ( $File::Find::name ne $prev_file ) {
                $prev_file = $File::Find::name;
                return ( ( $prev_file eq '' ? '' : "\n" )
                    . colored( $File::Find::name, 'bold green' ) . "\n
+$.:"
                        . shift );
            }
            else {
                return "$.:" . shift;
            }
            }
        : $no_filename   ? sub {shift}
        : $filename_only ? sub {
            if ( $File::Find::name ne $prev_file ) {
                $prev_file = $File::Find::name;
                return ( ( $prev_file eq '' ? '' : "\n" )
                    . colored( $File::Find::name, 'bold green' )
                        . "\n" );
            }
            else {
                return;
            }
            }
        : sub {
            if ( $File::Find::name ne $prev_file ) {
                $prev_file = $File::Find::name;
                return ( ( $prev_file eq '' ? '' : "\n" )
                    . colored( $File::Find::name, 'bold green' ) . "\n
+"
                        . shift );
            }
            else {
                return shift;
            }
        }
    );

    my $grep_file_fn = sub {
        grep_file(
            ignore_rcs   => 1,
            plain        => $no_ansicolor,
            match_rx     => $match_rx,
            filename_rx  => $filename_rx,
            formatter    => $formatter,
            invert_match => $invert_match,
            match_once   => $filename_only
        );
    };

    # Here's the main loop. For each source directory/file, search it.
    for my $src (@srcs) {

        # Examine all files in $src.
        if ($no_recursive) {

            # Mimic the API of File::Find for grep_file().
            # local $File::Find::dir = unimplemented
            ## no critic
            local $File::Find::name = $src;
            local $_                = $src;

            $grep_file_fn->();
        }
        else {
            find( $grep_file_fn, $src );
        }
    }

    return 0;
}

sub open_file_harder {
    my ($filename) = @_;
    return if not defined $filename;

    if ( my ($extension) = $filename =~ /(\.[^.]+)\z/mx ) {
        my @readers = (
            [ qr/\.t(?:ar\.)?gz\z/ => qw( gzcat ),    $filename ],
            [ qr/\.zip\z/,         => qw( unzip -p ), $filename ],
            [ qr/\.Z\z/            => qw( zcat ),     $filename ],
            [ qr/\.gz\z/           => qw( gzcat ),    $filename ],
            [ qr/\.bz2\z/          => qw( bzcat ),    $filename ],
        );
        for my $reader (@readers) {
            my ( $pattern, @command ) = @{$reader};
            if ( $extension =~ $pattern ) {
                open3( undef, my $fh, undef, @command );
                return $fh;
            }
        }
    }

    open my $fh, '<', $filename
        or die "Couldn't open $filename: $!";
    return $fh;
}

sub grep_file {
    my %p            = @_;
    my $match_rx     = $p{match_rx};
    my $formatter    = $p{formatter};
    my $invert_match = $p{invert_match};
    my $plain        = $p{plain};
    my $match_once   = $p{match_once};

    my $filename = $_;

    # Ignore CVS stuff.
    return if $File::Find::name =~ m{/CVS/?};

    # If there is a pattern required of filenames, try that one
    # first. This requires no checks to the FS so I'm doing this
    # before the next stuff.
    return
        if defined $p{filename_rx}
        and not $filename =~ $p{filename_rx};

    # Ignore non-existant files.
    return if not -f $filename;

    # Ignore non-text files if that's what was requested.
    return if $TextOnly and not -T _;

    eval {
        my $fh = open_file_harder($filename);

    LINE: while ( my $line = <$fh> ) {

            # If the line matches the pattern print it as a formatted
            # line.
            my $matched;

            if ($plain) {
                $matched = ( $line =~ /$match_rx/mx );
            }
            else {
                $matched = ( $line =~ /$match_rx/mx );

                $line
                    =~ s/((?:$match_rx)+)/ colored( "$1", 'yellow on_b
+lack' ) /gemx;
            }

            # Given    Match         then exclusive or is great here.
            #            0   1
            #          +---+---+
            # Invert 0 |   | X |
            #        1 | X |   |
            if ( $matched xor $invert_match ) {
                print $formatter->($line);
                last LINE if $match_once;
            }
        }
    };

    return 1;
}

__END__

=head1 NAME

dgrep - A recursive grep that uses perl regular expressions.

=head1 SYNOPSIS

dgrep [options] [file ...]

 Options:
   -help      Prints this help message
   -man       Prints the manual

   -t         Searches only `text' files
   -w         Matches only "words" using \b...\b
   -i         Case-insensitive matching
   -Q         Ignore perl meta-characters

   -v         Invert output, match lines that don't match the pattern
   -h         Exclude filename from output
   -n         Include line number in output

   -R         Disable recursion, no directories.
   -plain     Disable highliting of matched text
   -name EXPR Only open files matching this regular expression

=head1 OPTIONS

=over 4

=item B<-help>

Prints a simple message on usage and then exits.

=item B<-man>

Prints the manual and then exits.

=item B<-t>

Only `text' files are searched.

=item B<-w>

When matching, the pattern is surrounded by perl\'s \b assertion. That
is, the match must be on a "word" boundary, either starting or
finishing. To perl, "word" is locale specific but generally means any
alphanumeric character and underscore.

=item B<-i>

Match without regard to casing. This is affected by locale.

=item B<-Q>

Pattern is a literal string. All regex metacharacters will be escaped
using the quotemeta() function.

=item B<-v>

Print only lines which do B<not> match the pattern. This is equivalent
to grep\'s -v parameter.

=item B<-h>

Omit the filename from the output when a line is matched. This is
semi-equivalent to grep\'s -h parameter.

=item B<-n>

Print the line number.

=item B<-R>

Do not recurse into any subdirectories.

=item B<-plain>

C<dgrep> automatically inserts ANSI escape codes to highlight matched
text. Use the C<-plain> option to disable that.

=item B<-name> EXPR

C<dgrep> usually searches every file and directory, recursively. When
C<-name EXPR> is used, only filenames matching this regular expression
are searched.

=back

=head1 DESCRIPTION

B<dgrep> is an "improved" version of the grep that comes with the Sun
box. It is normally recursive, accepts perl regular expressions, and
optionally prints the filename the match was found in.

=cut
[download]
In reply to diotalevi's grep by diotalevi
Are you posting in the right place? Check out Where do I post X? to know for sure.
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
Want more info? How to link or How to display code and escape characters are good places to start.

Welcome to the Monastery
	PerlMonks