Beefy Boxes and Bandwidth Generously Provided by pair Networks
go ahead... be a heretic
 
PerlMonks  

A little script that combines `head' and `tail' utilities

by cosimo (Hermit)
on Apr 29, 2004 at 12:28 UTC ( #349106=sourcecode: print w/replies, xml ) Need Help??
Category: Text processing
Author/Contact Info Cosimo Streppone cosimo@cpan.org
Description: This little utility extracts the "body" from some text given on STDIN. It does What You Mean(tm), given that you already know `head' and `tail' command line utilities. I wrote it to parse huge (>1Gb) PostgreSQL database dumps and to extract only single tables schema from them. Hope you will find it useful...
#!/usr/bin/perl

=head1 NAME

body

=head1 DESCRIPTION

Extracts the "body" from some text given on STDIN.

This little utility does What You Mean(tm), given that
you already know `head' and `tail' command line utilities.

I wrote it to parse B<huge> PostgreSQL database dumps
and to extract only single tables schema from them.

Hope you will find it useful...

=head1 SYNOPSIS

    body [OPTIONS]
    cat  [FILE] | body [OPTIONS]
    body [OPTIONS] < [FILE]

=head1 OPTIONS

    -s <n1|rx1> | --start <n1|rx1>  start extracting text from line <n
+1>, or from
                                    first regexp match of <rx1>. If no
+t passed,
                                    takes `1' as default value
    -e <n2|rx2> | --end <n2|rx2>    finish extracting text on line <n2
+> or on
                                    first regexp match of <rx2>. Overr
+ides -n or
                                    --count if they are present.
    -h          | --help            you are reading it now
    -n <m>      | --count <m>       extract max <m> lines of text from
+ standard
                                    input file
    -verb       | --verbose         enables verbose messages for begin
+/end
    -vers       | --version         displays program version and exits

=head1 EXAMPLES

    $0 -s 50 -n 10 < ~/myfile
        Extracts 10 lines from 50th to 59th from ~/myfile

    $0 -n 50 < ~/myfile
        Same of `cat ~/myfile | head -50'

    cat ~/dump.sql | $0 -s 'CREATE TABLE "a"' -e 'CREATE TABLE "b"'
        Extracts create table statement for table "t1" from a full
        database dump

=head1 AUTHOR

Cosimo Streppone, cosimo@cpan.org

=head1 LAST MODIFY

29/04/2004

=cut


use strict;
use warnings;
use constant ERROR => 255;
use constant OK    => ~ ERROR;
use Getopt::Long;

our $VERSION = '0.01';
our $verbose = 0;
my $lines    = 0;
my $start    = 0;
my $end      = 0;
my $help     = 0;
my $version  = 0;

GetOptions(
    'n|count=i' => \$lines,
    'start=s'   => \$start,
    'end=s'     => \$end,
    'help'      => \$help,
    'verbose'   => \$verbose,
    'version'   => \$version,
);

if( $help ) {
    help() and exit OK;
}
elsif( $version ) {
    print $0, ' version ', $VERSION, "\n";
    exit OK;
}

exit ERROR unless open(STDIN, '-');

$start = 1 unless defined $start;
my $start_num = ( $start !~ /\D/ ) ? 1 : 0;
my $end_num   = ( $end   !~ /\D/ ) ? 1 : 0;

if( $end_num && $lines > 0 ) {
    if( $start_num ) {
        $end ||= $start + $lines - 1;
    } else {
        $end ||= 1 << 31;
    }
}

#say('start='.$start.' lines='.$lines.' end='.$end);
#say('start_num?'.($start_num?'Y':'N').' end_num?'.($end_num?'Y':'N'))
+;

my $in_body;
my $finished_body;

while(<STDIN>) {
    #say('$.='.$.);

    if( ! $in_body ) {
        if( $start_num ) {
            if( $. >= $start ) {
                $in_body = 1;
                #say('found numeric start');
            }
        } else {
            if( $_ =~ $start ) {
                $in_body = 1;
                #say('found regexp start');
                if( $end_num && $lines > 0 ) {
                    $end = $. + $lines - 1;
                    #say("numeric end is now $end");
                }
            }
        }
    }

    if( $in_body ) {
        print;
        $finished_body = $end_num ? ($. > $end) : ($_ =~ $end);
        if( $finished_body ) {
            #say('finished body');
            last;
        }
    }
}

exit ( close STDIN ? OK : ERROR );

sub say {
    return unless $verbose;
    warn(@_);
}

sub help {
    system(perldoc => $0);
    return 1;
}
Replies are listed 'Best First'.
Re: A little script that combines `head' and `tail' utilities
by Aristotle (Chancellor) on Sep 17, 2004 at 07:24 UTC

    Or you could just use the toolbox.

    $ sed -n 10,20p somefile $ sed -n 40,/foo/p somefile $ sed -n /foo/,/bar/p somefile

    Although that's less fun I suppose. Fine, let's do it in Perl.

    $ perl -ne'print if 10 .. 20' somefile $ perl -ne'print if 40 .. /foo/' somefile $ perl -ne'print if /foo/ .. /bar/' somefile

    Makeshifts last the longest.

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://349106]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others chanting in the Monastery: (5)
As of 2020-08-12 12:36 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    Which rocket would you take to Mars?










    Results (65 votes). Check out past polls.

    Notices?