#!/usr/bin/perl
=head1 NAME
body
=head1 DESCRIPTION
Extracts the "body" from some text given on STDIN.
This little utility does What You Mean(tm), given that
you already know `head' and `tail' command line utilities.
I wrote it to parse B<huge> PostgreSQL database dumps
and to extract only single tables schema from them.
Hope you will find it useful...
=head1 SYNOPSIS
body [OPTIONS]
cat [FILE] | body [OPTIONS]
body [OPTIONS] < [FILE]
=head1 OPTIONS
-s <n1|rx1> | --start <n1|rx1> start extracting text from line <n
+1>, or from
first regexp match of <rx1>. If no
+t passed,
takes `1' as default value
-e <n2|rx2> | --end <n2|rx2> finish extracting text on line <n2
+> or on
first regexp match of <rx2>. Overr
+ides -n or
--count if they are present.
-h | --help you are reading it now
-n <m> | --count <m> extract max <m> lines of text from
+ standard
input file
-verb | --verbose enables verbose messages for begin
+/end
-vers | --version displays program version and exits
=head1 EXAMPLES
$0 -s 50 -n 10 < ~/myfile
Extracts 10 lines from 50th to 59th from ~/myfile
$0 -n 50 < ~/myfile
Same of `cat ~/myfile | head -50'
cat ~/dump.sql | $0 -s 'CREATE TABLE "a"' -e 'CREATE TABLE "b"'
Extracts create table statement for table "t1" from a full
database dump
=head1 AUTHOR
Cosimo Streppone, cosimo@cpan.org
=head1 LAST MODIFY
29/04/2004
=cut
use strict;
use warnings;
use constant ERROR => 255;
use constant OK => ~ ERROR;
use Getopt::Long;
our $VERSION = '0.01';
our $verbose = 0;
my $lines = 0;
my $start = 0;
my $end = 0;
my $help = 0;
my $version = 0;
GetOptions(
'n|count=i' => \$lines,
'start=s' => \$start,
'end=s' => \$end,
'help' => \$help,
'verbose' => \$verbose,
'version' => \$version,
);
if( $help ) {
help() and exit OK;
}
elsif( $version ) {
print $0, ' version ', $VERSION, "\n";
exit OK;
}
exit ERROR unless open(STDIN, '-');
$start = 1 unless defined $start;
my $start_num = ( $start !~ /\D/ ) ? 1 : 0;
my $end_num = ( $end !~ /\D/ ) ? 1 : 0;
if( $end_num && $lines > 0 ) {
if( $start_num ) {
$end ||= $start + $lines - 1;
} else {
$end ||= 1 << 31;
}
}
#say('start='.$start.' lines='.$lines.' end='.$end);
#say('start_num?'.($start_num?'Y':'N').' end_num?'.($end_num?'Y':'N'))
+;
my $in_body;
my $finished_body;
while(<STDIN>) {
#say('$.='.$.);
if( ! $in_body ) {
if( $start_num ) {
if( $. >= $start ) {
$in_body = 1;
#say('found numeric start');
}
} else {
if( $_ =~ $start ) {
$in_body = 1;
#say('found regexp start');
if( $end_num && $lines > 0 ) {
$end = $. + $lines - 1;
#say("numeric end is now $end");
}
}
}
}
if( $in_body ) {
print;
$finished_body = $end_num ? ($. > $end) : ($_ =~ $end);
if( $finished_body ) {
#say('finished body');
last;
}
}
}
exit ( close STDIN ? OK : ERROR );
sub say {
return unless $verbose;
warn(@_);
}
sub help {
system(perldoc => $0);
return 1;
}
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.
|