use strict;
use Getopt::Long;
Getopt::Long::Configure('no_ignore_case');
Getopt::Long::Configure('no_auto_abbrev');
my $VERSION = "1.6";
$|=1;
main();
sub main {
# command line arg variables
my (@column, @nc, $sep, $u, $v);
my ($all_col, $column, $com, $full, $topic, $skip,
$string, $numeric, $out_sep, $out_fmt, $regex);
&GetOptions(
"a|all" => \$all_col, # output all columns
"c|compare=s@" => \@nc, # column comparisons
"e|exec=s" => \$com, # command to run
"f|printf=s" => \$out_fmt, # output format
"h|help:s" => \$topic, # get help on an optio
+n
"i|sep=s" => \$sep, # input column separat
+or
"k|skip=i" => \$skip, # skip specified num l
+ines
"l|full" => \$full, # print whole line
"N|numeric" => \$numeric, # force numeric compar
+ison
"o|outsep=s" => \$out_sep, # output record separa
+tor
"r|regex=s" => \$regex, # regex to use
"S|string" => \$string, # force string compari
+son
"s|column=s" => \$column, # selected columns
"u|unique=s" => \$u, # unique items in col
+s
"v|version" => \$v, # print version
);
# cannot do numeric and string comparisons at the same time
if($numeric && $string) { print "-N and -S are mutually exclusive!\n
+"; help(); exit; }
if(defined($topic)) { help($topic); exit; }
elsif($v) { help("version"); exit; }
# default value for column if none is specified
if(defined($column)) { @column = split(',', $column); }
elsif(defined($u)) { $column[0] = $u; }
else { $column[0] = 0; }
# default column separator
unless(defined($sep)) { $sep = '\s+'; }
# default output separator
unless(defined($out_sep)) { $out_sep = " "; }
# number of lines to skip at the beginning
while($skip && <>) { $skip--; }
# get a list of columns to do comparisons on and subroutines to do t
+hem
my ($comp_col, $comp_sub) = parse_comp($string, $numeric, @nc);
my (@selected, %uniq, $whole_line, $comp_fail);
############### MAIN LOOP ###############
while($whole_line = <>) {
if(defined($regex)) { @selected = ($whole_line =~ /$regex/); }
else { @selected = split($sep, $whole_line); }
if(defined($selected[0])) {
# check for uniqueness if requested
if(defined($u)) { ($uniq{$selected[$u]}) ? next : $uniq{$selecte
+d[$u]}++; }
# do column comparisons if there are any
if(defined($comp_col)) {
$comp_fail = 0;
for(0..$#{$comp_col}) {
unless(defined($selected[$comp_col->[$_]]) &&
&{$comp_sub->[$_]}($selected[$comp_col->[$_]])) {
$comp_fail = 1; last;
}
}
next if($comp_fail);
}
if(defined($com)) { execute($com, \@selected); }
elsif(defined($full)) { print $whole_line; }
else {
if(defined($all_col)) {
if(defined($out_fmt)) { printf "$out_fmt\n", @selected; }
else { print join($out_sep, @selected), "\n"; }
}
else {
if(defined($out_fmt)) { printf "$out_fmt\n", @selected[@colu
+mn]; }
else { print join($out_sep, @selected[@column]), "\n"; }
}
}
}
}
}
########### SUBROUTINES ##############
# execute a command
sub execute {
my ($com, $columns) = @_;
# substitute the value from the appropriate column for \0, \1, etc.
$com =~ s/\\(\d+)\;?/$columns->[$1]/g;
system("$com");
}
# generate numeric comparison routines
sub build_num_comp {
my ($cexp, $cval) = @_;
if ($cexp eq 'gt') { return sub { return ($_[0] > $cval); } }
elsif($cexp eq 'lt') { return sub { return ($_[0] < $cval); } }
elsif($cexp eq 'ge') { return sub { return ($_[0] >= $cval); } }
elsif($cexp eq 'le') { return sub { return ($_[0] <= $cval); } }
elsif($cexp eq 'eq') { return sub { return ($_[0] == $cval); } }
elsif($cexp eq 'ne') { return sub { return ($_[0] != $cval); } }
elsif($cexp eq 're') { return sub { return ($_[0] =~ /$cval/); } }
elsif($cexp eq 'nr') { return sub { return ($_[0] !~ /$cval/); } }
else { print "Invalid comparison, use ax -h for help.\n"; exit(1); }
}
# generate string comparison routines
sub build_str_comp {
my ($cexp, $cval) = @_;
if ($cexp eq 'gt') { return sub { return ($_[0] gt $cval); } }
elsif($cexp eq 'lt') { return sub { return ($_[0] lt $cval); } }
elsif($cexp eq 'ge') { return sub { return ($_[0] ge $cval); } }
elsif($cexp eq 'le') { return sub { return ($_[0] le $cval); } }
elsif($cexp eq 'eq') { return sub { return ($_[0] eq $cval); } }
elsif($cexp eq 'ne') { return sub { return ($_[0] ne $cval); } }
elsif($cexp eq 're') { return sub { return ($_[0] =~ /$cval/); } }
elsif($cexp eq 'nr') { return sub { return ($_[0] !~ /$cval/); } }
else { print "Invalid comparison, use ax -h for help.\n"; exit(1); }
}
# parse the column comparison input on the command line and return a l
+ist
# of columns to do comparisons on, and subroutines to do the compariso
+ns
sub parse_comp {
my ($string, $numeric, @nc) = @_;
my @comp_col = ();
my @comp_sub = ();
my $count = 0;
# check for column comparisons
my $nc = '';
foreach $nc (@nc) {
if($nc =~ /^\s*(\d+)\s*(\w\w)\s*(\d+)$/) {
$comp_col[$count] = $1; # column number
if($string) { $comp_sub[$count] = build_str_comp($2, $3); }
else { $comp_sub[$count] = build_num_comp($2, $3); }
$count++;
}
elsif($nc =~ /^\s*(\d+)\s*(\w\w)\s*(\S+)$/) {
$comp_col[$count] = $1; # column number
if($numeric) { $comp_sub[$count] = build_num_comp($2, $3); }
else { $comp_sub[$count] = build_str_comp($2, $3); }
$count++;
}
else { print "Invalid comparison, ax -h for help\n"; exit(1); }
}
if(defined($comp_col[0])) { return \@comp_col, \@comp_sub; }
else { return undef, undef; }
}
# print some help text
sub help {
my $topic = shift;
if($topic eq "l" || $topic eq "full") {
print "Usage: ax -f | --full\n";
print " Print the full line\n";
}
elsif($topic eq "s" || $topic eq "column") {
print "Usage: ax -${topic} <number>\n";
print " Select column <number> to be output.\n";
}
elsif($topic eq "k" || $topic eq "skip") {
print "Usage: ax -${topic} <number>\n";
print " Skip <number> lines before beginning processing.\n";
}
elsif($topic eq "i" || $topic eq "sep") {
print "Usage: ax -sep <string>\n";
print " Use <string> as separator when splitting columns.\n"
+;
}
elsif($topic eq "e" || $topic eq "exec") {
print "Usage: ax -${topic} \"command\"\n";
print " Execute a shell command for each line that matches.\
+n";
print " Substitute \\0, \\1, etc for column 0, 1, etc.\n";
print " Each column indicator may optionally be followed by
+a\n";
print " semicolon, to separated it from any digits that imme
+diately\n";
print " follow it.\n";
}
elsif($topic eq "r" || $topic eq "regex") {
print "Usage: ax -${topic} \"perl regex\"\n";
print " Provide a regular expression for parsing the columns
+,\n";
print " to replace the default whitespace-matching expressio
+n.\n";
}
elsif ($topic eq "v" || $topic eq "version") {
print " ax : text parser : version $VERSION\n";
print " by kirk baucom <kbaucom\@schizoid.com>\n";
}
elsif($topic eq "u" || $topic eq "unique") {
print "Usage: ax -${topic} <number>";
print " Skip lines with repeated values in column <number>\n
+";
}
elsif($topic eq "c" || $topic eq "compare") {
print "Usage: ax -${topic} '<colnum> <operator> <value>'\n";
print " Compare the value in column <colnum> with the value
+<value>\n";
print " using the operator <operator>, and skip lines that f
+ail.\n";
print " <operator> can take the values:\n\n";
print "gt (greater than)\nlt (less than)\neq (equal to)\nne (not e
+qual to)\n";
print "ge (greater or equal)\nle (less or equal)\nre (regular expr
+ession)\n";
print "nr (negated regular expression)\n";
}
elsif($topic eq "N" || $topic eq "numeric") {
print "Usage: ax -${topic} -s '<colnum> <operator> <value>'\n";
print " Force a numeric comparison when using the -cc option\
+n";
}
elsif($topic eq "S" || $topic eq "string") {
print "Usage: ax -${topic} -s '<colnum> <operator> <value>'\n";
print " Force a string comparison when using the -cc option\n
+";
}
elsif($topic eq "f" || $topic eq "printf") {
print "Usage: ax -${topic} '<printf format>'\n";
print " Supply a format suitable for printf to be used for ou
+tput\n";
}
elsif($topic eq "o" || $topic eq "outsep") {
print "Usage: ax -${topic} '<separator>'\n";
print " Supply an output record separator. Default is a singl
+e space.\n";
}
else { # general help
print <<END;
Usage: ax [-a | --all]
[-c | --compare "colnum gt|lt|ge|le|eq|ne|re|nr string"]
[-e | --exec "command"]
[-f | --printf <format>]
[-h | --help]
[-i | --sep <string>]
[-k | --skip <number>]
[-l | --full]
[-N | --numeric]
[-o | --outsep <string>]
[-r | --regex "regular expression"]
[-S | --string]
[-s | --column <number>]
[-u | --unique <number>]
[-v | --version]
Use ax -h <option> for more specific help (ie. ax -h f for help with
+ the
-f parameter).
END
}
}
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.