use strict; use Getopt::Long; Getopt::Long::Configure('no_ignore_case'); Getopt::Long::Configure('no_auto_abbrev'); my $VERSION = "1.6"; $|=1; main(); sub main { # command line arg variables my (@column, @nc, $sep, $u, $v); my ($all_col, $column, $com, $full, $topic, $skip, $string, $numeric, $out_sep, $out_fmt, $regex); &GetOptions( "a|all" => \$all_col, # output all columns "c|compare=s@" => \@nc, # column comparisons "e|exec=s" => \$com, # command to run "f|printf=s" => \$out_fmt, # output format "h|help:s" => \$topic, # get help on an option "i|sep=s" => \$sep, # input column separator "k|skip=i" => \$skip, # skip specified num lines "l|full" => \$full, # print whole line "N|numeric" => \$numeric, # force numeric comparison "o|outsep=s" => \$out_sep, # output record separator "r|regex=s" => \$regex, # regex to use "S|string" => \$string, # force string comparison "s|column=s" => \$column, # selected columns "u|unique=s" => \$u, # unique items in col s "v|version" => \$v, # print version ); # cannot do numeric and string comparisons at the same time if($numeric && $string) { print "-N and -S are mutually exclusive!\n"; help(); exit; } if(defined($topic)) { help($topic); exit; } elsif($v) { help("version"); exit; } # default value for column if none is specified if(defined($column)) { @column = split(',', $column); } elsif(defined($u)) { $column[0] = $u; } else { $column[0] = 0; } # default column separator unless(defined($sep)) { $sep = '\s+'; } # default output separator unless(defined($out_sep)) { $out_sep = " "; } # number of lines to skip at the beginning while($skip && <>) { $skip--; } # get a list of columns to do comparisons on and subroutines to do them my ($comp_col, $comp_sub) = parse_comp($string, $numeric, @nc); my (@selected, %uniq, $whole_line, $comp_fail); ############### MAIN LOOP ############### while($whole_line = <>) { if(defined($regex)) { @selected = ($whole_line =~ /$regex/); } else { @selected = split($sep, $whole_line); } if(defined($selected[0])) { # check for uniqueness if requested if(defined($u)) { ($uniq{$selected[$u]}) ? next : $uniq{$selected[$u]}++; } # do column comparisons if there are any if(defined($comp_col)) { $comp_fail = 0; for(0..$#{$comp_col}) { unless(defined($selected[$comp_col->[$_]]) && &{$comp_sub->[$_]}($selected[$comp_col->[$_]])) { $comp_fail = 1; last; } } next if($comp_fail); } if(defined($com)) { execute($com, \@selected); } elsif(defined($full)) { print $whole_line; } else { if(defined($all_col)) { if(defined($out_fmt)) { printf "$out_fmt\n", @selected; } else { print join($out_sep, @selected), "\n"; } } else { if(defined($out_fmt)) { printf "$out_fmt\n", @selected[@column]; } else { print join($out_sep, @selected[@column]), "\n"; } } } } } } ########### SUBROUTINES ############## # execute a command sub execute { my ($com, $columns) = @_; # substitute the value from the appropriate column for \0, \1, etc. $com =~ s/\\(\d+)\;?/$columns->[$1]/g; system("$com"); } # generate numeric comparison routines sub build_num_comp { my ($cexp, $cval) = @_; if ($cexp eq 'gt') { return sub { return ($_[0] > $cval); } } elsif($cexp eq 'lt') { return sub { return ($_[0] < $cval); } } elsif($cexp eq 'ge') { return sub { return ($_[0] >= $cval); } } elsif($cexp eq 'le') { return sub { return ($_[0] <= $cval); } } elsif($cexp eq 'eq') { return sub { return ($_[0] == $cval); } } elsif($cexp eq 'ne') { return sub { return ($_[0] != $cval); } } elsif($cexp eq 're') { return sub { return ($_[0] =~ /$cval/); } } elsif($cexp eq 'nr') { return sub { return ($_[0] !~ /$cval/); } } else { print "Invalid comparison, use ax -h for help.\n"; exit(1); } } # generate string comparison routines sub build_str_comp { my ($cexp, $cval) = @_; if ($cexp eq 'gt') { return sub { return ($_[0] gt $cval); } } elsif($cexp eq 'lt') { return sub { return ($_[0] lt $cval); } } elsif($cexp eq 'ge') { return sub { return ($_[0] ge $cval); } } elsif($cexp eq 'le') { return sub { return ($_[0] le $cval); } } elsif($cexp eq 'eq') { return sub { return ($_[0] eq $cval); } } elsif($cexp eq 'ne') { return sub { return ($_[0] ne $cval); } } elsif($cexp eq 're') { return sub { return ($_[0] =~ /$cval/); } } elsif($cexp eq 'nr') { return sub { return ($_[0] !~ /$cval/); } } else { print "Invalid comparison, use ax -h for help.\n"; exit(1); } } # parse the column comparison input on the command line and return a list # of columns to do comparisons on, and subroutines to do the comparisons sub parse_comp { my ($string, $numeric, @nc) = @_; my @comp_col = (); my @comp_sub = (); my $count = 0; # check for column comparisons my $nc = ''; foreach $nc (@nc) { if($nc =~ /^\s*(\d+)\s*(\w\w)\s*(\d+)$/) { $comp_col[$count] = $1; # column number if($string) { $comp_sub[$count] = build_str_comp($2, $3); } else { $comp_sub[$count] = build_num_comp($2, $3); } $count++; } elsif($nc =~ /^\s*(\d+)\s*(\w\w)\s*(\S+)$/) { $comp_col[$count] = $1; # column number if($numeric) { $comp_sub[$count] = build_num_comp($2, $3); } else { $comp_sub[$count] = build_str_comp($2, $3); } $count++; } else { print "Invalid comparison, ax -h for help\n"; exit(1); } } if(defined($comp_col[0])) { return \@comp_col, \@comp_sub; } else { return undef, undef; } } # print some help text sub help { my $topic = shift; if($topic eq "l" || $topic eq "full") { print "Usage: ax -f | --full\n"; print " Print the full line\n"; } elsif($topic eq "s" || $topic eq "column") { print "Usage: ax -${topic} \n"; print " Select column to be output.\n"; } elsif($topic eq "k" || $topic eq "skip") { print "Usage: ax -${topic} \n"; print " Skip lines before beginning processing.\n"; } elsif($topic eq "i" || $topic eq "sep") { print "Usage: ax -sep \n"; print " Use as separator when splitting columns.\n"; } elsif($topic eq "e" || $topic eq "exec") { print "Usage: ax -${topic} \"command\"\n"; print " Execute a shell command for each line that matches.\n"; print " Substitute \\0, \\1, etc for column 0, 1, etc.\n"; print " Each column indicator may optionally be followed by a\n"; print " semicolon, to separated it from any digits that immediately\n"; print " follow it.\n"; } elsif($topic eq "r" || $topic eq "regex") { print "Usage: ax -${topic} \"perl regex\"\n"; print " Provide a regular expression for parsing the columns,\n"; print " to replace the default whitespace-matching expression.\n"; } elsif ($topic eq "v" || $topic eq "version") { print " ax : text parser : version $VERSION\n"; print " by kirk baucom \n"; } elsif($topic eq "u" || $topic eq "unique") { print "Usage: ax -${topic} "; print " Skip lines with repeated values in column \n"; } elsif($topic eq "c" || $topic eq "compare") { print "Usage: ax -${topic} ' '\n"; print " Compare the value in column with the value \n"; print " using the operator , and skip lines that fail.\n"; print " can take the values:\n\n"; print "gt (greater than)\nlt (less than)\neq (equal to)\nne (not equal to)\n"; print "ge (greater or equal)\nle (less or equal)\nre (regular expression)\n"; print "nr (negated regular expression)\n"; } elsif($topic eq "N" || $topic eq "numeric") { print "Usage: ax -${topic} -s ' '\n"; print " Force a numeric comparison when using the -cc option\n"; } elsif($topic eq "S" || $topic eq "string") { print "Usage: ax -${topic} -s ' '\n"; print " Force a string comparison when using the -cc option\n"; } elsif($topic eq "f" || $topic eq "printf") { print "Usage: ax -${topic} ''\n"; print " Supply a format suitable for printf to be used for output\n"; } elsif($topic eq "o" || $topic eq "outsep") { print "Usage: ax -${topic} ''\n"; print " Supply an output record separator. Default is a single space.\n"; } else { # general help print <] [-h | --help] [-i | --sep ] [-k | --skip ] [-l | --full] [-N | --numeric] [-o | --outsep ] [-r | --regex "regular expression"] [-S | --string] [-s | --column ] [-u | --unique ] [-v | --version] Use ax -h