http://qs321.pair.com?node_id=345353


in reply to Parsing Log Files

If I read your problem right, this code should work. I tested it on the data shown here and the output is what you see.

ladoix% cat 345343.pl #!/usr/bin/perl use strict; use warnings; my @trans; # Read all your data into an array of arrays while (<DATA>) { my @line = split /\*/; # Using split instead of regex push @trans, \@line; } # Go through each of the column names I made up # and fun the function countcolumn() on it foreach ( qw( name code type date time bla1 bla2 ) ) { countcolumn(\@trans,$_); } # countcolumn takes an array reference and a column name as arguments sub countcolumn { my $arrayref = shift; my $name = shift; # A lookup hash to simplify accessing the array # indices in the rest of the code my %lookup = ( name => 0, code => 1, type => 2, date => 3, time => 4, bla1 => 5, bla2 => 6, ); my %counthash; # Give the value found under $name in %lookup a # name reflecting what it is my $index = $lookup{$name}; # Do the counting for the column foreach my $trans ( @$arrayref ) { $counthash{$trans->[$index]}++; } print "Column: $name\n"; # Print a sorted list print map { "$_ : $counthash{$_}\n" } sort keys %counthash; print "\n"; } __DATA__ Company Name*345467*YW34567c*activitype*04/15/2004*11:34:10*123456789* +1 Company Name Other*345468*YW34567c*activitype*04/15/2004*11:34:10*1234 +56789*3 Company Name 1*345469*YW34567c*activitype*04/15/2004*11:34:10*12345678 +9*1 Company Name 3*345468*YW34567c*activitype*04/15/2004*11:34:10*12345678 +9*1 Company Name 2*345467*YW34567c*activitype*04/15/2004*11:34:10*12345678 +9*1 Company Name 4*345469*YW34567c*activitype*04/15/2004*11:34:10*12345678 +9*1 Company Name 2*345467*YW34567c*activitype*04/15/2004*11:34:10*12345678 +9*1 Company Name 4*345467*YW34567c*activitype*04/16/2004*07:34:00*12345678 +9*1 Company Name 1*345468*YW34567c*activitype*04/16/2004*09:30:00*12345678 +9*1 Company Name 1*345469*YW34567c*activitype*04/16/2004*10:34:00*12345678 +9*1 Company Name 2*345467*YW34567c*activitype*04/16/2004*11:37:00*12345678 +9*1 #output ladoix% perl 345343.pl Column: name Company Name : 1 Company Name 1 : 3 Company Name 2 : 3 Company Name 3 : 1 Company Name 4 : 2 Company Name Other : 1 Column: code 345467 : 5 345468 : 3 345469 : 3 Column: type YW34567c : 11 Column: date activitype : 11 Column: time 04/15/2004 : 7 04/16/2004 : 4 Column: bla1 07:34:00 : 1 09:30:00 : 1 10:34:00 : 1 11:34:10 : 7 11:37:00 : 1 Column: bla2 123456789 : 11

--
Damon Allen Davison