# # Statistics::Descriptive::ByColumns module # Compute statistics on columns of data. # # Idea based on a program by Benjamin Kuperman # Rewritten in module form by Diego Zamboni # # June 6, 2001. package Statistics::Descriptive::ByColumns; use strict; use Carp; use Statistics::Descriptive; use vars qw(%default_params); =head1 NAME ByColumns - Compute statistic values on columns of data =head1 SYNOPSIS $s=Statistics::Descriptive::ByColumns->new(type => 'Full', columns => 5); $s->read_file("values.dat"); $s->add_row(5, 10, 23, 81, 150); $s->add_line("35 12 7 3.2 81"); $s->add_to_row(1, 38, 25, 41, 12, 99); $s->print; @objs=$s->get_objects; $c1=$s->get_object(1); =head1 DESCRIPTION This module is a wrapper around C to make it easier to store and get statistics on diferent columns of data. Each column is considered as a separate data set, and its values are added to a different C object that can later be obtained and queried. =cut %default_params = ( type => 'Full', columns => 3, ); # Create a new object, with the specified (or default) parameters. sub new { my $what=shift; my $class=ref($what)||$what; my $self={@_}; foreach (keys %default_params) { $self->{$_} = $default_params{$_} unless exists($self->{$_}); } # Check parameters croak "Invalid type parameter '$self->{type}'" unless $self->{type} eq 'Full' || $self->{type} eq 'Sparse'; croak "Invalid number of columns: $self->{columns}" if $self->{columns}<1; # Create the objects $self->{objs}=[ map { ($self->{type} eq 'Full')? Statistics::Descriptive::Full->new(): Statistics::Descriptive::Sparse->new(); } 1 .. $self->{columns} ]; bless $self, $class; return $self; } # Add a row of values. sub add_row { my $self=shift; my @vals=@_; if (@vals != $self->{columns}) { carp "Invalid number of columns: got ".scalar(@vals). ", need $self->{columns}"; return; } foreach my $o (@{$self->{objs}}) { $o->add_data(shift @vals); } return $self; } # Split (on whitespace) and add lines of text. sub add_line { my $self=shift; my @lines=@_; foreach (@lines) { chomp; my @vals=split; $self->add_row(@vals) or carp "Invalid line, ignored: $_\n"; } return $self; } # Add data to a specific column sub add_to_column { my $self=shift; my $col=shift; my @vals=@_; if ($col<0 || $col>=$self->{columns}) { carp "Invalid column number $col. Valid range: 0-".($self->{columns}-1); return; } $self->{objs}->[$col]->add_data(@vals); return $self; } # Read data from a file. Argument is a filename or a handle. sub read_file { my $self=shift; my $fname=shift; my $h; if (!ref($fname)) { # It's a filename open IN, "<$fname" or do { carp "Error opening file '$fname': $!"; return }; $h=\*IN; } else { # It's a handle $h=$fname; } while (<$h>) { chomp; my @vals=split; $self->add_row(@vals) or carp "Invalid line $., ignored: $_\n"; } close IN; return $self; } # Get the Statistics::Descriptive objects sub get_objects { my $self=shift; return @{$self->{objs}}; } # Get a specific object sub get_object { my $self=shift; my $col=shift; if ($col<0 || $col>=$self->{columns}) { carp "Invalid column number $col. Valid range: 0-$self->{columns}"; return; } return $self->{objs}->[$col]; } # Print the currently accumulated values. # Can be passed a handle to print to and a header sub print { my $self=shift; my $h=shift || \*STDOUT; my $hdr=shift; my $c=0; my $f=($self->{type} eq 'Full'); if ($hdr) { print "#" x length $hdr; print "####\n"; print "# $hdr #\n"; print "#" x length $hdr; print "####\n"; } foreach my $o (@{$self->{objs}}) { print $h "Column $c:\n"; printf $h "\t Number of items: %d\n", $o->count(); printf $h "\t Mean: %f\n", $o->mean(); printf $h "\t Median: %f\n", $o->median() if $f; printf $h "\t Mode: %f\n", ($o->mode()||0) if $f; printf $h "\t variance: %f\n", $o->variance(); printf $h "\tstandard_deviation: %f\n", $o->standard_deviation(); printf $h "\t min: %f\n", $o->min(); printf $h "\t max: %f\n", $o->max(); printf $h "\t Middle 50%% mean: %f\n", $o->trimmed_mean(.25,.25) if $f; $c++; } } 1; __END__ #!perl -w # Usage: col-stats.pl numcols [file ...] use Statistics::Descriptive::ByColumns; my $n=shift @ARGV or die "Usage: $0 numcols [file ...]\n"; unless (@ARGV) { @ARGV=(\*STDIN); } foreach my $f (@ARGV) { my $s=Statistics::Descriptive::ByColumns->new(columns => $n); $s->read_file($f); $s->print(undef, ref($f)?undef:$f); }