http://qs321.pair.com?node_id=87640
Category: Math and Statistics
Author/Contact Info Diego Zamboni (zzamboni@perlmonk.org)
Description: This module is a wrapper around Statistics::Descriptive to make it easier to store and get statistics on diferent columns of data. Each column is considered as a separate data set, and its values are added to a different Statistics::Descriptive object that can later be obtained and queried.

Still missing proper POD documentation. See sample program after __END__. Sample data:

93 97 84 70 52 30 20 54 68 2 38 33 26 33 10 45 2 39 96 18 34 79 52 70 86 11 32 67 46 19
#
# Statistics::Descriptive::ByColumns module
# Compute statistics on columns of data.
#
# Idea based on a program by Benjamin Kuperman
# Rewritten in module form by Diego Zamboni
#
# June 6, 2001.

package Statistics::Descriptive::ByColumns;

use strict;
use Carp;
use Statistics::Descriptive;
use vars qw(%default_params);

=head1 NAME

ByColumns - Compute statistic values on columns of data

=head1 SYNOPSIS

  $s=Statistics::Descriptive::ByColumns->new(type => 'Full', columns =
+> 5);
  $s->read_file("values.dat");
  $s->add_row(5, 10, 23, 81, 150);
  $s->add_line("35 12 7 3.2 81");
  $s->add_to_row(1, 38, 25, 41, 12, 99);
  $s->print;
  @objs=$s->get_objects;
  $c1=$s->get_object(1);

=head1 DESCRIPTION

This module is a wrapper around C<Statistics::Descriptive> to make
it easier to store and get statistics on diferent columns of data.
Each column is considered as a separate data set, and its values
are added to a different C<Statistics::Descriptive> object that
can later be obtained and queried.

=cut

%default_params = (
                   type => 'Full',
                   columns => 3,
                  );

# Create a new object, with the specified (or default) parameters.
sub new {
  my $what=shift;
  my $class=ref($what)||$what;
  my $self={@_};
  foreach (keys %default_params) {
    $self->{$_} = $default_params{$_} unless exists($self->{$_});
  }
  # Check parameters
  croak "Invalid type parameter '$self->{type}'"
    unless $self->{type} eq 'Full' || $self->{type} eq 'Sparse';
  croak "Invalid number of columns: $self->{columns}"
    if $self->{columns}<1;
  # Create the objects
  $self->{objs}=[
                 map {
                   ($self->{type} eq 'Full')?
                     Statistics::Descriptive::Full->new():
                     Statistics::Descriptive::Sparse->new();
                 } 1 .. $self->{columns}
                ];
  bless $self, $class;
  return $self;
}

# Add a row of values.
sub add_row {
  my $self=shift;
  my @vals=@_;
  if (@vals != $self->{columns}) {
    carp "Invalid number of columns: got ".scalar(@vals).
      ", need $self->{columns}";
    return;
  }
  foreach my $o (@{$self->{objs}}) {
    $o->add_data(shift @vals);
  }
  return $self;
}

# Split (on whitespace) and add lines of text.
sub add_line {
  my $self=shift;
  my @lines=@_;
  foreach (@lines) {
    chomp;
    my @vals=split;
    $self->add_row(@vals)
      or carp "Invalid line, ignored: $_\n";
  }
  return $self;
}

# Add data to a specific column
sub add_to_column {
  my $self=shift;
  my $col=shift;
  my @vals=@_;
  if ($col<0 || $col>=$self->{columns}) {
    carp "Invalid column number $col. Valid range: 0-".($self->{column
+s}-1);
    return;
  }
  $self->{objs}->[$col]->add_data(@vals);
  return $self;
}

# Read data from a file. Argument is a filename or a handle.
sub read_file {
  my $self=shift;
  my $fname=shift;
  my $h;
  if (!ref($fname)) {
    # It's a filename
    open IN, "<$fname"
      or do { carp "Error opening file '$fname': $!"; return };
    $h=\*IN;
  }
  else {
    # It's a handle
    $h=$fname;
  }
  while (<$h>) {
    chomp;
    my @vals=split;
    $self->add_row(@vals)
      or carp "Invalid line $., ignored: $_\n";
  }
  close IN;
  return $self;
}

# Get the Statistics::Descriptive objects
sub get_objects {
  my $self=shift;
  return @{$self->{objs}};
}

# Get a specific object
sub get_object {
  my $self=shift;
  my $col=shift;
  if ($col<0 || $col>=$self->{columns}) {
    carp "Invalid column number $col. Valid range: 0-$self->{columns}"
+;
    return;
  }
  return $self->{objs}->[$col];
}

# Print the currently accumulated values.
# Can be passed a handle to print to and a header
sub print {
  my $self=shift;
  my $h=shift || \*STDOUT;
  my $hdr=shift;
  my $c=0;
  my $f=($self->{type} eq 'Full');

  if ($hdr) {
    print "#" x length $hdr;
    print "####\n";
    print "# $hdr #\n";
    print "#" x length $hdr;
    print "####\n";
  }
  foreach my $o (@{$self->{objs}}) {
    print $h "Column $c:\n";
    printf $h "\t   Number of items: %d\n", $o->count();
    printf $h "\t              Mean: %f\n", $o->mean();
    printf $h "\t            Median: %f\n", $o->median() if $f;
    printf $h "\t              Mode: %f\n", ($o->mode()||0) if $f;
    printf $h "\t          variance: %f\n", $o->variance();
    printf $h "\tstandard_deviation: %f\n", $o->standard_deviation();
    printf $h "\t               min: %f\n", $o->min();
    printf $h "\t               max: %f\n", $o->max();
    printf $h "\t   Middle 50%% mean: %f\n", $o->trimmed_mean(.25,.25)
+ if $f;
    $c++;
  }
}

1;

__END__

#!perl -w
# Usage: col-stats.pl numcols [file ...]

use Statistics::Descriptive::ByColumns;

my $n=shift @ARGV
  or die "Usage: $0 numcols [file ...]\n";

unless (@ARGV) {
        @ARGV=(\*STDIN);
}
foreach my $f (@ARGV) {
  my $s=Statistics::Descriptive::ByColumns->new(columns => $n);
  $s->read_file($f);
  $s->print(undef, ref($f)?undef:$f);
}