#
# Statistics::Descriptive::ByColumns module
# Compute statistics on columns of data.
#
# Idea based on a program by Benjamin Kuperman
# Rewritten in module form by Diego Zamboni
#
# June 6, 2001.
package Statistics::Descriptive::ByColumns;
use strict;
use Carp;
use Statistics::Descriptive;
use vars qw(%default_params);
=head1 NAME
ByColumns - Compute statistic values on columns of data
=head1 SYNOPSIS
$s=Statistics::Descriptive::ByColumns->new(type => 'Full', columns =
+> 5);
$s->read_file("values.dat");
$s->add_row(5, 10, 23, 81, 150);
$s->add_line("35 12 7 3.2 81");
$s->add_to_row(1, 38, 25, 41, 12, 99);
$s->print;
@objs=$s->get_objects;
$c1=$s->get_object(1);
=head1 DESCRIPTION
This module is a wrapper around C<Statistics::Descriptive> to make
it easier to store and get statistics on diferent columns of data.
Each column is considered as a separate data set, and its values
are added to a different C<Statistics::Descriptive> object that
can later be obtained and queried.
=cut
%default_params = (
type => 'Full',
columns => 3,
);
# Create a new object, with the specified (or default) parameters.
sub new {
my $what=shift;
my $class=ref($what)||$what;
my $self={@_};
foreach (keys %default_params) {
$self->{$_} = $default_params{$_} unless exists($self->{$_});
}
# Check parameters
croak "Invalid type parameter '$self->{type}'"
unless $self->{type} eq 'Full' || $self->{type} eq 'Sparse';
croak "Invalid number of columns: $self->{columns}"
if $self->{columns}<1;
# Create the objects
$self->{objs}=[
map {
($self->{type} eq 'Full')?
Statistics::Descriptive::Full->new():
Statistics::Descriptive::Sparse->new();
} 1 .. $self->{columns}
];
bless $self, $class;
return $self;
}
# Add a row of values.
sub add_row {
my $self=shift;
my @vals=@_;
if (@vals != $self->{columns}) {
carp "Invalid number of columns: got ".scalar(@vals).
", need $self->{columns}";
return;
}
foreach my $o (@{$self->{objs}}) {
$o->add_data(shift @vals);
}
return $self;
}
# Split (on whitespace) and add lines of text.
sub add_line {
my $self=shift;
my @lines=@_;
foreach (@lines) {
chomp;
my @vals=split;
$self->add_row(@vals)
or carp "Invalid line, ignored: $_\n";
}
return $self;
}
# Add data to a specific column
sub add_to_column {
my $self=shift;
my $col=shift;
my @vals=@_;
if ($col<0 || $col>=$self->{columns}) {
carp "Invalid column number $col. Valid range: 0-".($self->{column
+s}-1);
return;
}
$self->{objs}->[$col]->add_data(@vals);
return $self;
}
# Read data from a file. Argument is a filename or a handle.
sub read_file {
my $self=shift;
my $fname=shift;
my $h;
if (!ref($fname)) {
# It's a filename
open IN, "<$fname"
or do { carp "Error opening file '$fname': $!"; return };
$h=\*IN;
}
else {
# It's a handle
$h=$fname;
}
while (<$h>) {
chomp;
my @vals=split;
$self->add_row(@vals)
or carp "Invalid line $., ignored: $_\n";
}
close IN;
return $self;
}
# Get the Statistics::Descriptive objects
sub get_objects {
my $self=shift;
return @{$self->{objs}};
}
# Get a specific object
sub get_object {
my $self=shift;
my $col=shift;
if ($col<0 || $col>=$self->{columns}) {
carp "Invalid column number $col. Valid range: 0-$self->{columns}"
+;
return;
}
return $self->{objs}->[$col];
}
# Print the currently accumulated values.
# Can be passed a handle to print to and a header
sub print {
my $self=shift;
my $h=shift || \*STDOUT;
my $hdr=shift;
my $c=0;
my $f=($self->{type} eq 'Full');
if ($hdr) {
print "#" x length $hdr;
print "####\n";
print "# $hdr #\n";
print "#" x length $hdr;
print "####\n";
}
foreach my $o (@{$self->{objs}}) {
print $h "Column $c:\n";
printf $h "\t Number of items: %d\n", $o->count();
printf $h "\t Mean: %f\n", $o->mean();
printf $h "\t Median: %f\n", $o->median() if $f;
printf $h "\t Mode: %f\n", ($o->mode()||0) if $f;
printf $h "\t variance: %f\n", $o->variance();
printf $h "\tstandard_deviation: %f\n", $o->standard_deviation();
printf $h "\t min: %f\n", $o->min();
printf $h "\t max: %f\n", $o->max();
printf $h "\t Middle 50%% mean: %f\n", $o->trimmed_mean(.25,.25)
+ if $f;
$c++;
}
}
1;
__END__
#!perl -w
# Usage: col-stats.pl numcols [file ...]
use Statistics::Descriptive::ByColumns;
my $n=shift @ARGV
or die "Usage: $0 numcols [file ...]\n";
unless (@ARGV) {
@ARGV=(\*STDIN);
}
foreach my $f (@ARGV) {
my $s=Statistics::Descriptive::ByColumns->new(columns => $n);
$s->read_file($f);
$s->print(undef, ref($f)?undef:$f);
}
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.