A colleague and I wrote this code to deal with a complex data file. I thought someone may fine it useful.
use strict;
use FileHandle;
#use English;
#use Cwd;
#use File::Basename;
my $dlFile;
my $DL;
my $recordNum;
my $key;
$dlFile = 'C:/arv/StatLog.V4_0.8.dl';
chomp $dlFile;
#Open input file and output file
$DL = (new FileHandle "< $dlFile") or die "Can't open datalog file $
+dlFile: $!";
print "Datalog file name is $dlFile\n";
my %points;
my $seq;
my $index;
my $value;
my %means;
my @point_names;
my @seq_names;
my @indices;
my %seen_point;
my %seen_seq;
my %seen_index;
while (<$DL>){
# print( "$_");
last if /^}/; #Process schema, stop when get to first };
}
while (<$DL>){
chomp;
#print "$_\n";
#Make sure it is 1 or more digits within quotes
if (/^"(\d+)/){
print "Record num is: $1\n";
$recordNum = $1;
while (<$DL>) {
chomp;
if (/^{/){
next;
}
if (/^}/){ #End of a datalog record
#print "For record # $recordNum, index was $index and
+sequence was $seq\n";
#$means{$pointname}{$seq_name}{$index}
foreach (keys %means){
#print "This key is $_, value is $means{$_}\n";
#Organize the structure however you see fit
# push @{$points{$_}{$seq}{$index}}, $means{$_};
push @{$points{$index}{$seq}{$_}}, $means{$_};
#print "point $_ seq:$seq index:$index value:$mean
+s{$_}\n";
}
last;
}
if (/(^\s+?")(.*?)("\s+?")(.*?)"/){ #Extract pointname, va
+lues
#$1 is the space at the beginning, $2 the pointname, $
+3 the stuff between point and value, and $4 the value
$key = $2;
$value = $4;
}
if ($key =~ /.mean$/){
# print "mean point: $key\n";
$means{$key}=$value;
push @point_names, $key unless $seen_point{$key}++
+; #get unique list of points
}
if ($key =~ /seq_name/){
#print "sequence is: $value\n";
$seq=$value;
push @seq_names, $value unless $seen_seq{$value}++
+; #get unique list of sequences
}
if ($key =~ /Ld_Idx/){
#print "index is: $value\n";
$index=$value;
push @indices, $value unless $seen_index{$value}++
+; #get unique list of indices
}
}
}
}
$DL-> close or die "Can't close $dlFile: $!\n";
#Let's look at the data structure
print "\n\n ===============\n";
my $sum;
my $max;
my $min;
my $range;
foreach my $point (@point_names){
foreach my $seq (@seq_names){
foreach my $index (@indices){
$sum = 0;
$max = 0;
$min = 99999;
foreach my $val (@{$points{$index}{$seq}{$point}}) { #Proc
+ess each .mean value
$sum = $sum + $val;
if ($val > $max)
{
$max = $val;
}
if ($val < $min)
{
$min = $val;
}
$range = $max - $min;
}
}
#print "Sum of " . scalar (@{$points{$index}{$seq}{$point}}) . "
+elements for $point in $seq with index $index was $sum\n";
#print "Max of " . scalar (@{$points{$index}{$seq}{$point}}) . " el
+ements for $point in $seq with index $index was $max\n";
#print "Min of " . scalar (@{$points{$index}{$seq}{$point}}) . " el
+ements for $point in $seq with index $index was $min\n";
print "Range of " . scalar (@{$points{$index}{$seq}{$point}}) . " e
+lements for $point in $seq with index $index was $range\n";
}
}
my $point = 'point1';
my $seq = 'rev32';
my $index = 2;
my $range2 = range($point, $seq, $index);
print $range2;
sub range
{
$sum = 0;
$max = 0;
$min = 99999;
foreach my $val (@{$points{$_[2]}{$_[1]}{$_[0]}}) { #Proce
+ss each .mean value
$sum = $sum + $val;
if ($val > $max)
{
$max = $val;
}
if ($val < $min)
{
$min = $val;
}
$range = $max - $min;
}
print "Range of " . scalar (@{$points{$index}{$seq}{$point}}) . " e
+lements for $point in $seq with index $index was $range\n";
return $range;
}