#!/usr/bin/perl
use warnings;
use strict;
use autodie;
use List::Util;
my $file = 'sequence.txt';
my $goal = 'GATC';
my $stats = process_file( $file, $goal );
# do something with stats ...
# ----------------------------------------
# Subroutines
#
sub process_file {
my ( $file, $goal ) = @_;
open my $infh, '<', $file;
my $stats = process_lines( $infh, $goal );
close $infh;
return $stats
}
sub process_lines {
my ( $infh, $goal ) = @_;
my @stats;
while ( my $line - <$infh> ) {
chomp $line;
my $linestats = process_one_line( $line, $goal );
push @stats, $linestats || 0;
}
return \@stats;
}
sub process_one_line {
my ( $line, $goal ) = @_
my @occurences;
my ( $offset ) = ( 0 );
SEEK:
while ( 1 ) {
$idx = index( $line, $goal, $offset );
last SEEK if $idx == -1; # no more occurences
push @occurences, $idx;
$offset = $idx;
}
return calc_avg_distance( \@occurences, length $goal );
}
sub calc_avg_distance {
my ( $occurences, $len ) = @_;
return unless $occurences and scalar @$occurences;
my $start = shift @$occurences;
my @distances;
while ( my $end = shift @$occurences ) {
push @distances, ( $end - $start ) - $len;
$start = $end;
}
my $sum = reduce { $a + $b }, @distances;
my $n = scalar @distances;
return $sum / $n;
}
As Occam said: Entia non sunt multiplicanda praeter necessitatem.