Greetings all,
I would use a hash and a for loop...
#!/usr/bin/perl -w
use strict;
use Dumpvalue;
my $d = new Dumpvalue;
my %count;
my $DNAstr = <DATA>;
chomp($DNAstr);
my @bases = split //, $DNAstr;
for(my $i = 0 ; $i < scalar(@bases); $i++){
$count{$bases[$i]}++;
$count{$bases[$i].$bases[$i+1]}++ if(defined $bases[$i+1]);
$count{$bases[$i].$bases[$i+1].$bases[$i+2]}++ if(defined $bases[$
+i+2]);
}
$d->dumpValues(\%count);
Here is the data I was using.
__DATA__
AGATAGCGCTGATCGAGAGCTATAGCGATCGATCGGATCGATCGGGATCTTAGCGAAAAGCTCGAT
TTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGATCGATCGCGCTTAGGAAATTCC
CCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGATCGCGCTAGCTCGATCGCGCGCT
AGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAGCTCGCTAGCTCGATCTTCTCGA
TCGCGGCTAGGAGAGCTCGAGCTTCGAGGCTGCGAGGATCGCGGAGAGGAGGATCGAGATCGGATA
GAGATCGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCT
GCTAGCTCGCTAGCTCGATCTTCTCGATCGCGGCTAGGAGAGCTCGAGCTTCGAGGCTGCGAGGAT
CGCGGAGAGGAGGATCGAGATCGGATCGAGAGCTATAGCGATCGATCGGATCGATCGGGATCTTAG
CGAAAAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGATCGATCGCG
CTTAGGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGATCGCGCTAGC
TCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAGCTCGCTAGC
TCGATCTTCTGATAGCGCTGATCGAGAGCTATAGCGATCGATCGGATCGATCGGGATCTTAGCGAA
AAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGATCGATCGCGCTTA
GGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGATCGCGCTAGCTCGA
TCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAGCTCGCTAGCTCGA
TCTTCTCGATCGCGGCTAGGAGAGCTCGAGCTTCGAGGCTGCGAGGATCGCGGAGAGGAGGATCGA
GATCGGATAGAGATCGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGC
TCGCTAGCTGCTAGCTCGCTAGCTCGATCTTCTCGATCGCGGCTAGGAGAGCTCGAGCTTCGAGGC
TGCGAGGATCGCGGAGAGGAGGATCGAGATCGGATCGAGAGCTATAGCGATCGATCGGATCGATCG
GGATCTTAGCGAAAAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGA
TCGATCGCGCTTAGGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGAT
CGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAG
CTCGCTAGCTCGATCTTCGATAGCGCTGATCGAGAGCTATAGCGATCGATCGGATCGATCGGGATC
TTAGCGAAAAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGATCGATCGATCGAT
CGCGCTTAGGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGGATAGAGATCGCGC
TAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTAGCTGCTAGCTCGC
TAGCTCGATCTTCTCGATCGCGGCTAGGAGAGCTCGAGCTTCGAGGCTGCGAGGATCGCGGAGAGG
AGGATCGAGATCGGATAGAGATCGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGC
GCGATAGCTCGCTAGCTGCTAGCTCGCTAGCTCGATCTTCTCGATCGCGGCTAGGAGAGCTCGAGC
TTCGAGGCTGCGAGGATCGCGGAGAGGAGGATCGAGATCGGATCGAGAGCTATAGCGATCGATCGG
ATCGATCGGGATCTTAGCGAAAAGCTCGATTTAGCTAGCTAAAAAAAAAATTTTTTGGGGGCGAGA
TCGATCGATCGATCGCGCTTAGGAAATTCCCCCGCGCGCGGCCCCCGAGATAGGGATAGGATAGGG
ATAGAGATCGCGCTAGCTCGATCGCGCGCTAGATTATATATATATTAGCGGCGCGATAGCTCGCTA
GCTGCTAGCTCGCTAGCTCGATCTTC
Which outputs
0 HASH(0x1824334)
'A' => 535
'AA' => 84
'AAA' => 66
'AAG' => 6
'AAT' => 12
'AG' => 223
'AGA' => 64
'AGC' => 105
'AGG' => 54
'AT' => 228
'ATA' => 81
'ATC' => 111
'ATT' => 36
'C' => 504
'CC' => 48
'CCC' => 36
'CCG' => 12
'CG' => 286
'CGA' => 124
'CGC' => 117
'CGG' => 45
'CT' => 169
'CTA' => 69
'CTC' => 54
'CTG' => 19
'CTT' => 27
'G' => 675
'GA' => 264
'GAA' => 12
'GAG' => 87
'GAT' => 165
'GC' => 270
'GCC' => 6
'GCG' => 117
'GCT' => 147
'GG' => 141
'GGA' => 72
'GGC' => 33
'GGG' => 36
'T' => 490
'TA' => 186
'TAA' => 6
'TAG' => 129
'TAT' => 51
'TC' => 186
'TCC' => 6
'TCG' => 157
'TCT' => 22
'TG' => 25
'TGA' => 4
'TGC' => 15
'TGG' => 6
'TT' => 93
'TTA' => 36
'TTC' => 21
'TTG' => 6
'TTT' => 30
Hope that helps
-InjunJoel
"I do not feel obliged to believe that the same God who endowed us with sense, reason and intellect has intended us to forego their use." -Galileo