#!/usr/bin/perl
use strict;
use warnings;
my %H;
open my $FH, '<', 'DNA_strings.dat' or die $!;
while (<$FH>) {
s/\s+$//;
for my $i (0 .. length($_)-1) {
my $k = $_;
substr($k,$i,1) = '*';
push @{$H{$k}}, $_;
}
}
for my $k (sort keys %H) {
if ($#{$H{$k}} > 1) {
print "$k\t", join(",\n\t\t", @{$H{$k}}), "\n";
}
}
####
$ cat DNA_strings.dat
CTGAG
CGAGT
ACGCT
TATAC
CTGAA
GGAGC
ATACA
AAAAA
ACAAA
AGAAA
AATAA
AAAGA
ACCAA
AGCAC
CCACG
GCCAT
AGCAA
GGCAT
GTTTG
$ perl DNA_cmp.pl
A*AAA: AAAAA, ACAAA, AGAAA
A*CAA: ACCAA, AGCAA
AA*AA: AAAAA, AATAA
AAA*A: AAAAA, AAAGA
AC*AA: ACAAA, ACCAA
AG*AA: AGAAA, AGCAA
AGCA*: AGCAC, AGCAA
CTGA*: CTGAG, CTGAA
G*CAT: GCCAT, GGCAT
##
##
$ perl gen_random_strings.pl 100000 20 20 ACGT >DNA_strings.dat
$ time perl DNA_cmp.pl
real 0m47.659s
user 0m46.395s
sys 0m1.252s