print join'',unpack 'C*',
'AGGCGGAAGCACCCAACAGCAACAG'
^ 'ACACGCAAAAACCGAAGAGAAAGCG';
0460040062000400400200420
####
print $_ = grep $_,unpack 'C*',
'AGGCGGAAGCACCCAACAGCAACAG'
^ 'ACACGCAAAAACCGAAGAGAAAGCG';
10
##
##
#! perl -slw
use strict;
use bytes;
$| = 1;
our $FUZZY ||= 10;
open FUZ, '< :raw', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!";
my %fuz;
$fuz{ $_ } = '' while chomp( $_ = );
close FUZ;
print "Loaded ${ \scalar keys %fuz } 25-ers";
open SEQ, '< :raw', $ARGV[ 1 ] or die "$ARGV[ 1 ] : $!";
while( my $seq = ) {
chomp $seq;
for my $offset ( 0 .. length( $seq ) - 25 ) {
printf "\rProcessing sequence %5d offset %05d", $., $offset;
for my $fuz ( keys %fuz ) {
my $m = grep $_, unpack 'C*', $fuz ^ substr( $seq, $offset, 25 );
if( $m <= $FUZZY ) {
## This stores the lineno/offset/fuzziness where each 25-er
## matched in a compact form for further process; sorting etc.
$fuz{ $fuz } .= pack 'nnn', $., $offset, $m;
## Or just print out the data to a file.
# print "\nMatched '$fuz' -v- '",
# substr( $seq, $offset, 25 ),
# "' in line: $. @ $offset with fuzziness: ", $m;
}
}
}
}
close SEQ;