print join'',unpack 'C*', 'AGGCGGAAGCACCCAACAGCAACAG' ^ 'ACACGCAAAAACCGAAGAGAAAGCG'; 0460040062000400400200420 #### print $_ = grep $_,unpack 'C*', 'AGGCGGAAGCACCCAACAGCAACAG' ^ 'ACACGCAAAAACCGAAGAGAAAGCG'; 10 #### #! perl -slw use strict; use bytes; $| = 1; our $FUZZY ||= 10; open FUZ, '< :raw', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!"; my %fuz; $fuz{ $_ } = '' while chomp( $_ = ); close FUZ; print "Loaded ${ \scalar keys %fuz } 25-ers"; open SEQ, '< :raw', $ARGV[ 1 ] or die "$ARGV[ 1 ] : $!"; while( my $seq = ) { chomp $seq; for my $offset ( 0 .. length( $seq ) - 25 ) { printf "\rProcessing sequence %5d offset %05d", $., $offset; for my $fuz ( keys %fuz ) { my $m = grep $_, unpack 'C*', $fuz ^ substr( $seq, $offset, 25 ); if( $m <= $FUZZY ) { ## This stores the lineno/offset/fuzziness where each 25-er ## matched in a compact form for further process; sorting etc. $fuz{ $fuz } .= pack 'nnn', $., $offset, $m; ## Or just print out the data to a file. # print "\nMatched '$fuz' -v- '", # substr( $seq, $offset, 25 ), # "' in line: $. @ $offset with fuzziness: ", $m; } } } } close SEQ;