1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | | | | | | | | | | | | |?|X|X|X|X|X|X|X|X|X|X|X|X|?| | | | | | | | | | | | | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ #### [ 6:53:31.34] P:\test>406836.pl 406836.25s.1000 406836.seq.1000 >406836.results Loaded 1000 25-ers at P:\test\406836.pl line 17. Processing sequence 1000 offset 01238 Processed 1000 sequences at P:\test\406836.pl line 48, line 1000. Average length: 1016.119 at P:\test\406836.pl line 49, line 1000. Total fuzzy comparisons: 992119000 at P:\test\406836.pl line 50, line ... [ 7:22:03.34] P:\test> #### timethis 10000, q[@m = ('acgt'x250 ) =~ m[(acgtacgtacgtacgtacgtacgta)]g ]; timethis 10000: 1 wallclock secs ( 1.16 usr + 0.00 sys = 1.16 CPU) @ 8643.04/s (n=10000) #### #! perl -slw use strict; use bytes; $| = 1; our $FUZZY ||= 2; open FUZ, '<', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!"; my %fuz; while( ) { chomp; $fuz{ $_ } = ''; } close FUZ; warn "Loaded ${ \scalar keys %fuz } 25-ers"; open SEQ, '< :raw', $ARGV[ 1 ] or die "$ARGV[ 1 ] : $!"; my $totalLen = 0; my $fuzzyComps = 0; while( my $seq = ) { chomp $seq; $totalLen += length $seq; for my $offset ( 0 .. length( $seq ) - 25 ) { my $ssref = \substr( $seq, $offset, 25 ); printf STDERR "\rProcessing sequence %5d offset %05d", $., $offset; for my $fuz ( keys %fuz ) { $fuzzyComps++; my $m = 25 - ( $fuz ^ $$ssref ) =~ tr[\0][\0]; if( $m <= $FUZZY ) { ## This stores the lineno/offset/fuzziness where each 25-er matched ## in a compact form for further process; sorting etc. # $fuz{ $fuz } .= pack 'nnn', $., $offset, $m; ## Or just print out the data to a file. print "Matched '$fuz' -v- '", $$ssref, "' in line: $. @ $offset with fuzziness: ", $m; } } } } warn "\n\nProcessed $. sequences"; warn "Average length: ", $totalLen / $.; warn "Total fuzzy comparisons: ", $fuzzyComps; close SEQ;