1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| | | | | | | | | | | | |?|X|X|X|X|X|X|X|X|X|X|X|X|?| | | | | | | | | | | | |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
####
[ 6:53:31.34] P:\test>406836.pl 406836.25s.1000 406836.seq.1000 >406836.results
Loaded 1000 25-ers at P:\test\406836.pl line 17.
Processing sequence 1000 offset 01238
Processed 1000 sequences at P:\test\406836.pl line 48, line 1000.
Average length: 1016.119 at P:\test\406836.pl line 49, line 1000.
Total fuzzy comparisons: 992119000 at P:\test\406836.pl line 50, line ...
[ 7:22:03.34] P:\test>
##
##
timethis 10000, q[@m = ('acgt'x250 ) =~ m[(acgtacgtacgtacgtacgtacgta)]g ];
timethis 10000: 1 wallclock secs ( 1.16 usr + 0.00 sys = 1.16 CPU) @ 8643.04/s (n=10000)
##
##
#! perl -slw
use strict;
use bytes;
$| = 1;
our $FUZZY ||= 2;
open FUZ, '<', $ARGV[ 0 ] or die "$ARGV[ 0 ] : $!";
my %fuz;
while( ) {
chomp;
$fuz{ $_ } = '';
}
close FUZ;
warn "Loaded ${ \scalar keys %fuz } 25-ers";
open SEQ, '< :raw', $ARGV[ 1 ] or die "$ARGV[ 1 ] : $!";
my $totalLen = 0;
my $fuzzyComps = 0;
while( my $seq = ) {
chomp $seq;
$totalLen += length $seq;
for my $offset ( 0 .. length( $seq ) - 25 ) {
my $ssref = \substr( $seq, $offset, 25 );
printf STDERR "\rProcessing sequence %5d offset %05d", $., $offset;
for my $fuz ( keys %fuz ) {
$fuzzyComps++;
my $m = 25 - ( $fuz ^ $$ssref ) =~ tr[\0][\0];
if( $m <= $FUZZY ) {
## This stores the lineno/offset/fuzziness where each 25-er matched
## in a compact form for further process; sorting etc.
# $fuz{ $fuz } .= pack 'nnn', $., $offset, $m;
## Or just print out the data to a file.
print "Matched '$fuz' -v- '",
$$ssref,
"' in line: $. @ $offset with fuzziness: ", $m;
}
}
}
}
warn "\n\nProcessed $. sequences";
warn "Average length: ", $totalLen / $.;
warn "Total fuzzy comparisons: ", $fuzzyComps;
close SEQ;