Category: | Text Processing |
Author/Contact Info | Mastering Algorithms with Perl, with an interface by nothingmuch/nothingmuch@altern.org |
Description: | Manber-Wu algorithm implemented in perl. This subroutine generates approximate matchers, pre initialized for a certain pattern. You then pass the anon sub strings to match upon. A perl only alternative to String::Approx |
sub mkamatch { # (pattern,kmismatches)
use integer;
my $p = shift;
my $k = shift;
my $m = length($p);
$k = (10 * $m) / 100 + 1 unless defined $k;
my @p2 = map { 1 << $_ } 0 .. 31;
my @t = (0) x 256; # alphabet size
my $i = -1;
$t[$_] |= $p2[++$i] for (unpack("C*",$p));
my (@s,@r);
$#s = $#r = $k;
for ($r[0] = 0, my $i = 0; $i < $k; $i++){
$r[$i+1] = $r[$i] | $p2[$i];
}
sub {
my $key = shift;
my $n = length($key);
my $mb = $p2[$m-1];
my $ret = -1;
my @str = unpack("C*",$key);
for ($s[0] = 0, my $i = 0; $i < $n; $i++){
$s[0] = (($s[0] << 1) | 1) & (my $tc = $t[$str[$i]]);
for (my $j = 1; $j <= $k; $j++){
$s[$j] = (((($r[$j] << 1) & $tc) | (($r[$j-1] | $s[$j-
+1]) << 1)) | $r[$j-1]) | 1;
}
$ret = $i > $m ? $i - $m : 0, last if $s[$k] & $mb;
@r = @s;
}
$ret;
}
}
|
Back to
Code Catacombs