http://qs321.pair.com?node_id=205241
Category: Text Processing
Author/Contact Info Mastering Algorithms with Perl, with an interface by nothingmuch/nothingmuch@altern.org
Description: Manber-Wu algorithm implemented in perl. This subroutine generates approximate matchers, pre initialized for a certain pattern. You then pass the anon sub strings to match upon. A perl only alternative to String::Approx
sub mkamatch { # (pattern,kmismatches)
    use integer;
    my $p = shift;
    my $k = shift;
    my $m = length($p);
    $k = (10 * $m) / 100 + 1 unless defined $k;
    
    my @p2 = map { 1 << $_ } 0 .. 31;
    my @t = (0) x 256; # alphabet size
    my $i = -1;
    $t[$_] |= $p2[++$i] for (unpack("C*",$p));
    
    my (@s,@r);
    $#s = $#r = $k;
    for ($r[0] = 0, my $i = 0; $i < $k; $i++){
        $r[$i+1] = $r[$i] | $p2[$i];
    }
    
    sub {
        my $key = shift;
        my $n = length($key);
        my $mb = $p2[$m-1];
        
        my $ret = -1;
        my @str = unpack("C*",$key);
        for ($s[0] = 0, my $i = 0; $i < $n; $i++){
            $s[0] = (($s[0] << 1) | 1) & (my $tc = $t[$str[$i]]);
            for (my $j = 1; $j <= $k; $j++){
                $s[$j] = (((($r[$j] << 1) & $tc) | (($r[$j-1] | $s[$j-
+1]) << 1)) | $r[$j-1]) | 1;
            }
            $ret = $i > $m  ? $i - $m : 0, last if $s[$k] & $mb;
            @r = @s;
        }
        $ret;
    }
}