Here's a small bench to highlight the speed difference in some of the chosen strategies.
#! /usr/bin/perl -wl
use Benchmark 'cmpthese';
my (@v, @b);
@v[0 .. 19] = 'a' .. 'z';
for (1..1e6) {
my %r;
undef $r{$v[rand @v]} until keys %r == 4;
push @b, join '', keys %r;
}
my @AoA = map [map ord()-97, split //,$_], @b;
my @vectors;
for my $t (@v) {
push @vectors, pack "b*", pack "c*", map !!/$t/, @b;
}
my $AoA = pack 'C*', map @$_, @AoA;
cmpthese -5, {
bitmap => sub {
my $res = '';
$res |= $vectors[$_] for @{$AoA[0]};
int(@AoA) - unpack "%32b*", $res;
},
regex1 => sub {
my $omit = $AoA[0];
my $pat = '[^'.( join '', map quotemeta, pack 'C*', @$omit ).'
+]{4}';
my $re = qr/\G(?:.{4})*?($pat)/s;
int(() = $AoA =~ /$re/g);
},
regex2 => sub {
my $pat = qr/[$b[0]]/;
my $cnt = 0;
$cnt += !/$pat/ for @b;
$cnt;
},
};