my $expected = (1-exp(-$cnt_rex/$VecSize))**$num_vecs;
####
$cum_collisions += $expected;
##
##
$ ./analyze_bloom.pl 14 3 17
NumVecs=3
avg avg calc --------- # collisions --------- expected
BI Samples one bits pct 1s pct 1s all 1 2 3 p(coll) false pos
-- ---------- -------- ------ ------ ------- ------- ------- ------- ------------ -------------
14 256: 253 1.55 1.55 7 7 0 0 0.0000037264 0.0002414791
14 512: 504 3.08 3.08 22 22 0 0 0.0000291236 0.0037774699
14 1024: 993 6.06 6.06 89 87 2 0 0.0002224011 0.0581208314
14 2048: 1926 11.76 11.75 336 309 26 1 0.0016223627 0.8630370129
14 4096: 3637 22.20 22.12 1185 1005 170 10 0.0108230772 11.9418752510
14 8192: 6414 39.15 39.35 3986 2802 1022 162 0.0609161842 144.4751474482
14 16384: 10300 62.87 63.21 11216 5615 4166 1435 0.2525804578 1374.7071068207
14 32768: 14151 86.37 86.47 27361 7789 10655 8917 0.6464623148 8946.4018915720
14 65536: 16105 98.30 98.17 60117 8201 15657 36259 0.9460533270 36391.1792023071
$ ./analyze_bloom.pl 16 4 18
NumVecs=4
avg avg calc ------------- # collisions ------------- expected
BI Samples one bits pct 1s pct 1s all 1 2 3 4 p(coll) false pos
-- ---------- -------- ------ ------ ------- ------- ------- ------- ------- ------------ ----------
16 256: 255 0.39 0.39 3 3 0 0 0 0.0000000002 0.0000000120
16 512: 510 0.78 0.78 5 5 0 0 0 0.0000000037 0.0000003784
16 1024: 1018 1.55 1.55 21 21 0 0 0 0.0000000578 0.0000119226
16 2048: 2021 3.08 3.08 107 106 1 0 0 0.0000008960 0.0003713063
16 4096: 3979 6.07 6.06 444 423 20 1 0 0.0000134746 0.0112771478
16 8192: 7749 11.82 11.75 1615 1466 141 8 0 0.0001906326 0.3256727521
16 16384: 14569 22.23 22.12 5843 4608 1066 158 11 0.0023940562 8.5228328538
16 32768: 25931 39.57 39.35 18293 11116 5475 1529 173 0.0239686508 185.0883611550
16 65536: 41690 63.61 63.21 49093 18714 17246 10354 2779 0.1596613002 2882.5123468408
16 131072: 56843 86.74 86.47 114278 21592 29488 36446 26752 0.5589731543 26626.3779623356
##
##
#!/usr/bin/perl
#
# analyze_bloom.pl
#
# Experiment with Bloom filter variant by BUk
#
use strict;
use warnings;
use autodie;
use Bit::Vector;
use Math::BigInt;
my $VecSizeBits=shift or die "Missing # of bits!";
my $num_vecs =shift or die "missing # of vectors!";
my $iterations =shift or die "missing iteration count!";
if ($iterations<33) {
$iterations = 1<<$iterations;
}
my $VecSize = 1 << $VecSizeBits;
my $VecMask = $VecSize-1;
print "NumVecs=$num_vecs\n";
my @col2numbits = (0, 1);
while (@col2numbits < 1<<$num_vecs) {
@col2numbits = (@col2numbits, map { $_+1 } @col2numbits);
}
my @cnt_colbits = (0) x (1+$num_vecs);
my %report_cnts = map { (1<<$_)=>0 } (8 .. 24);
my @report_cnts = sort { $a<=>$b } keys %report_cnts;
my $next_rpt = shift @report_cnts;
my @vectors;
my @cnt_bits = (0) x $num_vecs;
$vectors[$_] = Bit::Vector::new(0,$VecSize) for 0 .. $num_vecs-1;
{
my @HDR = (
q{ avg avg calc },
q{BI Samples one bits pct 1s pct 1s },
q{-- ---------- -------- ------ ------ },
);
my $t = int(($num_vecs+1) * 8 - length(" # collisions "))/2;
$HDR[0] .= "-"x$t . " # collisions " . "-"x$t . " expected";
$HDR[1] .= " all ";
$HDR[1] .= sprintf(" % 2u ", $_) for 1 .. $num_vecs;
$HDR[1] .= " p(coll) false pos";
$HDR[2] .= ("-"x7 . " ") x ($num_vecs+1) . "------------ -------------";
print join("\n", @HDR, "");
}
my $cnt_coll=0;
my $cnt_coll_f=0;
my $cnt_rex=0;
my $cum_collisions=0;
while (++$cnt_rex < $iterations) {
my @h = map { int(rand($VecSize)) } 0 .. $VecMask;
my $bits=32;
my $collision=0;
for my $i (0 .. $num_vecs-1) {
if ($vectors[$i]->bit_test($h[$i])) {
$collision |= 1<<$i
}
else {
++$cnt_bits[$i]
}
$vectors[$i]->Bit_On($h[$i]);
}
if ($collision) {
++$cnt_colbits[$col2numbits[$collision]];
++$cnt_colbits[0]; # 'any' collision
++$cnt_coll;
if ($collision == $VecMask) {
++$cnt_coll_f;
}
}
my $expected = (1-exp(-$cnt_rex/$VecSize))**$num_vecs;
$cum_collisions += $expected;
if ($cnt_rex == $next_rpt) {
$next_rpt = shift @report_cnts;
my $ttl_bits=0;
$ttl_bits += $_ for @cnt_bits;
my $avg_bits = $ttl_bits / $num_vecs;
my $avg_pct_full = 100.0 * $ttl_bits / ($num_vecs*$VecSize);
my $calc_pct_full = 100.0 * (1-exp(-$cnt_rex/$VecSize));
printf "% 2u %10u: % 7u %6.2f %6.2f",
$VecSizeBits, $cnt_rex, $avg_bits, $avg_pct_full, $calc_pct_full;
print " ",
join(" ",
map {sprintf"% 7u",$_} @cnt_colbits
);
printf " %.10f", $expected;
printf " %.10f", $cum_collisions;
print "\n";
}
}