#!/usr/bin/perl # # analyze_bloom.pl # # Experiment with Bloom filter variant by BUk # use strict; use warnings; use autodie; use Bit::Vector; use Math::BigInt; my $VecSizeBits=shift or die "Missing # of bits!"; my $num_vecs =shift or die "missing # of vectors!"; my $iterations =shift or die "missing iteration count!"; if ($iterations<33) { $iterations = 1<<$iterations; } my $VecSize = 1 << $VecSizeBits; my $VecMask = $VecSize-1; print "NumVecs=$num_vecs\n"; my @col2numbits = (0, 1); while (@col2numbits < 1<<$num_vecs) { @col2numbits = (@col2numbits, map { $_+1 } @col2numbits); } my @cnt_colbits = (0) x (1+$num_vecs); my %report_cnts = map { (1<<$_)=>0 } (8 .. 24); my @report_cnts = sort { $a<=>$b } keys %report_cnts; my $next_rpt = shift @report_cnts; my @vectors; my @cnt_bits = (0) x $num_vecs; $vectors[$_] = Bit::Vector::new(0,$VecSize) for 0 .. $num_vecs-1; { my @HDR = ( q{ avg avg calc }, q{BI Samples one bits pct 1s pct 1s }, q{-- ---------- -------- ------ ------ }, ); my $t = int(($num_vecs+1) * 8 - length(" # collisions "))/2; $HDR[0] .= "-"x$t . " # collisions " . "-"x$t . " expected"; $HDR[1] .= " all "; $HDR[1] .= sprintf(" % 2u ", $_) for 1 .. $num_vecs; $HDR[1] .= " p(coll) false pos"; $HDR[2] .= ("-"x7 . " ") x ($num_vecs+1) . "------------ -------------"; print join("\n", @HDR, ""); } my $cnt_coll=0; my $cnt_coll_f=0; my $cnt_rex=0; my $cum_collisions=0; while (++$cnt_rex < $iterations) { my @h = map { int(rand($VecSize)) } 0 .. $VecMask; my $bits=32; my $collision=0; for my $i (0 .. $num_vecs-1) { if ($vectors[$i]->bit_test($h[$i])) { $collision |= 1<<$i } else { ++$cnt_bits[$i] } $vectors[$i]->Bit_On($h[$i]); } if ($collision) { ++$cnt_colbits[$col2numbits[$collision]]; ++$cnt_colbits[0]; # 'any' collision ++$cnt_coll; if ($collision == $VecMask) { ++$cnt_coll_f; } } my $expected = (1-exp(-$cnt_rex/$VecSize))**$num_vecs; $cum_collisions += $expected; if ($cnt_rex == $next_rpt) { $next_rpt = shift @report_cnts; my $ttl_bits=0; $ttl_bits += $_ for @cnt_bits; my $avg_bits = $ttl_bits / $num_vecs; my $avg_pct_full = 100.0 * $ttl_bits / ($num_vecs*$VecSize); my $calc_pct_full = 100.0 * (1-exp(-$cnt_rex/$VecSize)); printf "% 2u %10u: % 7u %6.2f %6.2f", $VecSizeBits, $cnt_rex, $avg_bits, $avg_pct_full, $calc_pct_full; print " ", join(" ", map {sprintf"% 7u",$_} @cnt_colbits ); printf " %.10f", $expected; printf " %.10f", $cum_collisions; print "\n"; } }