use strict;
use warnings;
use Benchmark qw(timethese);
sub popcount1 {
my $x = shift;
my $count;
for ($count = 0; $x; ++$count) { $x &= $x - 1 }
return $count;
}
sub popcount2 {
return sprintf('%b', shift) =~ tr/1//;
}
# Update: unpack('%b*', pack('J', shift)) is better (see Rosetta ref. below)
# This works but is slower: unpack('b*', pack('J', shift)) =~ tr/1//;
sub popcount3 {
return unpack('%32b*', pack('Q', shift));
}
my $start = 2**32 - 42;
my $end = $start + 1000000;
print "sanity test for correctness\n";
for my $i (0 .. 256, $start .. $end) {
my $one = popcount1($i);
my $two = popcount2($i);
my $three = popcount3($i);
# print "$i: $one $two $three\n";
$one == $two or die;
$one == $three or die;
}
timethese 50, {
One => sub { popcount1($_) for $start .. $end },
Two => sub { popcount2($_) for $start .. $end },
Three => sub { popcount3($_) for $start .. $end },
};
####
sanity test for correctness
Benchmark: timing 50 iterations of One, Three, Two...
One: 29 wallclock secs (28.41 usr + 0.00 sys = 28.41 CPU) @ 1.76/s (n=50)
Three: 10 wallclock secs (10.00 usr + 0.00 sys = 10.00 CPU) @ 5.00/s (n=50)
Two: 10 wallclock secs (10.03 usr + 0.00 sys = 10.03 CPU) @ 4.98/s (n=50)
##
##
// Make GCC __builtin_popcountll available with MSVC
// (used to calculate the Hamming weight efficiently)
#ifdef _MSC_VER
#include
#define __builtin_popcountll __popcnt64
#define __builtin_popcount __popcnt
#endif