$ diff pm_11113750_orig.pl pm_11113750_a.pl
26a27
> UV arg;
30,31c31,33
< IV neg_t = -(SvIV(ST(i)));
< IV last_set = SvUV(ST(i)) & neg_t;
---
> arg = SvUV(ST(i));
> IV neg_t = -arg;
> IV last_set = arg & neg_t;
42c44
< if(!(SvUV(ST(i)) & invalid_bits)) count++;
---
> if(!(arg & invalid_bits)) count++;
####
$ PERL5OPT= perl pm_11113750_orig.pl
Benchmark: timing 5 iterations of uv_fits_double3, uv_fits_double_bitfiddle...
uv_fits_double3: 7 wallclock secs ( 6.70 usr + 0.00 sys = 6.70 CPU) @ 0.75/s (n=5)
uv_fits_double_bitfiddle: 17 wallclock secs (17.36 usr + 0.00 sys = 17.36 CPU) @ 0.29/s (n=5)
46875 46875
$ PERL5OPT= perl pm_11113750_a.pl
Benchmark: timing 5 iterations of uv_fits_double3, uv_fits_double_bitfiddle...
uv_fits_double3: 7 wallclock secs ( 6.75 usr + 0.00 sys = 6.75 CPU) @ 0.74/s (n=5)
uv_fits_double_bitfiddle: 6 wallclock secs ( 6.67 usr + 0.00 sys = 6.67 CPU) @ 0.75/s (n=5)
46875 46875
##
##
int uv_fits_double3x(SV* the_min, SV* the_max) {
dXSARGS;
UV i_min = SvUV(the_min);
UV i_max = SvUV(the_max);
UV i;
int count = 0;
UV arg;
for(i = i_min; i < i_max; i++) {
arg = i;
if(arg) {
while(!(arg & 1)) arg >>= 1;
if(arg < 9007199254740993) count++;
}
}
return count;
}
int uv_fits_double_bitfiddle_3x(SV* the_min, SV* the_max) {
dXSARGS;
UV i_min = SvUV(the_min);
UV i_max = SvUV(the_max);
UV i;
int count = 0;
for(i = i_min; i < i_max; i++) {
UV arg = i;
IV neg = -arg;
IV smallest_invalid = (arg & -arg)<<53;
UV valid_bits = smallest_invalid-1;
UV invalid_bits = ~valid_bits;
if (! (arg & invalid_bits)) count++;
}
return count;
}
##
##
$ PERL5OPT= perl pm_11113750_x.pl
Name "main::count4" used only once: possible typo at pm_11113750_x.pl line 147.
=== Test 1: roughly equivalent speed
Benchmark: timing 5 iterations of uv_fits_double3, uv_fits_double_bitfiddle...
uv_fits_double3: 28 wallclock secs (27.22 usr + 0.08 sys = 27.30 CPU) @ 0.18/s (n=5)
uv_fits_double_bitfiddle: 27 wallclock secs (26.72 usr + 0.00 sys = 26.72 CPU) @ 0.19/s (n=5)
33046878 33046878 (48000004)
=== TEST 2: twiddle has an advantage if we don't use SvUV()
Benchmark: timing 10000 iterations of bitfiddle_3x, double3x, empty_func, empty_loop...
bitfiddle_3x: 18 wallclock secs (18.39 usr + 0.00 sys = 18.39 CPU) @ 543.74/s (n=10000)
double3x: 30 wallclock secs (29.55 usr + 0.00 sys = 29.55 CPU) @ 338.44/s (n=10000)
empty_func: 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU)
(warning: too few iterations for a reliable count)
empty_loop: 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU)
(warning: too few iterations for a reliable count)
33046875 33046875 48000000 1416858175
##
##
use warnings;
use Benchmark;
use Inline C => Config =>
BUILD_NOISY => 1;
use Inline C => <<'EOC';
int uv_fits_double3(SV * x, ...) {
dXSARGS;
int i, count = 0;
UV arg;
for(i = 0; i < items; i++) {
arg = SvUV(ST(i));
if(arg) {
while(!(arg & 1)) arg >>= 1;
if(arg < 9007199254740993) count++;
}
}
return count;
}
int uv_fits_double_bitfiddle(SV * t, ...) {
dXSARGS;
int i, count = 0;
UV arg;
for(i = 0; i < items; i++) {
arg = SvUV(ST(i));
IV smallest_invalid = (arg & -arg) << 53;
UV valid_bits = smallest_invalid - 1;
UV invalid_bits = ~valid_bits;
if ( !(arg & invalid_bits)) count++;
}
return count;
}
int uv_fits_double3x(SV* the_min, SV* the_max) {
dXSARGS;
UV i_min = SvUV(the_min);
UV i_max = SvUV(the_max);
UV i;
int count = 0;
UV arg;
for(i = i_min; i < i_max; i++) {
arg = i;
if(arg) {
while(!(arg & 1)) arg >>= 1;
if(arg < 9007199254740993) count++;
}
}
return count;
}
int uv_fits_double_bitfiddle2(SV* the_min, SV* the_max) {
dXSARGS;
UV i_min = SvUV(the_min);
UV i_max = SvUV(the_max);
UV i;
int count = 0;
for(i = i_min; i < i_max; i++) {
UV arg = i;
IV neg = -arg;
IV smallest_invalid = (arg & -arg)<<53;
UV valid_bits = smallest_invalid-1;
UV invalid_bits = ~valid_bits;
if (! (arg & invalid_bits)) count++;
}
return count;
}
int uv_empty_func(SV* the_min, SV* the_max) {
dXSARGS;
UV i_min = SvUV(the_min);
UV i_max = SvUV(the_max);
return i_max - i_min;
}
int uv_empty_loop(SV* the_min, SV* the_max) {
dXSARGS;
UV i_min = SvUV(the_min);
UV i_max = SvUV(the_max);
UV i;
int count = 0;
for(i = i_min; i < i_max; i++) {
UV arg = i;
// boring
count++;
}
return count * i_max - i_min;
}
EOC
@in2 = (
[ 1844674407366955161, 1844674407378955161 ],
[ 9007199248740992, 9007199260740992 ],
[ 184467436737095, 184467448737095 ],
[ 184463440737, 184475440737 ],
);
push @in, $_->[0] .. $_->[1] for @in2;
print "=== Test 1: roughly equivalent speed\n";
our ($count1, $count2);
($count1, $count2) = (0, 0);
timethese (5, {
'uv_fits_double3' => '$count1 = uv_fits_double3(@in);',
'uv_fits_double_bitfiddle' => '$count2 = uv_fits_double_bitfiddle(@in);',
});
print "$count1 $count2 (", scalar(@in), ")\n";
print "!!!! MISMATCH !!!!\n"
if $count1 != $count2;
print "\n=== TEST 2: twiddle has an advantage if we don't use SvUV()\n";
($count1, $count2, $count3) = (0, 0, 0, 0);
timethese (10000, {
'double3x' => '$count1 = uv_fits_double3x(@{$in2[0]})'
.' + uv_fits_double3x(@{$in2[1]})'
.' + uv_fits_double3x(@{$in2[2]})'
.' + uv_fits_double3x(@{$in2[3]})'
,
'bitfiddle_3x'=>'$count2 = uv_fits_double_bitfiddle2(@{$in2[0]})'
.' + uv_fits_double_bitfiddle2(@{$in2[1]})'
.' + uv_fits_double_bitfiddle2(@{$in2[2]})'
.' + uv_fits_double_bitfiddle2(@{$in2[3]})'
,
'empty_func'=>'$count3 = uv_empty_func(@{$in2[0]})'
.' + uv_empty_func(@{$in2[1]})'
.' + uv_empty_func(@{$in2[2]})'
.' + uv_empty_func(@{$in2[3]})'
,
'empty_loop'=>'$count4 = uv_empty_loop(@{$in2[0]})'
.' + uv_empty_loop(@{$in2[1]})'
.' + uv_empty_loop(@{$in2[2]})'
.' + uv_empty_loop(@{$in2[3]})'
,
});
print "$count1 $count2 $count3 $count4\n";
print "!!!! MISMATCH !!!!\n" if $count1 != $count2;