$ diff pm_11113750_orig.pl pm_11113750_a.pl 26a27 > UV arg; 30,31c31,33 < IV neg_t = -(SvIV(ST(i))); < IV last_set = SvUV(ST(i)) & neg_t; --- > arg = SvUV(ST(i)); > IV neg_t = -arg; > IV last_set = arg & neg_t; 42c44 < if(!(SvUV(ST(i)) & invalid_bits)) count++; --- > if(!(arg & invalid_bits)) count++; #### $ PERL5OPT= perl pm_11113750_orig.pl Benchmark: timing 5 iterations of uv_fits_double3, uv_fits_double_bitfiddle... uv_fits_double3: 7 wallclock secs ( 6.70 usr + 0.00 sys = 6.70 CPU) @ 0.75/s (n=5) uv_fits_double_bitfiddle: 17 wallclock secs (17.36 usr + 0.00 sys = 17.36 CPU) @ 0.29/s (n=5) 46875 46875 $ PERL5OPT= perl pm_11113750_a.pl Benchmark: timing 5 iterations of uv_fits_double3, uv_fits_double_bitfiddle... uv_fits_double3: 7 wallclock secs ( 6.75 usr + 0.00 sys = 6.75 CPU) @ 0.74/s (n=5) uv_fits_double_bitfiddle: 6 wallclock secs ( 6.67 usr + 0.00 sys = 6.67 CPU) @ 0.75/s (n=5) 46875 46875 #### int uv_fits_double3x(SV* the_min, SV* the_max) { dXSARGS; UV i_min = SvUV(the_min); UV i_max = SvUV(the_max); UV i; int count = 0; UV arg; for(i = i_min; i < i_max; i++) { arg = i; if(arg) { while(!(arg & 1)) arg >>= 1; if(arg < 9007199254740993) count++; } } return count; } int uv_fits_double_bitfiddle_3x(SV* the_min, SV* the_max) { dXSARGS; UV i_min = SvUV(the_min); UV i_max = SvUV(the_max); UV i; int count = 0; for(i = i_min; i < i_max; i++) { UV arg = i; IV neg = -arg; IV smallest_invalid = (arg & -arg)<<53; UV valid_bits = smallest_invalid-1; UV invalid_bits = ~valid_bits; if (! (arg & invalid_bits)) count++; } return count; } #### $ PERL5OPT= perl pm_11113750_x.pl Name "main::count4" used only once: possible typo at pm_11113750_x.pl line 147. === Test 1: roughly equivalent speed Benchmark: timing 5 iterations of uv_fits_double3, uv_fits_double_bitfiddle... uv_fits_double3: 28 wallclock secs (27.22 usr + 0.08 sys = 27.30 CPU) @ 0.18/s (n=5) uv_fits_double_bitfiddle: 27 wallclock secs (26.72 usr + 0.00 sys = 26.72 CPU) @ 0.19/s (n=5) 33046878 33046878 (48000004) === TEST 2: twiddle has an advantage if we don't use SvUV() Benchmark: timing 10000 iterations of bitfiddle_3x, double3x, empty_func, empty_loop... bitfiddle_3x: 18 wallclock secs (18.39 usr + 0.00 sys = 18.39 CPU) @ 543.74/s (n=10000) double3x: 30 wallclock secs (29.55 usr + 0.00 sys = 29.55 CPU) @ 338.44/s (n=10000) empty_func: 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU) (warning: too few iterations for a reliable count) empty_loop: 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU) (warning: too few iterations for a reliable count) 33046875 33046875 48000000 1416858175 #### use warnings; use Benchmark; use Inline C => Config => BUILD_NOISY => 1; use Inline C => <<'EOC'; int uv_fits_double3(SV * x, ...) { dXSARGS; int i, count = 0; UV arg; for(i = 0; i < items; i++) { arg = SvUV(ST(i)); if(arg) { while(!(arg & 1)) arg >>= 1; if(arg < 9007199254740993) count++; } } return count; } int uv_fits_double_bitfiddle(SV * t, ...) { dXSARGS; int i, count = 0; UV arg; for(i = 0; i < items; i++) { arg = SvUV(ST(i)); IV smallest_invalid = (arg & -arg) << 53; UV valid_bits = smallest_invalid - 1; UV invalid_bits = ~valid_bits; if ( !(arg & invalid_bits)) count++; } return count; } int uv_fits_double3x(SV* the_min, SV* the_max) { dXSARGS; UV i_min = SvUV(the_min); UV i_max = SvUV(the_max); UV i; int count = 0; UV arg; for(i = i_min; i < i_max; i++) { arg = i; if(arg) { while(!(arg & 1)) arg >>= 1; if(arg < 9007199254740993) count++; } } return count; } int uv_fits_double_bitfiddle2(SV* the_min, SV* the_max) { dXSARGS; UV i_min = SvUV(the_min); UV i_max = SvUV(the_max); UV i; int count = 0; for(i = i_min; i < i_max; i++) { UV arg = i; IV neg = -arg; IV smallest_invalid = (arg & -arg)<<53; UV valid_bits = smallest_invalid-1; UV invalid_bits = ~valid_bits; if (! (arg & invalid_bits)) count++; } return count; } int uv_empty_func(SV* the_min, SV* the_max) { dXSARGS; UV i_min = SvUV(the_min); UV i_max = SvUV(the_max); return i_max - i_min; } int uv_empty_loop(SV* the_min, SV* the_max) { dXSARGS; UV i_min = SvUV(the_min); UV i_max = SvUV(the_max); UV i; int count = 0; for(i = i_min; i < i_max; i++) { UV arg = i; // boring count++; } return count * i_max - i_min; } EOC @in2 = ( [ 1844674407366955161, 1844674407378955161 ], [ 9007199248740992, 9007199260740992 ], [ 184467436737095, 184467448737095 ], [ 184463440737, 184475440737 ], ); push @in, $_->[0] .. $_->[1] for @in2; print "=== Test 1: roughly equivalent speed\n"; our ($count1, $count2); ($count1, $count2) = (0, 0); timethese (5, { 'uv_fits_double3' => '$count1 = uv_fits_double3(@in);', 'uv_fits_double_bitfiddle' => '$count2 = uv_fits_double_bitfiddle(@in);', }); print "$count1 $count2 (", scalar(@in), ")\n"; print "!!!! MISMATCH !!!!\n" if $count1 != $count2; print "\n=== TEST 2: twiddle has an advantage if we don't use SvUV()\n"; ($count1, $count2, $count3) = (0, 0, 0, 0); timethese (10000, { 'double3x' => '$count1 = uv_fits_double3x(@{$in2[0]})' .' + uv_fits_double3x(@{$in2[1]})' .' + uv_fits_double3x(@{$in2[2]})' .' + uv_fits_double3x(@{$in2[3]})' , 'bitfiddle_3x'=>'$count2 = uv_fits_double_bitfiddle2(@{$in2[0]})' .' + uv_fits_double_bitfiddle2(@{$in2[1]})' .' + uv_fits_double_bitfiddle2(@{$in2[2]})' .' + uv_fits_double_bitfiddle2(@{$in2[3]})' , 'empty_func'=>'$count3 = uv_empty_func(@{$in2[0]})' .' + uv_empty_func(@{$in2[1]})' .' + uv_empty_func(@{$in2[2]})' .' + uv_empty_func(@{$in2[3]})' , 'empty_loop'=>'$count4 = uv_empty_loop(@{$in2[0]})' .' + uv_empty_loop(@{$in2[1]})' .' + uv_empty_loop(@{$in2[2]})' .' + uv_empty_loop(@{$in2[3]})' , }); print "$count1 $count2 $count3 $count4\n"; print "!!!! MISMATCH !!!!\n" if $count1 != $count2;