See Tuning Algorithm::Diff
The problem with that benchmark is it only tests the call to the functions; not the subsequent accessing of the returned information.
Accessing the bits from Pure Perl will kill the performance stony dead.
Update: Added another few tests, including a bitvector version to the benchmark:
#! perl -slw
use strict; use Config;
use Inline C => Config => BUILD_NOISY => 1;
use Inline C => <<'END_C', NAME => 'diffBench', CLEAN_AFTER_BUILD =>0
+;
SV *diffAoA( U32 n ) {
AV *av = newAV();
U32 i;
for( i = 0; i < n/2; ++i ) {
AV *av2 = newAV();
av_push( av2, newSViv( i*2 ) );
av_push( av2, newSViv( i*2+1 ) );
av_push( av, (SV*)av2 );
}
return newRV_noinc( (SV*)av );
}
SV *diffPacked( U32 n ) {
U32 *diffs = malloc( sizeof( U32 ) * n );
SV *packed;
U32 i;
for( i = 0; i < n; ++i ) {
diffs[ i ] = i;
}
packed = newSVpv( (char *)diffs, sizeof( U32 ) * n );
free( diffs );
return packed;
}
void diffPackedList( U32 n ) {
inline_stack_vars;
U32 i;
inline_stack_reset;
for( i = 0; i < n/2; ++i ) {
U32 a[ 2 ] = { i*2, i*2+1 };
inline_stack_push( sv_2mortal( newSVpv( (char*)a, 8 ) ) );
}
inline_stack_done;
inline_stack_return( n/2 );
return;
}
SV *diff2dString( U32 n ) {
SV *diffs = newSVpv( "", 0 );
U32 i;
for( i = 0; i < n/2; ++i ) {
sv_catpvf( diffs, "%u:%u ", i*2, i*2+1 );
}
return diffs;
}
void diff1dList( U32 n ) {
inline_stack_vars;
U32 i;
inline_stack_reset;
for( i = 0; i < n/2; ++i ) {
inline_stack_push( sv_2mortal( newSVpvf( "%u:%u", i*2, i*2+1 )
+ ) );
}
inline_stack_done;
inline_stack_return( n/2 );
return;
}
void diffList( U32 n ) {
inline_stack_vars;
U32 i;
inline_stack_reset;
for( i = 0; i < n; ++i ) {
inline_stack_push( sv_2mortal( newSViv( i ) ) );
}
inline_stack_done;
inline_stack_return( n );
return;
}
void diffLoA( U32 n ) {
inline_stack_vars;
U32 i;
inline_stack_reset;
for( i = 0; i < n/2; ++i ) {
AV *av2 = newAV();
av_push( av2, newSViv( i*2 ) );
av_push( av2, newSViv( i*2+1 ) );
inline_stack_push( newRV_noinc( (SV*)av2 ) );
}
inline_stack_done;
inline_stack_return( n/2 );
return;
}
SV * diffBits( U32 n ) {
unsigned __int64 bits = 0;
U32 i;
for( i = 0; i < n/2; ++i ) {
bits |= ( 1ull << ( i*2) );
bits |= ~( 1ull << ( i*2+1 ) );
}
return newSViv( bits );
}
END_C
use Data::Dump qw[ pp ];
use Benchmark qw[ cmpthese ];
our $N //= 10;
cmpthese -1, {
AoA => q[
my $AoA = diffAoA( $N ); # pp $AoA;
for my $pair ( @{ $AoA } ) {
my( $x, $y ) = @{ $pair };
}
],
packed => q[
my $packed = diffPacked( $N ); # pp $packed;
while( length( $packed ) ) {
my( $x, $y ) = unpack 'VV', substr( $packed, 0, 8, '' );
}
],
packedList => q[
for my $pair ( diffPackedList( $N ) ) {
my( $x, $y ) = unpack 'VV', $pair;
}
],
twoDel => q[
my $string2d = diff2dString( $N ); # pp $string2d;
for my $pair ( split ' ', $string2d ) {
my( $x, $y ) = split ':', $pair;
}
],
oneDelList => q[
for my $pair ( diff1dList( $N ) ) {
my( $x, $y ) = split ':', $pair;
}
],
list => q[
my @array = diffList( $N ); # pp \@array;
while( @array ) {
my( $x, $y ) = ( shift @array, shift @array );
}
],
LoA => q[
my @array = diffLoA( $N ); # pp \@array;
for my $pair ( @array ) {
my( $x, $y ) = @{ $pair };
}
],
bits => q[
my $bits = diffBits( $N );
for( my $i =0; $i < 64; ++$i ) {
my( $x, $y ) = ( ( $bits & ( 1 << ( $i*2 ) ) ) >> ( $i *2
+), ( $bits & ( 1 << ( $i*2+1) ) ) >> ( $i*2+1 ) );
}
],
};
__END__
C:\test>diffBench.pl -N=10
Rate bits twoDel oneDelList AoA LoA packedList pa
+cked list
bits 12197/s -- -81% -84% -89% -90% -90%
+-91% -93%
twoDel 65723/s 439% -- -13% -42% -46% -47%
+-51% -61%
oneDelList 75443/s 519% 15% -- -33% -37% -39%
+-44% -55%
AoA 113180/s 828% 72% 50% -- -6% -9%
+-16% -32%
LoA 120618/s 889% 84% 60% 7% -- -3%
+-11% -28%
packedList 123986/s 917% 89% 64% 10% 3% --
+ -8% -26%
packed 135458/s 1011% 106% 80% 20% 12% 9%
+ -- -19%
list 167440/s 1273% 155% 122% 48% 39% 35%
+ 24% --
C:\test>diffBench.pl -N=64
Rate twoDel bits oneDelList packedList AoA LoA pa
+cked list
twoDel 11273/s -- -13% -13% -53% -54% -55%
+-61% -65%
bits 12979/s 15% -- -0% -46% -47% -48%
+-55% -60%
oneDelList 13013/s 15% 0% -- -46% -47% -48%
+-55% -60%
packedList 24162/s 114% 86% 86% -- -1% -3%
+-17% -25%
AoA 24506/s 117% 89% 88% 1% -- -2%
+-16% -24%
LoA 25004/s 122% 93% 92% 3% 2% --
+-14% -23%
packed 29133/s 158% 124% 124% 21% 19% 17%
+ -- -10%
list 32367/s 187% 149% 149% 34% 32% 29%
+ 11% --
|