typedef struct {
U64 addr;
char *name;
} PAIR;
typedef struct {
PAIR **byInt;
U32 *byStr;
U32 size;
U32 used;
double factor;
} BIMAP;
####
C:\test>1112165-hash
Start: mem: 7,920 K
23762752
Hash built: mem: 4,382,828 K
Fetch by Name took 7.539046 seconds
Fetch by Addr took 8.349479 seconds
Hash queried: check mem: 4,382,852 K
##
##
C:\test>BiMap -S=5 -F=0.71 -I=12000000 -D=0
bless(do{\(my $o = 52848976)}, "BiMap")
Fetch by Addr & Fetch by Name took 22.257803 seconds for 11881376 items in a 16777216 sized BiMap
Mem: 580,552 K
##
##
C:\test>BiMap -S=5 -F=0.70 -I=20000000 -D=0
bless(do{\(my $o = 52848976)}, "BiMap")
Fetch by Addr & Fetch by Name took 19.896479 seconds for 11881376 items in a 33554432 sized BiMap
Mem: 777,532 K
##
##
#! perl -slw
package BiMap; use strict; #use Config;
use Inline C => Config => BUILD_NOISY => 1; #, ccflags => $Config{ccflags} . '-D_CRT_SECURE_NO_WARNINGS';
use Inline C => <<'END_C', NAME => 'BiMap', CLEAN_AFTER_BUILD =>0, TYPEMAPS => '/test/BiMap.typemap';;
#undef malloc
#undef calloc
#undef free
#define CLASS "BiMap"
#define HASH_SEED 0xc0d1f1ed
#define U64_HIBIT 0x8000000000000000ull
U32 __inline hash( const unsigned char *str, const STRLEN len) {
const unsigned char * const end = (const unsigned char *)str + len;
U32 hash = HASH_SEED;
while (str < end) {
hash += *str++;
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash;
}
U32 __inline nextPowerOfTwo( U32 v ) {
v--;
v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16;
v += (v == 0);
return ++v;
}
typedef unsigned __int64 U64;
typedef struct {
U64 addr;
char *name;
} PAIR;
typedef struct {
PAIR **byInt;
U32 *byStr;
U32 size;
U32 used;
double factor;
} BIMAP;
void DESTROY ( BIMAP *bm ) {
U32 i;
for( i=0; i < bm->size; ++i ) {
if( bm->byInt[ i ] ) {
if( !( (U64)bm->byInt[ i ]->name & ~U64_HIBIT ) ) free( bm->byInt[ i ]->name );
free( bm->byInt[ i ] );
}
}
free( bm->byInt );
free( bm->byStr );
free( bm );
}
void dump( BIMAP *bm, int dumpBody ) {
U32 i;
printf( "\n\nObject:%8p byInt:%8p byStr:%8p size:%u used:%u\n", bm, bm->byInt, bm->byStr, bm->size, bm->used );
if( dumpBody )
for( i = 0; i < bm->size; ++i ) {
printf( "%4d: %10.10I64u %-10s [ byStr: %6u ]\n", i
, bm->byInt[ i ] ? bm->byInt[ i ]->addr : 0
, bm->byInt[ i ] ? bm->byInt[ i ]->name : " n/a"
, bm->byStr[ i ] ? bm->byStr[ i ] : 0
);
}
}
BIMAP *new( U32 initSize, double factor ) {
BIMAP *bm = (BIMAP*)malloc( sizeof( BIMAP ) );
initSize = nextPowerOfTwo( initSize );
bm->byInt = (PAIR**)calloc( initSize, sizeof( PAIR ) );
bm->byStr = (U32*)calloc( initSize, sizeof( U32 ) );
bm->size = initSize;
bm->used = 0;
bm->factor = factor;
return bm;
}
U32 used( BIMAP *bm ) { return bm->used; }
U32 size( BIMAP *bm ) { return bm->size; }
double factor( BIMAP *bm ) { return bm->factor; }
U32 addPair( BIMAP *bm, PAIR *pair );
U32 add( BIMAP *bm, U64 i, SV *sv );
void resize( BIMAP *bm, U32 newSize ) {
BIMAP *newBm = new( newSize, bm->factor );
U32 i;
// printf( "Resize: from %u(%u) to %u\n", bm->size, bm->used, newSize );
for( i = 0; i < bm->size; ++i ) {
if( bm->byInt[ i ] ) {
addPair( newBm, bm->byInt[ i ] );
}
}
free( bm->byInt );
free( bm->byStr );
bm->byInt = newBm->byInt;
bm->byStr = newBm->byStr;
bm->size = newBm->size;
bm->used = newBm->used;
free( newBm );
return;
}
U32 __inline addPair( BIMAP *bm, PAIR *pair ) {
U32 nameLen = (U32)strlen( (U64)pair->name & U64_HIBIT ? (char*)&pair->name : pair->name );
register U32 mask = bm->size - 1;
register U32 iHash = hash( (char*)&pair->addr, 8 ) & mask,
sHash = hash( (U64)pair->name & U64_HIBIT ? (char*)&pair->name : pair->name, nameLen ) & mask;
U32 iIters = 0, sIters = 0;
if( bm->used > (U32)( bm->size * bm->factor ) ) {
resize( bm, bm->size * 2 );
mask = bm->size - 1;
iHash = hash( (char*)&pair->addr, 8 ) & mask;
sHash = hash( (U64)pair->name & U64_HIBIT ? (char*)&pair->name : pair->name, nameLen ) & mask;
}
while( bm->byInt[ iHash ] ) {
++iIters;
iHash = ( iHash + 1 ) & mask;
}
while( bm->byStr[ sHash ] ) {
++sIters;
sHash = ( sHash + 1 ) & mask;
}
bm->byInt[ iHash ] = pair;
bm->byStr[ sHash ] = iHash+1;
bm->used++;
return iIters + sIters;
}
U32 add( BIMAP *bm, U64 i, SV *sv ) {
STRLEN l;
char *s = SvPV( sv, l );
PAIR *pair = (PAIR*)calloc( 1, sizeof( PAIR ) );
pair->addr = i;
if( l < 7 ) {
strncpy( (char*)(&pair->name), s, l );
(U64)pair->name |= U64_HIBIT;
}
else {
pair->name = _strdup( s );
}
return addPair( bm, pair );
}
U64 findByStr( BIMAP *bm, char *s ) {
U32 sLen = (U32)strlen( s );
register U32 mask = bm->size - 1, sIters = 0;
register U32 sHash = hash( s, sLen ) & mask;
register PAIR **byInt = bm->byInt;
register U32 *byStr = bm->byStr;
register char *name;
if( !byStr[ sHash ] ) return -1;
name = (U64)byInt[ byStr[ sHash ]-1 ]->name & U64_HIBIT
? (char*)&(U64)byInt[ byStr[ sHash ]-1 ]->name
: byInt[ byStr[ sHash ]-1 ]->name;
while( strcmp( name, s ) ) {
sHash = ( sHash + 1 ) & mask;
if( !byStr[ sHash ] || !byInt[ byStr[ sHash ]-1 ] ) return -1;
name = (U64)byInt[ byStr[ sHash ]-1 ]->name & U64_HIBIT
? (char*)&(U64)byInt[ byStr[ sHash ]-1 ]->name
: byInt[ byStr[ sHash ]-1 ]->name;
}
return byInt[ byStr[ sHash ]-1 ]->addr;
}
char *findByInt( BIMAP *bm, U64 i ) {
register U32 mask = bm->size - 1;
register U32 iHash = hash( (char*)&i, 8 ) & mask;
register PAIR **byInt = bm->byInt;
if( !byInt[ iHash ] ) return "$^&* NOT FOUND ON FIRST TRY *&^$";
while( byInt[ iHash ]->addr != i ) {
if( ! byInt[ iHash = ( iHash + 1 ) & mask ] ) return "$^&* NOT FOUND AT ALL *&^$";
}
return (U64)( byInt[ iHash ]->name ) & U64_HIBIT ? (char*)&byInt[ iHash ]->name : byInt[ iHash ]->name;
}
END_C
package main;
use Time::HiRes qw[ time ];
use Data::Dump qw[ pp ];
use List::Util qw[ sum ];
$|++;
our $S //= 4;
our $F //= 0.9;
our $I //= 26**$S;
our $D //= 0;
sub mem{ `tasklist /nh /fi "PID eq $$"` =~ m[(\S+ K)$] }
my $bm = BiMap::new( $I, $F );
pp $bm;
my %counts;
my $i = 1;
if( $D ) {
++$counts{ $bm->add( $i++, $_ ) } for 'a' x $S .. 'z' x $S;
}
else {
$bm->add( $i++, $_ ) for 'a' x $S .. 'z' x $S;
}
#$bm->dump( 1 );
my $start = time;
for my $_ ( 'a' x $S .. 'z' x $S ) {
$_ eq $bm->findByInt( $bm->findByStr( $_ ) ) or die "Failed to find '$_'\n";
}
printf "Fetch by Addr & Fetch by Name took %f seconds for %u items in a %u sized BiMap \n", time() - $start, $bm->used, $bm->size;
print "Mem: ", mem;
if( $D ) {
printf "'%s'\n", $bm->findByInt( 26**($S+1) );
printf "'%I64u'\n", $bm->findByStr( 'z' x ($S+1) );
pp \%counts;
print my $total = sum( map{ $_ * $counts{ $_ } } keys %counts );
print sum( values %counts );
print $total / $bm->used;
}