typedef struct { U64 addr; char *name; } PAIR; typedef struct { PAIR **byInt; U32 *byStr; U32 size; U32 used; double factor; } BIMAP; #### C:\test>1112165-hash Start: mem: 7,920 K 23762752 Hash built: mem: 4,382,828 K Fetch by Name took 7.539046 seconds Fetch by Addr took 8.349479 seconds Hash queried: check mem: 4,382,852 K #### C:\test>BiMap -S=5 -F=0.71 -I=12000000 -D=0 bless(do{\(my $o = 52848976)}, "BiMap") Fetch by Addr & Fetch by Name took 22.257803 seconds for 11881376 items in a 16777216 sized BiMap Mem: 580,552 K #### C:\test>BiMap -S=5 -F=0.70 -I=20000000 -D=0 bless(do{\(my $o = 52848976)}, "BiMap") Fetch by Addr & Fetch by Name took 19.896479 seconds for 11881376 items in a 33554432 sized BiMap Mem: 777,532 K #### #! perl -slw package BiMap; use strict; #use Config; use Inline C => Config => BUILD_NOISY => 1; #, ccflags => $Config{ccflags} . '-D_CRT_SECURE_NO_WARNINGS'; use Inline C => <<'END_C', NAME => 'BiMap', CLEAN_AFTER_BUILD =>0, TYPEMAPS => '/test/BiMap.typemap';; #undef malloc #undef calloc #undef free #define CLASS "BiMap" #define HASH_SEED 0xc0d1f1ed #define U64_HIBIT 0x8000000000000000ull U32 __inline hash( const unsigned char *str, const STRLEN len) { const unsigned char * const end = (const unsigned char *)str + len; U32 hash = HASH_SEED; while (str < end) { hash += *str++; hash += (hash << 10); hash ^= (hash >> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); return hash; } U32 __inline nextPowerOfTwo( U32 v ) { v--; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v += (v == 0); return ++v; } typedef unsigned __int64 U64; typedef struct { U64 addr; char *name; } PAIR; typedef struct { PAIR **byInt; U32 *byStr; U32 size; U32 used; double factor; } BIMAP; void DESTROY ( BIMAP *bm ) { U32 i; for( i=0; i < bm->size; ++i ) { if( bm->byInt[ i ] ) { if( !( (U64)bm->byInt[ i ]->name & ~U64_HIBIT ) ) free( bm->byInt[ i ]->name ); free( bm->byInt[ i ] ); } } free( bm->byInt ); free( bm->byStr ); free( bm ); } void dump( BIMAP *bm, int dumpBody ) { U32 i; printf( "\n\nObject:%8p byInt:%8p byStr:%8p size:%u used:%u\n", bm, bm->byInt, bm->byStr, bm->size, bm->used ); if( dumpBody ) for( i = 0; i < bm->size; ++i ) { printf( "%4d: %10.10I64u %-10s [ byStr: %6u ]\n", i , bm->byInt[ i ] ? bm->byInt[ i ]->addr : 0 , bm->byInt[ i ] ? bm->byInt[ i ]->name : " n/a" , bm->byStr[ i ] ? bm->byStr[ i ] : 0 ); } } BIMAP *new( U32 initSize, double factor ) { BIMAP *bm = (BIMAP*)malloc( sizeof( BIMAP ) ); initSize = nextPowerOfTwo( initSize ); bm->byInt = (PAIR**)calloc( initSize, sizeof( PAIR ) ); bm->byStr = (U32*)calloc( initSize, sizeof( U32 ) ); bm->size = initSize; bm->used = 0; bm->factor = factor; return bm; } U32 used( BIMAP *bm ) { return bm->used; } U32 size( BIMAP *bm ) { return bm->size; } double factor( BIMAP *bm ) { return bm->factor; } U32 addPair( BIMAP *bm, PAIR *pair ); U32 add( BIMAP *bm, U64 i, SV *sv ); void resize( BIMAP *bm, U32 newSize ) { BIMAP *newBm = new( newSize, bm->factor ); U32 i; // printf( "Resize: from %u(%u) to %u\n", bm->size, bm->used, newSize ); for( i = 0; i < bm->size; ++i ) { if( bm->byInt[ i ] ) { addPair( newBm, bm->byInt[ i ] ); } } free( bm->byInt ); free( bm->byStr ); bm->byInt = newBm->byInt; bm->byStr = newBm->byStr; bm->size = newBm->size; bm->used = newBm->used; free( newBm ); return; } U32 __inline addPair( BIMAP *bm, PAIR *pair ) { U32 nameLen = (U32)strlen( (U64)pair->name & U64_HIBIT ? (char*)&pair->name : pair->name ); register U32 mask = bm->size - 1; register U32 iHash = hash( (char*)&pair->addr, 8 ) & mask, sHash = hash( (U64)pair->name & U64_HIBIT ? (char*)&pair->name : pair->name, nameLen ) & mask; U32 iIters = 0, sIters = 0; if( bm->used > (U32)( bm->size * bm->factor ) ) { resize( bm, bm->size * 2 ); mask = bm->size - 1; iHash = hash( (char*)&pair->addr, 8 ) & mask; sHash = hash( (U64)pair->name & U64_HIBIT ? (char*)&pair->name : pair->name, nameLen ) & mask; } while( bm->byInt[ iHash ] ) { ++iIters; iHash = ( iHash + 1 ) & mask; } while( bm->byStr[ sHash ] ) { ++sIters; sHash = ( sHash + 1 ) & mask; } bm->byInt[ iHash ] = pair; bm->byStr[ sHash ] = iHash+1; bm->used++; return iIters + sIters; } U32 add( BIMAP *bm, U64 i, SV *sv ) { STRLEN l; char *s = SvPV( sv, l ); PAIR *pair = (PAIR*)calloc( 1, sizeof( PAIR ) ); pair->addr = i; if( l < 7 ) { strncpy( (char*)(&pair->name), s, l ); (U64)pair->name |= U64_HIBIT; } else { pair->name = _strdup( s ); } return addPair( bm, pair ); } U64 findByStr( BIMAP *bm, char *s ) { U32 sLen = (U32)strlen( s ); register U32 mask = bm->size - 1, sIters = 0; register U32 sHash = hash( s, sLen ) & mask; register PAIR **byInt = bm->byInt; register U32 *byStr = bm->byStr; register char *name; if( !byStr[ sHash ] ) return -1; name = (U64)byInt[ byStr[ sHash ]-1 ]->name & U64_HIBIT ? (char*)&(U64)byInt[ byStr[ sHash ]-1 ]->name : byInt[ byStr[ sHash ]-1 ]->name; while( strcmp( name, s ) ) { sHash = ( sHash + 1 ) & mask; if( !byStr[ sHash ] || !byInt[ byStr[ sHash ]-1 ] ) return -1; name = (U64)byInt[ byStr[ sHash ]-1 ]->name & U64_HIBIT ? (char*)&(U64)byInt[ byStr[ sHash ]-1 ]->name : byInt[ byStr[ sHash ]-1 ]->name; } return byInt[ byStr[ sHash ]-1 ]->addr; } char *findByInt( BIMAP *bm, U64 i ) { register U32 mask = bm->size - 1; register U32 iHash = hash( (char*)&i, 8 ) & mask; register PAIR **byInt = bm->byInt; if( !byInt[ iHash ] ) return "$^&* NOT FOUND ON FIRST TRY *&^$"; while( byInt[ iHash ]->addr != i ) { if( ! byInt[ iHash = ( iHash + 1 ) & mask ] ) return "$^&* NOT FOUND AT ALL *&^$"; } return (U64)( byInt[ iHash ]->name ) & U64_HIBIT ? (char*)&byInt[ iHash ]->name : byInt[ iHash ]->name; } END_C package main; use Time::HiRes qw[ time ]; use Data::Dump qw[ pp ]; use List::Util qw[ sum ]; $|++; our $S //= 4; our $F //= 0.9; our $I //= 26**$S; our $D //= 0; sub mem{ `tasklist /nh /fi "PID eq $$"` =~ m[(\S+ K)$] } my $bm = BiMap::new( $I, $F ); pp $bm; my %counts; my $i = 1; if( $D ) { ++$counts{ $bm->add( $i++, $_ ) } for 'a' x $S .. 'z' x $S; } else { $bm->add( $i++, $_ ) for 'a' x $S .. 'z' x $S; } #$bm->dump( 1 ); my $start = time; for my $_ ( 'a' x $S .. 'z' x $S ) { $_ eq $bm->findByInt( $bm->findByStr( $_ ) ) or die "Failed to find '$_'\n"; } printf "Fetch by Addr & Fetch by Name took %f seconds for %u items in a %u sized BiMap \n", time() - $start, $bm->used, $bm->size; print "Mem: ", mem; if( $D ) { printf "'%s'\n", $bm->findByInt( 26**($S+1) ); printf "'%I64u'\n", $bm->findByStr( 'z' x ($S+1) ); pp \%counts; print my $total = sum( map{ $_ * $counts{ $_ } } keys %counts ); print sum( values %counts ); print $total / $bm->used; }