Inline::C

I'd thought I'd whip up a little test to compare the perl hash based solution to an Inline-C solution. Looks like the C version is about 85x faster...

Benchmark: timing 20 iterations of hash_string, inline...
hash_string: 37 wallclock secs (37.08 usr +  0.01 sys = 37.09 CPU) @  0.54/s (n=20)
    inline:  1 wallclock secs ( 0.44 usr +  0.00 sys =  0.44 CPU) @ 45.45/s (n=20)

#!/usr/bin/perl

use Inline C;
use Benchmark;

my $gen = "atgcgc"x500000; #3 million characters

$tests{"inline"} = sub { string_inline_c($gen, length($gen)) };
$tests{"hash_string"} = sub { hash_string($gen) };

timethese(20, \%tests);

sub hash_string {
    my ($genome) = @_;
    my %count;
    $count{ substr($genome, $_, 2) }++ for (0..length($genome)-2);
}
__END__
__C__

int string_inline_c(char *genome, int len)
{
    int i;
    int hash[96];

    /* The hashing function is simply 4*(first char - 'a') + second ch
+ar - 'a' */
    /* i.e. the bucket for gg is 4*('g'-'a')+'g'-'a' = 30 */

    /*initialize hash buckets which will get used*/

    /*aa*/      /*ac*/      /*ag*/      /*at*/
    hash[ 0] =  hash[ 2] =  hash[ 6] =  hash[19] = 0;

    /*ca*/      /*cc*/      /*cg*/      /*ct*/
    hash[ 8] =  hash[10] =  hash[14] =  hash[27] = 0;

    /*ga*/      /*gc*/      /*gg*/      /*gt*/
    hash[24] =  hash[26] =  hash[30] =  hash[43] = 0;

    /*ta*/      /*tc*/      /*tg*/      /*tt*/
    hash[76] =  hash[78] =  hash[82] =  hash[95] = 0;

    for(i=0;i<len-1;i++)
    {
        hash[4*(genome[i]-'a')+(genome[i+1]-'a')]++;
    }

    /* returning the proper perl hash is left as an */
    /* exercise for the reader                      */
    /* see also the Inline-C Cookbook               */
    return(1);
}
[download]

Comment on Inline::C Download Code

Replies are listed 'Best First'.

Re: Inline::C -- complete
by sleepingsquirrel (Chaplain) on Nov 26, 2003 at 05:09 UTC

#!/usr/bin/perl

use Inline C;
use Benchmark;

my $gen = "atgcgc"x500000; #3 million characters
my $h_ref;

$tests{"inline"} = sub { $h_ref = string_inline_c($gen, length($gen)) 
+};
$tests{"hash_string"} = sub { hash_string($gen) };

timethese(2, \%tests);

sub hash_string {
    my ($genome) = @_;
    my %count;
    $count{ substr($genome, $_, 2) }++ for (0..length($genome)-2);
}
__END__
__C__

SV* string_inline_c(char *genome, int len)
{
    int i;
    int hash[96];
    HV* perl_hash=newHV();

    /* The hashing function is simply 4*(first char - 'a') + second ch
+ar - 'a' */
    /* i.e. the bucket for gg is 4*('g'-'a')+'g'-'a' = 30 */
    /*initialize our 'C' hash buckets which will get used*/

    /*aa*/      /*ac*/      /*ag*/      /*at*/
    hash[ 0] =  hash[ 2] =  hash[ 6] =  hash[19] = 0;

    /*ca*/      /*cc*/      /*cg*/      /*ct*/
    hash[ 8] =  hash[10] =  hash[14] =  hash[27] = 0;

    /*ga*/      /*gc*/      /*gg*/      /*gt*/
    hash[24] =  hash[26] =  hash[30] =  hash[43] = 0;

    /*ta*/      /*tc*/      /*tg*/      /*tt*/
    hash[76] =  hash[78] =  hash[82] =  hash[95] = 0;

    for(i=0;i<len-1;i++)
    {
        hash[4*(genome[i]-'a')+(genome[i+1]-'a')]++;
    }

    /*move our values over from the 'C' hash to the perl hash*/

#define h(c,i) (hv_store(perl_hash, (c), sizeof((c))-1, newSViv(hash[(
+i)]), 0))

    h("aa", 0); h("ac", 2); h("ag", 6); h("at",19);
    h("ca", 8); h("cc",10); h("cg",14); h("ct",27);
    h("ga",24); h("gc",26); h("gg",30); h("gt",43);
    h("ta",76); h("tc",78); h("tg",82); h("tt",95);

    return newRV_noinc((SV*) perl_hash); /*return a ref to a hash*/
}
[download]

[reply]
[d/l]


Just another Perl shrine
	PerlMonks