Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

Re: Powerset short-circuit optimization

by creamygoodness (Curate)
on Nov 06, 2006 at 01:56 UTC ( [id://582359]=note: print w/replies, xml ) Need Help??


in reply to Powerset short-circuit optimization

Greets,

Here's another entry for you, Limbic~Region. I believe it meets your criteria of checking the longest strings first.

#!/usr/bin/perl use strict; use warnings; use Inline 'C'; for my $file (@ARGV) { open(my $fh, '<', $file) or die "Unable to open '$file' for readin +g: $!"; while (<$fh>) { my ($set) = $_ =~ /^(\w+)/; #print "INPUT: $set\n"; powerset($set, length($set)); } } __END__ __C__ /* Generate a powerset for the set of all unique lowercase letters det +ected in * a string of length [len]. */ void powerset(char *set, unsigned len); /* Set up. */ void init(void); /* Start with a number. Print it's associated powerset if we haven't +seen it * before. Iterate over the bits, negating one each time and recurse. */ void reduce(long fingerprint); /* Print the letter associated with each bit in a 32-bit bitmask */ void decode_and_print(long fingerprint); /* 0x4000000 is 2**26. This array has one slot for every possible unor +dered * set of unique lower-case letters. * * Note: if memory consumption were a concern, this would be implement +ed using * a bit vector rather than an array of char. */ static char seen[0x4000000]; /* Map of individual letters to a bit mask with a single bit set. */ static long letter_masks[128]; void powerset(char *set, unsigned len) { long fingerprint = 0; unsigned i; /* call init the first time this function is invoked */ static bool init_flag = FALSE; if (!init_flag) { init(); init_flag = TRUE; } /* generate a bitmask representing this set */ for (i = 0; i < len; i++) { long bit = letter_masks[ set[i] ]; if (bit == 0) continue; fingerprint |= bit; } /* reduce the bitmask bit by bit */ reduce(fingerprint); } void init(void) { char letter; unsigned bitshift; /* zero out the "seen" array and the "letter_masks" array */ memset(seen, 0, sizeof(seen)); memset(letter_masks, 0, sizeof(letter_masks)); /* skip the null set, 'cause Limbic~Region sez so */ seen[0] = 1; /* assign one bit mask for each letter (assumes contiguity of a-z) + */ for (letter = 'a', bitshift = 0; letter <= 'z'; letter++, bitshift++ ) { letter_masks[letter] = 0x1 << bitshift; } return; } void reduce(long fingerprint) { unsigned bit; /* if we've seen this set, we've seen all its subsets, so bail */ if (seen[fingerprint]) return; /* first time we've seen this set, so print it */ seen[fingerprint] = 1; decode_and_print(fingerprint); /* iterate over the bits in the fingerprint */ for (bit = 1; bit <= 26; bit++) { long single_bit_mask = 0x1 << (bit - 1); if (fingerprint & single_bit_mask) { /* turn off one bit and recurse */ reduce( fingerprint ^ single_bit_mask ); } } } void decode_and_print(long fingerprint) { unsigned bit; char letter; /* shift one bit left, increment one letter */ for (bit = 1, letter = 'a'; bit <= 26; bit++, letter++ ) { /* print the letter if its associated bit is on */ long single_bit_mask = 0x1 << (bit - 1); if (fingerprint & single_bit_mask) { putc(letter, stdout); } } /* finish up with a newline */ putc('\n', stdout); }
--
Marvin Humphrey
Rectangular Research ― http://www.rectangular.com

Replies are listed 'Best First'.
Re^2: Powerset short-circuit optimization
by Limbic~Region (Chancellor) on Nov 06, 2006 at 15:59 UTC
    creamygoodness,
    Thanks. This will give me something to chew on for a while. I actually have a pretty advanced project that I would like to use C for but don't currently have the skillset for. Practice makes ... er um, well yeah.

    Update: Since you asked in the CB how this performed in comparison to mine. It takes only 61% of the time to produce the same output in my very rough benchmark (wall clock on a non-idle system).

    Cheers - L~R

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://582359]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others musing on the Monastery: (2)
As of 2024-04-19 19:38 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found