Greets,
Here's another entry for you, Limbic~Region. I believe it meets your criteria of checking the longest strings first.
#!/usr/bin/perl
use strict;
use warnings;
use Inline 'C';
for my $file (@ARGV) {
open(my $fh, '<', $file) or die "Unable to open '$file' for readin
+g: $!";
while (<$fh>) {
my ($set) = $_ =~ /^(\w+)/;
#print "INPUT: $set\n";
powerset($set, length($set));
}
}
__END__
__C__
/* Generate a powerset for the set of all unique lowercase letters det
+ected in
* a string of length [len].
*/
void
powerset(char *set, unsigned len);
/* Set up.
*/
void
init(void);
/* Start with a number. Print it's associated powerset if we haven't
+seen it
* before. Iterate over the bits, negating one each time and recurse.
*/
void
reduce(long fingerprint);
/* Print the letter associated with each bit in a 32-bit bitmask */
void
decode_and_print(long fingerprint);
/* 0x4000000 is 2**26. This array has one slot for every possible unor
+dered
* set of unique lower-case letters.
*
* Note: if memory consumption were a concern, this would be implement
+ed using
* a bit vector rather than an array of char.
*/
static char seen[0x4000000];
/* Map of individual letters to a bit mask with a single bit set. */
static long letter_masks[128];
void
powerset(char *set, unsigned len) {
long fingerprint = 0;
unsigned i;
/* call init the first time this function is invoked */
static bool init_flag = FALSE;
if (!init_flag) {
init();
init_flag = TRUE;
}
/* generate a bitmask representing this set */
for (i = 0; i < len; i++) {
long bit = letter_masks[ set[i] ];
if (bit == 0)
continue;
fingerprint |= bit;
}
/* reduce the bitmask bit by bit */
reduce(fingerprint);
}
void
init(void) {
char letter;
unsigned bitshift;
/* zero out the "seen" array and the "letter_masks" array */
memset(seen, 0, sizeof(seen));
memset(letter_masks, 0, sizeof(letter_masks));
/* skip the null set, 'cause Limbic~Region sez so */
seen[0] = 1;
/* assign one bit mask for each letter (assumes contiguity of a-z)
+ */
for (letter = 'a', bitshift = 0;
letter <= 'z';
letter++, bitshift++
) {
letter_masks[letter] = 0x1 << bitshift;
}
return;
}
void
reduce(long fingerprint) {
unsigned bit;
/* if we've seen this set, we've seen all its subsets, so bail */
if (seen[fingerprint])
return;
/* first time we've seen this set, so print it */
seen[fingerprint] = 1;
decode_and_print(fingerprint);
/* iterate over the bits in the fingerprint */
for (bit = 1; bit <= 26; bit++) {
long single_bit_mask = 0x1 << (bit - 1);
if (fingerprint & single_bit_mask) {
/* turn off one bit and recurse */
reduce( fingerprint ^ single_bit_mask );
}
}
}
void
decode_and_print(long fingerprint) {
unsigned bit;
char letter;
/* shift one bit left, increment one letter */
for (bit = 1, letter = 'a';
bit <= 26;
bit++, letter++
) {
/* print the letter if its associated bit is on */
long single_bit_mask = 0x1 << (bit - 1);
if (fingerprint & single_bit_mask) {
putc(letter, stdout);
}
}
/* finish up with a newline */
putc('\n', stdout);
}