#!/usr/bin/perl use strict; use warnings; use Inline 'C'; for my $file (@ARGV) { open(my $fh, '<', $file) or die "Unable to open '$file' for reading: $!"; while (<$fh>) { my ($set) = $_ =~ /^(\w+)/; #print "INPUT: $set\n"; powerset($set, length($set)); } } __END__ __C__ /* Generate a powerset for the set of all unique lowercase letters detected in * a string of length [len]. */ void powerset(char *set, unsigned len); /* Set up. */ void init(void); /* Start with a number. Print it's associated powerset if we haven't seen it * before. Iterate over the bits, negating one each time and recurse. */ void reduce(long fingerprint); /* Print the letter associated with each bit in a 32-bit bitmask */ void decode_and_print(long fingerprint); /* 0x4000000 is 2**26. This array has one slot for every possible unordered * set of unique lower-case letters. * * Note: if memory consumption were a concern, this would be implemented using * a bit vector rather than an array of char. */ static char seen[0x4000000]; /* Map of individual letters to a bit mask with a single bit set. */ static long letter_masks[128]; void powerset(char *set, unsigned len) { long fingerprint = 0; unsigned i; /* call init the first time this function is invoked */ static bool init_flag = FALSE; if (!init_flag) { init(); init_flag = TRUE; } /* generate a bitmask representing this set */ for (i = 0; i < len; i++) { long bit = letter_masks[ set[i] ]; if (bit == 0) continue; fingerprint |= bit; } /* reduce the bitmask bit by bit */ reduce(fingerprint); } void init(void) { char letter; unsigned bitshift; /* zero out the "seen" array and the "letter_masks" array */ memset(seen, 0, sizeof(seen)); memset(letter_masks, 0, sizeof(letter_masks)); /* skip the null set, 'cause Limbic~Region sez so */ seen[0] = 1; /* assign one bit mask for each letter (assumes contiguity of a-z) */ for (letter = 'a', bitshift = 0; letter <= 'z'; letter++, bitshift++ ) { letter_masks[letter] = 0x1 << bitshift; } return; } void reduce(long fingerprint) { unsigned bit; /* if we've seen this set, we've seen all its subsets, so bail */ if (seen[fingerprint]) return; /* first time we've seen this set, so print it */ seen[fingerprint] = 1; decode_and_print(fingerprint); /* iterate over the bits in the fingerprint */ for (bit = 1; bit <= 26; bit++) { long single_bit_mask = 0x1 << (bit - 1); if (fingerprint & single_bit_mask) { /* turn off one bit and recurse */ reduce( fingerprint ^ single_bit_mask ); } } } void decode_and_print(long fingerprint) { unsigned bit; char letter; /* shift one bit left, increment one letter */ for (bit = 1, letter = 'a'; bit <= 26; bit++, letter++ ) { /* print the letter if its associated bit is on */ long single_bit_mask = 0x1 << (bit - 1); if (fingerprint & single_bit_mask) { putc(letter, stdout); } } /* finish up with a newline */ putc('\n', stdout); }