For set A B C, you can enumerate the powerset in binary: 111 ABC 110 AB 101 A C 100 A 011 BC 010 B 001 C 000 For BCD, it would be thus: 111 BCD 110 BC 101 B C 100 B 011 CD 010 C 001 D 000 Instead, we want to offset the values in BCD and leave an empty slot for A. We'll end up with this: 0111 BCD 0110 BC 0101 B C 0100 B 0011 CD 0010 C 0001 D 0000 #### (for ABC) Have we seen A before? No? A = slot 0 Have we seen B before? No? B = slot 1 Have we seen C before? No? C = slot 2 (for BCD) Have we seen B before? YES - return 1 Have we seen C before? YES - return 2 Have we seen D before? No? D == slot 3 #### By using a dead bit for A, we actually have this: 1111 invalid 1110 invalid 1101 invalid 1100 invalid 1011 invalid 1010 invalid 1001 invalid 1000 invalid 0111 BCD 0110 BC 0101 B C 0100 B 0011 CD 0010 C 0001 D 0000 #### 11111111111111111111111111 invalid 11111111111111111111111110 invalid 11111111111111111111111101 invalid . . . 00000000000000000000000010 invalid 00000000000000000000000001 valid (z) #### From up above, set (z) start at 11111111111111111111111111 deadbit string = 11111111111111111111111110 start & deadbit = 11111111111111111111111110 this is > 0, so xor. (start ^ deadbit) & start = 00000000000000000000000001 Bang! one and, a conditional, and an xor jumped you past 67 million some odd invalid sets. Another one, from above, set (ABCD) start at 1111 deadbit string = 1110 start & deadbit = 1110 this is > 0, so xor (start ^ deadbit) & start = 0111 (starts at BCD Here's a more complex example. Assume our first set was ABC, and our next was ABD. (A = 0, B = 1, C = 2, D = 3). When we hit ABD, we flag C as our deadbit and our deadbit string would be 0010. Let's try a complete runthrough: 1111 1111 & 0010 > 0 -> (1111 ^ 0010) & 1111 -> new index is 1101 1101 ABD (1101 & 0010 == 0) 1100 AB (1100 & 0010 == 0) 1011 (1011 & 0010 == 0) 1011 & 0010 > 0 -> (1011 ^ 0010) & 1011 -> new index is 1001 1001 A D (1001 & 0010 == 0) 1000 A (1000 & 0010 == 0) 0111 0111 & 0010 > 0 -> (0111 ^ 0010) & 0111 -> new index is 0101 0101 B D (0101 & 0010 == 0) 0100 B (0100 & 0010 == 0) 0011 0011 & 0010 > 0 -> (0011 ^ 0010) & 0010 -> new index is 0001 0001 D (0001 & 0010 == 0) 0000 () (0000 & 0010 == 0) #### use strict; use warnings; # returns _3_ closures to generate certain powersets #arbitrary benchmark device, used to se how many times the iterator was called. my $calls = 0; sub limbic_power_generator { my $set = shift; # we re-define skippers as an array and allow the user to pass it in, so we # can keep track of previously skipped values. The value of the hash was # that it prevents dupes. Note - it would be better to mod the old version # to always use a descendingly sorted array, but is left to the reader. my $skippers = shift || {}; my $deadbits = shift || 0; #we start with the original set and count down to the null set my $set_idx = 2 ** @$set; #these are the set indexes we should skip my %skippers = (); # our first closure generates subsets my $generator = sub { # arbitrary benchmark device, that way you can see how may times the iterator # was called $calls++; #bow out if we're out of sets return () unless $set_idx; # Start decrementing the set_idx. We always do this at least once, so # we get to the next set. Our original number is 2 ** @set, so we start # at 2 ** @set - 1 after the decrement while (1) { $set_idx--; # check to see if this set contains a deadbit, and if so hop over it. if ($set_idx & $deadbits) { # make sure that we don't accidentally jump up to a higher set index. # this can happen if you have deadbits beyond the length of your set $set_idx = ($set_idx ^ $deadbits) & $set_idx; } #bow out if this set is NOT a subset of any set we're skipping last unless $skippers->{$set_idx}; #bow out of the function completely with the null set if we've hit 0. return () unless $set_idx; } # Now we convert our set_idx to binary. Each bit stores whether the element # is in this subset. For example, set_idx 11 would be 1011, so we keep # elements 0, 2, and 3. my @in_set = split //, unpack("b*", pack("V",$set_idx)); # now we return a list. The first element is an arrayref which is the actual # subset we generated, the second is our set_idx. return ([map { $set->[$_] } grep { $in_set[$_] } (0..$#$set)], $set_idx); }; # our second closure allows you to add sets to skip # it also returns the list of skipped values my $skipper = sub { if (@_) { my $skip_key = shift; $skippers->{$skip_key}++; } return $skippers; }; # return both of our closures. return ($generator, $skipper) } # we'll use the example sets from node 580625 my $limbic_sets = [ [qw(A B C)], [qw(A B D)], [qw(A B)], [qw(B C)], [qw(E)], [qw(A B C E)], [qw(A B C D E)], ]; # our index lookup hash. There are potential savings by pre-caching these values # if all elements are known in advance. my %idx = (); my $next_open_idx = 0; # our sets to skip my %skippers = (); foreach my $limbic_set (@$limbic_sets) { print "checks set @$limbic_set\n"; # we need to keep track of which indexes are dead, so we copy the # known indexes my %dead_idx = %idx; # here we'll keep track of the bits that are dead my $deadbits = 0; # we now need to iterate over our set. If we know the index of that element # then great. That means we've seen it before, and it's currently live, so # delete it from our list of dead bits. # # otherwise, assign it a new index. foreach my $elem (@$limbic_set) { if (defined $idx{$elem}) { delete $dead_idx{$elem}; } else { $idx{$elem} = $next_open_idx++; } } #here we're going to store the indexes which are dead my %dead_lookup = (); # iterate over our dead elements list, and toss it into the deadbits string # and add its index to the lookup foreach my $idx (values %dead_idx) { $deadbits |= 2 ** $idx; $dead_lookup{$idx}++; } # we need to pad out set with dead bits. So if we call with (ABC), then later # with (ABD), we need to turn that into (AB D) my $padded_limbic_set = []; my $padded_limbic_idx = 0; foreach my $idx (0..$#$limbic_set) { # if that index is dead, then toss in a placeholder and shift the array # element forward. This is using parallel indexes, there may be a more # efficient method. if ($dead_lookup{$padded_limbic_idx}) { $padded_limbic_set->[$padded_limbic_idx++] = undef; redo; } $padded_limbic_set->[$padded_limbic_idx++] = $limbic_set->[$idx]; } # get our iterators, using the padded set, skippers, and deadbits. my ($limbic_iterator, $limbic_skipper) = limbic_power_generator($padded_limbic_set, \%skippers, $deadbits); #as we see an eleemnt, we're going to add it to this list, so we skip it on the next pass. my %future_skippers = (); #and start cruising over our powersets. while ( my ($set, $idx) = $limbic_iterator->() ) { #fancy crap to get it to print out properly. my $display = {map {$_ => 1} grep {defined} @$set}; my $format = "%2s" x scalar(@$padded_limbic_set) . " (%d)\n"; printf($format, (map {defined $_ && $display->{$_} ? $_ : ' '} @$padded_limbic_set), $idx); #we don't skip anything in this pass, but we'll do it the next time around. $future_skippers{$idx}++; } @skippers{keys %future_skippers} = values %future_skippers; } print "TOTAL CALLS $calls\n"; #### #!/usr/bin/perl use strict; use warnings; sub set_generator { my ($set, $set_idx, $skippers, $deadbits) = @_; # Start decrementing the set_idx. We always do this at least once, so # we get to the next set. Our original number is 2 ** @set, so we start # at 2 ** @set - 1 after the decrement while ($set_idx--) { # check to see if this set contains a deadbit, and if so hop over it. # make sure that we don't accidentally jump up to a higher set index. # this can happen if you have deadbits beyond the length of your set $set_idx = ($set_idx ^ $deadbits) & $set_idx; #bow out if this set is NOT a subset of any set we're skipping last unless $skippers->{$set_idx}; #bow out of the function completely with the null set if we've hit 0. } #bow out if we're out of sets return () unless $set_idx; # Now we convert our set_idx to binary. Each bit stores whether the element # is in this subset. For example, set_idx 11 would be 1011, so we keep # elements 0, 2, and 3. my @in_set = split //, unpack("b*", pack("L",$set_idx)); # now we return a list. The first element is an arrayref which is the actual # subset we generated, the second is our set_idx. return ([map { $set->[$_] } grep { $in_set[$_] } (0..$#$set)], $set_idx); }; # our index lookup hash. Assume letters 'a'-'z'. The original version of # dynamically assigning indexes had a few bugs. Whoops. So now you're required # to know all elements in advance. Damn. I'll fix this later, I guess. my @letters = ('a'..'z'); my %idx = map {$letters[$_] => $_} (0..25); # our sets to skip my %skippers = (); while (my $limbic_string = <>) { chomp $limbic_string; my $limbic_set = [split //, $limbic_string]; # we need to keep track of which indexes are dead, so we copy the # known indexes my %dead_idx = %idx; # here we'll keep track of the bits that are dead my $deadbits = 0; #remove the live bits from the dead set. delete @dead_idx{@$limbic_set}; # iterate over our dead elements list, and toss it into the deadbits string # and splice in an undef to our list. foreach my $idx (sort {$a <=> $b} values %dead_idx) { $deadbits |= 2 ** $idx; splice @$limbic_set, $idx, 0, undef unless $idx >= @$limbic_set; } #as we see an element, we're going to add it to this list, so we skip it on the next pass. my @future_skippers = (); my $set_idx = 2 ** @$limbic_set; #and start cruising over our powersets. while (my ($set, $newidx) = set_generator($limbic_set, $set_idx, \%skippers, $deadbits)) { print @$set,"\n"; #note our new set index $set_idx = $newidx; #we don't skip anything in this pass, but we'll do it the next time around. push @future_skippers, $set_idx; } @skippers{@future_skippers} = (1) x @future_skippers; }