halley,
In between the commercials of Lost and Alias (literally), I came up with the following:
#!/usr/bin/perl
use strict;
use warnings;
my %items = (
a => [ qw/one six/ ],
b => [ qw/two three five/ ],
c => [ qw/one two five/ ],
);
my $tuple = 2;
# $common may be 0
my ($common, @list) = tuples(\%items, $tuple);
print "Requiring each key contain a minimum of $tuple in common,\n";
print "$common keys is the most that can be found:\n";
print "$_\n" for @list;
sub tuples {
my ($data, $by) = @_;
my ($max, $bit) = (0, 0);
my (@key, %tuple);
for ( keys %$data ) {
for ( @{ $data->{ $_ } } ) {
$tuple{ $_ } = '' if ! exists $tuple{ $_ };
vec($tuple{ $_ }, $bit, 1) = 1;
}
++$bit;
}
my $next = combo($by, sort keys %tuple);
while ( my @combo = $next->() ) {
my $common;
$common = defined $common ? $common & $tuple{ $_ } : $tuple{ $
+_ } for @combo;
my $tot = unpack("b*", $common) =~ tr/1//;
if ( $tot > $max ) {
($max, @key) = ($tot, "@combo");
}
elsif ( $tot == $max ) {
push @key, "@combo";
}
}
return ($max, @key);
}
sub combo {
my $by = shift;
return sub { () } if ! $by || $by =~ /\D/ || @_ < $by;
my @list = @_;
my @position = (0 .. $by - 2, $by - 2);
my @stop = @list - $by .. $#list;
my $end_pos = $#position;
my $done = undef;
return sub {
return () if $done;
my $cur = $end_pos;
{
if ( ++$position[ $cur ] > $stop[ $cur ] ) {
$position[ --$cur ]++;
redo if $position[ $cur ] > $stop[ $cur ];
my $new_pos = $position[ $cur ];
@position[ $cur .. $end_pos ] = $new_pos .. $new_pos +
+ $by;
}
}
$done = 1 if $position[0] == $stop[0];
return @list[ @position ];
}
}
I think it does what you want but it is quite rough around the edges. I will see about cleaning it up tomorrow. Do you happen to have a bigger sample so that I can benchmark?
Update: Made minor cleanups to code