A divide-and-conquer approach may help you. There is a very fast algorithm (nearly order N) called UnionFind which will divide the problem into groups of items that have
keys in common. Then each partition could be treated separately.
#!/usr/bin/perl
use strict;
use warnings;
use Bit::Vector::Overload;
use Graph::UnionFind;
# Insert real data here.
my %items = (
a => [ qw/one six/ ],
b => [ qw/two three five/ ],
c => [ qw/one two five/ ],
);
my $icount = keys %items;
print "Doing $icount items\n";
# Assign an index position to each item.
# Form a mapping from items to bit positions.
# Collect a list of bitmaps for combination work.
my @revipos;
my %ipos;
my @ilst;
my $ix = 0;
for my $itm ( sort keys %items ) {
$ipos{$itm} = $ix;
push @revipos,$itm;
my $set1 = new Bit::Vector($icount + 1);
$set1->Bit_On($ix);
push @ilst, $set1;
++$ix;
}
# Form a mapping from keywords to bit positions.
my $scount = 0;
my @revkpos;
my %kpos;
for my $itm ( @revipos ) {
for my $keyw ( @{ $items{ $itm }} ) {
if (!exists $kpos{$keyw}) {
$kpos{$keyw} = $scount++;
push @revkpos, $keyw;
}
}
}
# Also form a reverse index for later printing.
#my %revkpos = reverse(%kpos);
my $uf = Graph::UnionFind->new;
# Supplemental data to look up by union number.
my %union_data;
# Form bit vectors with ones in the keyword positions,
# one for every item.
my %keyword_vecs;
my @lst1;
my $i = 0;
for my $itm ( @revipos ) {
#print "doing item $i\n";
my $set0 = new Bit::Vector($scount + 1);
$keyword_vecs{$itm} = $set0;
for my $keyw ( @{ $items{ $itm }} ) {
$set0->Bit_On($kpos{$keyw});
}
# hold both sets - items and keywords together.
#push @lst1,[$ilst[$ipos{$itm}], $set0];
# unionfind to detect partitions.
$uf->add($i);
my $unioned = 0;
my @ukeys = keys %union_data;
for my $uk (@ukeys) {
my $partu = $uf->find($uk);
if ($uk != $partu) {
delete $union_data{$uk};
}
if ($set0 & $union_data{$partu}) {
$unioned = 1;
$uf->union( $i, $partu);
my $parti = $uf->find( $i );
my $newset = $set0 | $union_data{$partu};
delete $union_data{$partu} if ($partu != $parti);
$union_data{$parti} = $newset;
}
}
$union_data{$i} = $set0 if (!$unioned);
++$i;
}
print "Expecting ",scalar(keys %union_data)," partitions\n";
my @parts;
my %unique_parts;
my $pcount = 0;
for $i (0 .. $#revipos) {
my $parti = $uf->find( $i );
if (!exists $unique_parts{$parti}) {
$unique_parts{$parti} = $pcount;
push @{ $parts[$pcount] }, $i;
$pcount++;
} else {
push @{ $parts[ $unique_parts{$parti} ] }, $i;
}
}
print "Found $pcount partitions\n";
for $i (0 .. $#parts) {
print scalar( @{ $parts[$i] })," items in partition $i\n";
}