#!/usr/bin/perl use strict; use warnings; use Bit::Vector::Overload; use Graph::UnionFind; # Insert real data here. my %items = ( a => [ qw/one six/ ], b => [ qw/two three five/ ], c => [ qw/one two five/ ], ); my $icount = keys %items; print "Doing $icount items\n"; # Assign an index position to each item. # Form a mapping from items to bit positions. # Collect a list of bitmaps for combination work. my @revipos; my %ipos; my @ilst; my $ix = 0; for my $itm ( sort keys %items ) { $ipos{$itm} = $ix; push @revipos,$itm; my $set1 = new Bit::Vector($icount + 1); $set1->Bit_On($ix); push @ilst, $set1; ++$ix; } # Form a mapping from keywords to bit positions. my $scount = 0; my @revkpos; my %kpos; for my $itm ( @revipos ) { for my $keyw ( @{ $items{ $itm }} ) { if (!exists $kpos{$keyw}) { $kpos{$keyw} = $scount++; push @revkpos, $keyw; } } } # Also form a reverse index for later printing. #my %revkpos = reverse(%kpos); my $uf = Graph::UnionFind->new; # Supplemental data to look up by union number. my %union_data; # Form bit vectors with ones in the keyword positions, # one for every item. my %keyword_vecs; my @lst1; my $i = 0; for my $itm ( @revipos ) { #print "doing item $i\n"; my $set0 = new Bit::Vector($scount + 1); $keyword_vecs{$itm} = $set0; for my $keyw ( @{ $items{ $itm }} ) { $set0->Bit_On($kpos{$keyw}); } # hold both sets - items and keywords together. #push @lst1,[$ilst[$ipos{$itm}], $set0]; # unionfind to detect partitions. $uf->add($i); my $unioned = 0; my @ukeys = keys %union_data; for my $uk (@ukeys) { my $partu = $uf->find($uk); if ($uk != $partu) { delete $union_data{$uk}; } if ($set0 & $union_data{$partu}) { $unioned = 1; $uf->union( $i, $partu); my $parti = $uf->find( $i ); my $newset = $set0 | $union_data{$partu}; delete $union_data{$partu} if ($partu != $parti); $union_data{$parti} = $newset; } } $union_data{$i} = $set0 if (!$unioned); ++$i; } print "Expecting ",scalar(keys %union_data)," partitions\n"; my @parts; my %unique_parts; my $pcount = 0; for $i (0 .. $#revipos) { my $parti = $uf->find( $i ); if (!exists $unique_parts{$parti}) { $unique_parts{$parti} = $pcount; push @{ $parts[$pcount] }, $i; $pcount++; } else { push @{ $parts[ $unique_parts{$parti} ] }, $i; } } print "Found $pcount partitions\n"; for $i (0 .. $#parts) { print scalar( @{ $parts[$i] })," items in partition $i\n"; }