1 4 round apple,ball,orange,pumpkin 2 2 red,round apple,ball 2 3 plant,round apple,orange,pumpkin 3 1 red,round,toy ball 3 2 fruit,plant,round apple,orange 3 2 orange,plant,round orange,pumpkin 4 1 fruit,orange,plant,round orange 4 1 fruit,plant,red,round apple 4 1 orange,plant,round,vetetable pumpkin #### my $item2attr = { apple => {red=>1, round=>1,plant=>1,fruit=>1}, orange => {orange=>1,round=>1,plant=>1,fruit=>1}, pumpkin => {orange=>1,round=>1,plant=>1, vetetable=>1}, ball => {red=>1, round=>1, toy=>1}, }; ## alternatively in the block below, generate a random data set with ## 500 items and 75 attributes with randomly 2-10 attributes per item =pod my $n_items = 500; my $n_attributes = 75; my $min_attr_in_item = 2; my $max_attr_in_item = 10; $item2attr = {}; for my $i (1..$n_items) { my $item = sprintf("it%03d",$i); my $n_attr = $min_attr_in_item + rand(1+$max_attr_in_item-$min_attr_in_item) ; my @attrs = sort ((map { sprintf("at%03d",$_) } (sort {rand() <=> rand() } (1..75)))[0..$n_attr-1]); #printinfo($item,int(@attrs),@attrs); map {$item2attr->{$item}{$_} = 1} @attrs; } =cut # list of all items and attributes my @items = sort keys %$item2attr; my @attr = sort(uniq( map { keys %$_ } values %$item2attr)); # flip the hash my $attr2item; for my $attr (@attr) { map { $attr2item->{$attr}{$_} = $item2attr->{$_}{$attr} if $item2attr->{$_}{$attr} } @items; } report_sets($item2attr); report_sets($attr2item,-swap=>1); sub report_sets { my ($hash,%args) = @_; my $sets; for my $key (keys %$hash) { my $set_hash_str = join(",", sort keys %{$hash->{$key}}); $sets->{$set_hash_str}{$key}++; } for my $set_hash_str (keys %$sets) { my @attr = split(",",$set_hash_str); my @shared_attr = shared_items($hash,@attr); if($args{-swap}) { printinfo(int(@shared_attr),int(@attr),join(",",@shared_attr),join(",",@attr)); } else { printinfo(int(@attr),int(@shared_attr),join(",",@attr),join(",",@shared_attr)); } } } sub shared_items { my ($hash,@attr) = @_; my @shared_items; my @items = keys %$hash; for my $item (@items) { my $n = grep($hash->{$item}{$_}, @attr); push @shared_items, $item if $n == @attr; } return sort @shared_items; }