while( @$items > 10) { my( $item1, $item2 ); my( $item1_idx, $item2_idx ); my $difference = 9999; #Arbitrary large number for my $i ( 0 .. $#$items ) { my $d1 = $items->[$i]; for my $j ( 0 .. $#$items ) { my $d2 = $items->[$j]; next if $i == $j; my $diff = max_diff( $d1, $d2 ); if( $diff < $difference ) { $difference = $diff; ($item1,$item2) = ($d1,$d2); ($item1_idx,$item2_idx) = ($i,$j); } last if $difference == 0; } last if $difference == 0; } splice( @$items, $item1_idx, 1 ); splice( @$items, $item2_idx, 1 ); my $c = merge( $item1, $item2 ); push @$items, $c; print " \r"; print scalar @$items, "\r"; } sub merge { my( $x, $y ) = @_; # Both non-clusters if( ref $x eq 'HASH' and ref $y eq 'HASH' ) { return [$x,$y]; } # $x cluster elsif( ref $x eq 'ARRAY' and ref $y eq 'HASH' ) { return [$x,$y]; } # $y cluster elsif( ref $x eq 'HASH' and ref $y eq 'ARRAY' ) { return [$y,$x]; } elsif( ref $x eq 'ARRAY' and ref $y eq 'ARRAY' ) { return [$x,$y]; } else { die "Wtf? $x $y"; } } sub max_diff { my( $d1, $d2 ) = @_; #my %x1 = map { $_->name, undef } $d1->words; #my %x2 = map { $_->name, undef } $d2->words; if( ref $d1 eq 'HASH' and ref $d2 eq 'HASH' ) { my %x1 = %{$d1->{words}}; my %x2 = %{$d2->{words}}; my %y1 = %x1; my %y2 = %x2; delete @x1{keys %x2}; delete @y2{keys %y1}; return( ( scalar keys %x1 ) + ( scalar keys %y2 ) ); } elsif( ref $d1 eq 'ARRAY' and ref $d2 eq 'HASH' ) { my $x = max_diff( $d1->[0], $d2 ); my $y = max_diff( $d1->[1], $d2 ); return $x > $y ? $x : $y; } elsif( ref $d1 eq 'HASH' and ref $d2 eq 'ARRAY' ) { my $x = max_diff( $d2->[0], $d1 ); my $y = max_diff( $d2->[1], $d1 ); return $x > $y ? $x : $y; } elsif( ref $d1 eq 'ARRAY' and ref $d2 eq 'ARRAY' ) { my $x = max_diff( $d1->[0], $d2->[0] ); my $y = max_diff( $d1->[1], $d2->[1] ); my $xx = max_diff( $d1->[0], $d2->[1] ); my $yy = max_diff( $d1->[1], $d2->[0] ); return max( $x, $y, $xx, $yy ); } else { die "Wtffffff $d1 $d2"; } }