my %Items;

sub build_test_data
{
    # reproduceable case
    srand(12345);

    # Sorted by prevalence.  Keyword 'kaa' is way more common than 'kzz'.
    my @Keywords = 'kaa' ... 'kzz';

    # Each node is associated with an asciibetical list of unique keywords.
    # We groom out the top keywords which are basically noise.
    for my $xx ('iaa' .. 'izz')
    {
	my $count = int(rand(8)) + 4;
	$Items{$xx}{$Keywords[ int(rand()*rand()*@Keywords) ]}++
	    while $count--;
	delete $Items{$xx}{$_} for 'kaa'..'kab';
	$Items{$xx} = [ sort keys %{$Items{$xx}} ];
    }

    return unless @_;
    print Dumper \%Items; # lots of raw data!
}

build_test_data();