use strict; use warnings; use Judy::HS qw/ Set Get Free /; use Sort::Packed 'sort_packed'; my $DATA_TEMPLATE = 'nZ10'; my $DATA_SIZE = 12; my $COUNT_SIZE_BYTES = 2; my $COUNT_SIZE_BITS = 16; my $COUNT_MAX = int( 2 ** $COUNT_SIZE_BITS - 1 ); @ARGV or die "usage: $0 file...\n"; my @llil_files = @ARGV; warn "my_test start\n"; my $tstart1 = time; my ( $data, $current ) = ( '', 0 ); my $judy; for my $fname ( @llil_files ) { open( my $fh, '<', $fname ) or die $!; while ( <$fh> ) { chomp; my ( $word, $count ) = split /\t/; ( undef, my $val ) = Get( $judy, $word ); if ( defined $val ) { vec( $data, $val * $DATA_SIZE / $COUNT_SIZE_BYTES, $COUNT_SIZE_BITS ) -= $count } else { $data .= pack $DATA_TEMPLATE, $COUNT_MAX - $count, $word; Set( $judy, $word, $current ); $current ++ } } } Free( $judy ); my $tend1 = time; warn "get_properties : ", $tend1 - $tstart1, " secs\n"; my $tstart2 = time; sort_packed "C$DATA_SIZE", $data; while ( $data ) { my ( $count, $word ) = unpack $DATA_TEMPLATE, substr $data, 0, $DATA_SIZE, ''; printf "%s\t%d\n", $word, $COUNT_MAX - $count } my $tend2 = time; warn "sort + output : ", $tend2 - $tstart2, " secs\n"; warn "total : ", $tend2 - $tstart1, " secs\n"; use Memory::Usage; my $m = Memory::Usage-> new; $m-> record; warn $m-> state-> [0][3], " Kbytes of RAM were used\n";