> But at this point I'd most probably just try Huffman coding with a single table and even higher compression.
Or maybe not, I just tried to calculate the Huffman codes, and the resulting extra win is only at 5% more.
use strict;
use warnings;
use Data::Dump qw/pp dd/;
# my @freq = qw/5 7 10 15 20 45/;
# my $sym = "1";
# my @tree = map { [ $_ => $sym++ ] } @freq;
my %freq = (
"" => 592,
"2" => 74,
"24" => 60,
"246" => 24,
"247" => 24,
"248" => 24,
"249" => 24,
"25" => 84,
"257" => 24,
"258" => 24,
"259" => 24,
"26" => 84,
"268" => 24,
"269" => 24,
"27" => 84,
"279" => 24,
"28" => 84,
"29" => 60,
"3" => 208,
"35" => 144,
"357" => 48,
"358" => 48,
"359" => 48,
"36" => 192,
"368" => 48,
"369" => 48,
"37" => 192,
"379" => 48,
"38" => 192,
"39" => 144,
"4" => 366,
"46" => 228,
"468" => 72,
"469" => 72,
"47" => 300,
"479" => 72,
"48" => 300,
"49" => 228,
"5" => 524,
"57" => 312,
"579" => 96,
"58" => 408,
"59" => 312,
"6" => 682,
"68" => 396,
"69" => 396,
"7" => 840,
"79" => 336,
"8" => 830,
"9" => 508,
);
my @tree = map { [ $freq{$_} => $_ ] } keys %freq;
#warn pp \@tree;
#exit;
while ( @tree > 1 ) {
@tree = sort { $a->[0] <=> $b->[0] } @tree;
my $left = shift @tree;
my $right = shift @tree;
unshift @tree, [ ($left->[0] + $right->[0]) => [ $left, $right ]
+ ];
}
#warn pp \@tree;
my %huff;
create_code($tree[0],"");
#warn pp \%huff;
my $avr = 0;
$avr += $freq{$_} * length($huff{$_}) / 10000
for keys %freq;
my $classic = log(50)/log(2);
my $old=0;
$old += $freq{$_} * length($_) * 8 / 10000
for keys %freq;
$old += 8/9 ; # \n
warn pp {
"old" => $old,
"line" => $old*9/8 . " bytes",
"huff average" => $avr ,
"classic length" => $classic ,
"huff win" => $avr/$old*100 ." %",
"classic win" => $classic/$old*100 ." %",
};
sub create_code {
my ($node,$code) = @_;
my $sym = $node->[1];
if ( ref $sym ) {
create_code( $sym->[0], $code."0" );
create_code( $sym->[1], $code."1" );
} else {
$huff{$sym} = $code;
}
}
Bits needed per group:
{
"classic length" => 5.64385618977472,
"classic win" => "42.155801598081 %",
"huff average" => 4.9824,
"huff win" => "37.215169703086 %",
"line" => "15.0616 bytes",
"old" => 13.3880888888889,
} at d:/exp/huffman.pl line 98.
Update
OTOH does the Huffman with ~5 bits/group need 1/6 less than you did with 6 bits/group (the 54 bit/line solution)
Depends on the POV I think! :)
FWIW here the Huffman code
{
"" => "0110",
"2" => 1001000,
"24" => "0101000",
"246" => 101111110,
"247" => 101111011,
"248" => 101111010,
"249" => 110010100,
"25" => 1001010,
"257" => 101111001,
"258" => 101111101,
"259" => 101111000,
"26" => 1011100,
"268" => 101111100,
"269" => 101111111,
"27" => 1001011,
"279" => 110010101,
"28" => 1001001,
"29" => 11101011,
"3" => 111011,
"35" => "010111",
"357" => 11001011,
"358" => 11001001,
"359" => 10111011,
"36" => 110000,
"368" => 11101010,
"369" => 10111010,
"37" => 110001,
"379" => 11001000,
"38" => 110011,
"39" => "010110",
"4" => 10110,
"46" => "00100",
"468" => "0101011",
"469" => "0101010",
"47" => "01110",
"479" => "0101001",
"48" => "01111",
"49" => "00101",
"5" => "0100",
"57" => 10000,
"579" => 1110100,
"58" => 11100,
"59" => 10001,
"6" => 1010,
"68" => 11011,
"69" => 11010,
"7" => "000",
"79" => 10011,
"8" => 1111,
"9" => "0011",
} at d:/exp/huffman.pl line 83.
|