Just because I wanted to see what the compress and decompress subs looked like :)
#!/usr/bin/perl
# https://perlmonks.org/?node_id=1233613
use strict;
use warnings;
use re 'eval';
my %huffman = ( # from LanX https://perlmonks.org/?node_id=111
+00047
"" => "0110",
"2" => 1001000,
"24" => "0101000",
"246" => 101111110,
"247" => 101111011,
"248" => 101111010,
"249" => 110010100,
"25" => 1001010,
"257" => 101111001,
"258" => 101111101,
"259" => 101111000,
"26" => 1011100,
"268" => 101111100,
"269" => 101111111,
"27" => 1001011,
"279" => 110010101,
"28" => 1001001,
"29" => 11101011,
"3" => 111011,
"35" => "010111",
"357" => 11001011,
"358" => 11001001,
"359" => 10111011,
"36" => 110000,
"368" => 11101010,
"369" => 10111010,
"37" => 110001,
"379" => 11001000,
"38" => 110011,
"39" => "010110",
"4" => 10110,
"46" => "00100",
"468" => "0101011",
"469" => "0101010",
"47" => "01110",
"479" => "0101001",
"48" => "01111",
"49" => "00101",
"5" => "0100",
"57" => 10000,
"579" => 1110100,
"58" => 11100,
"59" => 10001,
"6" => 1010,
"68" => 11011,
"69" => 11010,
"7" => "000",
"79" => 10011,
"8" => 1111,
"9" => "0011",
);
my %tohuff = map { $_, $huffman{ ('23456789' & tr/01/ ?/r) =~ tr/ //dr
+ } }
grep !/11/ && tr/1// <= 3, glob '{0,1}' x 8;
my @allcodes = map "$huffman{$_}(?{$_})", keys %huffman;
my $fromhuff = do { local $" = '|'; qr/(?:@allcodes)/ };
sub compress
{
my $coded = '';
for ( shift =~ /(.*)\n/g )
{
my @lookup = (0) x 123;
@lookup[ unpack 'C*', $_ ] = (1) x length;
for( my $group = 35; $group < 123; $group += 10 )
{
$coded .= $tohuff{ join '', @lookup[$group .. $group + 7] };
}
}
return pack 'b*', $coded;
}
sub decompress
{
my $decoded = '';
my $decade = 33;
local $_ = unpack 'b*', shift;
while( /$fromhuff/g )
{
$decoded .= pack 'C*', map $decade + $_, split //, ($^R // '');
($decade += 10) >= 123 and $decade = 33, $decoded .= "\n";
}
return $decoded =~ s/^.+\z//mr;
}
my $input = '';
for (1 .. 100)
{
for (my $x=0; $x<90; $x+=10)
{
my @c;
push(@c, int (rand(10)+$x));
push(@c, int (rand(10)+$x));
push(@c, int (rand(10)+$x));
push(@c, int (rand(10)+$x));
@c = sort{$a<=>$b}@c;
for (my $i = 1; $i < @c; $i++)
{
$input .= chr(33+$c[$i]) if $c[$i] != $c[$i-1] && $c[$i] != $c[$
+i-1]+1;
}
}
$input .= "\n";
}
#use Data::Dump 'dd'; dd $_ for $input =~ /.*\n?/g;
print "\n input length ", length $input, "\n";
my $compressed = compress($input);
my $compressedlength = length $compressed;
print "compressed length $compressedlength\n";
my $restored = decompress($compressed);
if( $input eq $restored )
{
printf "\nMatched, compression ratio = %.1f%%\n",
100 * (1 - length($compressed) / length($restored));
}
else
{
print "----------------------------------------failed\n";
use Data::Dump 'dd'; dd $_ for $restored =~ /.*\n?/g;
}
Typical output:
input length 1512
compressed length 557
Matched, compression ratio = 63.2%