#!/usr/bin/perl # https://perlmonks.org/?node_id=1233613 use strict; use warnings; use re 'eval'; my %huffman = ( # from LanX https://perlmonks.org/?node_id=11100047 "" => "0110", "2" => 1001000, "24" => "0101000", "246" => 101111110, "247" => 101111011, "248" => 101111010, "249" => 110010100, "25" => 1001010, "257" => 101111001, "258" => 101111101, "259" => 101111000, "26" => 1011100, "268" => 101111100, "269" => 101111111, "27" => 1001011, "279" => 110010101, "28" => 1001001, "29" => 11101011, "3" => 111011, "35" => "010111", "357" => 11001011, "358" => 11001001, "359" => 10111011, "36" => 110000, "368" => 11101010, "369" => 10111010, "37" => 110001, "379" => 11001000, "38" => 110011, "39" => "010110", "4" => 10110, "46" => "00100", "468" => "0101011", "469" => "0101010", "47" => "01110", "479" => "0101001", "48" => "01111", "49" => "00101", "5" => "0100", "57" => 10000, "579" => 1110100, "58" => 11100, "59" => 10001, "6" => 1010, "68" => 11011, "69" => 11010, "7" => "000", "79" => 10011, "8" => 1111, "9" => "0011", ); my %tohuff = map { $_, $huffman{ ('23456789' & tr/01/ ?/r) =~ tr/ //dr } } grep !/11/ && tr/1// <= 3, glob '{0,1}' x 8; my @allcodes = map "$huffman{$_}(?{$_})", keys %huffman; my $fromhuff = do { local $" = '|'; qr/(?:@allcodes)/ }; sub compress { my $coded = ''; for ( shift =~ /(.*)\n/g ) { my @lookup = (0) x 123; @lookup[ unpack 'C*', $_ ] = (1) x length; for( my $group = 35; $group < 123; $group += 10 ) { $coded .= $tohuff{ join '', @lookup[$group .. $group + 7] }; } } return pack 'b*', $coded; } sub decompress { my $decoded = ''; my $decade = 33; local $_ = unpack 'b*', shift; while( /$fromhuff/g ) { $decoded .= pack 'C*', map $decade + $_, split //, ($^R // ''); ($decade += 10) >= 123 and $decade = 33, $decoded .= "\n"; } return $decoded =~ s/^.+\z//mr; } my $input = ''; for (1 .. 100) { for (my $x=0; $x<90; $x+=10) { my @c; push(@c, int (rand(10)+$x)); push(@c, int (rand(10)+$x)); push(@c, int (rand(10)+$x)); push(@c, int (rand(10)+$x)); @c = sort{$a<=>$b}@c; for (my $i = 1; $i < @c; $i++) { $input .= chr(33+$c[$i]) if $c[$i] != $c[$i-1] && $c[$i] != $c[$i-1]+1; } } $input .= "\n"; } #use Data::Dump 'dd'; dd $_ for $input =~ /.*\n?/g; print "\n input length ", length $input, "\n"; my $compressed = compress($input); my $compressedlength = length $compressed; print "compressed length $compressedlength\n"; my $restored = decompress($compressed); if( $input eq $restored ) { printf "\nMatched, compression ratio = %.1f%%\n", 100 * (1 - length($compressed) / length($restored)); } else { print "----------------------------------------failed\n"; use Data::Dump 'dd'; dd $_ for $restored =~ /.*\n?/g; } #### input length 1512 compressed length 557 Matched, compression ratio = 63.2%