Thank you all very much for your suggestions.
I tried my hand at writing an iterative version, but it didn't perform as well as I'd hoped. Can anyone suggest a better implementation?
Not surprisingly, the version which just deletes keys from the source hash is by far the fastest.
More surprising to me was that the recursive version is quite a lot faster than both the map/grep and slice based solutions.
#! /usr/bin/env perl
use strict;
use warnings;
use Carp;
use Test::More;
use Benchmark qw(timethese cmpthese);
use Deep::Hash::Utils qw(reach nest deepvalue);
my $t = {
source => {
f1 => 'garbage',
f2 => 'more garbage',
f3 => 'important data',
f4 => {
this => 'sub hash',
is => 'garbage'
},
f5 => {
f6 => 'more important data',
f7 => {
more => 'garbage',
f8 => 'important data',
},
f9 => 'garbage',
},
f10 => [ 'important', 'data' ],
f11 => [ 'more', 'garbage' ]
},
filter => {
f3 => 1,
f5 => {
f6 => 1,
f7 => {
f8 => 1
}
},
f10 => 1
},
output => {
f3 => 'important data',
f5 => {
f6 => 'more important data',
f7 => {
f8 => 'important data',
}
},
f10 => [ 'important', 'data' ],
},
};
sub hash_filter_recursive {
my $source = shift;
my $filter = shift;
my %output;
foreach ( keys %$filter ) {
if ( exists $source->{$_} ) {
if ( ref $filter->{$_} eq 'HASH' ) {
croak "bad filter: on '$_', expected HASH\n"
unless ( ref $source->{$_} eq 'HASH' );
$output{$_} = hash_filter_recursive( $source->{$_}, $filter->{
+$_} );
}
else {
$output{$_} = $source->{$_};
}
}
}
return \%output;
}
sub hash_filter_iterative {
# I expected this one to be faster...
# Can anyone suggest a better implementation?
my $source = shift;
my $filter = shift;
my $output = {};
my @queue = ( [ $source, $filter, $output ] );
while ( my $a = shift @queue ) {
my ( $s, $f, $o ) = @{$a};
foreach ( keys %$f ) {
if ( exists $s->{$_} ) {
if ( ref $f->{$_} eq 'HASH' ) {
croak "bad filter: on '$_', expected HASH\n"
unless ( ref $s->{$_} eq 'HASH' );
$o->{$_} = {};
push @queue, [ $s->{$_}, $f->{$_}, $o->{$_} ];
}
else {
$o->{$_} = $s->{$_};
}
}
}
}
return $output;
}
sub hash_filter_delete {
my $source = shift;
my $filter = shift;
foreach ( keys %$source ) {
if ( exists $filter->{$_} ) {
if ( ref $filter->{$_} eq 'HASH' ) {
croak "bad filter: on '$_', expected HASH\n"
unless ( ref $source->{$_} eq 'HASH' );
hash_filter_delete( $source->{$_}, $filter->{$_} );
}
}
else {
delete $source->{$_};
}
}
return $source;
}
sub hash_slice {
# contributed by Veltro
my $s = $_[0];
my $f = $_[1];
my $n = {};
my @keysToGet = keys %{$f};
@{$n}{@keysToGet} = @{$s}{@keysToGet};
foreach ( keys %{$n} ) {
if ( ref $n->{$_} eq 'HASH' ) {
$n->{$_} = hash_slice( $s->{$_}, $f->{$_} );
}
}
return $n;
}
sub map_grep {
# contributed by shmem
my $source = shift;
my $filter = shift;
return {
map {
ref $filter->{$_} eq 'HASH'
? ref $source->{$_} eq 'HASH'
? ( $_, map_grep( $source->{$_}, $filter->{$_} ) )
: croak "bad filter: on '$_', expected HASH\n"
: ( $_, $source->{$_} )
} grep { exists $source->{$_} } keys %$filter
};
}
sub map_grep_2 {
# contributed by shmem
return {
map {
ref $_[1]->{$_} eq 'HASH'
? ref $_[0]->{$_} eq 'HASH'
? ( $_, map_grep_2( $_[0]->{$_}, $_[1]->{$_} ) )
: croak "bad filter: on '$_', expected HASH\n"
: ( $_, $_[0]->{$_} )
} grep { exists $_[0]->{$_} } keys %{ $_[1] }
};
}
sub hash_deep_utils {
# contributed by mr_ron
my ( $source, $filter ) = @_;
my %rc;
while ( my @l = reach($filter) ) {
pop @l;
if ( defined( my $source_val = deepvalue( $source, @l ) ) ) {
# hint: work around nest behavior on even vs odd key count
nest( \%rc, @l )->{ $l[$#l] } = $source_val;
}
}
\%rc;
}
is_deeply( $_->( $t->{source}, $t->{filter} ), $t->{output} )
foreach ( \&hash_filter_recursive, \&hash_filter_iterative,
\&hash_filter_delete, \&hash_slice, \&map_grep, \&map_grep_2,
\&hash_deep_utils );
done_testing();
cmpthese(
1000000,
{
recursive => sub { hash_filter_recursive( $t->{source}, $t->{filt
+er} ) },
iterative => sub { hash_filter_iterative( $t->{source}, $t->{filt
+er} ) },
delete => sub { hash_filter_delete( $t->{source}, $t->{filt
+er} ) },
map_grep => sub { map_grep( $t->{source}, $t->{filt
+er} ) },
map_grep_2 => sub { map_grep_2( $t->{source}, $t->{filt
+er} ) },
slice => sub { hash_slice( $t->{source}, $t->{filt
+er} ) },
}
);