Fastest way to "pick without replacement"

Interesting question. First thought was full slices. It's faster than grep but still nowhere near splice.

use warnings;
use strict;
use Benchmark qw/cmpthese/;

my @numbers = ( 0..20 );
my $index = 3;
my @expect = ( 0..2,4..20 );
use constant TEST => 0;

cmpthese(-2, {
    splice => sub {
        my @output = @numbers;
        splice @output, $index, 1;
        join("\0", @output) eq join("\0", @expect) or die if TEST;
    },
    grep => sub { # https://www.perlmonks.org/?node_id=11123877
        my @output = @numbers[ grep $_ != $index, 0 .. $#numbers ];
        join("\0", @output) eq join("\0", @expect) or die if TEST;
    },
    slice => sub {
        my @output = @numbers[0..$index-1,$index+1..$#numbers];
        join("\0", @output) eq join("\0", @expect) or die if TEST;
    },
});

__END__

           Rate   grep  slice splice
grep   291881/s     --   -38%   -66%
slice  468110/s    60%     --   -46%
splice 859381/s   194%    84%     --
[download]

🦛

[reply]
[d/l]

haukex the test you are proposing removes only 1 item from @numbers. If the test removed more than one numbers (or all) one after the other (after a shuffle) could be more realistic (and my hash-based approach could get a chance too :)):

use warnings;
use strict;
use Benchmark qw/cmpthese/;

use List::Util qw/shuffle/;

my @numbers = ( 0..20 );
my @indices = shuffle 0..$#numbers;

my @expect = ( 0..2,4..20 );
use constant TEST => 0;

cmpthese(-2, {
    splice => sub {
        my @output = @numbers;
        for my $index (@indices){
            splice @output, 0, 1;
        }
        join("\0", @output) eq join("\0", @expect) or die if TEST;
    },
    grep => sub { # https://www.perlmonks.org/?node_id=11123877
        my @output;
        for my $index (@indices){
            @output = @numbers[ grep $_ != $index, 0 .. $#numbers ];
        }
        join("\0", @output) eq join("\0", @expect) or die if TEST;
    },
    hash => sub {
        my %ha; @ha{0..$#numbers} = @numbers;
        for my $index (@indices){
            delete $ha{$index};
        }
        my @output = map { $ha{$_} } sort keys %ha;
        join("\0", @output) eq join("\0", @expect) or die if TEST;
    },
});

__END__

           Rate   grep   hash splice
grep    30916/s     --   -85%   -96%
hash   205775/s   566%     --   -71%
splice 707918/s  2190%   244%     --
[download]

With the above, grep is disadvantaged and splice it just a guess by me (it always removes the first item).

The bottom line: in a loop of removals, hash-based approach can be a contestant too although splice is by far the best but it messes the indices.

bw, bliako

[reply]
[d/l]
[select]

#!/usr/bin/perl
use warnings;
use strict;

use Benchmark qw{ cmpthese };

use List::Util qw{ shuffle };

my @numbers = 0 .. 20;
my @indices = shuffle(grep $_ % 2, 0 .. $#numbers);

my @expect = (0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20);
use constant TEST => 1;

cmpthese(-2, {
    splice => sub {
        my @output = @numbers;
        for my $index (sort { $b <=> $a } @indices){
            splice @output, $index, 1;
        }
        join("\0", @output) eq join("\0", @expect) or die "@output\n@e
+xpect" if TEST;
    },
    grep => sub {
        my @output = @numbers[
            grep {
                my $number = $_;
                ! grep $number == $_, @indices
            } 0 .. $#numbers
        ];
        join("\0", @output) eq join("\0", @expect) or die "@output\n@e
+xpect" if TEST;
    },
    hash => sub {
        my %ha; @ha{0..$#numbers} = ();
        for my $index (@indices){
            delete $ha{$index};
        }
        my @output = @numbers[ sort { $a <=> $b } keys %ha ];
        join("\0", @output) eq join("\0", @expect) or die "@output\n@e
+xpect" if TEST;
    },
    hash_slice => sub {
        my %indices;
        @indices{0 .. $#numbers} = ();
        delete @indices{@indices};
        my @output = @numbers[sort { $a <=> $b } keys %indices];
        join("\0", @output) eq join("\0", @expect) or die "@output\n@e
+xpect" if TEST;
    }
});
[download]

               Rate       grep       hash hash_slice     splice
grep        65163/s         --       -42%       -45%       -89%
hash       112778/s        73%         --        -5%       -81%
hash_slice 118725/s        82%         5%         --       -80%
splice     580859/s       791%       415%       389%         --
[download]

swl

Update: But using a nested loop (grep in grep) is inefficient and is probably not how I would have changed the original code removing a single element. I'd rather use something like

    grep => sub {
        my %indices;
        @indices{@indices} = ();
        my @output = @numbers[
            grep ! exists $indices{$_}, 0 .. $#numbers
        ];
        join("\0", @output) eq join("\0", @expect) or die "@output\n@e
+xpect" if TEST;
    },
[download]

which leads to (TEST = 0)

               Rate       hash hash_slice       grep     splice
hash       113323/s         --        -6%       -41%       -80%
hash_slice 120876/s         7%         --       -37%       -79%
grep       190934/s        68%        58%         --       -66%
splice     568237/s       401%       370%       198%         --
[download]

map{substr$_->[0],$_->[1]||0,1}[\*||{},3],[[]],[ref qr-1,-,-1],[{}],[sub{}^*ARGV,3]

[reply]
[d/l]
[select]

Using a slice in the delete call will sidestep the for-loop. The map can also be replaced with a slice.

    hash_slice => sub {
        my %ha; @ha{0..$#numbers} = @numbers;
        delete @ha{@indices};
        my @output = @ha{sort keys %ha};
        join("\0", @output) eq join("\0", @expect) or die if TEST;
    },
[download]

[reply]
[d/l]

The sort should also be numeric.

    hash_slice => sub {
        my %ha; @ha{0..$#numbers} = @numbers;
        delete @ha{@indices};
        my @output = @ha{sort {$a <=> $b} keys %ha};
        join("\0", @output) eq join("\0", @expect) or die if TEST;
    },
[download]

Update: Although I now see Choroba has already done that in 11123952.

[reply]
[d/l]

coolz!

[reply]

If you don't care about order, replacing the deleted element by the last in the array works as well. And I was expecting it to be faster than splice (since I was expecting the latter to move every element after the one that is removed) but the performances are actually very similar:

use warnings;
use strict;
use Benchmark qw/cmpthese/;

my $size = 1e3;
my @numbers = ( 0..$size );
my $index = 3;
my @expect = ( 0..2,4..$size );
use constant TEST => 0;

cmpthese(-2, {
  splice => sub {
    my @output = @numbers;
    splice @output, $index, 1;
    join("\0", sort @output) eq join("\0", sort @expect) or die if TES
+T;
  },
  grep => sub { # https://www.perlmonks.org/?node_id=11123877
    my @output = @numbers[ grep $_ != $index, 0 .. $#numbers ];
    join("\0", sort @output) eq join("\0", sort @expect) or die if TES
+T;
  },
  swap => sub {
    my @output = @numbers;
    $output[$index] = pop @output;
    join("\0", sort @output) eq join("\0", sort @expect) or die if TES
+T;
  },
});
[download]

C:\Projets\perl>perl pm_select_11123879.pm
          Rate   grep   swap splice
grep   14287/s     --   -73%   -73%
swap   52519/s   268%     --    -0%
splice 52549/s   268%     0%     --
[download]

NB: I checked, it works fine when $index is the last element in the array as well.

[reply]
[d/l]
[select]

knapsack problem

I have good experience solving such stuff with a branch and bound algorithm combined with clever normalization (i.e. grouping similar cases) and caching in a hash (no need to descend a sub-tree which has already been investigated before).

In this case the normalization would be the sum of the partial solution, which would also be the key in the cache-hash.

for instance: 2+3 = 5 so once you can cache all further path for 5 without needing to branch.

2+3 +95 => $cache{5} = [ [95] ]

5 +95 => seen!

HTH! :)

PS: I wrote many posts about branch-and-bound here try searching the archives if interested.

Cheers Rolf
_{(addicted to the Perl Programming Language :)

Wikisyntax for the Monastery}

[reply]
[d/l]
[select]

Thank you for the replies everyone! I've put all the solutions together here - I've kept the behavior of deleting only a single value, but the comparison with deleting multiple indicies by choroba here is very interesting too! I've included Eily's solution that leaves the array out of its original order too.

use warnings;
use strict;
use Benchmark qw/cmpthese/;

my @numbers = ( 'a'..'t' );
my $index = 3;
my $expect = 'abcefghijklmnopqrst';
use constant TEST => 0;

cmpthese(-2, {
    splice => sub {
        my @output = @numbers;
        splice @output, $index, 1;
        join("", @output) eq $expect or die if TEST;
    },
    grep => sub { # choroba
        my @output = @numbers[ grep $_ != $index, 0 .. $#numbers ];
        join("", @output) eq $expect or die if TEST;
    },
    slice => sub { # hippo
        my @output = @numbers[ 0..$index-1, $index+1..$#numbers ];
        join("", @output) eq $expect or die if TEST;
    },
    hash => sub { # bliako
        my %ha; @ha{0..$#numbers} = @numbers;
        delete $ha{$index};
        my @output = map { $ha{$_} } sort {$a <=> $b} keys %ha;
        join("", @output) eq $expect or die if TEST;
    },
    hash_slice => sub { # swl
        my %ha; @ha{0..$#numbers} = @numbers;
        delete $ha{$index};
        my @output = @ha{ sort {$a <=> $b} keys %ha };
        join("", @output) eq $expect or die if TEST;
    },
    swap => sub { # Eily
        my @output = @numbers;
        $output[$index] = pop @output;
        join("", sort @output) eq $expect or die if TEST;
    },
    pop_sort => sub { # Eily's + sort
        my @output = @numbers;
        $output[$index] = pop @output;
        @output = sort @output;
        join("", @output) eq $expect or die if TEST;
    },
});

__END__

               Rate     hash hash_slice    grep pop_sort   slice    sw
+ap  splice
hash       113253/s       --       -10%    -78%     -82%    -84%    -8
+7%    -87%
hash_slice 126029/s      11%         --    -76%     -80%    -83%    -8
+5%    -86%
grep       518838/s     358%       312%      --     -17%    -28%    -3
+9%    -41%
pop_sort   627138/s     454%       398%     21%       --    -14%    -2
+6%    -28%
slice      725216/s     540%       475%     40%      16%      --    -1
+4%    -17%
swap       846179/s     647%       571%     63%      35%     17%      
+--     -3%
splice     873809/s     672%       593%     68%      39%     20%      
+3%      --
[download]

[reply]
[d/l]

I was curious if storing the values in a hash in the first place might be better. Turns out, no, even if the output is a hash as well.

use warnings;
use strict;
use Benchmark qw/cmpthese/;

my @array = ( 'a'..'t' );
my %hash = map { $_ => $array[$_] } 0..$#array;
my $index = 3;
my $expect = 'abcefghijklmnopqrst';
use constant TEST => 0;

cmpthese(-2, {
    splice => sub {
        my @output = @array;
        splice @output, $index, 1;
        join("", @output) eq $expect or die if TEST;
    },
    hash_slice => sub {
        my %ha; @ha{0..$#array} = @array;
        delete $ha{$index};
        my @output = @ha{ sort {$a <=> $b} keys %ha };
        join("", @output) eq $expect or die if TEST;
    },
    swap => sub {
        my @output = @array;
        $output[$index] = pop @output;
        join("", sort @output) eq $expect or die if TEST;
    },
    prehash_del => sub {
        my %ha = %hash;
        delete $ha{$index};
        my @output = @ha{ sort {$a <=> $b} keys %ha };
        join("", @output) eq $expect or die if TEST;
    },
    prehash_local => sub {
        delete local $hash{$index};
        my @output = @hash{ sort {$a <=> $b} keys %hash };
        join("", @output) eq $expect or die if TEST;
    },
    prehash_grep => sub {
        my @output = @hash{ sort {$a <=> $b}
            grep { $_ != $index } keys %hash };
        join("", @output) eq $expect or die if TEST;
    },
    outhash => sub {
        my %output = %hash;
        delete $output{$index};
        join("", @output{ sort {$a <=> $b} keys %output }) eq $expect
            or die if TEST;
    },
});

__END__

                  Rate hash_slice prehash_del prehash_local prehash_gr
+ep outhash swap splice
hash_slice    127891/s         --        -10%          -40%         -5
+0%    -68% -85%   -85%
prehash_del   142822/s        12%          --          -33%         -4
+5%    -64% -83%   -84%
prehash_local 214633/s        68%         50%            --         -1
+7%    -46% -75%   -75%
prehash_grep  258191/s       102%         81%           20%           
+--    -36% -70%   -70%
outhash       400518/s       213%        180%           87%          5
+5%      -- -53%   -54%
swap          847448/s       563%        493%          295%         22
+8%    112%   --    -2%
splice        868423/s       579%        508%          305%         23
+6%    117%   2%     --
[download]

[reply]
[d/l]

Every set bit would represent a used number. �

Of course this would mean that the overall algorithm needs to be adjusted ...

Anyway in my experience you are doing micro-optimization here, the exponential growth of the search space makes better bound conditions to cut sub-trees far more important than tuning Perl.

Cheers Rolf
_{(addicted to the Perl Programming Language :)

Wikisyntax for the Monastery}

�) the strings can't get that big, a result set for 32 input numbers would already be beyond limits.

[reply]

Out of interest, I ran the code through Devel::NYTProf, setting the number of iterations to 200,000. The sort functions in the hash-based methods are the main slow-points, with populating the hash coming a close second (i.e. @ha{0..$#array} = @array;).

haukex has already shown that pre-initialising the hash does not change the order by much.

Another point is that the hash slice subs don't need to get the results using a slice - they can just get the values directly.

The code below adds two subs that sort the values call on %ha.

When run on my system (Strawberry perl 5.28) both variants are about 20%-25% faster than their respective originals. The top few subs are, unsurprisingly, unchanged. I have not done prehash_local but would expect the same relative difference.

use warnings;
use strict;
use Benchmark qw/cmpthese/;

my @array = ( 'a'..'t' );
my %hash = map { $_ => $array[$_] } 0..$#array;
my $index = 3;
my $expect = 'abcefghijklmnopqrst';
use constant TEST => 0;

cmpthese(-2, {
    splice => sub {
        my @output = @array;
        splice @output, $index, 1;
        join("", @output) eq $expect or die if TEST;
    },
    hash_slice => sub {
        my %ha; @ha{0..$#array} = @array;
        delete $ha{$index};
        my @output = @ha{ sort {$a <=> $b} keys %ha };
        join("", @output) eq $expect or die if TEST;
    },
    h_slice_2 => sub {
        my %ha; @ha{0..$#array} = @array;
        delete $ha{$index};
        my @output = sort values %ha;
        join("", @output) eq $expect or die if TEST;
    },
    swap => sub {
        my @output = @array;
        $output[$index] = pop @output;
        join("", sort @output) eq $expect or die if TEST;
    },
    prehash_del => sub {
        my %ha = %hash;
        delete $ha{$index};
        my @output = @ha{ sort {$a <=> $b} keys %ha };
        join("", @output) eq $expect or die if TEST;
    },
    prehash_del_2 => sub {
        my %ha = %hash;
        delete $ha{$index};
        my @output = sort values %ha;
        join("", @output) eq $expect or die if TEST;
    },
    prehash_local => sub {
        delete local $hash{$index};
        my @output = @hash{ sort {$a <=> $b} keys %hash };
        join("", @output) eq $expect or die if TEST;
    },
    prehash_grep => sub {
        my @output = @hash{ sort {$a <=> $b}
            grep { $_ != $index } keys %hash };
        join("", @output) eq $expect or die if TEST;
    },
    outhash => sub {
        my %output = %hash;
        delete $output{$index};
        join("", @output{ sort {$a <=> $b} keys %output }) eq $expect
            or die if TEST;
    },
});

__END__

                  Rate hash_slice h_slice_2 prehash_del prehash_del_2 
+prehash_local prehash_grep outhash swap splice
hash_slice     50341/s         --      -20%        -21%          -36% 
+         -52%         -59%    -67% -79%   -79%
h_slice_2      62792/s        25%        --         -2%          -20% 
+         -40%         -48%    -58% -74%   -74%
prehash_del    63796/s        27%        2%          --          -19% 
+         -39%         -48%    -58% -74%   -74%
prehash_del_2  78613/s        56%       25%         23%            -- 
+         -25%         -35%    -48% -68%   -68%
prehash_local 104733/s       108%       67%         64%           33% 
+           --         -14%    -31% -57%   -57%
prehash_grep  121518/s       141%       94%         90%           55% 
+          16%           --    -19% -50%   -50%
outhash       150826/s       200%      140%        136%           92% 
+          44%          24%      -- -38%   -38%
swap          243449/s       384%      288%        282%          210% 
+         132%         100%     61%   --    -1%
splice        244776/s       386%      290%        284%          211% 
+         134%         101%     62%   1%     --
[download]

[reply]
[d/l]
[select]


more useful options
	PerlMonks