FWIW, push seems to be faster than direct indexing on a pre-grown array, as the following table shows.
The only difference between the windex and windex_2 alternatives in the following table is that the former uses push and the latter uses direct indexing on a pre-grown array. The size of the string is 100_000, containing about 3800 matches. (Full code within the readmore tags.)
Rate wregex windex_2 windex
wregex 212/s -- -24% -32%
windex_2 277/s 31% -- -11%
windex 310/s 47% 12% --
use strict;
use warnings;
use Benchmark 'cmpthese';
srand( 0 );
my $s = join '', map chr(97+int(rand(26))), 1..100_000;
my $n = $s =~ tr/a/a/;
print "$n\n";
cmpthese( -1,
{
windex => \&windex,
windex_2 => \&windex_2,
wregex => \&wregex,
}
);
sub windex {
my @o;
my $o = -1;
while ( ( $o = index( $s, 'a', $o+1 )) > -1 ) { push @o, $o }
return;
}
sub windex_2 {
my @o;
$#o = $n - 1;
my $o = -1;
my $i = 0;
while ( ( $o = index( $s, 'a', $o+1 )) > -1 ) { $o[ $i++ ] = $o }
return;
}
sub wregex {
my @o;
$s =~ m/a(?{ push @o, pos() - 1 })(?!)/;
return;
}
__END__
3797
Rate wregex windex_2 windex
wregex 212/s -- -24% -32%
windex_2 277/s 31% -- -11%
windex 310/s 47% 12% --