Time to haul out cmpthese:
use warnings;
use strict;
use Benchmark qw(cmpthese);
my $str;
my $len = 0;
while ($len < 1000000) {
my $runLen = int rand (50);
$str .= chr (ord ('a') + int rand (26)) x $runLen;
$len += $runLen;
}
my ($runlen, $start) = Index ();
print "Index: Run from $start for $runlen\n";
($runlen, $start) = Linear ();
print "Linear: Run from $start for $runlen\n";
($runlen, $start) = RegexSort ();
print "RegexSort: Run from $start for $runlen\n";
cmpthese (-5,
{
Index => \&Index,
Linear => \&Linear,
RegexSort => \&RegexSort,
}
);
sub Index {
my $sstr = substr ($str, 0, 1) . (substr ($str, 1) ^ $str);
my @bestRuns;
my $match = "\0";
my $bestRunLen = 1;
my $scan = 0;
while (-1 != ($scan = index $sstr, $match, $scan)) {
my $runLen = length ((substr ($sstr, $scan) =~ /(\0+)/)[0]);
if ($runLen > $bestRunLen) {
# new best match
@bestRuns = ();
$bestRunLen = $runLen;
$match = "\0" x ($bestRunLen);
}
push @bestRuns, $scan - 1;
$scan += $bestRunLen;
}
return ($bestRunLen + 1, $bestRuns[0]);
}
sub Linear {
my ($c, $maxn, $n, $maxc) = ('', 0);
my $bestEnd = 0;
for my $index (0..(length($str)-1)) {
$_ = substr($str, $index, 1);
if ($_ ne $c) {
$n = 1;
$c = $_;
}
else {
$n++;
if ($n > $maxn) {
$maxn = $n;
$maxc = $c;
$bestEnd = $index
}
}
}
return ($maxn, $bestEnd - $maxn + 1);
}
sub RegexSort {
return (length ((sort {length $b <=> length $a} $str =~ m[((.)\2+)
+]g)[0]), -1);
}
Results (using various values for the run length generator):
Index: Run from 701117 for 30
Linear: Run from 701117 for 30
RegexSort: Run from -1 for 30
(warning: too few iterations for a reliable count)
s/iter RegexSort Linear Index
RegexSort 2.21 -- -54% -97%
Linear 1.03 116% -- -94%
Index 6.16e-002 3494% 1564% --
Index: Run from 670331 for 125
Linear: Run from 670331 for 125
RegexSort: Run from -1 for 125
Rate Linear RegexSort Index
Linear 1.06/s -- -51% -90%
RegexSort 2.14/s 102% -- -79%
Index 10.2/s 865% 377% --
Index: Run from 749633 for 459
Linear: Run from 749633 for 459
RegexSort: Run from -1 for 459
Rate Linear RegexSort Index
Linear 1.05/s -- -77% -82%
RegexSort 4.56/s 334% -- -21%
Index 5.77/s 450% 27% --
Note that the first three lines of each group are the check results. RegexSort doesn't generate a start index for the match so -1 is shown. However the same length is generated in each case so it is presumed that the same longest match is being found.
DWIM is Perl's answer to Gödel