Out of curiosity, and for a bit of fun,
I wondered how a pure Perl solution would stack up against the two solutions using CPAN modules.
#!/usr/bin/env perl
use strict;
use warnings;
use Benchmark 'cmpthese';
use Test::More tests => 3;
my $TM_part1 = "25-40,74-93,95-120,130-149";
my $TM_part2 = "31-47,84-99,107-123,137-151";
my @split_TM1 = split ',', $TM_part1;
my @split_TM2 = split ',', $TM_part2;
my $union = '31-40,84-93,107-120,137-149';
is _span(), $union;
is _pair(), $union;
is _perl(), $union;
cmpthese 0 => {
Span => \&_span,
Pair => \&_pair,
Perl => \&_perl,
};
sub _span {
use Set::IntSpan;
my @u_set;
for my $i (0 .. $#split_TM1) {
my $set1 = Set::IntSpan->new($split_TM1[$i]);
my $set2 = Set::IntSpan->new($split_TM2[$i]);
my $u_set = intersect $set1 $set2;
push @u_set, $u_set;
}
return join ',', @u_set;
}
sub _pair {
use List::AllUtils qw( max min pairwise );
return join ',', pairwise {
my $low = max map /^(\d+)/, $a, $b;
my $high = min map /-(\d+)/, $a, $b;
$low <= $high ? "$low-$high" : ();
} @split_TM1, @split_TM2;
}
sub _perl {
my @u_set;
for my $i (0 .. $#split_TM1) {
my %seen;
for ([split /-/, $split_TM1[$i]], [split /-/, $split_TM2[$i]])
+ {
++$seen{$_} for $_->[0] .. $_->[1];
}
push @u_set, join '-', (sort grep $seen{$_} == 2, keys %seen)[
+0, -1];
}
return join ',', @u_set;
}
I ran that five times; here's a median result:
1..3
ok 1
ok 2
ok 3
Rate Span Perl Pair
Span 16937/s -- -20% -84%
Perl 21145/s 25% -- -80%
Pair 106033/s 526% 401% --
If you're interested, feel free to tweak the code for better (different) results.