Anonymous Monk has asked for the wisdom of the Perl Monks concerning the following question:
Hi Monks,
I have some rangers of numbers, and I want to find their union, BUT only comparing them pair-wise. Example:
25-40,74-93,99-120,130-149
31-47,84-99,107-123,137-151
My code:
use Set::IntSpan;
$TM_part1="25-40,74-93,95-120,130-149";
$TM_part2="31-47,84-99,107-123,137-151";
@split_TM1 = split("\t", $TM_part1);
@split_TM2 = split("\t", $TM_part2);
for ($i=0; $i<=$#split_TM1; $i++)
{
$tm_range1 = $split_TM1[$i];
$tm_range2 = $split_TM2[$i];
$set1 = new Set::IntSpan $tm_range1;
$set2 = new Set::IntSpan $tm_range2;
$u_set = intersect $set1 $set2;
}
print "Union of strings:".$u_set."\n";
Output: 31-40,84-93,95-99,107-120,137-149
Problem here is that I get an extra one, i.e. 95-99, because the 3rd range of $set1 partially overlaps with the 2nd range of $set2. Is there a way to tell my script to only create union of strings in a pairwise manner? Like compare 1st range of $set1 to 1st range of $set2, create the union of these two, then proceed to create the union between the 2nd range of $set1 and 2nd range of $set2 etc.
Note: the number of ranges is always the same between $set1 and $set2.
Re: Create union from ranges, but compare respectively
by GrandFather (Saint) on Jun 10, 2022 at 01:49 UTC
|
Strictures are your friend (use strict; use warnings; - see The strictures, according to Seuss). In this case forcing you to declare variables with my may have made you think about the scope of $u_set and that may have led you to realize that the print is in the wrong place, and that may have led you to realize that the for loop was only performing one iteration, and that may have led you to notice that you used a tab character to split on instead of a comma. A better version of the code might look like:
use strict;
use warnings;
use Set::IntSpan;
my $TM_part1 = "25-40,74-93,95-120,130-149";
my $TM_part2 = "31-47,84-99,107-123,137-151";
my @split_TM1 = split(",", $TM_part1);
my @split_TM2 = split(",", $TM_part2);
for my $i (0 .. $#split_TM1) {
my $set1 = Set::IntSpan->new($split_TM1[$i]);
my $set2 = Set::IntSpan->new($split_TM2[$i]);
my $u_set = intersect $set1 $set2;
print "Union of strings: $u_set\n";
}
Note the Perlish for loop and avoidance of object indirect calling of the Set::IntSpan constructor.
Optimising for fewest key strokes only makes sense transmitting to Pluto or beyond
| [reply] [d/l] |
Re: Create union from ranges, but compare respectively
by tybalt89 (Monsignor) on Jun 10, 2022 at 02:51 UTC
|
It you want to do something pair-wise, just use pairwise :)
#!/usr/bin/perl
use strict; # https://perlmonks.org/?node_id=11144630
use warnings;
use List::AllUtils qw( max min pairwise );
my $TM_part1="25-40,74-93,95-120,130-149";
my $TM_part2="31-47,84-99,107-123,137-151";
my @split_TM1 = split(",", $TM_part1);
my @split_TM2 = split(",", $TM_part2);
my $result = join ',', pairwise {
my $low = max map /^(\d+)/, $a, $b;
my $high = min map /-(\d+)/, $a, $b;
$low <= $high ? "$low-$high" : ();
} @split_TM1, @split_TM2;
print "$result\n";
Outputs:
31-40,84-93,107-120,137-149
| [reply] [d/l] [select] |
Re: Create union from ranges, but compare respectively
by hv (Prior) on Jun 10, 2022 at 01:08 UTC
|
It looks like your strings are comma-separated, but you are splitting them on tabs. You are also overwriting $u_set each time round the loop, so you'll need to accumulate the result differently.
I'd probably aim to write it something like this:
use strict; # always
use warnings; # always
use Set::IntSpan;
my $TM_part1 = "25-40,74-93,95-120,130-149";
my $TM_part2 = "31-47,84-99,107-123,137-151";
my @split_TM1 = split ',', $TM_part1;
my @split_TM2 = split ',', $TM_part2;
my $u_set = join ',', map {
my $set1 = Set::IntSpan->new($split_TM1[$_]);
my $set2 = Set::IntSpan->new($split_TM2[$_]);
intersect $set1 $set2;
} 0 .. $#split_TM1;
print "Union of strings: $u_set\n";
.. but if you're not comfortable with map, you can accumulate the results in an array instead:
use strict; # always
use warnings; # always
use Set::IntSpan;
my $TM_part1 = "25-40,74-93,95-120,130-149";
my $TM_part2 = "31-47,84-99,107-123,137-151";
my @split_TM1 = split ',', $TM_part1;
my @split_TM2 = split ',', $TM_part2;
my @u_set;
for my $i (0 .. $#split_TM1) {
my $set1 = Set::IntSpan->new($split_TM1[$i]);
my $set2 = Set::IntSpan->new($split_TM2[$i]);
push @u_set, intersect $set1 $set2;
}
my $u_set = join ',', @u_set;
print "Union of strings: $u_set\n";
| [reply] [d/l] [select] |
Re: Create union from ranges, but compare respectively
by kcott (Archbishop) on Jun 10, 2022 at 09:07 UTC
|
Out of curiosity, and for a bit of fun,
I wondered how a pure Perl solution would stack up against the two solutions using CPAN modules.
#!/usr/bin/env perl
use strict;
use warnings;
use Benchmark 'cmpthese';
use Test::More tests => 3;
my $TM_part1 = "25-40,74-93,95-120,130-149";
my $TM_part2 = "31-47,84-99,107-123,137-151";
my @split_TM1 = split ',', $TM_part1;
my @split_TM2 = split ',', $TM_part2;
my $union = '31-40,84-93,107-120,137-149';
is _span(), $union;
is _pair(), $union;
is _perl(), $union;
cmpthese 0 => {
Span => \&_span,
Pair => \&_pair,
Perl => \&_perl,
};
sub _span {
use Set::IntSpan;
my @u_set;
for my $i (0 .. $#split_TM1) {
my $set1 = Set::IntSpan->new($split_TM1[$i]);
my $set2 = Set::IntSpan->new($split_TM2[$i]);
my $u_set = intersect $set1 $set2;
push @u_set, $u_set;
}
return join ',', @u_set;
}
sub _pair {
use List::AllUtils qw( max min pairwise );
return join ',', pairwise {
my $low = max map /^(\d+)/, $a, $b;
my $high = min map /-(\d+)/, $a, $b;
$low <= $high ? "$low-$high" : ();
} @split_TM1, @split_TM2;
}
sub _perl {
my @u_set;
for my $i (0 .. $#split_TM1) {
my %seen;
for ([split /-/, $split_TM1[$i]], [split /-/, $split_TM2[$i]])
+ {
++$seen{$_} for $_->[0] .. $_->[1];
}
push @u_set, join '-', (sort grep $seen{$_} == 2, keys %seen)[
+0, -1];
}
return join ',', @u_set;
}
I ran that five times; here's a median result:
1..3
ok 1
ok 2
ok 3
Rate Span Perl Pair
Span 16937/s -- -20% -84%
Perl 21145/s 25% -- -80%
Pair 106033/s 526% 401% --
If you're interested, feel free to tweak the code for better (different) results.
| [reply] [d/l] [select] |
|
Rate Span SQL (Pg) Perl Pair
Span 8489/s -- -44% -56% -86%
SQL (Pg) 15132/s 78% -- -22% -75%
Perl 19388/s 128% 28% -- -68%
Pair 60727/s 615% 301% 213% --
adding:
sub _sql {
return $dbh->selectrow_arrayref("
select array_to_string( array(
select lower(r) || '-' || upper(r)-1 from (
select
(select unnest(r1) limit 1 offset n -1) *
(select unnest(r2) limit 1 offset n -1) as r
from (values
(('{[' || replace(replace('$TM_part1', ',', '],['), '-', ','
+) || ']}')::int4multirange,
('{[' || replace(replace('$TM_part2', ',', '],['), '-', ','
+) || ']}')::int4multirange
)) as f(r1,r2)
, lateral generate_series(1, 4) as g(n)
) as g
), ',') as h
;
")->[0] ;
}
Update: see my next message; it has improved/faster SQL and different Benchmark | [reply] [d/l] [select] |
|
I improved the SQL, this one here is a bit faster. I also include below changes to your test program, as lines to be added. The DBI connect will inevitably need some tweaking, depending on host machine.
use DBI;
#my $dbh = DBI->connect or die "hm - $@\n"; # easier when ENV vars are
+ set
my $dbh = DBI->connect('dbi:Pg:', "app_user1", "P4ss_W0rd1") or die "
+meh - no db connection - $@\n" ;
# two test lines:
is _sql() , $union;
"SQL/Pg" => \&_sql,
# and the statement itself
sub _sql {
return $dbh->selectrow_arrayref("
select array_to_string( array(
select lower(isect) || '-' || upper(isect)-1 from(
select
('['||replace(unnest(string_to_array('$TM_part1', ',')), '-', ',
+')||']')::int4range
* ('['||replace(unnest(string_to_array('$TM_part2', ',')), '-', ',
+')||']')::int4range
) as h(isect)
), ',')
")->[0] ;
}
and this is the new result:
Rate Span Perl SQL/Pg Pair oneliner
Span 8373/s -- -57% -68% -86% -100%
Perl 19261/s 130% -- -27% -69% -100%
SQL/Pg 26397/s 215% 37% -- -57% -100%
Pair 61443/s 634% 219% 133% -- -99%
oneliner 5644800/s 67313% 29207% 21284% 9087% --
| [reply] [d/l] [select] |
Re: Create union from ranges, but compare respectively -- oneliner
by Discipulus (Canon) on Jun 10, 2022 at 12:13 UTC
|
perl -le "for($ARGV[0]=~/\-/g){my@a;map{$a[$_]++for(eval s/\-/\.\./r)}
+map{s/^([^,]*),?//;$1}@ARGV;print+(join'-',(grep$a[$_]==2,0..$#a)[0,-
+1])}"
"25-40,74-93,95-120,130-149" "31-47,84-99,107-123,137-151"
31-40
84-93
107-120
137-149
L*
PS this should be the sub to add to the above bench ( UPDATE it had many errors: corrected)
sub _oneliner{
my @res;
for($TM_part1=~/\-/g) {
my @arr;
# map{ $arr[$_]++ for (eval s/\-/\.\./r) } map{ s/^([^,]*)
+,?//; $1 }$TM_part1, $TM_part2;
# semplified a bit, see below
map{s/^([^,]*),?//;$arr[$_]++ for( eval $1 =~ s/\-/\.\./r
+)}$TM_part1,$TM_part2;
push @res,(join'-',(grep { defined $arr[$_] and $arr[$_] =
+= 2 }0..$#arr)[0,-1] );
}
return join ',', @res;
}
__END__
1..4
ok 1
ok 2
ok 3
ok 4
Rate Span Perl Pair Oneliner
Span 12055/s -- -27% -84% -100%
Perl 16553/s 37% -- -79% -100%
Pair 77602/s 544% 369% -- -99%
Oneliner 11429533/s 94715% 68949% 14628% --
UPDATE the double map is really redundant and unneeded in the above sub:
# map{$arr[$_]++ for(eval s/\-/\.\./r)}map{s/^([^,]*),?//;$1}$TM_part1
+,$TM_part2;
# should be:
map{s/^([^,]*),?//;$arr[$_]++ for( eval $1 =~ s/\-/\.\./r )}$TM_part1,
+$TM_part2;
# so also the oneliner can be reduced:
perl -le "for($ARGV[0]=~/\-/g){my@a;map{s/^([^,]*),?//;$a[$_]++for eva
+l$1=~s/\-/\.\./r}@ARGV;print join'-',(grep$a[$_]==2,0..$#a)[0,-1]}"
"25-40,74-93,95-120,130-149" "31-47,84-99,107-123,137-151"
31-40
84-93
107-120
137-149
PS I fixed 1..$#a with 0..$#a in the above code snippets
There are no rules, there are no thumbs..
Reinvent the wheel, then learn The Wheel; may be one day you reinvent one of THE WHEELS.
| [reply] [d/l] [select] |
|
|