This is a somewhat tricky problem because later data may require that separate groups that were formed by earlier data need to be merged. Where the code is a little tricky it's worth being a little long winded, use explicit identifier names and comment each case. Consider:
use strict;
use List::Util;
my %nodes;
my @groups;
while (<DATA>) {
chomp;
my ($node1, $node2) = split;
if (! exists $nodes{$node1} && ! exists $nodes{$node2}) {
# New group
$nodes{$node1} = @groups;
$nodes{$node2} = @groups;
push @groups, [$node1, $node2];
next;
}
if (! exists $nodes{$node1}) {
# node1 is part of node2's group
push @{$groups[$nodes{$node2}]}, $node1;
$nodes{$node1} = $nodes{$node2};
}
if (! exists $nodes{$node2}) {
# node2 is part of node1's group
push @{$groups[$nodes{$node1}]}, $node2;
$nodes{$node2} = $nodes{$node1};
}
next # Already met this pairing
if $nodes{$node1} == $nodes{$node2};
# node1 and node2 are in different groups. Merge the groups
my ($group, $nulGroup) = ($nodes{$node1}, $nodes{$node2});
push @{$groups[$group]}, @{$groups[$nulGroup]};
$nodes{$_} = $group for @{$groups[$nulGroup]};
$groups[$nulGroup] = undef;
}
@groups = grep {defined} @groups;
for my $group (0 .. $#groups) {
print 'Group', $group + 1, ': ', join (', ', @{$groups[$group]}),
+"\n";
}
__DATA__
Contig1 Contig2
Contig1 Contig3
Contig2 Contig1
Contig2 Contig3
Contig3 Contig1
Contig3 Contig2
Contig3 Contig4
Contig4 Contig3
Contig4 Contig5
Contig6 Contig7
Contig7 Contig6
Contig8 Contig9
Contig9 Contig10
Contig10 Contig8
Contig10 Contig11
Contig11 Contig10
C12 Contig11
C12 Contig5
Prints:
Group1: Contig6, Contig7
Group2: Contig8, Contig9, Contig10, Contig11, C12, Contig1, Contig2, C
+ontig3, Contig4, Contig5
True laziness is hard work
|