in reply to find index of specific array value that occurs multiple times
Your problem is complicated by the use of spaces in the consensus lines. If we simply split on whitespace, then we would get an incorrect index/offset.
Working, tested code:
Output:use strict; use warnings; push @ARGV, 'pm_962355_01.dat' if not @ARGV; my @consensus_lines; my $seq_start_column = 0; while (<>) { chomp; die "This algorithm relies on spaces - no tabs allowed!" if /\t/; if ( !$seq_start_column and /^(\d+\.\S+\s+)\S/ ) { $seq_start_column = length $1; } next if /^CLUSTAL/; # Header row next if !/\S/; # Blank rows if ( /\*/ ) { push @consensus_lines, $_; } } if (!$seq_start_column) { die "Failed to calculate start column for sequences"; } my $consensus = join '', map { substr $_, $seq_start_column } @consensus_lines; # Just for debugging use Data::Dumper; $Data::Dumper::Useqq = 1; print Dumper $consensus; my @indexes = 0..length($consensus); my @index_c = grep { substr($consensus,$_,1) eq '*' } @indexes; my @index_n = grep { substr($consensus,$_,1) ne '*' } @indexes; # If positions are 0-based: print " Conserved: ", join(',', @index_c), "\n"; print "Not conserved: ", join(',', @index_n), "\n"; # If positions are 1-based: #my @offset_c = map { $_ + 1 } @index_c; #my @offset_n = map { $_ + 1 } @index_n; #print " Conserved: ", join(',', @offset_c), "\n"; #print "Not conserved: ", join(',', @offset_n), "\n";
$VAR1 = " :*** * .** *::***** **:::::**: ::*:*** :*****:*.** +*******: .:* * **:.*.. *****.***:***:: ..*: **.* ****:************* +"; Conserved: 9,10,11,14,19,20,22,25,26,27,28,29,31,32,38,39,44,46,47 +,48,51,52,53,54,55,57,59,60,61,62,63,64,65,66,67,72,74,78,79,82,86,87 +,88,89,90,92,93,94,96,97,98,104,107,108,110,112,113,114,115,117,118,1 +19,120,121,122,123,124,125,126,127,128,129 Not conserved: 0,1,2,3,4,5,6,7,8,12,13,15,16,17,18,21,23,24,30,33,34,3 +5,36,37,40,41,42,43,45,49,50,56,58,68,69,70,71,73,75,76,77,80,81,83,8 +4,85,91,95,99,100,101,102,103,105,106,109,111,116,130
|
---|
Replies are listed 'Best First'. | |
---|---|
Re^2: find index of specific array value that occurs multiple times
by AWallBuilder (Beadle) on Mar 29, 2012 at 14:31 UTC | |
by davido (Cardinal) on Mar 29, 2012 at 15:51 UTC |
In Section
Seekers of Perl Wisdom