for ('A'..'Z'){
++$absent{$_} unless exists $prot{$_};
}
####
# print absent counts
for (sort keys %absent){
printf "%s=%d\n",$_,$absent{$_};
};
##
##
#!/usr/bin/perl
use strict;
use warnings;
my $report_name = 'aa_report.txt';
open my $out_file, '>', $report_name
or die "Cannot open '$report_name' because: $!";
print 'PLEASE ENTER THE FILENAME OF THE PROTEIN SEQUENCE: ';
chomp( my $prot_filename = );
open my $PROTFILE, '<', $prot_filename
or die "Cannot open '$prot_filename' because: $!";
$/ = ''; # Set paragraph mode
my @count=();
my %absent=();
my $name;
while ( my $para = <$PROTFILE> ) {
# Remove fasta header line
if ( $para =~ s/^>(.*)//m ){
$name = $1;
};
# Remove comment line(s)
$para =~ s/^\s*#.*//mg;
my %prot;
$para =~ s/([A-Z])/ ++$prot{ $1 } /eg;
my $num = scalar keys %prot;
push @count,[$num,$name];
printf "Counted %d for %s ..\n",$num,substr($name,0,50);
print $out_file "$name\n";
print $out_file join( ' ', map "$_=$prot{$_}", sort keys %prot ), "\n";
printf $out_file "Number of proteins = %d\n\n",$num ;
# count absent
for ('A'..'Z'){
++$absent{$_} unless exists $prot{$_};
};
};
# sort names by count in ascending order to get lowest
my @sorted = sort { $a->[0] <=> $b->[0] } @count;
my $lowest = $sorted[0]->[0];
# maybe more than 1 lowest
printf $out_file "Least number of proteins is %d in these entries\n",$lowest;
my @lowest = grep { $_->[0] == $lowest } @sorted;
print $out_file "$_->[1]\n" for @lowest;
# show all results
print $out_file "\nAll results in ascending count\n";
for (@sorted){
printf $out_file "%d %s\n",@$_;
};
close $out_file;
print "Results in $report_name\n";
# print absent counts
for (sort keys %absent){
printf "%s=%d\n",$_,$absent{$_};
};