#!/usr/bin/perl
use strict;
use warnings;
my $report_name = 'aa_report.txt';
open my $out_file, '>', $report_name
or die "Cannot open '$report_name' because: $!";
print 'PLEASE ENTER THE FILENAME OF THE PROTEIN SEQUENCE: ';
chomp( my $prot_filename = <STDIN> );
open my $PROTFILE, '<', $prot_filename
or die "Cannot open '$prot_filename' because: $!";
$/ = ''; # Set paragraph mode
my @count=();
my $name;
while ( my $para = <$PROTFILE> ) {
# Remove fasta header line
if ( $para =~ s/^>(.*)//m ){
$name = $1;
};
# Remove comment line(s)
$para =~ s/^\s*#.*//mg;
my %prot;
$para =~ s/([A-Z])/ ++$prot{ $1 } /eg;
my $num = scalar keys %prot;
push @count,[$num,$name];
printf "Counted %d for %s ..\n",$num,substr($name,0,50);
print $out_file "$name\n";
print $out_file join( ' ', map "$_=$prot{$_}", sort keys %prot ),
+"\n";
printf $out_file "Number of proteins = %d\n\n",$num ;
}
# sort names by count in ascending order to get lowest
my @sorted = sort { $a->[0] <=> $b->[0] } @count;
my $lowest = $sorted[0]->[0];
# maybe more than 1 lowest
printf $out_file "Least number of proteins is %d in these entries\n",$
+lowest;
my @lowest = grep { $_->[0] == $lowest } @sorted;
print $out_file "$_->[1]\n" for @lowest;
# show all results
print $out_file "\nAll results in ascending count\n";
for (@sorted){
printf $out_file "%d %s\n",@$_;
};
close $out_file;
print "Results in $report_name\n"
poj |