The simplest way to overcome the problem you encounter is to count the number of fields in @$col and handle the last few terms separately.
#! /usr/bin/perl
use warnings;
use strict;
my %hash;
while(<DATA>) {
chomp;
my @col = split " ", $_;
next unless exists $col[3];
next unless $col[3] =~ /^S\d{5}GM\d{3}$/;
my $key = substr($col[3],1,5);
push @{$hash{$key}} , [ @col ];
}
for ( sort keys %hash ) {
my $i = $_;
$i =~ s/^0+//g;
my $file = "output_$i.txt";
# open FILE, ">", "$file" or
# die("Cannot open file $file\n");
print "FILE: $file\n";
for my $col ( @{$hash{$_}} ) {
my $col_count = scalar @$col - 1;
if ( $col_count > 6 ) {
#
# use array slices to partition data.
#
my $end = join " ", @$col[6..$col_count];
my $begin = join "\t", @$col[0..5];
print $begin, "\t", $end, "\n";
}
else {
print join ("\t", @$col), "\n";
}
# replace prints with print FILE etc.
}
# close (FILE);
}
__DATA__
11880 13417 - S00010GM001 sml_056 sp|YV02233 desc
13804 14685 - S00010GM002 sml_045 sp|YV02643 desc
15525 18026 - S00001GM001 sml_032 sp|V023334 desc
32763 34239 + S00002GM001 sml_028 sp|YV02376 desc
67929 68933 - S00003GM001 sml_025 sp|YV02346 desc
+not found
90562 91368 + S00012GM001 sml_025 sp|YV02376 desc
+not found
10209 10433 - S00012GM002 sml_046 sp|YV02355 desc
12522 12576 + S00013GM001 sml_027 sp|0235777 desc
13247 13349 - S00013GM002 sml_088 sp|YV02375 desc
The output is:
C:\Code>perl onfour.pl
FILE: output_1.txt
15525 18026 - S00001GM001 sml_032 sp|V023334 desc
FILE: output_2.txt
32763 34239 + S00002GM001 sml_028 sp|YV02376 desc
FILE: output_3.txt
67929 68933 - S00003GM001 sml_025 sp|YV02346 desc n
+ot found
FILE: output_10.txt
11880 13417 - S00010GM001 sml_056 sp|YV02233 desc
13804 14685 - S00010GM002 sml_045 sp|YV02643 desc
FILE: output_12.txt
90562 91368 + S00012GM001 sml_025 sp|YV02376 desc n
+ot found
10209 10433 - S00012GM002 sml_046 sp|YV02355 desc
FILE: output_13.txt
12522 12576 + S00013GM001 sml_027 sp|0235777 desc
13247 13349 - S00013GM002 sml_088 sp|YV02375 desc