“file1.dat”:
Gene exp1 exp2 exp3 exp4
1 a b c d
2 e f g h
3 i j k l
4 m n o p
5 q r s t
6 u v w x
“file2.dat”:
Gene exp1 exp2 exp3 exp4
1 aa bb cc dd
2 ee ff gg hh
3 ii jj kk ll
4 mm nn oo pp
5 qq rr ss tt
6 uu vv ww xx
Script:
#! perl
use strict;
use warnings;
use Tie::File;
use constant
{
FIELD => 3, # Number of the field to extract from each record
SEP => ',', # Separator for fields in each output file record
};
my @infile_names = qw( file1.dat file2.dat );
my $outfile_name = 'exp3.dat';
open(my $in, '<', $infile_names[0])
or die "Cannot open file '$infile_names[0]' for reading: $!";
open(my $out, '>', $outfile_name)
or die "Cannot open file '$outfile_name' for writing: $!";
<$in>; # Discard header
print $out $infile_names[0], "\n";
while (my $line = <$in>)
{
my @fields = split /\s+/, $line;
print $out $fields[FIELD], "\n";
}
close $out or die "Cannot close file '$outfile_name': $!";
close $in or die "Cannot close file '$infile_names[0]': $!";
tie my @array, 'Tie::File', $outfile_name
or die "Cannot tie file '$outfile_name': $!";
for my $file (@infile_names[1 .. $#infile_names])
{
open(my $in, '<', $file)
or die "Cannot open file '$file' for reading: $!";
<$in>; # Discard header
$array[0] .= SEP . $file;
while (my $line = <$in>)
{
my @fields = split /\s+/, $line;
$array[$. - 1] .= SEP . $fields[FIELD];
}
close $in or die "Cannot close file '$file': $!";
}
untie @array;
Output in “exp3.dat”:
file1.dat,file2.dat
c,cc
g,gg
k,kk
o,oo
s,ss
w,ww
Another approach you should consider is to store the hundreds-of-files’ worth of data in a database, and then extract whatever you need via SQL.