use strict; use warnings; use BioUtil::Seq; use constant { HDR => 0, SEQ => 1 }; use MCE::Flow; use MCE::Shared; mce_open my $out_fh, '>', \*STDOUT or die "open error: $!\n"; # From the documentation: # # FastaReader returns an anonymous subroutine, when called, returns # a fasta record which is a reference of an array containing the fasta # header and sequence. By default, spaces and \r?\n are trimmed from # the sequence. # mce_flow { max_workers => 4, chunk_size => 1, input_data => FastaReader("input_file.fasta") }, sub { my ( $mce, $chunk_ref, $chunk_id ) = @_; my $fa = $chunk_ref->[0]; # my $fa = $_; # same thing for chunk_size => 1 # therefore, the 2 lines above may be omitted # print ">$fa->[HDR]\n$fa->[SEQ]\n"; my $name = ( split(/ /, $fa->[HDR], 2) )[0]; my $output; while ( $fa->[SEQ] =~ /(?<=(.....))abc(.{10})def(?=(.....))/g ) { $output .= "$name: $1, $2, $3\n"; } print $out_fh $output if length($output); };