use 5.026; use warnings; use Data::Dumper; open my $inFH, q{<}, \ <<__EOD__ or die $!; >NM_030643.4 Homo sapiens apolipoprotein L4 (APOL4) GAGGTGCTGGGGAGCAGCGTGTTTGCTGTGCTTGATTGTGAGCTGCTGGGAAGTTGTGACTTTCATTTTA CCTTTCGAATTCCTGGGTATATCTTGGGGGCTGGAGGACGTGTCTGGTTATTATATAGGTGCACAGCTGG AGGTGAGATCCACACAGCTCAGACCAGCTGGATCTTGCTCAGTCTCTGTCAGAGGAAGATCCCTTGGAGG AGGCCCCGCAGCGACATGGAGGGAGCTGCTTTGCTGAAAATCTTTGTCGTCTGCATCTGGAACCAAAATC >NM_001198855.1 Homo sapiens cytochrome P450 family 2 subfamily C member 8 (CYP2C8) ACATGTCAAAGAGACACACACTAAATTAGCAGGGAGTGTTATAAAAACTTTGGAGTGCAAGCTCACAGCT GTCTTAATAAGAAGAGAAGGCTTCAATGGAACCTTTTGTGGTCCTGGTGCTGTGTCTCTCTTTTATGCTT CTCTTTTCACTCTGGAGACAGAGCTGTAGGAGAAGGAAGCTCCCTCCTGGCCCCACTCCTCTTCCTATTA >NR_029834.1 Homo sapiens microRNA 200a (MIR200A), microRNA CCGGGCCCCTGTGAGCATCTTACCGGACAGTGCTGGATTTCCCAGCTTGACTCTAACACTGTCTGGTAAC GATGTTCAAAGGTGACCCGC >AC067940.1 Homo sapiens clone RP11-818E9, LOW-PASS SEQUENCE SAMPLING AAATACAACTTTAAATCAAAACGGTAAAAATTCCACTCTTTCATACTAACTTCAAAAGTATTTGCTTTAA AAAAAAAGNNNNNNNNNNAAACTGAATTTCTATTAAGCATCTATTTATAGAAGAGAGTAAACACCCCGTG AATAAAAGACAGAGAATTGTAGCAGCCCGAAGTCCCTTTTCTCTCCTCCCAAGCATTTGGCTCTGGTCCA AATTCACATATCCTGCTCCGTAAAACAAAGTGCCTTGGTTAACCTAACGTTATTCCTTGAACAGTAGTTT AGTGATCAACTAGTTTTTGTTGTTGTTGTTGTTTGAGACAGAGTCTCACTCTGTCGCCCAGGCTGGAGTG CAGTGGCGAGATCTCAGCTCACTGCAACCTCTGCTGCCCAGGTTCAAGGGATTCTCCTGCCTCAGCCTCC CAAGTAGCTGGTATTACAGGCACCTGCCACCGCGCCTGGCTAATTTTTTTTTTTTTTTTTTTTTGTATTT __EOD__ my $seqTitle = q{}; my $accumulator = q{}; my %sequences = (); while ( <$inFH> ) { chomp; if ( m{^>} ) { addSequence(); } else { $accumulator .= $_; } } addSequence(); close $inFH or die $!; say for sort keys %sequences; say q{-} x 50; print Data::Dumper ->new( [ \ %sequences ], [ qw{ *sequences } ] ) ->Sortkeys( 1 ) ->Dumpxs(); sub addSequence { $sequences{ $seqTitle } = $accumulator if $seqTitle; $seqTitle = $_; $accumulator = q{}; } #### >AC067940.1 Homo sapiens clone RP11-818E9, LOW-PASS SEQUENCE SAMPLING >NM_001198855.1 Homo sapiens cytochrome P450 family 2 subfamily C member 8 (CYP2C8) >NM_030643.4 Homo sapiens apolipoprotein L4 (APOL4) >NR_029834.1 Homo sapiens microRNA 200a (MIR200A), microRNA -------------------------------------------------- %sequences = ( '>AC067940.1 Homo sapiens clone RP11-818E9, LOW-PASS SEQUENCE SAMPLING' => 'AAATACAACTTTAAATCAAAACGGTAAAAATTCCACTCTTTCATACTAACTTCAAAAGTATTTGCTTTAAAAAAAAAGNNNNNNNNNNAAACTGAATTTCTATTAAGCATCTATTTATAGAAGAGAGTAAACACCCCGTGAATAAAAGACAGAGAATTGTAGCAGCCCGAAGTCCCTTTTCTCTCCTCCCAAGCATTTGGCTCTGGTCCAAATTCACATATCCTGCTCCGTAAAACAAAGTGCCTTGGTTAACCTAACGTTATTCCTTGAACAGTAGTTTAGTGATCAACTAGTTTTTGTTGTTGTTGTTGTTTGAGACAGAGTCTCACTCTGTCGCCCAGGCTGGAGTGCAGTGGCGAGATCTCAGCTCACTGCAACCTCTGCTGCCCAGGTTCAAGGGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGTATTACAGGCACCTGCCACCGCGCCTGGCTAATTTTTTTTTTTTTTTTTTTTTGTATTT', '>NM_001198855.1 Homo sapiens cytochrome P450 family 2 subfamily C member 8 (CYP2C8)' => 'ACATGTCAAAGAGACACACACTAAATTAGCAGGGAGTGTTATAAAAACTTTGGAGTGCAAGCTCACAGCTGTCTTAATAAGAAGAGAAGGCTTCAATGGAACCTTTTGTGGTCCTGGTGCTGTGTCTCTCTTTTATGCTTCTCTTTTCACTCTGGAGACAGAGCTGTAGGAGAAGGAAGCTCCCTCCTGGCCCCACTCCTCTTCCTATTA', '>NM_030643.4 Homo sapiens apolipoprotein L4 (APOL4) ' => 'GAGGTGCTGGGGAGCAGCGTGTTTGCTGTGCTTGATTGTGAGCTGCTGGGAAGTTGTGACTTTCATTTTACCTTTCGAATTCCTGGGTATATCTTGGGGGCTGGAGGACGTGTCTGGTTATTATATAGGTGCACAGCTGGAGGTGAGATCCACACAGCTCAGACCAGCTGGATCTTGCTCAGTCTCTGTCAGAGGAAGATCCCTTGGAGGAGGCCCCGCAGCGACATGGAGGGAGCTGCTTTGCTGAAAATCTTTGTCGTCTGCATCTGGAACCAAAATC', '>NR_029834.1 Homo sapiens microRNA 200a (MIR200A), microRNA' => 'CCGGGCCCCTGTGAGCATCTTACCGGACAGTGCTGGATTTCCCAGCTTGACTCTAACACTGTCTGGTAACGATGTTCAAAGGTGACCCGC' );