use strict; use warnings; my $rxRecord = qr {(?xs) (ENTRY.*?\n) (?=ENTRY|\z) }; my $rxFieldHdrs = qr{(?:ENTRY|TITLE|ORGANISM|ACCESSIONS)}; my $rxField = qr {(?xs) ($rxFieldHdrs.*?\n) (?=$rxFieldHdrs|\z) }; my $fileText; { local $/; $fileText = ; } my @records = $fileText =~ m{$rxRecord}g; foreach my $record (@records) { print qq{$record}, q{+} x 50, qq{\n}; my @fields = $record =~ m{$rxField}g; foreach my $field (@fields) { print qq{$field}, q{-} x 50, qq{\n}; } print q{*} x 50, qq{\n}; } __END__ ENTRY CCHU #type complete TITLE cytochrome c [validated] - human Homo sapiens ORGANISM #formal_name Homo sapiens #common_name man ACCESSIONS A31764; A05676; I55192; A00001 MGDVEKGKKIFIMKCSQCHTVEMGDVEKGGKHKTGPNLHGMIYARAJLFGRKTSEKGQAPGYSYTAANKNKGIIWGEDTLMEYLENPKKYIP ENTRY CCCZ #type complete TITLE cytochrome c - chimpanzee (tentative sequence) ORGANISM #formal_name Pan troglodytes #common_name chimpanzee ACCESSIONS A00002 GDVEKGKKIFIMKCSQCHTSEKVEKGSSSKHKSSSTGPNLHGLMIYARAJFGRKTGSEKQAPGYSYTAANKNKGIIWGED ENTRY CCMQR #type complete TITLE cytochrome c - rhesus macaque (tentative sequence) Macaca mulatta ORGANISM #formal_name Macaca mulatta #common_name rhesus macaque ACCESSIONS A00003 GDVEKGKKIFIMKCSQSEKCHTVEKGGSSSSKHKTGPNLHGSSEKEMIYARAJKSEKLFGAAAAAAAARKTGQAPGYSYTAANKSSSSNKGITWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEE ENTRY CCMKP #type complete TITLE cytochrome c - spider monkey ORGANISM #formal_name Ateles sp. #common_name spider monkey ACCESSIONS A00004 GDVFKGKRIFIMKCSQCHTVESSSSKGGKHKTGPNLHGLMIYARAJSEKFGSSSSSSSSSSR #### ENTRY CCHU #type complete TITLE cytochrome c [validated] - human Homo sapiens ORGANISM #formal_name Homo sapiens #common_name man ACCESSIONS A31764; A05676; I55192; A00001 MGDVEKGKKIFIMKCSQCHTVEMGDVEKGGKHKTGPNLHGMIYARAJLFGRKTSEKGQAPGYSYTAANKNKGIIWGEDTLMEYLENPKKYIP ++++++++++++++++++++++++++++++++++++++++++++++++++ ENTRY CCHU #type complete -------------------------------------------------- TITLE cytochrome c [validated] - human Homo sapiens -------------------------------------------------- ORGANISM #formal_name Homo sapiens #common_name man -------------------------------------------------- ACCESSIONS A31764; A05676; I55192; A00001 MGDVEKGKKIFIMKCSQCHTVEMGDVEKGGKHKTGPNLHGMIYARAJLFGRKTSEKGQAPGYSYTAANKNKGIIWGEDTLMEYLENPKKYIP -------------------------------------------------- ************************************************** ENTRY CCCZ #type complete TITLE cytochrome c - chimpanzee (tentative sequence) ORGANISM #formal_name Pan troglodytes #common_name chimpanzee ACCESSIONS A00002 GDVEKGKKIFIMKCSQCHTSEKVEKGSSSKHKSSSTGPNLHGLMIYARAJFGRKTGSEKQAPGYSYTAANKNKGIIWGED ++++++++++++++++++++++++++++++++++++++++++++++++++ ENTRY CCCZ #type complete -------------------------------------------------- TITLE cytochrome c - chimpanzee (tentative sequence) -------------------------------------------------- ORGANISM #formal_name Pan troglodytes #common_name chimpanzee -------------------------------------------------- ACCESSIONS A00002 GDVEKGKKIFIMKCSQCHTSEKVEKGSSSKHKSSSTGPNLHGLMIYARAJFGRKTGSEKQAPGYSYTAANKNKGIIWGED -------------------------------------------------- ************************************************** ENTRY CCMQR #type complete TITLE cytochrome c - rhesus macaque (tentative sequence) Macaca mulatta ORGANISM #formal_name Macaca mulatta #common_name rhesus macaque ACCESSIONS A00003 GDVEKGKKIFIMKCSQSEKCHTVEKGGSSSSKHKTGPNLHGSSEKEMIYARAJKSEKLFGAAAAAAAARKTGQAPGYSYTAANKSSSSNKGITWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEE ++++++++++++++++++++++++++++++++++++++++++++++++++ ENTRY CCMQR #type complete -------------------------------------------------- TITLE cytochrome c - rhesus macaque (tentative sequence) Macaca mulatta -------------------------------------------------- ORGANISM #formal_name Macaca mulatta #common_name rhesus macaque -------------------------------------------------- ACCESSIONS A00003 GDVEKGKKIFIMKCSQSEKCHTVEKGGSSSSKHKTGPNLHGSSEKEMIYARAJKSEKLFGAAAAAAAARKTGQAPGYSYTAANKSSSSNKGITWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEE -------------------------------------------------- ************************************************** ENTRY CCMKP #type complete TITLE cytochrome c - spider monkey ORGANISM #formal_name Ateles sp. #common_name spider monkey ACCESSIONS A00004 GDVFKGKRIFIMKCSQCHTVESSSSKGGKHKTGPNLHGLMIYARAJSEKFGSSSSSSSSSSR ++++++++++++++++++++++++++++++++++++++++++++++++++ ENTRY CCMKP #type complete -------------------------------------------------- TITLE cytochrome c - spider monkey -------------------------------------------------- ORGANISM #formal_name Ateles sp. #common_name spider monkey -------------------------------------------------- ACCESSIONS A00004 GDVFKGKRIFIMKCSQCHTVESSSSKGGKHKTGPNLHGLMIYARAJSEKFGSSSSSSSSSSR -------------------------------------------------- **************************************************