use strict; use warnings; $|=1; my @CDS; my @ORIGIN; while (my $line =) { next unless ($line =~ /\S/); #skip blank data lines push (@CDS,$line) if ( $line =~ /^\s+CDS/ ... $line =~/^\s*[a-z]+/ ); push (@ORIGIN, $line) if ($line =~ /^\s*ORIGIN/...$line !~/\s*\d/); } print "CDS section:\n"; print @CDS; print "\nORIGIN section:\n"; print @ORIGIN; =PRINTS: ########## a copy from my command line output #### CDS section: CDS 10..1011 /gene="hemoglobin" /codon_start=1 /product="hemoglobin" /protein_id="AAA29796.1" /translation="MHSSIVLATVLFVAIASASKTRELCMKSLEHAKVGTSKEAKQDG IDLYKHMFEHYPAMKKYFKHRENYTPADVQKDPFFIKQGQNILLACHVLCATYDDRET FDAYVGELMARHERDHVKVPNDVWNHFWEHFIEFLGSKTTLDEPTKHAWQEIGKEFSH EISHHGRHSVRDHCMNSLEYIAIGDKEHQKQNGIDLYKHMFEHYPHMRKAFKGRENFT KEDVQKDAFFVNKDTRFCWPFVCCDSSYDDEPTFDYFVDALMDRHIKDDIHLPQEQWH EFWKLFAEYLNEKSHQHLTEAEKHAWSTIGEDFAHEADKHAKAEKDHHEGEHKEEHH" sig_peptide 10..63 ORIGIN section: ORIGIN 1 ggaaccatta tgcactcttc aatagttttg gccaccgtgc tctttgtagc gattgcttca 61 gcatcaaaaa cgcgagagct atgcatgaaa tcgctcgagc atgccaaggt tggcaccagc =cut ## Data is abbreviated from https://www.ncbi.nlm.nih.gov/nuccore/M85050.1 ## example URL courtesty of [glycine] __DATA__ COMMENT Original source text: Pseudoterranova decipiens larval cDNA to mRNA. FEATURES Location/Qualifiers source 1..1353 /organism="Pseudoterranova decipiens" /mol_type="mRNA" /db_xref="taxon:6271" /dev_stage="larval" gene 1..1353 /gene="hemoglobin" 5'UTR 1..9 /gene="hemoglobin" CDS 10..1011 /gene="hemoglobin" /codon_start=1 /product="hemoglobin" /protein_id="AAA29796.1" /translation="MHSSIVLATVLFVAIASASKTRELCMKSLEHAKVGTSKEAKQDG IDLYKHMFEHYPAMKKYFKHRENYTPADVQKDPFFIKQGQNILLACHVLCATYDDRET FDAYVGELMARHERDHVKVPNDVWNHFWEHFIEFLGSKTTLDEPTKHAWQEIGKEFSH EISHHGRHSVRDHCMNSLEYIAIGDKEHQKQNGIDLYKHMFEHYPHMRKAFKGRENFT KEDVQKDAFFVNKDTRFCWPFVCCDSSYDDEPTFDYFVDALMDRHIKDDIHLPQEQWH EFWKLFAEYLNEKSHQHLTEAEKHAWSTIGEDFAHEADKHAKAEKDHHEGEHKEEHH" sig_peptide 10..63 /gene="hemoglobin" mat_peptide 64..1008 /gene="hemoglobin" /product="hemoglobin" misc_feature 696..737 /gene="hemoglobin" /phenotype="'altered epitope (frameshift)'" 3'UTR 1013..1353 /gene="hemoglobin" ORIGIN 1 ggaaccatta tgcactcttc aatagttttg gccaccgtgc tctttgtagc gattgcttca 61 gcatcaaaaa cgcgagagct atgcatgaaa tcgctcgagc atgccaaggt tggcaccagc