# ----- prediction on sequence number 1 (length = 105, name = seq_01) -- # # Constraints/Hints: # (none) # Predicted genes for sequence number 1 on both strands # start gene g1 seq_01 CHECKED gene 28503 30196 0.89 + . g1 seq_01 CHECKED transcript 28503 30196 0.89 + . g1.t1 seq_01 CHECKED start_codon 28503 28505 . + 0 transcript_id "g1.t1"; gene_id "g1"; # coding sequence = [atgtcgtccctccccactctcatctttctccaccc # atcgctgcggtcctcgccgacccttttgtgccggaagtagggaccgg] # protein sequence = [MTASAFVLGTVAFLHNRLRRSRPRQASTAHR # GTETPLLRSDKENLTTVLDATILVHSLGQKTNLALGATSSSLDLQKTNLAL # VAALTPGIVFPLPSPFVATGLCLQKTNLALGATSSSLDL] # end gene g1 ### # start gene g2 seq_01 CHECKED gene 77978 79779 0.44 + . g2 seq_01 CHECKED transcript 77978 79779 0.44 + . g2.t1 seq_01 CHECKED start_codon 77978 77980 . + 0 transcript_id "g2.t1"; gene_id "g2"; # coding sequence = [atgccgtcctcgtcaaagcagctggcgatgcc # tcggcccctccttctgcaaaccgccctgccgcccgcctcggctcctccgaa # gccgagcagcctacgcaggggccgcagatgctcgcgggagggaatatcgg] # protein sequence =[MPLDSSSTPTSNPAPSHSSTAYLLFERLHIAEQ # CCPGQGIRHGKWSPGSSEAPT] # end gene g2 ### # # ----- prediction on sequence number 2 (length = 710, name = seq_02) ----- # # Constraints/Hints: # (none) # Predicted genes for sequence number 2 on both strands # start gene g3 seq_02 CHECKED gene 150 2800 0.31 + . g3 seq_02 CHECKED transcript 150 2800 0.31 + . g3.t1 seq_02 CHECKED intron 1 149 0.75 + . transcript_id "g3.t1"; gene_id "g3"; # coding sequence = [agctgccctcctcggggccagccttctcttaactc # tttgagaccttcaatcctgaggcgtgagacgcagtctggaggagcagctc] # protein sequence = [LRRETQSGGAALCSLFDPPPTPTACAHANSP] # end gene g3 ### # # ----- prediction on sequence number 3 (length = 713, name = seq_03) ----- # # Constraints/Hints: # (none) # Predicted genes for sequence number 3 on both strands # start gene g4 .... [as same as above]......so on and on... #### FILE 1: >seq_01 g1 atgtcgtccctccccactctcatctttctccacccatcgctgcggtcctcgccgacccttttgtgccggaagtagggaccgg >seq_01 g2 atgccgtcctcgtcaaagcagctggcgatgcctcggcccctccttctgcaaaccgccctgccgcccgcctcggctcctccgaagccgagcagcctacgcaggggccgcagatgctcgcgggagggaatatcgg >seq_02 g3 agctgccctcctcggggccagccttctcttaactctttgagaccttcaatcctgaggcgtgagacgcagtctggaggagcagctc >seq_03 g4 ......so on... FILE 2: >seq_01 g1 MTASAFVLGTVAFLHNRLRRSRPRQASTAHRGTETPLLRSDKENLTTVLDATILVHSLGQKTNLALGATSSSLDLQKTNLALVAALTPGIVFPLPSPFVATGLCLQKTNLALGATSSSLDL >seq_01 g2 MPLDSSSTPTSNPAPSHSSTAYLLFERLHIAEQCCPGQGIRHGKWSPGSSEAPT >seq_02 g3 LRRETQSGGAALCSLFDPPPTPTACAHANSP >seq_03 g4 ......so on... #### #!/usr/bin/perl open(FH,$ARGV[0]); open(OUT1,">file1.txt"); open(OUT2,">file2.txt"); @array=; $str=join("",@array); @list=split("###",$str); foreach $line(@list){ $line=~m/(# coding sequence = [.*\])(# protein sequence = [.*\])/; print OUT1 "$1"; print OUT2 "$2"; }