#!/usr/bin/perl use 5.010; use strict; use warnings; while () { my %chunks = /(\S+)="([^"]+)"/g; my $header = delete $chunks{GI} || delete $chunks{protein_id} or next; print ">$header"; print ' ', $_, '="', $chunks{$_}, '"' for keys %chunks; print "\n"; } __DATA__ >1001585.MDS_0001 protein_id="YP_004377784.1" product="chromosomal replication initiation protein" GI="330500915" GeneID="10459818" >1001585.MDS_0002 protein_id="YP_004377785.1" product="DNA polymerase III subunit beta" GI="330500916" GeneID="10454784" >1001585.MDS_0003 protein_id="YP_004377786.1" product="recombination protein F" GI="330500917" GeneID="10454785" >1001585.MDS_0004 protein_id="YP_004377787.1" product="DNA gyrase subunit B" GI="330500918" GeneID="10454786" >1001585.MDS_0005 protein_id="YP_004377788.1" GI="330500919" GeneID="10454787" >1001585.MDS_0006 protein_id="YP_004377789.1" GI="330500920" GeneID="10454788" >1001585.MDS_0007 protein_id="YP_004377790.1" GI="330500921" GeneID="10454789" >1001585.MDS_0008 protein_id="YP_004377791.1" GI="330500922" GeneID="10454790" >1001585.MDS_0009 protein_id="YP_004377792.1" product="ABC transporter permease" GI="330500923" GeneID="10454791" >1001585.MDS_0010 protein_id="YP_004377793.1" product="ABC transporter ATP-binding protein" GI="330500924" GeneID="10454792" >245014.CK3_35030 protein_id="CBL42879.1" product="Predicted transcription factor, homolog of eukaryotic MBF1" >245014.CK3_35040 protein_id="CBL42880.1" product="Bacterial protein of unknown function (DUF961)."