#!/usr/local/bin/perl -w use strict; ## use strict, use strict, use strict!!! $|++; ## enable line buffering to STDOUT use FileHandle; # A program that accept an input file: Scorpion database from Gen Bank # and will output the database in BioWare format ## used descriptive variable names ## used shift operator to process arguments (shift) and die with usage my $infile = shift || die "usage: $0 infile outfile\n"; my $outfile = shift || die "usage: $0 infile outfile\n"; my $item_count=1; my $item='D000001'; my $IN = new FileHandle; my $OUT = new FileHandle; ## check status of open and print $! for descriptive error message open($IN, "< " . $infile) or die "Can't open $infile. $!"; open($OUT, "> " . $outfile) or die "Can't open $outfile. $!"; while(<$IN>) { ## remove trailing newline chomp; ## skip blank lines next if( '^\s*$' ); ## print newline if end of record if( '^//$' ) { print $OUT "\n"; next; } ## expects date format like 1or2-three-four characters (perlre) if( /^DATE\s+(..?)-(...)-(....)$/ ) { ## very fast regex print $OUT "DBACC\t", $item++, "\n"; print $OUT "DATE\t\"$1-$2 $3\"\n"; } ## non-greedy match between double quotes (perlre) elsif( /^\s*\/exon="(.*?)"$/ ) { ## handle null case print $OUT "Exon\t{Translation -}\n" unless $1; ## seperate the matched string and process each (split) for(split ';', $1) { print $OUT "Exon\t{Translation\%", $_ ,"}\n"; } } ## non-greedy match between double quotes (perlre) elsif( /^\s*\/intron="(.*?)"$/ ) { ## handle null case print $OUT "Intron\t{Translation -}\n" unless $1; ## seperate the matched string and process each for(split ';', $1) { print $OUT "Intron\t{Translation\%", $_ ,"}\n"; } } } ## check status of close and print $! for descriptive error message close($IN) or die "Can't close $infile. $!"; close($OUT) or die "Can't close $outfile. $!";