#!/usr/bin/perl -w
use strict;
use Bio::DB::GenBank;
use Bio::SeqIO;
use File::Basename;
use Pod::Usage;
use Getopt::Long;
# configuration information
my ($verbose,$help,$format,$filename);
# set up defaults
$verbose = 0;
$help = 0;
$format = 'GCG';
$filename = 'ncbi_seq';
GetOptions( 'verbose' => \$verbose,
'filename=s' => \$filename,
'format=s' => \$format,
'help' => \$help,
);
my $outfile = make_outputfile($filename);
my $progname = basename $0;
# process arguments
if ( $help ) {
pod2usage( -verbose => 2 );
}
pod2usage(2) unless @ARGV;
# start program flow
my $gb = new Bio::DB::GenBank(
-retrievaltype => 'tempfile',
-format => 'fasta'
);
my $seqout;
my $seqio = $gb->get_Stream_by_acc( \@ARGV );
my $count = 0;
while( my $sequence = $seqio->next_seq ) {
$outfile = make_outputfile( $ARGV[ $count ] );
print "outfile = $outfile\n";
$seqout = Bio::SeqIO->new( -file => ">$outfile",
-format => $format );
$seqout->write_seq($sequence);
if ( $verbose ) {
print "Successfully fetched ", $sequence->display_id,
"\n";
}
sub make_outputfile {
my $file = shift;
if ( -f $file ) {
my $suffix = 0;
while ( -f "$file.$suffix" ) {
$suffix += 1;
}
$file = "$file.$suffix";
}
return $file;
}
=head1 NAME
ncbi-fetch - fetch sequences directly from NCBI sequence databases
=head1 SYNOPSIS
ncbi-fetch accession1 [accession2 ...]
options:
--format sequence_format
--combine
--help
--verbose
=head1 OPTIONS
=over 1
=item --verbose
Causes ncbi-fetch to print output as it fetches sequences
(turned off by default).
=item --format
Specifies the sequence file format, valid formats include GenBank,
FASTA, EMBL and GCG.
=item --help
Prints a help message.
=back
=head1 EXAMPLES
# Fetch the entire E. coli K-12 genome from the NCBI
$ ncbi-fetch U00096
# Fetch some cdk7 sequences from the NCBI
$ ncbi-fetch NM_001239 NM_078489 NM_021128
=head1 DESCRIPTION
B<ncbi-fetch> will fetch sequences from the NCBI using Bio::DB::GenBan
+k Perl
module (available as part of the BioPerl package). Each sequence is sa
+ved to a
separate file named by accession number. This program will introduce a
three-second delay in between successive requests in order to avoid pl
+acing too
much stress on the NCBI servers.
=head1 AUTHOR
Tex Thompson <tex@biosysadmin.com>
=head1 LICENSE
B<ncbi-fetch> is licensed under the GNU GPL license, available from
http://www.gnu.org/.
=cut
|