1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
|
#!/usr/bin/perl
#
# Fetch sequence data via OBDA registry system
#
# usage: rfetch -i <file_with_accession_list> -a -v -d embl -s start -e end
#
use Bio::DB::Registry;
use Bio::SeqIO;
use Getopt::Long;
use strict;
my $database = 'embl_biosql';
my $start = undef;
my $end = undef;
my $format = 'fasta';
my $file = undef;
my $acc = undef;
my $verbose = undef;
&GetOptions(
'd|database:s' => \$database,
's|start:i' => \$start,
'e|end:i' => \$end,
'f|format:s' => \$format,
'i|input:s' => \$file,
'a|acc' => \$acc,
'v|verbose' => \$verbose,
);
my $registry = Bio::DB::Registry->new();
my $db = $registry->get_database($database);
my $seqout = Bio::SeqIO->new( '-format' => $format, '-fh' => \*STDOUT);
my @ids;
if( defined $file ) {
open my $F, '<', $file or die "Could not read file '$file': $!\n";
while( <$F> ) {
my ($id) = split;
push(@ids,$id);
}
close $F;
} else {
@ids = @ARGV;
}
foreach my $id ( @ids ) {
my $seq;
if( $verbose ){
print STDERR "fetching $id\n";
}
if( $acc ) {
$seq = $db->get_Seq_by_acc($id);
} else {
$seq = $db->get_Seq_by_id($id);
}
if( defined $start && defined $end ) {
$seq = $seq->trunc($start,$end);
}
$seqout->write_seq($seq);
}
|