File: rfetch.pl

package info (click to toggle)
bioperl 1.7.8-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 35,964 kB
  • sloc: perl: 94,019; xml: 14,811; makefile: 15
file content (69 lines) | stat: -rw-r--r-- 1,290 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/perl

#
# Fetch sequence data via OBDA registry system
#
# usage: rfetch -i <file_with_accession_list> -a -v -d embl -s start -e end
#

use Bio::DB::Registry;
use Bio::SeqIO;
use Getopt::Long;
use strict;

my $database = 'embl_biosql';
my $start    = undef;
my $end      = undef;
my $format   = 'fasta';
my $file     = undef;
my $acc      = undef;
my $verbose  = undef;

&GetOptions(
	    'd|database:s' => \$database,
	    's|start:i' => \$start,
	    'e|end:i'   => \$end,
	    'f|format:s' => \$format,
	    'i|input:s' => \$file,
	    'a|acc'     => \$acc,
	    'v|verbose' => \$verbose,
	   );


my $registry = Bio::DB::Registry->new();

my $db = $registry->get_database($database);

my $seqout = Bio::SeqIO->new( '-format' => $format, '-fh' => \*STDOUT);

my @ids;

if( defined $file ) {
  open my $F, '<', $file or die "Could not read file '$file': $!\n";
  while( <$F> ) {
    my ($id) = split;
    push(@ids,$id);
  }
  close $F;
} else {
  @ids = @ARGV;
}

foreach my $id ( @ids ) {
  my $seq;
  if( $verbose ){
    print STDERR "fetching $id\n";
  }

  if( $acc ) {
    $seq = $db->get_Seq_by_acc($id);
  } else {
    $seq = $db->get_Seq_by_id($id);
  }

  if( defined $start && defined $end ) {
    $seq = $seq->trunc($start,$end);
  }

  $seqout->write_seq($seq);
}