File: gb_get

package info (click to toggle)
libboulder-perl 1.30-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 404 kB
  • ctags: 306
  • sloc: perl: 4,286; makefile: 41
file content (86 lines) | stat: -rwxr-xr-x 2,135 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl

use strict 'vars';
use lib '..','../blib/lib','.','./blib/lib';
use Boulder::Stream;
use Boulder::Genbank;
use Getopt::Long;
use Text::Abbrev;
my ($ACCESSOR,$DELAY,$FIRST,$BOULDER,$DB);

GetOptions('accessor=s' => \$ACCESSOR,
	   'boulder'     =>\$BOULDER,
	   'db=s'        => \$DB,
	   'delay=i'     => \$DELAY) || die <<USAGE;
Usage: $0 [options] [accession] [accession] ...

Retrieves Genbank entries from a list of accession
numbers.  If no accession numbers are present on the
command line, or if the magic "-" argument is given,
will read accession numbers from standard in.

Outputs a series of Genbank records for each accession
number.

Options:
  -accessor <Entrez|Yank>  Use the Entrez|Yank methods
                            for retrieving records.
  -boulder                  Return boulder format file
  -db                       Database
  -delay    <seconds>       Seconds to sleep between retrievals (10)

Options may be abbreviated, i.e. -acc E

Database specifiers:
  m  MEDLINE
  p  Protein
  n  Nucleotide
  t  3-D structure
  c  Genome

USAGE
;

$ACCESSOR ||= 'Entrez';
unless (defined $DELAY) {
  warn "Warning: No -delay parameter. Introducing a 1 second delay between fetches\n";
  $DELAY = 1;
}
my @accession_numbers;
my $from_stdin = !@ARGV;
my $abbrev = abbrev qw(Entrez Yank entrez yank);
my $accessor = $abbrev->{$ACCESSOR} || die "Unknown access method $ACCESSOR";
substr($accessor,0,1)=~tr/a-z/A-Z/; #canonicalize

while (defined (my $a = shift)) {
  $from_stdin++,last if $a eq '-';
  push(@accession_numbers,$a);
}

if ($from_stdin) {
  while (<>) {
    chomp;
    next if /^\#/;
    my ($a) = /^(\w+)/;
    next unless $a;
    push(@accession_numbers,$a);
  }
}

$DB ||= 'n';

my $stream = new Boulder::Stream;
my $gb = new Boulder::Genbank(-accessor=>$accessor,-fetch=>\@accession_numbers,-db=>$DB)
  || die "Couldn't open new Boulder::Genbank object";
my $count;
while (1) {
  last if $count++ > @accession_numbers;
  next unless my $s = $gb->get;
  sleep $DELAY if $FIRST++ && $DELAY > 0;
  unless ($BOULDER) {
    print $s->Blob,"//\n";
  } else {
    $stream->put($s);
  }
}