File: DBIdsClient.py

package info (click to toggle)
python-biopython 1.42-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 17,584 kB
  • ctags: 12,272
  • sloc: python: 80,461; xml: 13,834; ansic: 7,902; cpp: 1,855; sql: 1,144; makefile: 203
file content (297 lines) | stat: -rwxr-xr-x 10,926 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
"""Search and retrieve information given a set of database identifiers.

EUtils has two major modes.  One uses history while the other uses
database identifiers.  This is a high-level interface for working with
identifiers.  You should use this module to get information about a
set of known database identifiers.

See HistoryClient if you want to work with a large number of
identifiers or potentially large search results.

>>> from Bio import EUtils
>>> from Bio.EUtils import DBIdsClient
>>> client = DBIdsClient.DBIdsClient()
>>> result = client.search("dalke", retmax = 100)
>>> len(result)
30
>>> print result[0].efetch(retmode = "text", rettype = "abstract").read()

1: Pac Symp Biocomput  1997;:85-96

Using Tcl for molecular visualization and analysis.

Dalke A, Schulten K.

Beckman Institute, Urbana, IL 61801, USA.

Reading and manipulating molecular structure data is a standard task in every
molecular visualization and analysis program, but is rarely available in a form
readily accessible to the user. Instead, the development of new methods for
analysis, display, and interaction is often achieved by writing a new program,
rather than building on pre-existing software. We present the Tcl-based script
language used in our molecular modeling program, VMD, and show how it can access
information about the molecular structure, perform analysis, and graphically
display and animate the results. The commands are available to the user and make
VMD a useful environment for studying biomolecules.


PMID: 9390282 [PubMed - indexed for MEDLINE]

>>>


Find sequences similar to GI:4579714 which were published in 2002.

>>> protein = DBIdsClient.from_dbids(EUtils.DBIds("protein", "4579714"))
>>> neighbors = protein.neighbor_links("protein",
...        daterange = EUtils.DateRange("2002/01/01", "2002/12/31", "pdat"))
>>> dbids = neighbors.linksetdbs["protein_protein"].dbids
>>> len(dbids)
28
>>> print dbids
DBIds(u'protein', [u'4579714', u'25298947', u'24158913', u'24158914', u'24158915', u'17942993', u'17942994', u'17942995', u'20150921', u'20150922', u'20151159', u'25298949', u'19716034', u'20663737', u'20663738', u'20663741', u'24987328', u'25533128', u'25298946', u'25298948', u'23008597', u'20219020', u'21218340', u'21218344', u'19075395', u'21218338', u'21218342', u'21311795'])
>>> 
>>> print client.from_dbids(dbids[:5]).efetch(retmode="text",
...                                           rettype="summary").read()

1: BAA75200
Bacteriorhodopsin [Halobacterium sp.]
gi|4579714|dbj|BAA75200.1|[4579714]


2: H84300
bacteriorhodopsin [imported] - Halobacterium sp. NRC-1
gi|25298947|pir||H84300[25298947]


3: 1M0KA
Chain A, Bacteriorhodopsin K Intermediate At 1.43 A Resolution
gi|24158913|pdb|1M0K|A[24158913]


4: 1M0LA
Chain A, BacteriorhodopsinLIPID COMPLEX AT 1.47 A RESOLUTION
gi|24158914|pdb|1M0L|A[24158914]


5: 1M0MA
Chain A, Bacteriorhodopsin M1 Intermediate At 1.43 A Resolution
gi|24158915|pdb|1M0M|A[24158915]

>>>

"""

import types
import parse, Mixins, Config, ThinClient, Datatypes

class DBIdsLookup(object):
    """Look up information about a DBIds

    To get the list of dbids, as interpreted by fetching the
    server's "uilist", use the "dbids" attribute.
    """
    def __init__(self, eutils, records_dbids):
        self.eutils = eutils
        self.records_dbids = records_dbids

    def esummary(self, retmode = 'xml', rettype = None):
        """call esummary on this DBIds; returns the socket handle"""
        return self.eutils.esummary_using_dbids(
            dbids = self.records_dbids)

    def summary(self):
        """get the summary for these DBIds, parsed into a Datatypes.Summary"""
        return parse.parse_summary_xml(self.esummary("xml"))

    def elink(self,
              db = "pubmed",
              cmd = "neighbor",
              term = None,
              field = None,
              daterange = None):
        """call elink on this DBIds; returns the socket handle"""
        return self.eutils.elink_using_dbids(
            dbids = self.dbids,
            db = db,
            cmd = cmd,
            daterange = daterange,
            term = term,
            field = field,
            )

    def _get_dbids(self):
        infile = self.efetch(retmode = "text", rettype = "uilist")
        ids = parse.parse_fetch_identifiers(infile)
        return Datatypes.DBIds(self.records_dbids.db, ids)
    dbids = property(_get_dbids, None, None,
        "The DBIds for this results set, validated from the server's 'uilist'")
    
    
class DBIdsRecord(DBIdsLookup):
    """A single record on the server"""
    def summary(self):
        return DBIdsLookup.summary(self)[0]

class SequenceDBIdsFetchMixin:
    """Support 'efetch' for sequence records"""
    def efetch(self, retmode = 'xml', rettype = None,
               seq_start = None, seq_stop = None, strand = None,
               complexity = None):
        if strand not in (None, 1, 2):
            raise TypeError("Strand can only be 1 (plus, default) or 2 (minus)")
        return self.eutils.efetch_using_dbids(
            dbids = self.records_dbids,
            retmode = retmode,
            rettype = rettype,
            seq_start = seq_start,
            seq_stop = seq_stop,
            strand = strand,
            complexity = complexity)

class SequenceDBIdsRecord(Mixins.SequenceFetchMixin,
                          SequenceDBIdsFetchMixin,
                          DBIdsRecord):
    """a single sequence record, referenced by database identifier"""
    pass

class PublicationDBIdsFetchMixin:
    """Support 'efetch' for publication records"""
    def efetch(self, retmode = "xml", rettype = None):
        return self.eutils.efetch_using_dbids(
            dbids = self.records_dbids,
            retmode = retmode,
            rettype = rettype)

class PublicationDBIdsRecord(Mixins.PublicationFetchMixin,
                             PublicationDBIdsFetchMixin,
                             DBIdsRecord):
    """a single publication record, referenced by database identifier"""
    pass

class BaseDBIdsRecordSet(DBIdsLookup):
    """Base class for dealing with a set of records, reference by identifier"""
    def __init__(self, eutils, records_dbids, metadata = None):
        DBIdsLookup.__init__(self, eutils, records_dbids)
        self.metadata = metadata

    def __len__(self):
        """Number of records referenced by this RecordSet"""
        return len(self.records_dbids)

    def __getitem__(self, i):
        """Return subset of the records"""
        if isinstance(i, types.SliceType):
            # Metadata is not passed downwards
            if i.step is None:
                return self.__class__(
                    self.eutils,
                    self.records_dbids[i.start:i.stop])
            return self.__class__(
                self.eutils,
                self.records_dbids[i.start:i.stop:i.step])

        return self._record_class(self.eutils, self.records_dbids.item(i))
        
class SequenceDBIdsRecordSet(Mixins.SequenceFetchMixin,
                             SequenceDBIdsFetchMixin,
                             BaseDBIdsRecordSet):
    """a set of sequence records, referenced by database identifier"""
    _record_class = SequenceDBIdsRecord

class PublicationDBIdsRecordSet(Mixins.PublicationFetchMixin,
                                PublicationDBIdsFetchMixin,
                                BaseDBIdsRecordSet):
    """a set of publication records, referenced by database identifier"""
    _record_class = PublicationDBIdsRecord


def _get_recordset_constructor(db, dbtype):
    """get the right DataSet constructor for a database"""
    dbtype = Config.databases.gettype(db, dbtype)
    if dbtype == Config.SEQUENCE_TYPE:
        return SequenceDBIdsRecordSet
    elif dbtype == Config.PUBLICATION_TYPE:
        return PublicationDBIdsRecordSet
    else:
        raise TypeError("Unknown database type: %r" % (dbtype,))

def from_dbids(dbids, dbtype = None, eutils = None):
    """create a RecordSet interface for the set of database identifiers

    Parameters are:
      dbids -- a DBIds
      dbtype -- the dbtype to use (EUtils.Config.{SEQUENCE,PUBLIATION}_TYPE)
           in case dbids.db isn't in the list of know NCBI databases.
           Defaults to None.
      eutils -- the ThinClient to use, defaults to creating a new
           ThinClient.ThinClient()
    """
    return DBIdsClient(eutils).from_dbids(dbids, dbtype)

class DBIdsClient:
    """Create a RecordSet either from a search or a set of dbids

    The constructor takes an optional ThinClient to use for
    connecting to NCBI.
    """
    def __init__(self, eutils = None):
        if eutils is None:
            eutils = ThinClient.ThinClient()
        self.eutils = eutils

    def from_dbids(self, dbids, dbtype = None):
        """Return a RecordSet given the DBIds

        This RecordSet can be used to fetch data from NCBI
        related to the given DBIds.
        """
        set_klass = _get_recordset_constructor(dbids.db, dbtype)
        return set_klass(self.eutils, dbids, None)

    def search(self,
               term,
               db = "pubmed",
               field = None,

               retstart = 0,
               retmax = 20,

               daterange = None,
               dbtype = None,
               ):
        """do an Entrez search

        The parameters are:
          'term' -- the query string in the Entrez query language; see
             http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
          'db' -- the database to search

          'field' -- the field to use for unqualified words
                  Eg, "dalke[au] AND gene" with field==None becomes
                    dalke[au] AND (genes[MeSH Terms] OR gene[Text Word]
                  and "dalke[au] AND gene" with field=="au" becomes
                    dalke[au] AND genes[Author]
                 (Yes, I think the first "au" should be "Author" too)

          'retstart' -- include identifiers in the output, starting with
                   position 'retstart' (normally starts with 0)
          'retmax' -- return at most 'retmax' identifiers in the output
                   (if not specified, NCBI returns 20 identifiers)
          'daterange' -- a date restriction; either WithinNDays or DateRange
          
          'dbtype' -- (optional) the database type (Config.PUBLICATION_TYPE
                  or SEQUENCE_TYPE).  Overrides the type based on the 'db'
        """
        set_klass = _get_recordset_constructor(db, dbtype)
        infile = self.eutils.esearch(
            term = term,
            db = db,
            field = field,
            retstart = retstart,
            retmax = retmax,
            daterange = daterange)
        searchinfo = parse.parse_search(infile, [None])

        dbids = Datatypes.DBIds(db, searchinfo.ids)
        return set_klass(self.eutils, dbids, searchinfo)