1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
|
"""Search and retrieve information given a set of database identifiers.
EUtils has two major modes. One uses history while the other uses
database identifiers. This is a high-level interface for working with
identifiers. You should use this module to get information about a
set of known database identifiers.
See HistoryClient if you want to work with a large number of
identifiers or potentially large search results.
>>> from Bio import EUtils
>>> from Bio.EUtils import DBIdsClient
>>> client = DBIdsClient.DBIdsClient()
>>> result = client.search("dalke", retmax = 100)
>>> len(result)
30
>>> print result[0].efetch(retmode = "text", rettype = "abstract").read()
1: Pac Symp Biocomput 1997;:85-96
Using Tcl for molecular visualization and analysis.
Dalke A, Schulten K.
Beckman Institute, Urbana, IL 61801, USA.
Reading and manipulating molecular structure data is a standard task in every
molecular visualization and analysis program, but is rarely available in a form
readily accessible to the user. Instead, the development of new methods for
analysis, display, and interaction is often achieved by writing a new program,
rather than building on pre-existing software. We present the Tcl-based script
language used in our molecular modeling program, VMD, and show how it can access
information about the molecular structure, perform analysis, and graphically
display and animate the results. The commands are available to the user and make
VMD a useful environment for studying biomolecules.
PMID: 9390282 [PubMed - indexed for MEDLINE]
>>>
Find sequences similar to GI:4579714 which were published in 2002.
>>> protein = DBIdsClient.from_dbids(EUtils.DBIds("protein", "4579714"))
>>> neighbors = protein.neighbor_links("protein",
... daterange = EUtils.DateRange("2002/01/01", "2002/12/31", "pdat"))
>>> dbids = neighbors.linksetdbs["protein_protein"].dbids
>>> len(dbids)
28
>>> print dbids
DBIds(u'protein', [u'4579714', u'25298947', u'24158913', u'24158914', u'24158915', u'17942993', u'17942994', u'17942995', u'20150921', u'20150922', u'20151159', u'25298949', u'19716034', u'20663737', u'20663738', u'20663741', u'24987328', u'25533128', u'25298946', u'25298948', u'23008597', u'20219020', u'21218340', u'21218344', u'19075395', u'21218338', u'21218342', u'21311795'])
>>>
>>> print client.from_dbids(dbids[:5]).efetch(retmode="text",
... rettype="summary").read()
1: BAA75200
Bacteriorhodopsin [Halobacterium sp.]
gi|4579714|dbj|BAA75200.1|[4579714]
2: H84300
bacteriorhodopsin [imported] - Halobacterium sp. NRC-1
gi|25298947|pir||H84300[25298947]
3: 1M0KA
Chain A, Bacteriorhodopsin K Intermediate At 1.43 A Resolution
gi|24158913|pdb|1M0K|A[24158913]
4: 1M0LA
Chain A, BacteriorhodopsinLIPID COMPLEX AT 1.47 A RESOLUTION
gi|24158914|pdb|1M0L|A[24158914]
5: 1M0MA
Chain A, Bacteriorhodopsin M1 Intermediate At 1.43 A Resolution
gi|24158915|pdb|1M0M|A[24158915]
>>>
"""
import types
import parse, Mixins, Config, ThinClient, Datatypes
class DBIdsLookup(object):
"""Look up information about a DBIds
To get the list of dbids, as interpreted by fetching the
server's "uilist", use the "dbids" attribute.
"""
def __init__(self, eutils, records_dbids):
self.eutils = eutils
self.records_dbids = records_dbids
def esummary(self, retmode = 'xml', rettype = None):
"""call esummary on this DBIds; returns the socket handle"""
return self.eutils.esummary_using_dbids(
dbids = self.records_dbids)
def summary(self):
"""get the summary for these DBIds, parsed into a Datatypes.Summary"""
return parse.parse_summary_xml(self.esummary("xml"))
def elink(self,
db = "pubmed",
cmd = "neighbor",
term = None,
field = None,
daterange = None):
"""call elink on this DBIds; returns the socket handle"""
return self.eutils.elink_using_dbids(
dbids = self.dbids,
db = db,
cmd = cmd,
daterange = daterange,
term = term,
field = field,
)
def _get_dbids(self):
infile = self.efetch(retmode = "text", rettype = "uilist")
ids = parse.parse_fetch_identifiers(infile)
return Datatypes.DBIds(self.records_dbids.db, ids)
dbids = property(_get_dbids, None, None,
"The DBIds for this results set, validated from the server's 'uilist'")
class DBIdsRecord(DBIdsLookup):
"""A single record on the server"""
def summary(self):
return DBIdsLookup.summary(self)[0]
class SequenceDBIdsFetchMixin:
"""Support 'efetch' for sequence records"""
def efetch(self, retmode = 'xml', rettype = None,
seq_start = None, seq_stop = None, strand = None,
complexity = None):
if strand not in (None, 1, 2):
raise TypeError("Strand can only be 1 (plus, default) or 2 (minus)")
return self.eutils.efetch_using_dbids(
dbids = self.records_dbids,
retmode = retmode,
rettype = rettype,
seq_start = seq_start,
seq_stop = seq_stop,
strand = strand,
complexity = complexity)
class SequenceDBIdsRecord(Mixins.SequenceFetchMixin,
SequenceDBIdsFetchMixin,
DBIdsRecord):
"""a single sequence record, referenced by database identifier"""
pass
class PublicationDBIdsFetchMixin:
"""Support 'efetch' for publication records"""
def efetch(self, retmode = "xml", rettype = None):
return self.eutils.efetch_using_dbids(
dbids = self.records_dbids,
retmode = retmode,
rettype = rettype)
class PublicationDBIdsRecord(Mixins.PublicationFetchMixin,
PublicationDBIdsFetchMixin,
DBIdsRecord):
"""a single publication record, referenced by database identifier"""
pass
class BaseDBIdsRecordSet(DBIdsLookup):
"""Base class for dealing with a set of records, reference by identifier"""
def __init__(self, eutils, records_dbids, metadata = None):
DBIdsLookup.__init__(self, eutils, records_dbids)
self.metadata = metadata
def __len__(self):
"""Number of records referenced by this RecordSet"""
return len(self.records_dbids)
def __getitem__(self, i):
"""Return subset of the records"""
if isinstance(i, types.SliceType):
# Metadata is not passed downwards
if i.step is None:
return self.__class__(
self.eutils,
self.records_dbids[i.start:i.stop])
return self.__class__(
self.eutils,
self.records_dbids[i.start:i.stop:i.step])
return self._record_class(self.eutils, self.records_dbids.item(i))
class SequenceDBIdsRecordSet(Mixins.SequenceFetchMixin,
SequenceDBIdsFetchMixin,
BaseDBIdsRecordSet):
"""a set of sequence records, referenced by database identifier"""
_record_class = SequenceDBIdsRecord
class PublicationDBIdsRecordSet(Mixins.PublicationFetchMixin,
PublicationDBIdsFetchMixin,
BaseDBIdsRecordSet):
"""a set of publication records, referenced by database identifier"""
_record_class = PublicationDBIdsRecord
def _get_recordset_constructor(db, dbtype):
"""get the right DataSet constructor for a database"""
dbtype = Config.databases.gettype(db, dbtype)
if dbtype == Config.SEQUENCE_TYPE:
return SequenceDBIdsRecordSet
elif dbtype == Config.PUBLICATION_TYPE:
return PublicationDBIdsRecordSet
else:
raise TypeError("Unknown database type: %r" % (dbtype,))
def from_dbids(dbids, dbtype = None, eutils = None):
"""create a RecordSet interface for the set of database identifiers
Parameters are:
dbids -- a DBIds
dbtype -- the dbtype to use (EUtils.Config.{SEQUENCE,PUBLIATION}_TYPE)
in case dbids.db isn't in the list of know NCBI databases.
Defaults to None.
eutils -- the ThinClient to use, defaults to creating a new
ThinClient.ThinClient()
"""
return DBIdsClient(eutils).from_dbids(dbids, dbtype)
class DBIdsClient:
"""Create a RecordSet either from a search or a set of dbids
The constructor takes an optional ThinClient to use for
connecting to NCBI.
"""
def __init__(self, eutils = None):
if eutils is None:
eutils = ThinClient.ThinClient()
self.eutils = eutils
def from_dbids(self, dbids, dbtype = None):
"""Return a RecordSet given the DBIds
This RecordSet can be used to fetch data from NCBI
related to the given DBIds.
"""
set_klass = _get_recordset_constructor(dbids.db, dbtype)
return set_klass(self.eutils, dbids, None)
def search(self,
term,
db = "pubmed",
field = None,
retstart = 0,
retmax = 20,
daterange = None,
dbtype = None,
):
"""do an Entrez search
The parameters are:
'term' -- the query string in the Entrez query language; see
http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
'db' -- the database to search
'field' -- the field to use for unqualified words
Eg, "dalke[au] AND gene" with field==None becomes
dalke[au] AND (genes[MeSH Terms] OR gene[Text Word]
and "dalke[au] AND gene" with field=="au" becomes
dalke[au] AND genes[Author]
(Yes, I think the first "au" should be "Author" too)
'retstart' -- include identifiers in the output, starting with
position 'retstart' (normally starts with 0)
'retmax' -- return at most 'retmax' identifiers in the output
(if not specified, NCBI returns 20 identifiers)
'daterange' -- a date restriction; either WithinNDays or DateRange
'dbtype' -- (optional) the database type (Config.PUBLICATION_TYPE
or SEQUENCE_TYPE). Overrides the type based on the 'db'
"""
set_klass = _get_recordset_constructor(db, dbtype)
infile = self.eutils.esearch(
term = term,
db = db,
field = field,
retstart = retstart,
retmax = retmax,
daterange = daterange)
searchinfo = parse.parse_search(infile, [None])
dbids = Datatypes.DBIds(db, searchinfo.ids)
return set_klass(self.eutils, dbids, searchinfo)
|