1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
|
# Copyright 2012 by Wibowo Arindrarto. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Bio.SearchIO object to model search results from a single query."""
from __future__ import print_function
from Bio._py3k import basestring
from copy import deepcopy
from itertools import chain
from Bio._py3k import OrderedDict
from Bio._py3k import filter
from Bio._utils import trim_str
from Bio.SearchIO._utils import optionalcascade
from ._base import _BaseSearchObject
from .hit import Hit
class QueryResult(_BaseSearchObject):
"""Class representing search results from a single query.
QueryResult is the container object that stores all search hits from a
single search query. It is the top-level object returned by SearchIO's two
main functions, ``read`` and ``parse``. Depending on the search results and
search output format, a QueryResult object will contain zero or more Hit
objects (see Hit).
You can take a quick look at a QueryResult's contents and attributes by
invoking ``print`` on it::
>>> from Bio import SearchIO
>>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
>>> print(qresult)
Program: blastn (2.2.27+)
Query: 33211 (61)
mir_1
Target: refseq_rna
Hits: ---- ----- ----------------------------------------------------------
# # HSP ID + description
---- ----- ----------------------------------------------------------
0 1 gi|262205317|ref|NR_030195.1| Homo sapiens microRNA 52...
1 1 gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA...
2 1 gi|270133242|ref|NR_032573.1| Macaca mulatta microRNA ...
3 2 gi|301171322|ref|NR_035857.1| Pan troglodytes microRNA...
4 1 gi|301171267|ref|NR_035851.1| Pan troglodytes microRNA...
5 2 gi|262205330|ref|NR_030198.1| Homo sapiens microRNA 52...
6 1 gi|262205302|ref|NR_030191.1| Homo sapiens microRNA 51...
7 1 gi|301171259|ref|NR_035850.1| Pan troglodytes microRNA...
8 1 gi|262205451|ref|NR_030222.1| Homo sapiens microRNA 51...
9 2 gi|301171447|ref|NR_035871.1| Pan troglodytes microRNA...
10 1 gi|301171276|ref|NR_035852.1| Pan troglodytes microRNA...
11 1 gi|262205290|ref|NR_030188.1| Homo sapiens microRNA 51...
...
If you just want to know how many hits a QueryResult has, you can invoke
``len`` on it. Alternatively, you can simply type its name in the interpreter::
>>> len(qresult)
100
>>> qresult
QueryResult(id='33211', 100 hits)
QueryResult behaves like a hybrid of Python's built-in list and dictionary.
You can retrieve its items (Hit objects) using the integer index of the
item, just like regular Python lists::
>>> first_hit = qresult[0]
>>> first_hit
Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps)
You can slice QueryResult objects as well. Slicing will return a new
QueryResult object containing only the sliced hits::
>>> sliced_qresult = qresult[:3] # slice the first three hits
>>> len(qresult)
100
>>> len(sliced_qresult)
3
>>> print(sliced_qresult)
Program: blastn (2.2.27+)
Query: 33211 (61)
mir_1
Target: refseq_rna
Hits: ---- ----- ----------------------------------------------------------
# # HSP ID + description
---- ----- ----------------------------------------------------------
0 1 gi|262205317|ref|NR_030195.1| Homo sapiens microRNA 52...
1 1 gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA...
2 1 gi|270133242|ref|NR_032573.1| Macaca mulatta microRNA ...
Like Python dictionaries, you can also retrieve hits using the hit's ID.
This is useful for retrieving hits that you know should exist in a given
search::
>>> hit = qresult['gi|262205317|ref|NR_030195.1|']
>>> hit
Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps)
You can also replace a Hit in QueryResult with another Hit using either the
integer index or hit key string. Note that the replacing object must be a
Hit that has the same ``query_id`` property as the QueryResult object.
If you're not sure whether a QueryResult contains a particular hit, you can
use the hit ID to check for membership first::
>>> 'gi|262205317|ref|NR_030195.1|' in qresult
True
>>> 'gi|262380031|ref|NR_023426.1|' in qresult
False
Or, if you just want to know the rank / position of a given hit, you can
use the hit ID as an argument for the ``index`` method. Note that the values
returned will be zero-based. So zero (0) means the hit is the first in the
QueryResult, three (3) means the hit is the fourth item, and so on. If the
hit does not exist in the QueryResult, a ``ValueError`` will be raised.
>>> qresult.index('gi|262205317|ref|NR_030195.1|')
0
>>> qresult.index('gi|262205330|ref|NR_030198.1|')
5
>>> qresult.index('gi|262380031|ref|NR_023426.1|')
Traceback (most recent call last):
...
ValueError: ...
To ease working with a large number of hits, QueryResult has several
``filter`` and ``map`` methods, analogous to Python's built-in functions with
the same names. There are ``filter`` and ``map`` methods available for
operations over both Hit objects or HSP objects. As an example, here we are
using the ``hit_map`` method to rename all hit IDs within a QueryResult::
>>> def renamer(hit):
... hit.id = hit.id.split('|')[3]
... return hit
>>> mapped_qresult = qresult.hit_map(renamer)
>>> print(mapped_qresult)
Program: blastn (2.2.27+)
Query: 33211 (61)
mir_1
Target: refseq_rna
Hits: ---- ----- ----------------------------------------------------------
# # HSP ID + description
---- ----- ----------------------------------------------------------
0 1 NR_030195.1 Homo sapiens microRNA 520b (MIR520B), micr...
1 1 NR_035856.1 Pan troglodytes microRNA mir-520b (MIR520B...
2 1 NR_032573.1 Macaca mulatta microRNA mir-519a (MIR519A)...
...
The principle for other ``map`` and ``filter`` methods are similar: they accept
a function, applies it, and returns a new QueryResult object.
There are also other methods useful for working with list-like objects:
``append``, ``pop``, and ``sort``. More details and examples are available in
their respective documentations.
Finally, just like Python lists and dictionaries, QueryResult objects are
iterable. Iteration over QueryResults will yield Hit objects::
>>> for hit in qresult[:4]: # iterate over the first four items
... hit
...
Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps)
Hit(id='gi|301171311|ref|NR_035856.1|', query_id='33211', 1 hsps)
Hit(id='gi|270133242|ref|NR_032573.1|', query_id='33211', 1 hsps)
Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
If you need access to all the hits in a QueryResult object, you can get
them in a list using the ``hits`` property. Similarly, access to all hit IDs is
available through the ``hit_keys`` property.
>>> qresult.hits
[Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps), ...]
>>> qresult.hit_keys
['gi|262205317|ref|NR_030195.1|', 'gi|301171311|ref|NR_035856.1|', ...]
"""
# attributes we don't want to transfer when creating a new QueryResult class
# from this one
_NON_STICKY_ATTRS = ('_items', '__alt_hit_ids', )
def __init__(self, hits=(), id=None,
hit_key_function=lambda hit: hit.id):
"""Initializes a QueryResult object.
:param id: query sequence ID
:type id: string
:param hits: iterator yielding Hit objects
:type hits: iterable
:param hit_key_function: function to define hit keys
:type hit_key_function: callable, accepts Hit objects, returns string
"""
# default values
self._id = id
self._hit_key_function = hit_key_function
self._items = OrderedDict()
self._description = None
self.__alt_hit_ids = {}
self.program = '<unknown program>'
self.target = '<unknown target>'
self.version = '<unknown version>'
# validate Hit objects and fill up self._items
for hit in hits:
# validation is handled by __setitem__
self.append(hit)
# handle Python 2 OrderedDict behavior
if hasattr(OrderedDict, 'iteritems'):
def __iter__(self):
return self.iterhits()
@property
def hits(self):
"""Hit objects contained in the QueryResult."""
return self._items.values()
@property
def hit_keys(self):
"""Hit IDs of the Hit objects contained in the QueryResult."""
return self._items.keys()
@property
def items(self):
"""List of tuples of Hit IDs and Hit objects."""
return self._items.items()
def iterhits(self):
"""Returns an iterator over the Hit objects."""
for hit in self._items.itervalues():
yield hit
def iterhit_keys(self):
"""Returns an iterator over the ID of the Hit objects."""
for hit_id in self._items:
yield hit_id
def iteritems(self):
"""Returns an iterator yielding tuples of Hit ID and Hit objects."""
for item in self._items.iteritems():
yield item
else:
def __iter__(self):
return iter(self.hits)
@property
def hits(self):
"""Hit objects contained in the QueryResult."""
return list(self._items.values())
@property
def hit_keys(self):
"""Hit IDs of the Hit objects contained in the QueryResult."""
return list(self._items.keys())
@property
def items(self):
"""List of tuples of Hit IDs and Hit objects."""
return list(self._items.items())
def iterhits(self):
"""Returns an iterator over the Hit objects."""
for hit in self._items.values():
yield hit
def iterhit_keys(self):
"""Returns an iterator over the ID of the Hit objects."""
for hit_id in self._items:
yield hit_id
def iteritems(self):
"""Returns an iterator yielding tuples of Hit ID and Hit objects."""
for item in self._items.items():
yield item
def __contains__(self, hit_key):
if isinstance(hit_key, Hit):
return self._hit_key_function(hit_key) in self._items
return hit_key in self._items or hit_key in self.__alt_hit_ids
def __len__(self):
return len(self._items)
# Python 3:
def __bool__(self):
return bool(self._items)
# Python 2:
__nonzero__ = __bool__
def __repr__(self):
return "QueryResult(id=%r, %r hits)" % (self.id, len(self))
def __str__(self):
lines = []
# set program and version line
lines.append('Program: %s (%s)' % (self.program, self.version))
# set query id line
qid_line = ' Query: %s' % self.id
if hasattr(self, 'seq_len'):
qid_line += ' (%i)' % self.seq_len
if self.description:
qid_line += trim_str('\n %s' % self.description, 80, '...')
lines.append(qid_line)
# set target line
lines.append(' Target: %s' % self.target)
# set hit lines
if not self.hits:
lines.append(' Hits: 0')
else:
lines.append(' Hits: %s %s %s' % ('-' * 4, '-' * 5, '-' * 58))
pattern = '%13s %5s %s'
lines.append(pattern % ('#', '# HSP', 'ID + description'))
lines.append(pattern % ('-' * 4, '-' * 5, '-' * 58))
for idx, hit in enumerate(self.hits):
if idx < 30:
hid_line = '%s %s' % (hit.id, hit.description)
if len(hid_line) > 58:
hid_line = hid_line[:55] + '...'
lines.append(pattern % (idx, str(len(hit)), hid_line))
elif idx > len(self.hits) - 4:
hid_line = '%s %s' % (hit.id, hit.description)
if len(hid_line) > 58:
hid_line = hid_line[:55] + '...'
lines.append(pattern % (idx, str(len(hit)), hid_line))
elif idx == 30:
lines.append('%14s' % '~~~')
return '\n'.join(lines)
def __getitem__(self, hit_key):
# retrieval using slice objects returns another QueryResult object
if isinstance(hit_key, slice):
# should we return just a list of Hits instead of a full blown
# QueryResult object if it's a slice?
hits = list(self.hits)[hit_key]
obj = self.__class__(hits, self.id, self._hit_key_function)
self._transfer_attrs(obj)
return obj
# if key is an int, then retrieve the Hit at the int index
elif isinstance(hit_key, int):
length = len(self)
if 0 <= hit_key < length:
for idx, item in enumerate(self.iterhits()):
if idx == hit_key:
return item
elif -1 * length <= hit_key < 0:
for idx, item in enumerate(self.iterhits()):
if length + hit_key == idx:
return item
raise IndexError("list index out of range")
# if key is a string, then do a regular dictionary retrieval
# falling back on alternative hit IDs
try:
return self._items[hit_key]
except KeyError:
return self._items[self.__alt_hit_ids[hit_key]]
def __setitem__(self, hit_key, hit):
# only accept string keys
if not isinstance(hit_key, basestring):
raise TypeError("QueryResult object keys must be a string.")
# hit must be a Hit object
if not isinstance(hit, Hit):
raise TypeError("QueryResult objects can only contain Hit objects.")
qid = self.id
hqid = hit.query_id
# and it must have the same query ID as this object's ID
# unless it's the query ID is None (default for empty objects), in which
# case we want to use the hit's query ID as the query ID
if qid is not None:
if hqid != qid:
raise ValueError("Expected Hit with query ID %r, found %r "
"instead." % (qid, hqid))
else:
self.id = hqid
# same thing with descriptions
qdesc = self.description
hqdesc = hit.query_description
if qdesc is not None:
if hqdesc != qdesc:
raise ValueError("Expected Hit with query description %r, "
"found %r instead." % (qdesc, hqdesc))
else:
self.description = hqdesc
# remove existing alt_id references, if hit_key already exists
if hit_key in self._items:
for alt_key in self._items[hit_key].id_all[1:]:
del self.__alt_hit_ids[alt_key]
# if hit_key is already present as an alternative ID
# delete it from the alternative ID dict
if hit_key in self.__alt_hit_ids:
del self.__alt_hit_ids[hit_key]
self._items[hit_key] = hit
for alt_id in hit.id_all[1:]:
self.__alt_hit_ids[alt_id] = hit_key
def __delitem__(self, hit_key):
# if hit_key an integer or slice, get the corresponding key first
# and put it into a list
if isinstance(hit_key, int):
hit_keys = [list(self.hit_keys)[hit_key]]
# the same, if it's a slice
elif isinstance(hit_key, slice):
hit_keys = list(self.hit_keys)[hit_key]
# otherwise put it in a list
else:
hit_keys = [hit_key]
for key in hit_keys:
deleted = False
if key in self._items:
del self._items[key]
deleted = True
if key in self.__alt_hit_ids:
del self._items[self.__alt_hit_ids[key]]
del self.__alt_hit_ids[key]
deleted = True
if not deleted:
raise KeyError('%r'.format(key))
return
# properties #
id = optionalcascade('_id', 'query_id', """QueryResult ID string""")
description = optionalcascade('_description', 'query_description',
"""QueryResult description""")
@property
def hsps(self):
"""HSP objects contained in the QueryResult."""
return [hsp for hsp in chain(*self.hits)]
@property
def fragments(self):
"""HSPFragment objects contained in the QueryResult."""
return [frag for frag in chain(*self.hsps)]
# public methods #
def absorb(self, hit):
"""Adds a Hit object to the end of QueryResult. If the QueryResult
already has a Hit with the same ID, append the new Hit's HSPs into
the existing Hit.
:param hit: object to absorb
:type hit: Hit
This method is used for file formats that may output the same Hit in
separate places, such as BLAT or Exonerate. In both formats, Hit
with different strands are put in different places. However, SearchIO
considers them to be the same as a Hit object should be all database
entries with the same ID, regardless of strand orientation.
"""
try:
self.append(hit)
except ValueError:
assert hit.id in self
for hsp in hit:
self[hit.id].append(hsp)
def append(self, hit):
"""Adds a Hit object to the end of QueryResult.
:param hit: object to append
:type hit: Hit
Any Hit object appended must have the same ``query_id`` property as the
QueryResult's ``id`` property. If the hit key already exists, a
``ValueError`` will be raised.
"""
# if a custom hit_key_function is supplied, use it to define th hit key
if self._hit_key_function is not None:
hit_key = self._hit_key_function(hit)
else:
hit_key = hit.id
if hit_key not in self and all(pid not in self for pid in hit.id_all[1:]):
self[hit_key] = hit
else:
raise ValueError("The ID or alternative IDs of Hit %r exists in "
"this QueryResult." % hit_key)
def hit_filter(self, func=None):
"""Creates a new QueryResult object whose Hit objects pass the filter
function.
:param func: filter function
:type func: callable, accepts Hit, returns bool
Here is an example of using ``hit_filter`` to select Hits whose
description begins with the string 'Homo sapiens', case sensitive::
>>> from Bio import SearchIO
>>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
>>> def desc_filter(hit):
... return hit.description.startswith('Homo sapiens')
...
>>> len(qresult)
100
>>> filtered = qresult.hit_filter(desc_filter)
>>> len(filtered)
39
>>> print(filtered[:4])
Program: blastn (2.2.27+)
Query: 33211 (61)
mir_1
Target: refseq_rna
Hits: ---- ----- ----------------------------------------------------------
# # HSP ID + description
---- ----- ----------------------------------------------------------
0 1 gi|262205317|ref|NR_030195.1| Homo sapiens microRNA 52...
1 2 gi|262205330|ref|NR_030198.1| Homo sapiens microRNA 52...
2 1 gi|262205302|ref|NR_030191.1| Homo sapiens microRNA 51...
3 1 gi|262205451|ref|NR_030222.1| Homo sapiens microRNA 51...
Note that instance attributes (other than the hits) from the unfiltered
QueryResult are retained in the filtered object.
>>> qresult.program == filtered.program
True
>>> qresult.target == filtered.target
True
"""
hits = list(filter(func, self.hits))
obj = self.__class__(hits, self.id, self._hit_key_function)
self._transfer_attrs(obj)
return obj
def hit_map(self, func=None):
"""Creates a new QueryResult object, mapping the given function to its
Hits.
:param func: map function
:type func: callable, accepts Hit, returns Hit
Here is an example of using ``hit_map`` with a function that discards all
HSPs in a Hit except for the first one::
>>> from Bio import SearchIO
>>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
>>> print(qresult[:8])
Program: blastn (2.2.27+)
Query: 33211 (61)
mir_1
Target: refseq_rna
Hits: ---- ----- ----------------------------------------------------------
# # HSP ID + description
---- ----- ----------------------------------------------------------
0 1 gi|262205317|ref|NR_030195.1| Homo sapiens microRNA 52...
1 1 gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA...
2 1 gi|270133242|ref|NR_032573.1| Macaca mulatta microRNA ...
3 2 gi|301171322|ref|NR_035857.1| Pan troglodytes microRNA...
4 1 gi|301171267|ref|NR_035851.1| Pan troglodytes microRNA...
5 2 gi|262205330|ref|NR_030198.1| Homo sapiens microRNA 52...
6 1 gi|262205302|ref|NR_030191.1| Homo sapiens microRNA 51...
7 1 gi|301171259|ref|NR_035850.1| Pan troglodytes microRNA...
>>> top_hsp = lambda hit: hit[:1]
>>> mapped_qresult = qresult.hit_map(top_hsp)
>>> print(mapped_qresult[:8])
Program: blastn (2.2.27+)
Query: 33211 (61)
mir_1
Target: refseq_rna
Hits: ---- ----- ----------------------------------------------------------
# # HSP ID + description
---- ----- ----------------------------------------------------------
0 1 gi|262205317|ref|NR_030195.1| Homo sapiens microRNA 52...
1 1 gi|301171311|ref|NR_035856.1| Pan troglodytes microRNA...
2 1 gi|270133242|ref|NR_032573.1| Macaca mulatta microRNA ...
3 1 gi|301171322|ref|NR_035857.1| Pan troglodytes microRNA...
4 1 gi|301171267|ref|NR_035851.1| Pan troglodytes microRNA...
5 1 gi|262205330|ref|NR_030198.1| Homo sapiens microRNA 52...
6 1 gi|262205302|ref|NR_030191.1| Homo sapiens microRNA 51...
7 1 gi|301171259|ref|NR_035850.1| Pan troglodytes microRNA...
"""
hits = [deepcopy(hit) for hit in self.hits]
if func is not None:
hits = [func(x) for x in hits]
obj = self.__class__(hits, self.id, self._hit_key_function)
self._transfer_attrs(obj)
return obj
def hsp_filter(self, func=None):
"""Creates a new QueryResult object whose HSP objects pass the filter
function.
``hsp_filter`` is the same as ``hit_filter``, except that it filters
directly on each HSP object in every Hit. If the filtering removes
all HSP objects in a given Hit, the entire Hit will be discarded. This
will result in the QueryResult having less Hit after filtering.
"""
hits = [x for x in (hit.filter(func) for hit in self.hits) if x]
obj = self.__class__(hits, self.id, self._hit_key_function)
self._transfer_attrs(obj)
return obj
def hsp_map(self, func=None):
"""Creates a new QueryResult object, mapping the given function to its
HSPs.
``hsp_map`` is the same as ``hit_map``, except that it applies the given
function to all HSP objects in every Hit, instead of the Hit objects.
"""
hits = [x for x in (hit.map(func) for hit in list(self.hits)[:]) if x]
obj = self.__class__(hits, self.id, self._hit_key_function)
self._transfer_attrs(obj)
return obj
# marker for default self.pop() return value
# this method is adapted from Python's built in OrderedDict.pop
# implementation
__marker = object()
def pop(self, hit_key=-1, default=__marker):
"""Removes the specified hit key and return the Hit object.
:param hit_key: key of the Hit object to return
:type hit_key: int or string
:param default: return value if no Hit exists with the given key
:type default: object
By default, ``pop`` will remove and return the last Hit object in the
QueryResult object. To remove specific Hit objects, you can use its
integer index or hit key.
>>> from Bio import SearchIO
>>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
>>> len(qresult)
100
>>> for hit in qresult[:5]:
... print(hit.id)
...
gi|262205317|ref|NR_030195.1|
gi|301171311|ref|NR_035856.1|
gi|270133242|ref|NR_032573.1|
gi|301171322|ref|NR_035857.1|
gi|301171267|ref|NR_035851.1|
# remove the last hit
>>> qresult.pop()
Hit(id='gi|397513516|ref|XM_003827011.1|', query_id='33211', 1 hsps)
# remove the first hit
>>> qresult.pop(0)
Hit(id='gi|262205317|ref|NR_030195.1|', query_id='33211', 1 hsps)
# remove hit with the given ID
>>> qresult.pop('gi|301171322|ref|NR_035857.1|')
Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
"""
# if key is an integer (index)
# get the ID for the Hit object at that index
if isinstance(hit_key, int):
# raise the appropriate error if there is no hit
if not self:
raise IndexError("pop from empty list")
hit_key = list(self.hit_keys)[hit_key]
try:
hit = self._items.pop(hit_key)
# remove all alternative IDs of the popped hit
for alt_id in hit.id_all[1:]:
try:
del self.__alt_hit_ids[alt_id]
except KeyError:
pass
return hit
except KeyError:
if hit_key in self.__alt_hit_ids:
return self.pop(self.__alt_hit_ids[hit_key], default)
# if key doesn't exist and no default is set, raise a KeyError
if default is self.__marker:
raise KeyError(hit_key)
# if key doesn't exist but a default is set, return the default value
return default
def index(self, hit_key):
"""Returns the index of a given hit key, zero-based.
:param hit_key: hit ID
:type hit_key: string
This method is useful for finding out the integer index (usually
correlated with search rank) of a given hit key.
>>> from Bio import SearchIO
>>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
>>> qresult.index('gi|301171259|ref|NR_035850.1|')
7
"""
if isinstance(hit_key, Hit):
return list(self.hit_keys).index(hit_key.id)
try:
return list(self.hit_keys).index(hit_key)
except ValueError:
if hit_key in self.__alt_hit_ids:
return self.index(self.__alt_hit_ids[hit_key])
raise
def sort(self, key=None, reverse=False, in_place=True):
# no cmp argument to make sort more Python 3-like
"""Sorts the Hit objects.
:param key: sorting function
:type key: callable, accepts Hit, returns key for sorting
:param reverse: whether to reverse sorting results or no
:type reverse: bool
:param in_place: whether to do in-place sorting or no
:type in_place: bool
``sort`` defaults to sorting in-place, to mimick Python's ``list.sort``
method. If you set the ``in_place`` argument to False, it will treat
return a new, sorted QueryResult object and keep the initial one
unsorted.
"""
if key is None:
# if reverse is True, reverse the hits
if reverse:
sorted_hits = list(self.hits)[::-1]
# otherwise (default options) make a copy of the hits
else:
sorted_hits = list(self.hits)[:]
else:
sorted_hits = sorted(self.hits, key=key, reverse=reverse)
# if sorting is in-place, don't create a new QueryResult object
if in_place:
new_hits = OrderedDict()
for hit in sorted_hits:
new_hits[self._hit_key_function(hit)] = hit
self._items = new_hits
# otherwise, return a new sorted QueryResult object
else:
obj = self.__class__(sorted_hits, self.id, self._hit_key_function)
self._transfer_attrs(obj)
return obj
# if not used as a module, run the doctest
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
|