File: www_blast.py

package info (click to toggle)

python-biopython 1.68%2Bdfsg-3~bpo8%2B1

links: PTS, VCS
area: main
in suites: jessie-backports
size: 46,856 kB
sloc: python: 160,306; xml: 93,216; ansic: 9,118; sql: 1,208; makefile: 155; sh: 63

file content (50 lines) | stat: -rw-r--r-- 1,655 bytes

parent folder | download | duplicates (2)

#!/usr/bin/env python
"""Example showing how to deal with internet BLAST from Biopython.

This code is described in great detail in the BLAST section of the Biopython
documentation.
"""
# standard library
from __future__ import print_function

try:
    from StringIO import StringIO  # Python 2
except ImportError:
    from io import StringIO  # Python 3

# biopython
from Bio import SeqIO
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML

# first get the sequence we want to parse from a FASTA file
f_record = next(SeqIO.parse('m_cold.fasta', 'fasta'))

print('Doing the BLAST and retrieving the results...')
result_handle = NCBIWWW.qblast('blastn', 'nr', f_record.format('fasta'))

# save the results for later, in case we want to look at it
with open('m_cold_blast.out', 'w') as save_file:
    blast_results = result_handle.read()
    save_file.write(blast_results)

print('Parsing the results and extracting info...')

# option 1 -- open the saved file to parse it
# option 2 -- create a handle from the string and parse it
string_result_handle = StringIO(blast_results)
b_record = NCBIXML.read(string_result_handle)

# now get the alignment info for all e values greater than some threshold
E_VALUE_THRESH = 0.1

for alignment in b_record.alignments:
    for hsp in alignment.hsps:
        if hsp.expect < E_VALUE_THRESH:
            print('****Alignment****')
            print('sequence: %s' % alignment.title)
            print('length: %i' % alignment.length)
            print('e value: %f' % hsp.expect)
            print(hsp.query[0:75] + '...')
            print(hsp.match[0:75] + '...')
            print(hsp.sbjct[0:75] + '...')