File: clustal_run.py

package info (click to toggle)
python-biopython 1.78%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 65,756 kB
  • sloc: python: 221,141; xml: 178,777; ansic: 13,369; sql: 1,208; makefile: 131; sh: 70
file content (61 lines) | stat: -rw-r--r-- 1,823 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
# Copyright 2000 Brad Chapman.  All rights reserved.
#
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Run clustalw and parse the output.

Example code to show how to create a clustalw command line, run clustalw
and parse the results into an object that can be dealt with easily.
"""
# standard library


import sys
import subprocess

# biopython
from Bio.Align.Applications import ClustalwCommandline
from Bio import AlignIO
from Bio.Align import AlignInfo

# create the command line to run clustalw
# this assumes you've got clustalw somewhere on your path, otherwise
# you need to pass the full path of the executable to this via cmd="..."
cline = ClustalwCommandline(infile="opuntia.fasta", outfile="test.aln")

# actually perform the alignment
return_code = subprocess.call(str(cline), shell=(sys.platform != "win32"))
assert return_code == 0, "Calling ClustalW failed"

# Parse the output
alignment = AlignIO.read("test.aln", "clustal")

print(alignment)

print("first description: %s" % alignment[0].description)
print("first sequence: %s" % alignment[0].seq)

# get the length of the alignment
print("length %i" % alignment.get_alignment_length())

print(alignment)

# print out interesting information about the alignment
summary_align = AlignInfo.SummaryInfo(alignment)

consensus = summary_align.dumb_consensus()
print("consensus %s" % consensus)

my_pssm = summary_align.pos_specific_score_matrix(consensus, chars_to_ignore=["N"])
print(my_pssm)

expect_freq = {"A": 0.3, "G": 0.2, "T": 0.3, "C": 0.2}

info_content = summary_align.information_content(
    5, 30, chars_to_ignore=["N"], e_freq_table=expect_freq
)

print("relative info content: %f" % info_content)