File: ACT_example.py

package info (click to toggle)
python-biopython 1.78%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 65,756 kB
  • sloc: python: 221,141; xml: 178,777; ansic: 13,369; sql: 1,208; makefile: 131; sh: 70
file content (130 lines) | stat: -rw-r--r-- 4,407 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
#

"""Example of using GenomeDiagram cross-links to mimic ACT."""


import sys
import os
from reportlab.lib import colors
from reportlab.lib.units import cm

from Bio.Graphics.GenomeDiagram import Diagram, CrossLink
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import SeqIO

# Modify this line to point at the Artemis/ACT example data which is online at:
# https://github.com/sanger-pathogens/Artemis/tree/master/etc
input_folder = "/Applications/Artemis/Artemis.app/Contents/artemis/etc"

name = "af063097_v_b132222"
file_a = "af063097.embl"
file_b = "b132222.embl"
format_a = "embl"
format_b = "embl"
file_a_vs_b = "af063097_v_b132222.crunch"

for f in [file_a, file_b, file_a_vs_b]:
    if not os.path.isfile(os.path.join(input_folder, f)):
        print("Missing input file %s.fna" % f)
        sys.exit(1)

# Only doing a_vs_b here, could also have b_vs_c and c_vs_d etc
genomes = [
    (os.path.join(input_folder, file_a), format_a),
    (os.path.join(input_folder, file_b), format_b),
]
comparisons = [os.path.join(input_folder, file_a_vs_b)]

# Create diagram with tracks, each with a feature set
assert len(genomes) >= 2 and len(genomes) == len(comparisons) + 1
gd_diagram = Diagram(name, track_size=0.35, circular=False)
tracks = {}
feature_sets = {}
records = {}
for f, format in genomes:
    records[f] = SeqIO.read(f, format)
    tracks[f] = gd_diagram.new_track(
        1,
        name=f,
        start=0,
        end=len(records[f]),
        scale_smalltick_interval=1000,
        scale_largetick_interval=10000,
        greytrack=True,
        greytrack_labels=0,
    )
    feature_sets[f] = tracks[f].new_set()

print("Drawing matches...")
for i, crunch_file in enumerate(comparisons):
    q = genomes[i + 1][0]  # query file
    s = genomes[i][0]  # subject file
    q_set = feature_sets[q]
    s_set = feature_sets[s]
    with open(crunch_file) as handle:
        for line in handle:
            if line[0] == "#":
                continue
            parts = line.rstrip("\n").split(None, 7)
            # 0 = score
            # 1 = id
            # 2 = S1
            # 3 = E1
            # 4 = seq1
            # 5 = S2
            # 6 = E2
            # 7 = seq2
            try:
                q_start, q_end = int(parts[2]), int(parts[3])
                s_start, s_end = int(parts[5]), int(parts[6])
            except IndexError:
                sys.stderr.write(repr(line) + "\n")
                sys.stderr.write(repr(parts) + "\n")
                raise
            flip = False
            if q_start > q_end:
                flip = not flip
                q_start, q_end = q_end, q_start
            if s_start > s_end:
                flip = not flip
                s_start, s_end = s_end, s_start
            if flip:
                c = colors.Color(0, 0, 1, alpha=0.25)
                b = False
            else:
                c = colors.Color(1, 0, 0, alpha=0.25)
                b = False
            q_feature = q_set.add_feature(
                SeqFeature(FeatureLocation(q_start - 1, q_end)), color=c, border=b
            )
            s_feature = s_set.add_feature(
                SeqFeature(FeatureLocation(s_start - 1, s_end)), color=c, border=b
            )
            gd_diagram.cross_track_links.append(CrossLink(q_feature, s_feature, c, b))
            # NOTE: We are using the same colour for all the matches,
            # with transparency. This means overlayed matches will appear darker.
            # It also means the drawing order not very important.
            # Note ACT puts long hits at the back, and colours by hit score

print("Drawing CDS features...")
for f, format in genomes:
    record = records[f]
    feature_set = feature_sets[f]
    # Mark the CDS features
    for cds in record.features:
        if cds.type != "CDS":
            continue
        feature_set.add_feature(
            cds, sigil="ARROW", color=colors.lightblue, border=colors.blue
        )

gd_diagram.draw(
    format="linear", fragments=3, orientation="landscape", pagesize=(20 * cm, 10 * cm)
)
gd_diagram.write(name + ".pdf", "PDF")

gd_diagram.draw(format="circular", orientation="landscape", pagesize=(20 * cm, 20 * cm))
gd_diagram.write(name + "_c.pdf", "PDF")