1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
|
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
"""Example of using GenomeDiagram cross-links to mimic ACT."""
import sys
import os
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics.GenomeDiagram import Diagram, CrossLink
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import SeqIO
# Modify this line to point at the Artemis/ACT example data which is online at:
# https://github.com/sanger-pathogens/Artemis/tree/master/etc
input_folder = "/Applications/Artemis/Artemis.app/Contents/artemis/etc"
name = "af063097_v_b132222"
file_a = "af063097.embl"
file_b = "b132222.embl"
format_a = "embl"
format_b = "embl"
file_a_vs_b = "af063097_v_b132222.crunch"
for f in [file_a, file_b, file_a_vs_b]:
if not os.path.isfile(os.path.join(input_folder, f)):
print("Missing input file %s.fna" % f)
sys.exit(1)
# Only doing a_vs_b here, could also have b_vs_c and c_vs_d etc
genomes = [
(os.path.join(input_folder, file_a), format_a),
(os.path.join(input_folder, file_b), format_b),
]
comparisons = [os.path.join(input_folder, file_a_vs_b)]
# Create diagram with tracks, each with a feature set
assert len(genomes) >= 2 and len(genomes) == len(comparisons) + 1
gd_diagram = Diagram(name, track_size=0.35, circular=False)
tracks = {}
feature_sets = {}
records = {}
for f, format in genomes:
records[f] = SeqIO.read(f, format)
tracks[f] = gd_diagram.new_track(
1,
name=f,
start=0,
end=len(records[f]),
scale_smalltick_interval=1000,
scale_largetick_interval=10000,
greytrack=True,
greytrack_labels=0,
)
feature_sets[f] = tracks[f].new_set()
print("Drawing matches...")
for i, crunch_file in enumerate(comparisons):
q = genomes[i + 1][0] # query file
s = genomes[i][0] # subject file
q_set = feature_sets[q]
s_set = feature_sets[s]
with open(crunch_file) as handle:
for line in handle:
if line[0] == "#":
continue
parts = line.rstrip("\n").split(None, 7)
# 0 = score
# 1 = id
# 2 = S1
# 3 = E1
# 4 = seq1
# 5 = S2
# 6 = E2
# 7 = seq2
try:
q_start, q_end = int(parts[2]), int(parts[3])
s_start, s_end = int(parts[5]), int(parts[6])
except IndexError:
sys.stderr.write(repr(line) + "\n")
sys.stderr.write(repr(parts) + "\n")
raise
flip = False
if q_start > q_end:
flip = not flip
q_start, q_end = q_end, q_start
if s_start > s_end:
flip = not flip
s_start, s_end = s_end, s_start
if flip:
c = colors.Color(0, 0, 1, alpha=0.25)
b = False
else:
c = colors.Color(1, 0, 0, alpha=0.25)
b = False
q_feature = q_set.add_feature(
SeqFeature(FeatureLocation(q_start - 1, q_end)), color=c, border=b
)
s_feature = s_set.add_feature(
SeqFeature(FeatureLocation(s_start - 1, s_end)), color=c, border=b
)
gd_diagram.cross_track_links.append(CrossLink(q_feature, s_feature, c, b))
# NOTE: We are using the same colour for all the matches,
# with transparency. This means overlayed matches will appear darker.
# It also means the drawing order not very important.
# Note ACT puts long hits at the back, and colours by hit score
print("Drawing CDS features...")
for f, format in genomes:
record = records[f]
feature_set = feature_sets[f]
# Mark the CDS features
for cds in record.features:
if cds.type != "CDS":
continue
feature_set.add_feature(
cds, sigil="ARROW", color=colors.lightblue, border=colors.blue
)
gd_diagram.draw(
format="linear", fragments=3, orientation="landscape", pagesize=(20 * cm, 10 * cm)
)
gd_diagram.write(name + ".pdf", "PDF")
gd_diagram.draw(format="circular", orientation="landscape", pagesize=(20 * cm, 20 * cm))
gd_diagram.write(name + "_c.pdf", "PDF")
|