1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
|
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
from __future__ import print_function
import sys
import os
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics.GenomeDiagram import Diagram, CrossLink
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import SeqIO
# Modify this line to point at the Artemis/ACT example data which is online at:
# https://github.com/sanger-pathogens/Artemis/tree/master/etc
input_folder = "/Applications/Artemis/Artemis.app/Contents/artemis/etc"
name = "af063097_v_b132222"
file_a = "af063097.embl"
file_b = "b132222.embl"
format_a = "embl"
format_b = "embl"
file_a_vs_b = "af063097_v_b132222.crunch"
for f in [file_a, file_b, file_a_vs_b]:
if not os.path.isfile(os.path.join(input_folder, f)):
print("Missing input file %s.fna" % f)
sys.exit(1)
# Only doing a_vs_b here, could also have b_vs_c and c_vs_d etc
genomes = [
(os.path.join(input_folder, file_a), format_a),
(os.path.join(input_folder, file_b), format_b),
]
comparisons = [os.path.join(input_folder, file_a_vs_b)]
# Create diagram with tracks, each with a feature set
assert len(genomes) >= 2 and len(genomes) == len(comparisons) + 1
gd_diagram = Diagram(name, track_size=0.35, circular=False)
tracks = dict()
feature_sets = dict()
records = dict()
for f, format in genomes:
records[f] = SeqIO.read(f, format)
tracks[f] = gd_diagram.new_track(1, name=f, start=0, end=len(records[f]),
scale_smalltick_interval=1000,
scale_largetick_interval=10000,
greytrack=True, greytrack_labels=0)
feature_sets[f] = tracks[f].new_set()
print("Drawing matches...")
for i, crunch_file in enumerate(comparisons):
q = genomes[i + 1][0] # query file
s = genomes[i][0] # subject file
q_set = feature_sets[q]
s_set = feature_sets[s]
with open(crunch_file) as handle:
for line in handle:
if line[0] == "#":
continue
parts = line.rstrip("\n").split(None, 7)
# 0 = score
# 1 = id
# 2 = S1
# 3 = E1
# 4 = seq1
# 5 = S2
# 6 = E2
# 7 = seq2
try:
q_start, q_end = int(parts[2]), int(parts[3])
s_start, s_end = int(parts[5]), int(parts[6])
except IndexError:
sys.stderr.write(repr(line) + "\n")
sys.stderr.write(repr(parts) + "\n")
raise
flip = False
if q_start > q_end:
flip = not flip
q_start, q_end = q_end, q_start
if s_start > s_end:
flip = not flip
s_start, s_end = s_end, s_start
if flip:
c = colors.Color(0, 0, 1, alpha=0.25)
b = False
else:
c = colors.Color(1, 0, 0, alpha=0.25)
b = False
q_feature = q_set.add_feature(SeqFeature(FeatureLocation(q_start - 1, q_end)),
color=c, border=b)
s_feature = s_set.add_feature(SeqFeature(FeatureLocation(s_start - 1, s_end)),
color=c, border=b)
gd_diagram.cross_track_links.append(CrossLink(q_feature, s_feature, c, b))
# NOTE: We are using the same colour for all the matches,
# with transparency. This means overlayed matches will appear darker.
# It also means the drawing order not very important.
# Note ACT puts long hits at the back, and colours by hit score
print("Drawing CDS features...")
for f, format in genomes:
record = records[f]
feature_set = feature_sets[f]
# Mark the CDS features
for cds in record.features:
if cds.type != "CDS":
continue
feature_set.add_feature(cds, sigil="ARROW",
color=colors.lightblue,
border=colors.blue)
gd_diagram.draw(format="linear", fragments=3,
orientation="landscape", pagesize=(20 * cm, 10 * cm))
gd_diagram.write(name + ".pdf", "PDF")
gd_diagram.draw(format="circular",
orientation="landscape", pagesize=(20 * cm, 20 * cm))
gd_diagram.write(name + "_c.pdf", "PDF")
|