1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
import os.path
from io import StringIO
from string import Template
from numpy import (
ceil,
log2,
transpose,
where,
)
PAD = 2
# Colors from rgb.txt,
DNA_DEFAULT_COLORS = {
"A": "0.00 1.00 0.00", # green
"C": "0.00 0.00 1.00", # blue
"G": "1.00 0.65 0.00", # orange red
"T": "1.00 0.00 0.00", # red
}
# Template is adapted from Jim Kent's lib/dnaMotif.pss to support aritrary
# alphabets.
TEMPLATE = "template.ps"
def freqs_to_heights(matrix):
"""
Calculate logo height using the method of:
Schneider TD, Stephens RM. "Sequence logos: a new way to display consensus
sequences." Nucleic Acids Res. 1990 Oct 25;18(20):6097-100.
"""
# Columns are sequence positions, rows are symbol counts/frequencies
f = matrix.values.transpose()
n, m = f.shape
# Ensure normalized
f = f / sum(f, axis=0)
# Shannon entropy (the where replaces 0 with 1 so that '0 log 0 == 0')
H = -sum(f * log2(where(f, f, 1)), axis=0)
# Height
return transpose(f * (log2(n) - H))
def eps_logo(matrix, base_width, height, colors=DNA_DEFAULT_COLORS):
"""
Return an EPS document containing a sequence logo for matrix where each
bases is shown as a column of `base_width` points and the total logo
height is `height` points. If `colors` is provided it is a mapping from
characters to rgb color strings.
"""
alphabet = matrix.sorted_alphabet
rval = StringIO()
# Read header ans substitute in width / height
template_path = os.path.join(os.path.dirname(__file__), "template.ps")
with open(template_path) as fh:
template_str = fh.read()
header = Template(template_str)
rval.write(
header.substitute(
bounding_box_width=ceil(base_width * matrix.width) + PAD, bounding_box_height=ceil(height) + PAD
)
)
# Determine heights
heights = freqs_to_heights(matrix)
height_scale = height / log2(len(alphabet))
# Draw each "row" of the matrix
for i, row in enumerate(heights):
x = i * base_width
y = 0
for j, base_height in enumerate(row):
char = alphabet[j]
page_height = height_scale * base_height
# print matrix.alphabet[j], base_height, height_scale, page_height
if page_height > 1:
# Draw letter
rval.write("{} setrgbcolor\n".format(colors.get(char, "0 0 0")))
rval.write(f"{x:3.2f} ")
rval.write(f"{y:3.2f} ")
rval.write("%3.2f " % (x + base_width))
rval.write("%3.2f " % (y + page_height))
rval.write(f"({char}) textInBox\n")
y += page_height
rval.write("showpage")
return rval.getvalue()
|