File: colorspace.py

package info (click to toggle)
python-sqt 0.8.0-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 824 kB
  • sloc: python: 5,964; sh: 38; makefile: 10
file content (93 lines) | stat: -rw-r--r-- 1,812 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""
Colorspace conversion routines.

Inspired by agapython/util/Dibase.py from Corona lite,
but reimplemented to avoid licensing issues.

Encoding Table

  A C G T
A 0 1 2 3
C 1 0 3 2
G 2 3 0 1
T 3 2 1 0
"""

import string
import sys

__author__ = 'Marcel Martin'

if sys.version > '3':
	xrange = range


def _initialize_dicts():
	"""
	Create the colorspace encoding and decoding dictionaries.
	"""
	enc = {}
	for i, c1 in enumerate("ACGT"):
		enc['N' + c1] = '4'
		enc[c1 + 'N'] = '4'
		enc['.' + c1] = '4'
		enc[c1 + '.'] = '4'
		for j, c2 in enumerate("ACGT"):
			# XOR of nucleotides gives color
			enc[c1 + c2] = chr(ord('0') + (i ^ j))
	enc.update({ 'NN': '4', 'N.': '4', '.N': '4', '..': '4'})

	dec = {}
	for i, c1 in enumerate("ACGT"):
		dec['.' + str(i)] = 'N'
		dec['N' + str(i)] = 'N'
		dec[c1 + '4'] = 'N'
		for j, c2 in enumerate("ACGT"):
			# XOR of nucleotides gives color
			dec[c1 + chr(ord('0') + (i ^ j))] = c2
	dec['N4'] = 'N'

	return (enc, dec)


def encode(s):
	"""
	Given a sequence of nucleotides, convert them to
	color space. Only uppercase characters are allowed.
	>>> encode("ACGGTC")
	"A13012"
	"""
	if not s:
		return s
	r = s[0]
	for i in range(len(s) - 1):
		r += ENCODE[s[i:i+2]]
	return r


def decode(s):
	"""
	Decode a sequence of colors to nucleotide space.
	The first character in s must be a nucleotide.
	Only uppercase characters are allowed.
	>>> decode("A13012")
	"ACGGTC"
	"""
	if len(s) < 2:
		return s
	x = s[0]
	result = x
	for c in s[1:]:
		x = DECODE[x + c]
		result += x
	return result


(ENCODE, DECODE) = _initialize_dicts()

if sys.version > '3':
	# convert to "bytes"
	def _str_dict_to_bytes(d):
		return dict((bytes(k, 'ascii'), bytes(v, 'ascii')) for k,v in d.items())
	ENCODE.update(_str_dict_to_bytes(ENCODE))
	DECODE.update(_str_dict_to_bytes(DECODE))