1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
"""
Model an interval on a reference.
"""
class Region:
def __init__(self, specification, start=None, stop=None, reverse_complement=False):
"""
specification -- description of the region as a string, such as
"chr14:22-111"
If start is given, the specification is considered to be a reference and
the parameters start, stop, reverse_complement are used directly.
"""
if start is None:
self.reference, self.start, self.stop, self.is_reverse_complement = self._parse_region(specification)
else:
self.reference = specification
self.start = start
self.stop = stop
self.is_reverse_complement = reverse_complement
@staticmethod
def _parse_region(s):
"""
Parse a string like "name:begin-end" or "name:begin..end".
The returned tuple is (name, start, stop, revcomp).
start is begin-1, stop is equal to end.
That is, this function converts from 1-based intervals to pythonic
open intervals!
The string may be prefixed with "rc:", in which case revcomp is set to True.
If 'end' is an empty string (as in "chrx:1-"), then stop is set to None.
If no range is given, as in "chrx:27", then stop is set to start+1.
If only 'name' is given (or "rc:name"), start is set to 0 and stop to None.
Commas within the numbers (thousands separators) are ignored.
"""
revcomp = False
if s.startswith('rc:'):
revcomp = True
s = s[3:]
fields = s.rsplit(':', 1)
if len(fields) == 1:
region = (fields[0], 0, None, revcomp)
else:
if '..' in fields[1]:
sep = '..'
else:
sep = '-'
coords = fields[1].split(sep, maxsplit=1)
start = int(coords[0].replace(',', ''))
if len(coords) == 1:
stop = start
else:
stop = int(coords[1].replace(',', '')) if coords[1] != '' else None
assert 0 < start and (stop is None or start <= stop)
region = (fields[0], start-1, stop, revcomp)
return region
def __str__(self):
"""
"""
prefix = 'rc:' if self.is_reverse_complement else ''
if self.start == 0 and self.stop is None:
return prefix + self.reference
if self.start + 1 == self.stop:
return "{}{}:{}".format(prefix, self.reference, self.start+1)
stop = '' if self.stop is None else self.stop
return "{}{}:{}-{}".format(prefix, self.reference, self.start+1, stop)
def __repr__(self):
return "Region({!r})".format(str(self))
def __eq__(self, other):
return (self.reference == other.reference and self.start == other.start
and self.stop == other.stop
and self.is_reverse_complement == other.is_reverse_complement)
|