1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
"""
Support for scores in the `wiggle`_ file format used by the UCSC Genome
Browser.
The positions in the wiggle format are 1-relative, however,
the positions returned match the BED/interval format which is zero-based, half-open.
.. _wiggle: http://genome.ucsc.edu/goldenPath/help/wiggle.html
"""
def parse_header(line):
return dict([field.split("=") for field in line.split()[1:]])
def IntervalReader(f):
"""
Iterator yielding chrom, start, end, strand, value.
Values are zero-based, half-open.
Regions which lack a score are ignored.
"""
current_chrom = None
current_pos = None
current_step = None
# always for wiggle data
strand = "+"
mode = "bed"
for line in f:
if line.isspace() or line.startswith("track") or line.startswith("#") or line.startswith("browser"):
continue
elif line.startswith("variableStep"):
header = parse_header(line)
current_chrom = header["chrom"]
current_pos = None
current_step = None
if "span" in header:
current_span = int(header["span"])
else:
current_span = 1
mode = "variableStep"
elif line.startswith("fixedStep"):
header = parse_header(line)
current_chrom = header["chrom"]
current_pos = int(header["start"]) - 1
current_step = int(header["step"])
if "span" in header:
current_span = int(header["span"])
else:
current_span = 1
mode = "fixedStep"
elif mode == "bed":
fields = line.split()
if len(fields) > 3:
if len(fields) > 5:
yield fields[0], int(fields[1]), int(fields[2]), fields[5], float(fields[3])
else:
yield fields[0], int(fields[1]), int(fields[2]), strand, float(fields[3])
elif mode == "variableStep":
fields = line.split()
pos = int(fields[0]) - 1
yield current_chrom, pos, pos + current_span, strand, float(fields[1])
elif mode == "fixedStep":
yield current_chrom, current_pos, current_pos + current_span, strand, float(line.split()[0])
current_pos += current_step
else:
raise ValueError(f"Unexpected input line: {line.strip()}")
class Reader:
"""
Iterator yielding chrom, position, value.
Values are zero-based.
Regions which lack a score are ignored.
"""
def __init__(self, f):
self.file = f
def __iter__(self):
for chrom, start, end, strand, val in IntervalReader(self.file):
for pos in range(start, end):
yield chrom, pos, val
|