File: wiggle.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (85 lines) | stat: -rw-r--r-- 2,785 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
Support for scores in the `wiggle`_ file format used by the UCSC Genome
Browser.

The positions in the wiggle format are 1-relative, however,
the positions returned match the BED/interval format which is zero-based, half-open.

.. _wiggle: http://genome.ucsc.edu/goldenPath/help/wiggle.html
"""


def parse_header(line):
    return dict([field.split("=") for field in line.split()[1:]])


def IntervalReader(f):
    """
    Iterator yielding chrom, start, end, strand, value.
    Values are zero-based, half-open.
    Regions which lack a score are ignored.
    """
    current_chrom = None
    current_pos = None
    current_step = None

    # always for wiggle data
    strand = "+"

    mode = "bed"

    for line in f:
        if line.isspace() or line.startswith("track") or line.startswith("#") or line.startswith("browser"):
            continue
        elif line.startswith("variableStep"):
            header = parse_header(line)
            current_chrom = header["chrom"]
            current_pos = None
            current_step = None
            if "span" in header:
                current_span = int(header["span"])
            else:
                current_span = 1
            mode = "variableStep"
        elif line.startswith("fixedStep"):
            header = parse_header(line)
            current_chrom = header["chrom"]
            current_pos = int(header["start"]) - 1
            current_step = int(header["step"])
            if "span" in header:
                current_span = int(header["span"])
            else:
                current_span = 1
            mode = "fixedStep"
        elif mode == "bed":
            fields = line.split()
            if len(fields) > 3:
                if len(fields) > 5:
                    yield fields[0], int(fields[1]), int(fields[2]), fields[5], float(fields[3])
                else:
                    yield fields[0], int(fields[1]), int(fields[2]), strand, float(fields[3])
        elif mode == "variableStep":
            fields = line.split()
            pos = int(fields[0]) - 1
            yield current_chrom, pos, pos + current_span, strand, float(fields[1])
        elif mode == "fixedStep":
            yield current_chrom, current_pos, current_pos + current_span, strand, float(line.split()[0])
            current_pos += current_step
        else:
            raise ValueError(f"Unexpected input line: {line.strip()}")


class Reader:
    """
    Iterator yielding chrom, position, value.
    Values are zero-based.
    Regions which lack a score are ignored.
    """

    def __init__(self, f):
        self.file = f

    def __iter__(self):
        for chrom, start, end, strand, val in IntervalReader(self.file):
            for pos in range(start, end):
                yield chrom, pos, val