File: _HTSeq_internal.py

package info (click to toggle)
htseq 2.0.9%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 103,476 kB
  • sloc: python: 6,280; sh: 211; cpp: 147; makefile: 80
file content (85 lines) | stat: -rw-r--r-- 3,486 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Internal HTSeq functions, not part of the API
import HTSeq
import numpy


def GenomicInterval_range(gi, step):
    for pos in range(gi.start, gi.end, step):
        yield HTSeq.GenomicPosition(gi.chrom, pos, gi.strand)


def GenomicInterval_xranged(gi, step):
    if gi.strand == "-":
        step *= -1
    for pos in range(gi.start_d, gi.end_d, step):
        yield HTSeq.GenomicPosition(gi.chrom, pos, gi.strand)


def ChromVector_steps(cv):
    '''Steps over a ChromVector


    NOTE: ChromVectors use an offset which is also iv.start compared to their
          storage objects that start at 0.
    '''
    # "Steps" of an ndarray (or memmap?)-storaged ChromVector
    if isinstance(cv.array, numpy.ndarray):
        start = cv.iv.start
        prev_val = None
        for i in range(cv.iv.start, cv.iv.end):
            val = cv.array[i - cv.offset]
            if prev_val is None or val != prev_val:
                if prev_val is not None:
                    yield (HTSeq.GenomicInterval(cv.iv.chrom, start, i, cv.iv.strand), prev_val)
                prev_val = val
                start = i
        yield (HTSeq.GenomicInterval(
            cv.iv.chrom, start, cv.iv.end, cv.iv.strand), prev_val,
            )

    # Steps of a StepVector-storaged ChromVector
    elif isinstance(cv.array, HTSeq.StepVector.StepVector):
        for start, stop, value in cv.array[
                cv.iv.start - cv.offset: cv.iv.end - cv.offset].get_steps():
            yield (HTSeq.GenomicInterval(
                cv.iv.chrom, start + cv.offset, stop + cv.offset, cv.iv.strand), value,
                )

    # Steps in a StretchVector behave similar to a full numpy array, but uses
    # np.nan for None and treats each stretch as independent, of course
    # NOTE: one could optimize this by using np.diff and flips, as we have done
    # in the StretchVector methods. For now, we leave it like this because
    # the whole point of StretchVector is to be used for stretches, not steps.
    elif isinstance(cv.array, HTSeq.StretchVector):
        for iv, stretch in cv.array:
            start = cv.offset + iv.start
            prev_val = None
            for i, val in enumerate(stretch, cv.offset + iv.start):
                # Subsequent NaNs, ignore
                if (prev_val is not None) and numpy.isnan(prev_val) and numpy.isnan(val):
                    continue
                if prev_val is None or val != prev_val:
                    # Delay yield of the first item until you meet the first
                    # unequal item, i.e. until you know the end of the step
                    if prev_val is not None:
                        yield (HTSeq.GenomicInterval(
                                    cv.iv.chrom,
                                    start,
                                    i,
                                    cv.iv.strand),
                                prev_val)
                    prev_val = val
                    start = i
            yield (HTSeq.GenomicInterval(
                cv.iv.chrom, start, cv.offset + iv.end, cv.iv.strand), prev_val,
                )
    else:
        raise SystemError("Unknown array type.")


def GenomicArray_steps(ga):
    """Steps of a GenomicArray are just the chained steps of each ChromVector"""
    for chrom, chromstrand_dict in ga.chrom_vectors.items():
        for strand, chrom_vector in chromstrand_dict.items():
            for iv, val in chrom_vector.steps():
                yield iv, val