File: tss1.py

package info (click to toggle)

htseq 2.0.9%2Bdfsg-1

links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 103,476 kB
sloc: python: 6,280; sh: 211; cpp: 147; makefile: 80

file content (31 lines) | stat: -rw-r--r-- 906 bytes

parent folder | download | duplicates (2)

import HTSeq
import numpy
from matplotlib import pyplot as plt


bamfile = HTSeq.BAM_Reader("SRR001432_head.bam")
gtffile = HTSeq.GFF_Reader("Homo_sapiens.GRCh37.56_chrom1.gtf")
halfwinwidth = 3000
fragmentsize = 200

coverage = HTSeq.GenomicArray("auto", stranded=False, typecode="i")
for almnt in bamfile:
    if almnt.aligned:
        almnt.iv.length = fragmentsize
        coverage[almnt.iv] += 1

tsspos = set()
for feature in gtffile:
    if feature.type == "exon" and feature.attr["exon_number"] == "1":
        tsspos.add(feature.iv.start_d_as_pos)

profile = numpy.zeros(2 * halfwinwidth, dtype="i")
for p in tsspos:
    window = HTSeq.GenomicInterval(
        p.chrom, p.pos - halfwinwidth, p.pos + halfwinwidth, "."
    )
    wincvg = numpy.fromiter(coverage[window], dtype="i", count=2 * halfwinwidth)
    if p.strand == "+":
        profile += wincvg
    else:
        profile += wincvg[::-1]