1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
|
import HTSeq
import numpy
from matplotlib import pyplot as plt
bamfile = HTSeq.BAM_Reader("SRR001432_head.bam")
gtffile = HTSeq.GFF_Reader("Homo_sapiens.GRCh37.56_chrom1.gtf")
halfwinwidth = 3000
fragmentsize = 200
coverage = HTSeq.GenomicArray("auto", stranded=False, typecode="i")
for almnt in bamfile:
if almnt.aligned:
almnt.iv.length = fragmentsize
coverage[almnt.iv] += 1
tsspos = set()
for feature in gtffile:
if feature.type == "exon" and feature.attr["exon_number"] == "1":
tsspos.add(feature.iv.start_d_as_pos)
profile = numpy.zeros(2 * halfwinwidth, dtype="i")
for p in tsspos:
window = HTSeq.GenomicInterval(
p.chrom, p.pos - halfwinwidth, p.pos + halfwinwidth, "."
)
wincvg = numpy.fromiter(coverage[window], dtype="i", count=2 * halfwinwidth)
if p.strand == "+":
profile += wincvg
else:
profile += wincvg[::-1]
|