File: example_3_no_comments

package info (click to toggle)
python-pybedtools 0.10.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 16,620 kB
  • sloc: python: 10,030; cpp: 899; makefile: 142; sh: 57
file content (49 lines) | stat: -rw-r--r-- 1,360 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import sys
import multiprocessing
import pybedtools

gff = pybedtools.example_filename('gdc.gff')
bam = pybedtools.example_filename('gdc.bam')

g = pybedtools.BedTool(gff).remove_invalid().saveas()


def featuretype_filter(feature, featuretype):
    if feature[2] == featuretype:
        return True
    return False


def subset_featuretypes(featuretype):
    result = g.filter(featuretype_filter, featuretype).saveas()
    return pybedtools.BedTool(result.fn)


def count_reads_in_features(features_fn):
    """
    Callback function to count reads in features
    """

    return pybedtools.BedTool(bam).intersect(
                             b=features_fn,
                             stream=True).count()


pool = multiprocessing.Pool()

featuretypes = ('intron', 'exon')
introns, exons = pool.map(subset_featuretypes, featuretypes)

exon_only = exons.subtract(introns).merge().remove_invalid().saveas().fn
intron_only = introns.subtract(exons).merge().remove_invalid().saveas().fn
intron_and_exon = exons.intersect(introns).merge().remove_invalid().saveas().fn

features = (exon_only, intron_only, intron_and_exon)
results = pool.map(count_reads_in_features, features)

labels = ('      exon only:',
          '    intron only:',
          'intron and exon:')

for label, reads in zip(labels, results):
    sys.stdout.write('%s %s\n' % (label, reads))