File: test_writeBedGraph.py

package info (click to toggle)
python-deeptools 3.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 34,624 kB
  • sloc: python: 14,765; xml: 4,090; sh: 38; makefile: 11
file content (143 lines) | stat: -rw-r--r-- 5,082 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from unittest import TestCase
from nose.tools import *
import os

import deeptools.writeBedGraph as wr
from deeptools.writeBedGraph import scaleCoverage

ROOT = os.path.dirname(os.path.abspath(__file__)) + "/test_data/"

__author__ = 'fidel'


class TestWriteBedGraph(TestCase):

    def setUp(self):
        """
        The distribution of reads between the two bam files is as follows.

        They cover 200 bp::

              0                              100                           200
              |------------------------------------------------------------|
            A                                ==============>
                                                            <==============


            B                 <==============               ==============>
                                             ==============>
                                                            ==============>
        """

        self.root = ROOT
        self.bamFile1 = self.root + "testA.bam"
        self.bamFile2 = self.root + "testB.bam"
        self.bamFile_PE = self.root + "test_paired2.bam"
        self.chrom = '3R'

        self.step_size = 50
        self.bin_length = 50
        self.func_args = {'scaleFactor': 1.0}

        self.c = wr.WriteBedGraph([self.bamFile1],
                                  binLength=self.bin_length,
                                  stepSize=self.step_size)

    def test_writeBedGraph_worker(self):
        self.c.zerosToNans = False
        self.c.skipZeros = False

        tempFile = self.c.writeBedGraph_worker('3R', 0, 200, scaleCoverage, self.func_args)
        _foo = open(tempFile[3], 'r')
        res = _foo.readlines()
        _foo.close()
        assert_equal(res, ['3R\t0\t100\t0\n', '3R\t100\t200\t1\n'])
        os.remove(tempFile[3])

    def test_writeBedGraph_worker_zerotonan(self):
        # turn on zeroToNan
        self.c.zerosToNans = True
        tempFile2 = self.c.writeBedGraph_worker('3R', 0, 200, scaleCoverage, self.func_args)
        _foo = open(tempFile2[3], 'r')
        res = _foo.readlines()
        _foo.close()
        assert_equal(res, ['3R\t100\t200\t1\n'])
        os.remove(tempFile2[3])

    def test_writeBedGraph_worker_scaling(self):
        func_args = {'scaleFactor': 3.0}
        tempFile = self.c.writeBedGraph_worker('3R', 0, 200, scaleCoverage, func_args)
        _foo = open(tempFile[3], 'r')
        res = _foo.readlines()
        _foo.close()
        assert_equal(res, ['3R\t0\t100\t0\n', '3R\t100\t200\t3\n'])
        os.remove(tempFile[3])

    def test_writeBedGraph_worker_ignore_duplicates(self):
        self.c = wr.WriteBedGraph([self.bamFile2],
                                  binLength=self.bin_length,
                                  stepSize=self.step_size, ignoreDuplicates=True)
        self.c.zerosToNans = True

        tempFile = self.c.writeBedGraph_worker('3R', 0, 200, scaleCoverage, self.func_args)
        _foo = open(tempFile[3], 'r')
        res = _foo.readlines()
        _foo.close()
        assert_equal(res, ['3R\t50\t200\t1\n'])
        os.remove(tempFile[3])

    def test_writeBedGraph_worker_smoothing(self):
        self.c.binLength = 20
        self.c.stepSize = 20
        self.c.smoothLength = 60
        tempFile = self.c.writeBedGraph_worker('3R', 100, 200, scaleCoverage, self.func_args)
        _foo = open(tempFile[3], 'r')
        res = _foo.readlines()
        _foo.close()
        assert_equal(res, ['3R\t100\t120\t1\n', '3R\t120\t180\t1.33333\n', '3R\t180\t200\t1\n'])
        os.remove(tempFile[3])

    def test_writeBedGraph_cigar(self):
        """
        The bamFile1 contains a read at position 10
        with the following CIGAR: 10S20M10N10M10S
        that maps to a chromosome named chr_cigar.
        """

        # turn of read extension
        self.c.extendPairedEnds = False
        self.c.binLength = 10
        self.c.stepSize = 10
        tempFile = self.c.writeBedGraph_worker('chr_cigar', 0, 100, scaleCoverage, self.func_args)
        _foo = open(tempFile[3], 'r')
        res = _foo.readlines()
        _foo.close()

        # the sigle read is split into bin 10-30, and then 40-50
        assert_equal(res, ['chr_cigar\t0\t10\t0\n',
                           'chr_cigar\t10\t30\t1\n',
                           'chr_cigar\t30\t40\t0\n',
                           'chr_cigar\t40\t50\t1\n',
                           'chr_cigar\t50\t100\t0\n'])
        os.remove(tempFile[3])


class TestWriteBedGraphCRAM(TestWriteBedGraph):
    def setUp(self):
        """
        As above, but for CRAM files
        """

        self.root = ROOT
        self.bamFile1 = self.root + "testA.cram"
        self.bamFile2 = self.root + "testB.cram"
        self.bamFile_PE = self.root + "test_paired2.cram"
        self.chrom = '3R'

        self.step_size = 50
        self.bin_length = 50
        self.func_args = {'scaleFactor': 1.0}

        self.c = wr.WriteBedGraph([self.bamFile1],
                                  binLength=self.bin_length,
                                  stepSize=self.step_size)