File: test_pwm_parsers.py

package info (click to toggle)
python-cogent 2024.5.7a1%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 74,600 kB
  • sloc: python: 92,479; makefile: 117; sh: 16
file content (40 lines) | stat: -rw-r--r-- 1,499 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from unittest import TestCase

from numpy import array
from numpy.testing import assert_allclose, assert_array_equal

from cogent3.core.moltype import get_moltype
from cogent3.parse import cisbp, jaspar


class TestPwmParsers(TestCase):
    def test_jaspar(self):
        """correctly load jaspar formatted counts matrix"""
        path = "data/sample.jaspar"
        mid, pwm = jaspar.read(path)
        assert mid == ["PSSMid", "HGNCsymbol"], "ID line wrong"
        # note state indices are ordered by moltype
        list(get_moltype("dna"))
        expect = [
            [35, 374, 30, 121, 6, 121, 33],
            [0, 10, 0, 0, 3, 2, 44],
            [352, 3, 354, 268, 360, 222, 155],
            [2, 2, 5, 0, 10, 44, 157],
        ]
        assert_array_equal(pwm.array, array(expect).T)
        self.assertEqual(pwm[0, "A"], 352)
        self.assertEqual(pwm[3, "T"], 121)

    def test_cisbp(self):
        """correctly read a wights matrix"""
        path = "data/M0926_1.02.txt"
        pfm = cisbp.read(path)
        expect = [
            [0.37, 0.55, 0.17, 0.06, 0.64, 0.28, 0.18, 0.26, 0.35],
            [0.24, 0.19, 0.07, 0.09, 0.15, 0.28, 0.08, 0.29, 0.28],
            [0.2, 0.17, 0.53, 0.78, 0.11, 0.16, 0.26, 0.18, 0.19],
            [0.18, 0.09, 0.24, 0.06, 0.1, 0.28, 0.48, 0.27, 0.18],
        ]
        assert_allclose(pfm.array, array(expect).T, atol=1e-2)
        assert_allclose(pfm[0, "A"], 0.199862209150251)
        self.assertEqual(pfm[6, "C"], 0.0787969447816471)