File: pwm_tests.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (88 lines) | stat: -rw-r--r-- 3,327 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from numpy import (
    allclose,
    isnan,
)

from bx.motif import pwm


def test_create():
    m = pwm.FrequencyMatrix.from_rows(["A", "C", "G", "T"], get_ctcf_rows())
    # Alphabet sort
    assert m.sorted_alphabet == ["A", "C", "G", "T"]
    # Character to index mapping
    assert m.char_to_index[ord("A")] == 0
    assert m.char_to_index[ord("C")] == 1
    assert m.char_to_index[ord("G")] == 2
    assert m.char_to_index[ord("T")] == 3
    assert m.char_to_index[ord("Q")] == -1
    # Values
    assert allclose(m.values[0], [2620, 2052, 3013, 2314])
    assert allclose(m.values[19], [3144, 3231, 3056, 567])


def test_scoring():
    m = pwm.FrequencyMatrix.from_rows(["A", "C", "G", "T"], get_ctcf_rows())
    # Stormo method
    sm = m.to_stormo_scoring_matrix()
    # Forward matches
    assert allclose(sm.score_string("AATCACCACCTCCTGGCAGG")[0], -156.8261261)
    assert allclose(sm.score_string("TGCCTGCCTCTGTAGGCTCC")[0], -128.8106842)
    assert allclose(sm.score_string("GTTGCCAGTTGGGGGAAGCA")[0], 4.65049839)
    assert allclose(sm.score_string("GCAGACACCAGGTGGTTCAG")[0], 1.60168743)
    # Reverse matches
    rc = sm.reverse_complement()
    assert allclose(rc.score_string("AATCACCACCTCCTGGCAGG")[0], 0.014178276062)
    assert allclose(rc.score_string("TGCCTGCCTCTGTAGGCTCC")[0], 0.723828315735)
    assert allclose(rc.score_string("GTTGCCAGTTGGGGGAAGCA")[0], -126.99407196)
    assert allclose(rc.score_string("GCAGACACCAGGTGGTTCAG")[0], -86.9560623169)
    # Nothing valid
    assert isnan(sm.score_string_with_gaps("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")).all()
    # Too short
    assert isnan(sm.score_string("TTTT")).all()


def test_scoring_with_gaps():
    m = pwm.FrequencyMatrix.from_rows(["A", "C", "G", "T"], get_ctcf_rows())
    # Stormo method
    sm = m.to_stormo_scoring_matrix()
    # Forward matches
    assert allclose(sm.score_string_with_gaps("GTTGCCAGT----TGGGGGAAGCATTT---AA")[0], 4.65049839)
    assert allclose(sm.score_string_with_gaps("GCAGA--CACCAGGTGG--TTCAG---")[0], 1.60168743)
    assert allclose(sm.score_string_with_gaps("----GTTGCCAGTTGGGGGAAGCA")[4], 4.65049839)
    assert allclose(sm.score_string_with_gaps("TTT--GTT--GCCA--GTTGGGG-G-A-A-G-C-A-")[5], 4.65049839)
    assert isnan(sm.score_string_with_gaps("TTT--GTT--GCCA--GTTGGGG-G-A-A-G-C-A-")[4])
    # Nothing valid
    assert isnan(sm.score_string_with_gaps("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")).all()
    assert isnan(sm.score_string_with_gaps("------------------------------------")).all()
    # Too short
    assert isnan(sm.score_string_with_gaps("TTTT")).all()
    assert isnan(sm.score_string_with_gaps("TTTT----")).all()


def get_ctcf_rows():
    """
    The CTCF primary site motif
    """
    return [
        [2620, 2052, 3013, 2314],
        [0, 3580, 1746, 4672],
        [2008, 1790, 4497, 1703],
        [3362, 0, 6637, 0],
        [0, 10000, 0, 0],
        [0, 10000, 0, 0],
        [7467, 0, 1310, 1222],
        [786, 4890, 4323, 0],
        [1179, 6288, 829, 1703],
        [10000, 0, 0, 0],
        [0, 0, 10000, 0],
        [4847, 0, 5152, 0],
        [0, 0, 6200, 3799],
        [0, 0, 10000, 0],
        [0, 0, 10000, 0],
        [1572, 7467, 0, 960],
        [3842, 0, 5545, 611],
        [0, 5895, 4104, 0],
        [1615, 4192, 1397, 2794],
        [3144, 3231, 3056, 567],
    ]