File: _pwm.pyx

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (102 lines) | stat: -rw-r--r-- 4,041 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
Extensions used by the `pwm` module.
"""

from cpython.version cimport PY_MAJOR_VERSION


cdef extern from "Python.h":
    int PyBytes_AsStringAndSize(object obj, char **buffer, Py_ssize_t* length) except -1

cdef extern from "numpy/arrayobject.h":
    ctypedef int intp
    ctypedef extern class numpy.ndarray [object PyArrayObject]:
        cdef char *data
        cdef int nd
        cdef intp *dimensions
        cdef intp *strides
        cdef int flags
    # These might be other types in the actual header depending on platform 
    ctypedef int npy_int16
    ctypedef float npy_float32

def score_string( ndarray matrix, ndarray char_to_index, object string, ndarray rval ):
    """
    Score each position in string `string` using the scoring matrix `matrix`.
    Characters in the string are mapped to columns in the matrix by `char_to_index`
    and the score for each position is stored in `rval`.
    
    matrix *must* be a 2d array of type float32
    char_to_index *must* be a 1d array of type int16
    rval *must* be a 1d array of type float32 and the same length as string
    """
    cdef char *buffer
    cdef Py_ssize_t len
    cdef float score
    cdef int i, j
    cdef int matrix_width = matrix.dimensions[0]
    cdef npy_int16 char_index
    # Get input string as character pointer
    if PY_MAJOR_VERSION >= 3:
        bytes_string = string.encode()
    else:
        bytes_string = string
    PyBytes_AsStringAndSize(bytes_string, &buffer, &len )
    # Loop over each position in the string 
    cdef int stop = len - matrix.dimensions[0] + 1
    for i from 0 <= i < stop:
        score = 0.0
        for j from 0 <= j < matrix_width:
            char_index = ( <npy_int16 *> ( char_to_index.data + buffer[i+j] * char_to_index.strides[0] ) )[0]
            if char_index < 0: 
                break
            score += ( <npy_float32*> ( matrix.data + j * matrix.strides[0] + char_index * matrix.strides[1] ) )[0]
        else:
            ( <npy_float32*> ( rval.data + i * rval.strides[0] ) )[0] = score
            
def score_string_with_gaps( ndarray matrix, ndarray char_to_index, object string, ndarray rval ):
    """
    Score each position in string `string` using the scoring matrix `matrix`.
    Characters in the string are mapped to columns in the matrix by `char_to_index`
    and the score for each position is stored in `rval`.

    matrix *must* be a 2d array of type float32
    char_to_index *must* be a 1d array of type int16
    rval *must* be a 1d array of type float32 and the same length as string
    """
    cdef char *buffer
    cdef Py_ssize_t len
    cdef float score
    cdef int i, j, string_pos
    cdef int matrix_width = matrix.dimensions[0]
    cdef npy_int16 char_index
    # Get input string as character pointer
    if PY_MAJOR_VERSION >= 3:
        bytes_string = string.encode()
    else:
        bytes_string = string
    PyBytes_AsStringAndSize(bytes_string, &buffer, &len )
    # Loop over each position in the string 
    cdef int stop = len - matrix.dimensions[0] + 1
    for i from 0 <= i < stop:
        if buffer[i] == '-':
            # Never start scoring at a gap
            continue
        score = 0.0
        string_pos = i
        for j from 0 <= j < matrix_width:
            # Advance to the next non-gap character
            while buffer[string_pos] == '-' and string_pos < len:
                string_pos += 1
            # Ran out of non-gap characters, no more scoring is possible
            if string_pos == len:
                return
            # Find character for position and score
            char_index = ( <npy_int16 *> ( char_to_index.data + buffer[string_pos] * char_to_index.strides[0] ) )[0]
            if char_index < 0: 
                break
            score += ( <npy_float32*> ( matrix.data + j * matrix.strides[0] + char_index * matrix.strides[1] ) )[0]
            # Matched a character, move forward
            string_pos += 1
        else:
            ( <npy_float32*> ( rval.data + i * rval.strides[0] ) )[0] = score