File: cython_needleman_wunsch.pyx

package info (click to toggle)
py-stringmatching 0.4.3-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,956 kB
  • sloc: python: 3,979; makefile: 174; sh: 7
file content (43 lines) | stat: -rw-r--r-- 1,518 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import cython
import numpy as np
cimport numpy as np

@cython.boundscheck(False)
@cython.wraparound(False)

def needleman_wunsch(unicode string1, unicode string2, float gap_cost,
                                                            sim_score):
    """ Computes Needleman-Wunsch measure raw score.
    Args:
        string1, string2 (unicode): Input unicode strings
        gap_cost (float): Cost of gap
        sim_score (sim function): Similarity function given by user if not use default sim ident function
    Returns:
        Returns Needleman-Wunsch similarity score (float)
    """

    cdef int i = 0, j = 0
    cdef double match = 0.0, delete = 0.0, insert = 0.0
    cdef double sim_func_score = 0.0
    cdef int len_s1 = len(string1), len_s2 = len(string2)
    cdef double[:,:] dist_mat = np.zeros((len(string1) + 1, len(string2) + 1), dtype=float)

    # DP initialization
    for i from 0 <= i < (len_s1 + 1):
        dist_mat[i, 0] = -(i * gap_cost)

    # DP initialization
    for j from 0 <= j < (len_s2 + 1):
        dist_mat[0, j] = -(j * gap_cost)


    # Needleman-Wunsch DP calculation
    for i from 1 <= i < (len_s1 + 1):
        for j from 1 <= j < (len_s2 + 1):
            sim_func_score = sim_score(string1[i - 1], string2[j - 1])
            match = dist_mat[i - 1, j - 1] + sim_func_score
            delete = dist_mat[i - 1, j] - gap_cost
            insert = dist_mat[i, j - 1] - gap_cost
            dist_mat[i, j] = max(match, delete, insert)

    return dist_mat[len_s1, len_s2]