File: string_dist_utils.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (67 lines) | stat: -rw-r--r-- 1,757 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
############################################################################
# Copyright (c) 2015 Saint Petersburg State University
# All Rights Reserved
# See file LICENSE for details.
############################################################################

__author__ = 'anton'
import sys

def calculate_dist_table(s1, s2):
    n1 = len(s1)
    n2 = len(s2)
    t = []
    t.append(range(n2 + 1))
    for i in range(1, n1 + 1):
        t_line = [i]
        for j in range(1, n2 + 1):
            if s1[i - 1] == s2[j - 1]:
                t_line.append(t[i - 1][j - 1])
            else:
                t_line.append(1 + min(t[i - 1][j - 1], t_line[j - 1], t[i - 1][j]))
        t.append(t_line)
    return t

def calculate_lcs_table(s1, s2):
    n1 = len(s1)
    n2 = len(s2)
    t = []
    t.append([0] * (n2 + 1))
    for i in range(1, n1 + 1):
        t_line = [0]
        for j in range(1, n2 + 1):
            if s1[i - 1] == s2[j - 1]:
                t_line.append(t[i - 1][j - 1] + 1)
            else:
                t_line.append(max(t[i - 1][j - 1], t_line[j - 1], t[i - 1][j]))
            t.append(t_line)
    return t

def lcs(s1, s2):
    t = calculate_dist_table(s1, s2)
    i = len(s1)
    j = len(s2)
    res = ""
    while i > 0 and j > 0:
        if t[i][j] == t[i - 1][j -1] + 1:
            i -= 1
            j -= 1
        elif t[i][j] == t[i - 1][j] + 1:
            i -= 1
        elif t[i][j] == t[i][j - 1] + 1:
            j -= 1
        else:
            i -= 1
            j -= 1
            res = s1[i] + res
    return res

def dist(s1, s2):
    return calculate_dist_table(s1, s2)[len(s1)][len(s2)]


def multi_lcs(ids):
    all_lcs = ids[0]
    for s in ids:
        all_lcs = lcs(all_lcs, s)
    return all_lcs