File: id_generation.py

package info (click to toggle)
spades 3.13.1+dfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 22,172 kB
  • sloc: cpp: 136,213; ansic: 48,218; python: 16,809; perl: 4,252; sh: 2,115; java: 890; makefile: 507; pascal: 348; xml: 303
file content (78 lines) | stat: -rw-r--r-- 2,193 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
############################################################################
# Copyright (c) 2015 Saint Petersburg State University
# All Rights Reserved
# See file LICENSE for details.
############################################################################

from string_dist_utils import multi_lcs

__author__ = 'anton'

import sys

def CommonPrefix(s1, s2):
    n = 0
    while n < len(s1) and n < len(s2) and s1[n] == s2[n]:
        n += 1
    return n

def CommonSuffix(s1, s2):
    n = 0
    while n < len(s1) and n < len(s2) and s1[-n - 1] == s2[-n - 1]:
        n += 1
    return n

def FindCommon(lines):
    if len(lines) == 0:
        return 0, 0
    left = len(lines[0])
    right = len(lines[0])
    min_len = len(lines[0])
    for line in lines:
        l, r = CommonPrefix(line, lines[0]), CommonSuffix(line, lines[0])
        left = min(left, l)
        right = min(right, r)
        min_len = min(min_len, len(line))
    return left, min(right, min_len - left)

def generate_ids(lines):
    l, r = FindCommon(lines)
    lines = [line[l: len(line) - r] for line in lines]
    id_candidates = generate_id_candidates(lines)
    return select_ids_from_candidates(id_candidates)

def select_ids_from_candidates(id_candidates):
    if len(id_candidates) == 1:
        return [""]
    ids = [""] * len(id_candidates)
    for i in range(len(id_candidates[0])):
        for bcid in range(len(id_candidates)):
            ids[bcid] += id_candidates[bcid][i]
        if len(set(ids)) == len(ids):
            return ids
    return None


def generate_id_candidates(lines):
    all_lcs = multi_lcs(lines)
    id_candidates = []
    for line in lines:
        id_candidates.append(generate_id_candidates_for_barcode(all_lcs, line))
    return id_candidates


def generate_id_candidates_for_barcode(all_lcs, line):
    id_candidate = []
    cur = ""
    pos = 0
    for i in range(len(line)):
        if pos < len(all_lcs) and line[i] == all_lcs[pos]:
            pos += 1
            if cur != "":
                id_candidate.append(cur)
                cur = ""
        else:
            cur = cur + line[i]
    if cur != "":
        id_candidate.append(cur)
    return id_candidate