File: kmer_utils.py

package info (click to toggle)
kmc 3.2.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,716 kB
  • sloc: cpp: 38,308; python: 664; makefile: 216; perl: 179; sh: 34
file content (48 lines) | stat: -rw-r--r-- 1,172 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python3
''' Some helpful utility functions.'''


def rev_comp(kmer):
    ''' Gets rev. comp of a k-mer.'''
    res = ""
    mapping = {
        'A': 'T',
        'C': 'G',
        'G': 'C',
        'T': 'A'
    }
    for i in range(0, len(kmer)):
        res += mapping[kmer[-i - 1]]
    return res

def get_minimizer_no_canonical(kmer, minimizer_len):
    ''' Get no canonical minimizer of a k-mer.'''
    minimizer = kmer[0:minimizer_len]
    for i in range(1, len(kmer) - minimizer_len + 1):
        candidate = kmer[i:minimizer_len+i]
        if candidate < minimizer:
            minimizer = candidate
    return minimizer

def get_minimizer(kmer, minimizer_len):
    ''' Get canonical minimizer of a k-mer.'''
    rev = rev_comp(kmer)
    candidate1 = get_minimizer_no_canonical(kmer, minimizer_len)
    candidate2 = get_minimizer_no_canonical(rev, minimizer_len)
    return min(candidate1, candidate2)

def kmer_to_uint(kmer):
    ''' Convers k-mer string to int '''
    mapping = {
        'A': 0,
        'C': 1,
        'G': 2,
        'T': 3
    }
    res = 0
    for symb in kmer:
        res <<= 2
        res += mapping[symb]
    return res