File: apred.py

package info (click to toggle)
dnapi 1.1-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 42,284 kB
  • sloc: python: 831; sh: 62; makefile: 13
file content (55 lines) | stat: -rw-r--r-- 1,861 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""Functions for adapter prediction.

"""

from operator import itemgetter

from dnapilib.io_utils import get_file_obj, fastq_sequence
from dnapilib.kmer import count_kmers, filter_kmers, assemble_kmers


def adapter_prediction(fastq, ratio, kmer_len, sample_num):
    """Return a list of predicted adapters.

       Predict 3' adapter sequence with a combination of k and R.
    """
    fq_obj = get_file_obj(fastq)
    fq_seq = fastq_sequence(fq_obj)
    freq = count_kmers(fq_seq, kmer_len, sample_num)
    clean = filter_kmers(freq, kmer_len, ratio)
    assembl = sorted(assemble_kmers(clean, kmer_len//2),
                     key=itemgetter(1), reverse=True)
    fq_obj.close()
    return assembl

def iterative_adapter_prediction(fastq, ratios, kmer_lens,
                                 sample_num, keep_len=12):
    """Return a list of predicted adapters.

       Iteratively predict 3' adapter sequence with different
       combinations of k and R.
    """
    fq_seq = []
    fq_obj = get_file_obj(fastq)
    for i, s in enumerate(fastq_sequence(fq_obj)):
        if i == sample_num:
            break
        fq_seq.append(s)
    fq_obj.close()

    collection = {}
    for kmer_len in kmer_lens:
        curated = {}
        freq = count_kmers(fq_seq, kmer_len, sample_num)
        for ratio in ratios:
            clean = filter_kmers(freq, kmer_len, ratio)
            assembl = assemble_kmers(clean, kmer_len//2)
            for s, c in assembl:
                key = s[:keep_len]
                curated[key] = max(curated.get(key,0), c)
        for s, c in curated.items():
            collection[s] = round(collection.get(s,0)+c, 4)
    asmbl_min_len = min(map(len, collection.keys()))
    assembl = sorted(assemble_kmers(list(collection.items()), asmbl_min_len//2),
                     key=itemgetter(1), reverse=True)
    return assembl