File: load_mCaller_data.py

package info (click to toggle)
mcaller 1.0.3%2Bgit20210624.b415090-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 14,920 kB
  • sloc: python: 878; sh: 79; makefile: 19
file content (18 lines) | stat: -rw-r--r-- 1,054 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from extract_contexts import base_models

def tsv2matrix(tsvname,base):
    base_model = base_models(base,False)
    signals,contexts = {bm:{} for bm in base_model.values()},{bm:{} for bm in base_model.values()}
    with open(tsvname,'r') as infi:
        for line in infi:
            #ecoli1  c183b422-5dda-4a23-b732-309e8f7f331f    1794509 ATGCGMTCCAG     1.49,1.93166666667,-0.385,5.615,5.36,-0.945,15.7357504216     -       m6A
            context,sigs,strand,label = line.split('\t')[3:7]
            label = label.strip()
            twobase_model = base_model[context[int(len(context)/2):int(len(context)/2)+2]]
            if label not in signals[twobase_model]:
                signals[twobase_model][label] = []
                contexts[twobase_model][label] = []
            if len(sigs.split(',')) >= 6 and len([x for x in sigs.split(',') if x == "0"]) == 0:
                signals[twobase_model][label].append([float(s) for s in sigs.split(',')])
                contexts[twobase_model][label].append(context)
    return signals, contexts