File: utils.py

package info (click to toggle)
python-pangolearn 2022-07-09%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 184,720 kB
  • sloc: python: 801; sh: 77; makefile: 16
file content (61 lines) | stat: -rw-r--r-- 1,645 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from Bio import SeqIO
import os
import csv
import sys

import collections
import hashlib
import collections
import csv

def version_from_init(init_file):
    version=None
    with open(init_file, "r") as fr:
        for l in fr:
            if l.startswith("__version__"):
                l = l.rstrip("\n")
                version = l.split('=')[1]
                version = version.replace('"',"").replace(" ","")
                break
    return version

def get_pango_version(pango_path):
    version =""

    for r,d,f in os.walk(pango_path):
        for fn in f:
            if fn == "__init__.py":
                version = version_from_init(os.path.join(r, fn))
                if not version:
                    continue
    print("Pango version is:", version)

    if not version:
        sys.sterr.write("No version found at pango path")
        sys.exit(-1)
    else:
        return version

def get_hash_string(record):
    seq = str(record.seq).upper().encode()
    hash_object = hashlib.md5(seq)
    hash_string = hash_object.hexdigest()
    return hash_string

def get_dict(in_csv,name_column,data_column):
    this_dict = {}
    with open(in_csv,"r") as f:
        reader = csv.DictReader(f)
        for row in reader:
            this_dict[row[name_column]] = row[data_column]
    return this_dict

def add_to_hash(seq_file):
    hash_map = {}
    seq_hash = {}
    for record in SeqIO.parse(seq_file, "fasta"):
        seq = str(record.seq).upper().encode()
        hash_object = hashlib.md5(seq)
        hash_map[hash_object.hexdigest()] = record.id
        seq_hash[str(record.seq)] = record.id
    return hash_map,seq_hash