File: pdbCompoundLines.py

package info (click to toggle)
snpeff 5.2.f%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 701,384 kB
  • sloc: java: 62,547; perl: 2,279; sh: 1,185; python: 744; xml: 507; makefile: 50
file content (90 lines) | stat: -rwxr-xr-x 2,119 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python

import sys

#---
# Add items to dictionary
#---
def addToDict(vals, key, val):
    out = ''
    if key == 'MOL_ID':
        out = show(vals)    # Show old values
        vals.clear()        # Reset
        init = 1
    # Add key or append to previous key (line continuation)
    if key in vals:
        vals[key] += val
    else:
        vals[key] = val
    return out

#---
# Parse compound sub fields
#---
def parseKeyVal(keyPrev, line):
    field = line[10:].strip()
    if ':' in field:
        (key, val) = field.split(':', 1)
        key = key.strip()
    else:
        key = keyPrev
        val = field
    val = val.strip()
    if val.endswith(';'):
        val = val[:-1]
    return (key, val)

#---
# Show dict entries as one string
#---
def show(vals):
    ret = ''
    if 'MOL_ID' not in vals:
        return ret
    for key in ['MOL_ID', 'MOLECULE', 'SYNONYM', 'CHAIN']:
        if key in vals:
            ret += vals[key]
        ret += '\t'
    return ret[:-1]

#-------------------------------------------------------------------------------
# Main
#-------------------------------------------------------------------------------

# Parse name from command line
name = sys.argv[1]

# Initialize
done = False
vals = dict()
compounds = list()
orgs = list()
key = ''

for l in sys.stdin:
    l = l.rstrip()

    if l.startswith('COMPND'):
        done = True
        (key, val) = parseKeyVal(key, l)                # Parse compound sub fields
        outStr = addToDict(vals, key, val)
        if outStr: compounds.append(outStr)
    elif l.startswith('SOURCE'):
        (key, val) = parseKeyVal(key, l)                # Parse compound sub fields
        if key == 'ORGANISM_COMMON' or key == 'ORGANISM_SCIENTIFIC':
            orgs.append( val )
    else:
        if done: break;

# Add last element
compounds.append( show(vals) )

# Show results
out = [i for tup in zip(compounds, orgs) for i in tup]     # Merge lists and create string
out = '\t'.join( out )

count = len(compounds)
if count > 1:
    organisms = ','.join(orgs)
    print(f"{name}\t{organisms}\t{count}\t{out}")