File: chemutils.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (164 lines) | stat: -rwxr-xr-x 4,901 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#
#  Copyright (C) 2000  greg Landrum
#
""" utility functions with "chemical know-how"

"""


import os
import re

from rdkit import RDConfig

if not RDConfig.usePgSQL:
    _atomDbName = os.path.join(RDConfig.RDDataDir, 'atomdb.gdb')
else:
    _atomDbName = "::RDData"


def GetAtomicData(atomDict, descriptorsDesired, dBase=_atomDbName, table='atomic_data', where='',
                  user='sysdba', password='masterkey', includeElCounts=0):
    """ pulls atomic data from a database

      **Arguments**

        - atomDict: the dictionary to populate

        - descriptorsDesired: the descriptors to pull for each atom

        - dBase: the DB to use

        - table: the DB table to use

        - where: the SQL where clause

        - user: the user name to use with the DB

        - password: the password to use with the DB

        - includeElCounts: if nonzero, valence electron count fields are added to
           the _atomDict_

    """
    extraFields = ['NVAL', 'NVAL_NO_FULL_F', 'NVAL_NO_FULL_D', 'NVAL_NO_FULL']
    from rdkit.Dbase import DbModule
    cn = DbModule.connect(dBase, user, password)
    c = cn.cursor()
    descriptorsDesired = [s.upper() for s in descriptorsDesired]
    if 'NAME' not in descriptorsDesired:
        descriptorsDesired.append('NAME')
    if includeElCounts and 'CONFIG' not in descriptorsDesired:
        descriptorsDesired.append('CONFIG')
    for field in extraFields:
        if field in descriptorsDesired:
            descriptorsDesired.remove(field)
    toPull = ','.join(descriptorsDesired)
    command = 'select %s from atomic_data %s' % (toPull, where)
    try:
        c.execute(command)
    except Exception:
        print('Problems executing command:', command)
        return
    res = c.fetchall()
    for atom in res:
        tDict = {}
        for i in range(len(descriptorsDesired)):
            desc = descriptorsDesired[i]
            val = atom[i]
            tDict[desc] = val
        name = tDict['NAME']
        atomDict[name] = tDict
        if includeElCounts:
            config = atomDict[name]['CONFIG']
            atomDict[name]['NVAL'] = ConfigToNumElectrons(config)
            atomDict[name]['NVAL_NO_FULL_F'] = ConfigToNumElectrons(config, ignoreFullF=1)
            atomDict[name]['NVAL_NO_FULL_D'] = ConfigToNumElectrons(config, ignoreFullD=1)
            atomDict[name]['NVAL_NO_FULL'] = ConfigToNumElectrons(
                config, ignoreFullF=1, ignoreFullD=1)


def SplitComposition(compStr):
    """ Takes a simple chemical composition and turns into a list of element,# pairs.

        i.e. 'Fe3Al' -> [('Fe',3),('Al',1)]

        **Arguments**

         - compStr: the composition string to be processed

        **Returns**

         - the *composVect* corresponding to _compStr_

        **Note**

          -this isn't smart enough by half to deal with anything even
              remotely subtle, so be gentle.

    """
    target = r'([A-Z][a-z]?)([0-9\.]*)'

    theExpr = re.compile(target)

    matches = theExpr.findall(compStr)
    res = []
    for match in matches:
        if len(match[1]) > 0:
            res.append((match[0], float(match[1])))
        else:
            res.append((match[0], 1))

    return res


def ConfigToNumElectrons(config, ignoreFullD=0, ignoreFullF=0):
    """ counts the number of electrons appearing in a configuration string

      **Arguments**

        - config: the configuration string (e.g. '2s^2 2p^4')

        - ignoreFullD: toggles not counting full d shells

        - ignoreFullF: toggles not counting full f shells

      **Returns**

        the number of valence electrons

    """
    arr = config.split(' ')

    nEl = 0
    for i in range(1, len(arr)):
        l = arr[i].split('^')
        incr = int(l[1])
        if ignoreFullF and incr == 14 and l[0].find('f') != -1 and len(arr) > 2:
            incr = 0
        if ignoreFullD and incr == 10 and l[0].find('d') != -1 and len(arr) > 2:
            incr = 0
        nEl = nEl + incr
    return nEl


if __name__ == '__main__':  # pragma: nocover

    print(SplitComposition('Fe'))
    print(SplitComposition('Fe3Al'))
    print(SplitComposition('Fe99PdAl'))
    print(SplitComposition('TiNiSiSO12P'))
    temp = ['[Xe] 4f^12 6s^2', '[Xe] 4f^14 5d^6 6s^2', '[Xe] 4f^14 5d^10 6s^2',
            '[Xe] 4f^14 5d^10 6s^2 6p^1', '[Xe] 5d^10']
    print('ignore all')
    for entry in temp:
        print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=1, ignoreFullF=1))
    print('ignore d')
    for entry in temp:
        print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=1, ignoreFullF=0))
    print('ignore f')
    for entry in temp:
        print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=0, ignoreFullF=1))
    print('ignore None')
    for entry in temp:
        print(entry, '\t\t\t\t', ConfigToNumElectrons(entry, ignoreFullD=0, ignoreFullF=0))