File: pmi.py

package info (click to toggle)
rdkit 202503.1-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220,160 kB
  • sloc: cpp: 399,240; python: 77,453; ansic: 25,517; java: 8,173; javascript: 4,005; sql: 2,389; yacc: 1,565; lex: 1,263; cs: 1,081; makefile: 580; xml: 229; fortran: 183; sh: 105
file content (48 lines) | stat: -rw-r--r-- 1,453 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#
# Copyright (C) 2016 Greg Landrum
#
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.

import numpy as np

# generates reference data for the PMI descriptors
from rdkit import Chem
from rdkit.Chem import AllChem


def GetMoments(mol, includeWeights):
  conf = mol.GetConformer()
  if includeWeights:
    masses = np.array([x.GetMass() for x in mol.GetAtoms()])
  else:
    masses = [1.0] * mol.GetNumAtoms()

  ps = conf.GetPositions()
  mps = [x * y for x, y in zip(ps, masses)]
  centroid = np.sum(mps, axis=0) / sum(masses)
  cps = ps - centroid
  xx = xy = xz = yy = yz = zz = 0.0
  for m, p in zip(masses, cps):
    xx += m * (p[1] * p[1] + p[2] * p[2])
    yy += m * (p[0] * p[0] + p[2] * p[2])
    zz += m * (p[1] * p[1] + p[0] * p[0])
    xy -= m * p[0] * p[1]
    xz -= m * p[0] * p[2]
    yz -= m * p[1] * p[2]
  covm = np.array([[xx, xy, xz], [xy, yy, yz], [xz, yz, zz]])
  res = np.linalg.eigvals(covm)
  return sorted(res)


if __name__ == '__main__':
  suppl = Chem.SDMolSupplier('./PBF_egfr.sdf', removeHs=False)
  output = open('./PMI_egfr.out', 'w+')
  for m in suppl:
    i1, i2, i3 = GetMoments(m, True)
    mi1, mi2, mi3 = GetMoments(m, False)
    print("%s %.4f %.4f %.4f %.4f %.4f %.4f" % (m.GetProp("_Name"), i1, i2, i3, mi1, mi2, mi3),
          file=output)