File: chiral_embed.py

package info (click to toggle)
rdkit 202503.6-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 222,000 kB
  • sloc: cpp: 411,111; python: 78,482; ansic: 26,181; java: 8,285; javascript: 4,404; sql: 2,393; yacc: 1,626; lex: 1,267; cs: 1,090; makefile: 581; xml: 229; fortran: 183; sh: 121
file content (43 lines) | stat: -rw-r--r-- 1,332 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import gzip

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.PyMol import MolViewer

try:
  v = MolViewer()
  v.DeleteAll()
except Exception:
  v = None

outf = open('chembl_20_chiral.problems.smi', 'w+')
noconff = open('chembl_20_chiral.noconfs.smi', 'w+')
for i, line in enumerate(gzip.open('../Data/chembl_20_chiral.smi.gz')):
  line = line.strip().decode().split(' ')
  mol = Chem.MolFromSmiles(line[0])
  if not mol:
    continue
  cents = Chem.FindMolChiralCenters(mol, includeUnassigned=True)
  if len([y for x, y in cents if y == '?']):
    continue
  nm = line[1]
  csmi = Chem.MolToSmiles(mol, True)
  for j in range(100):
    mh = Chem.AddHs(mol)
    ok = AllChem.EmbedMolecule(mh, randomSeed=j + 1)
    if ok >= 0:
      Chem.AssignAtomChiralTagsFromStructure(mh)
      newm = Chem.RemoveHs(mh)
      smi = Chem.MolToSmiles(newm, True)
      if smi != csmi:
        print('%d %d %s:\n%s\n%s' % (i, j, nm, csmi, smi))
        print('%s %s %d' % (line[0], line[1], j + 1), file=outf)

        if v is not None:
          v.ShowMol(mh, name='%s-%d' % (nm, j), showOnly=False)
          break  # move immediately onto the next molecule
    else:
      print('noconf %d %d %s: %s' % (i, j, nm, line[0]))
      print('%s %s %d' % (line[0], line[1], j + 1), file=noconff)

  print('Done with mol %d' % i)