File: chiral_embed.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (43 lines) | stat: -rw-r--r-- 1,332 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

from rdkit import Chem
from rdkit.Chem.PyMol import MolViewer
from rdkit.Chem import AllChem
import gzip

try:
  v = MolViewer()
  v.DeleteAll()
except Exception:
  v = None

outf = open('chembl_20_chiral.problems.smi', 'w+')
noconff = open('chembl_20_chiral.noconfs.smi', 'w+')
for i, line in enumerate(gzip.open('../Data/chembl_20_chiral.smi.gz')):
  line = line.strip().decode().split(' ')
  mol = Chem.MolFromSmiles(line[0])
  if not mol:
    continue
  cents = Chem.FindMolChiralCenters(mol, includeUnassigned=True)
  if len([y for x, y in cents if y == '?']):
    continue
  nm = line[1]
  csmi = Chem.MolToSmiles(mol, True)
  for j in range(100):
    mh = Chem.AddHs(mol)
    ok = AllChem.EmbedMolecule(mh, randomSeed=j + 1)
    if ok >= 0:
      Chem.AssignAtomChiralTagsFromStructure(mh)
      newm = Chem.RemoveHs(mh)
      smi = Chem.MolToSmiles(newm, True)
      if smi != csmi:
        print('%d %d %s:\n%s\n%s' % (i, j, nm, csmi, smi))
        print('%s %s %d' % (line[0], line[1], j + 1), file=outf)

        if v is not None:
          v.ShowMol(mh, name='%s-%d' % (nm, j), showOnly=False)
          break  # move immediately onto the next molecule
    else:
      print('noconf %d %d %s: %s' % (i, j, nm, line[0]))
      print('%s %s %d' % (line[0], line[1], j + 1), file=noconff)

  print('Done with mol %d' % i)