File: mol_transform.py

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (123 lines) | stat: -rw-r--r-- 4,388 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Copyright (c) 2013, GlaxoSmithKline Research & Development Ltd.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
#       copyright notice, this list of conditions and the following
#       disclaimer in the documentation and/or other materials provided
#       with the distribution.
#     * Neither the name of GlaxoSmithKline Research & Development Ltd.
#       nor the names of its contributors may be used to endorse or promote
#       products derived from this software without specific prior written
#       permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Jameed Hussain, July 2013
from __future__ import print_function
import sys
import re
from optparse import OptionParser
from rdkit import Chem
from rdkit.Chem import AllChem


def smiles_to_smarts(smi):
  mol = Chem.MolFromSmiles(smi)

  if (mol is None):
    sys.stderr.write("Can't generate mol for: %s\n" % (smi))
    return None

  #change the isotope to 42
  for atom in mol.GetAtoms():
    atom.SetIsotope(42)

  #preint out the smiles - all the atom attributes will be fully specified
  smarts = Chem.MolToSmiles(mol, isomericSmiles=True)
  #remove the 42 isotope labels
  smarts = re.sub(r'\[42', "[", smarts)
  #now have a fully specified SMARTS - simples!

  return smarts


if __name__ == '__main__':

  parser = OptionParser(
    description="Program to apply transformations to a set of input molecules",
    epilog="Example command: mol_transform.py -t TRANSFORM_FILE <SMILES_FILE\t\t"
    "Format of smiles file: SMILES ID <space or comma separated>\t\t\t"
    "Format of transform file: transform <one per line>\t\t\t"
    "Output: SMILES,ID,Transfrom,Modified_SMILES")
  parser.add_option('-f', '--file', action='store', dest='transform_file', type='string',
                    help='The file containing the transforms to apply to your input SMILES')

  (options, args) = parser.parse_args()

  #print options.transform_file
  if (options.transform_file == None):
    print("Please specify the transform file.")
    sys.exit(1)

  smiles = []
  #read the STDIN
  for line in sys.stdin:
    line = line.rstrip()
    smi, id = re.split('\s|,', line)
    #print smiles,id
    smiles.append((smi, id))

  #read the transform file
  #all the transform must come from BioDig to guarantee they have been cansmirk'ed
  infile = open(options.transform_file, 'r')

  print("Input_SMILES,ID,RG-Transform,RG-transformedSMILES")
  for transform in infile:
    transform = transform.rstrip()

    #need to conver the smiles to smart to get rid of any potential issues
    lhs, rhs = transform.split(">>")

    if (lhs == "[*:1][H]"):
      lhs = "[*;!H0:1]"
    else:
      lhs = smiles_to_smarts(lhs)

    if (rhs == "[*:1][H]"):
      rhs = "[*:1]"
    else:
      rhs = smiles_to_smarts(rhs)

    rdkit_transform = "%s>>%s" % (lhs, rhs)
    rxn = AllChem.ReactionFromSmarts(rdkit_transform)
    #rxn = AllChem.ReactionFromSmarts(transform)

    for x in smiles:
      mol = Chem.MolFromSmiles(x[0])
      ps = rxn.RunReactants([mol])

      products = set()
      for y in range(len(ps)):
        for z in range(len(ps[y])):
          p = ps[y][z]
          Chem.SanitizeMol(p)
          products.add(Chem.MolToSmiles(p, isomericSmiles=True))

      for p in products:
        print("%s,%s,%s,%s" % (x[0], x[1], transform, p))