File: FeatFinderCLI.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (107 lines) | stat: -rw-r--r-- 3,328 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#
#  Copyright (C) 2005-2006 Rational Discovery LLC
#
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#


import argparse
import re
import os

from rdkit import Chem
from rdkit import RDLogger
from rdkit.Chem import ChemicalFeatures

logger = RDLogger.logger()
splitExpr = re.compile(r'[ \t,]')


def GetAtomFeatInfo(factory, mol):
  res = [None] * mol.GetNumAtoms()
  feats = factory.GetFeaturesForMol(mol)
  for feat in feats:
    ids = feat.GetAtomIds()
    feature = "%s-%s" % (feat.GetFamily(), feat.GetType())
    for id_ in ids:
      if res[id_] is None:
        res[id_] = []
      res[id_].append(feature)
  return res


def initParser():
  """ Initialize the parser """
  parser = argparse.ArgumentParser(description='Determine pharmacophore features of molecules',
                                   epilog=_splashMessage,
                                   formatter_class=argparse.RawDescriptionHelpFormatter)
  parser.add_argument('-r', dest='reverseIt', default=False, action='store_true',
                      help='Set to get atoms lists for each feature.')
  parser.add_argument('-n', dest='maxLines', default=-1, help=argparse.SUPPRESS, type=int)
  parser.add_argument('fdefFilename', type=existingFile,
                      help='Pharmacophore feature definition file')
  parser.add_argument('smilesFilename', type=existingFile,
                      help='The smiles file should have SMILES in the first column')
  return parser


_splashMessage = """
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
  FeatFinderCLI
  Part of the RDKit (http://www.rdkit.org)
-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
"""


def existingFile(filename):
  """ 'type' for argparse - check that filename exists """
  if not os.path.exists(filename):
    raise argparse.ArgumentTypeError("{0} does not exist".format(filename))
  return filename


def processArgs(args, parser):
  try:
    factory = ChemicalFeatures.BuildFeatureFactory(args.fdefFilename)
  except Exception:
    parser.error("Could not parse Fdef file {0.fdefFilename}.".format(args))

  with open(args.smilesFilename) as inF:
    for lineNo, line in enumerate(inF, 1):
      if lineNo == args.maxLines + 1:
        break
      smi = splitExpr.split(line.strip())[0].strip()
      mol = Chem.MolFromSmiles(smi)
      if mol is None:
        logger.warning("Could not process smiles '%s' on line %d." % (smi, lineNo))
        continue

      print('Mol-%d\t%s' % (lineNo, smi))
      if args.reverseIt:
        feats = factory.GetFeaturesForMol(mol)
        for feat in feats:
          print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()), end='')
          print(', '.join([str(x) for x in feat.GetAtomIds()]))
      else:
        featInfo = GetAtomFeatInfo(factory, mol)
        for i, v in enumerate(featInfo):
          print('\t% 2s(%d)' % (mol.GetAtomWithIdx(i).GetSymbol(), i + 1), end='')
          if v:
            print('\t', ', '.join(v))
          else:
            print()


def main():
  """ Main application """
  parser = initParser()
  args = parser.parse_args()
  processArgs(args, parser)


if __name__ == '__main__':
  main()