File: Fragments.py

package info (click to toggle)
rdkit 202009.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 129,624 kB
  • sloc: cpp: 288,030; python: 75,571; java: 6,999; ansic: 5,481; sql: 1,968; yacc: 1,842; lex: 1,254; makefile: 572; javascript: 461; xml: 229; fortran: 183; sh: 134; cs: 93
file content (58 lines) | stat: -rw-r--r-- 1,619 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# $Id$
#
# Copyright (C) 2002-2010 greg Landrum and Rational Discovery LLC
#
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#
""" functions to match a bunch of fragment descriptors from a file

No user-servicable parts inside.  ;-)

"""
import os
from rdkit import RDConfig
from rdkit import Chem

defaultPatternFileName = os.path.join(RDConfig.RDDataDir, 'FragmentDescriptors.csv')


def _CountMatches(mol, patt, unique=True):
  return len(mol.GetSubstructMatches(patt, uniquify=unique))


fns = []


def _LoadPatterns(fileName=None):
  if fileName is None:
    fileName = defaultPatternFileName
  try:
    with open(fileName, 'r') as inF:
      for line in inF.readlines():
        if len(line) and line[0] != '#':
          splitL = line.split('\t')
          if len(splitL) >= 3:
            name = splitL[0]
            descr = splitL[1]
            sma = splitL[2]
            descr = descr.replace('"', '')
            patt = Chem.MolFromSmarts(sma)
            if not patt or patt.GetNumAtoms() == 0:
              raise ImportError('Smarts %s could not be parsed' % (repr(sma)))
            fn = lambda mol, countUnique=True, pattern=patt: _CountMatches(mol, pattern, unique=countUnique)
            fn.__doc__ = descr
            name = name.replace('=', '_')
            name = name.replace('-', '_')
            fns.append((name, fn))
  except IOError:
    pass


_LoadPatterns()
for name, fn in fns:
  exec('%s=fn' % (name))
fn = None