File: normalizations.in

package info (click to toggle)
rdkit 202209.3-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 203,880 kB
  • sloc: cpp: 334,239; python: 80,247; ansic: 24,579; java: 7,667; sql: 2,123; yacc: 1,884; javascript: 1,358; lex: 1,260; makefile: 576; xml: 229; fortran: 183; cs: 181; sh: 101
file content (69 lines) | stat: -rw-r--r-- 3,344 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
//
//  Copyright (C) 2021 Greg Landrum
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//

namespace defaults {
std::vector<std::pair<std::string, std::string>> defaultNormalizations = {
    // Opposite of #2.1 in InChI technical manual? Covered by RDKit
    // Sanitization.
    {"Nitro to N+(O-)=O",
     "[N,P,As,Sb;X3:1](=[O,S,Se,Te:2])=[O,S,Se,Te:3]>>[*+1:1]([*-1:2])=[*:3]"},
    {"Sulfone to S(=O)(=O)",
     "[S+2:1]([O-:2])([O-:3])>>[S+0:1](=[O-0:2])(=[O-0:3])"},
    {"Pyridine oxide to n+O-", "[nH0+0:1]=[OH0+0:2]>>[n+:1][O-:2]"},
    {"Azide to N=N+=N-", "[*:1][N:2]=[N:3]#[N:4]>>[*:1][N:2]=[N+:3]=[N-:4]"},
    {"Diazo/azo to =N+=N-", "[*:1]=[N:2]#[N:3]>>[*:1]=[N+:2]=[N-:3]"},
    {"Sulfoxide to -S+(O-)-",
     "[!O:1][S+0;X3:2](=[O:3])[!O:4]>>[*:1][S+1:2]([O-:3])[*:4]"},
    // Equivalent to #1.5 in InChI technical manual
    {"Phosphate to P(O-)=O",
     "[O,S,Se,Te;-1:1][P+;D4:2][O,S,Se,Te;-1:3]>>[*+0:1]=[P+0;D5:2][*-1:3]"},
    // Equivalent to #1.8 in InChI technical manual
    {"C/S+N to C/S=N+",
     "[C,S&!$([S+]-[O-]);X3+1:1]([NX3:2])[NX3!H0:3]>>[*+0:1]([N:2])=[N+:3]"},
    // Equivalent to #1.8 in InChI technical manual
    {"P+N to P=N+", "[P;X4+1:1]([NX3:2])[NX3!H0:3]>>[*+0:1]([N:2])=[N+:3]"},
    {"Normalize hydrazine-diazonium",
     "[CX4:1][NX3H:2]-[NX3H:3][CX4:4][NX2+:5]#[NX1:6]>>[CX4:1][NH0:2]=[NH+:3]["
     "C:4][N+0:5]=[NH:6]"},
    // Equivalent to #1.3 in InChI technical manual
    {"Recombine 1,3-separated charges",
     "[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[N,P,As,Sb,O,S,Se,Te;+1:3]>>[*-0:1]=[*"
     ":2]-[*+0:3]"},
    {"Recombine 1,3-separated charges",
     "[n,o,p,s;-1:1]:[a:2]=[N,O,P,S;+1:3]>>[*-0:1]:[*:2]-[*+0:3]"},
    {"Recombine 1,3-separated charges",
     "[N,O,P,S;-1:1]-[a:2]:[n,o,p,s;+1:3]>>[*-0:1]=[*:2]:[*+0:3]"},
    {"Recombine 1,5-separated charges",
     "[N,P,As,Sb,O,S,Se,Te;-1:1]-[A+0:2]=[A:3]-[A:4]=[N,P,As,Sb,O,S,Se,Te;+1:5]"
     ">>[*-0:1]=[*:2]-[*:3]=[*:4]-[*+0:5]"},
    {"Recombine 1,5-separated charges",
     "[n,o,p,s;-1:1]:[a:2]:[a:3]:[c:4]=[N,O,P,S;+1:5]>>[*-0:1]:[*:2]:[*:3]:[c:"
     "4]-[*+0:5]"},
    {"Recombine 1,5-separated charges",
     "[N,O,P,S;-1:1]-[c:2]:[a:3]:[a:4]:[n,o,p,s;+1:5]>>[*-0:1]=[c:2]:[*:3]:[*:"
     "4]:[*+0:5]"},
    // Conjugated cation rules taken from Francis Atkinson's standardiser. Those
    // that can reduce aromaticity aren't included
    {"Normalize 1,3 conjugated cation",
     "[N,O;+0!H0:1]-[A:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]=[*:2]-[*+0:3]"},
    {"Normalize 1,3 conjugated cation",
     "[n;+0!H0:1]:[c:2]=[N!$(*[O-]),O;+1H0:3]>>[*+1:1]:[*:2]-[*+0:3]"},
    {"Normalize 1,5 conjugated cation",
     "[N,O;+0!H0:1]-[A:2]=[A:3]-[A:4]=[N!$(*[O-]),O;+1H0:5]>>[*+1:1]=[*:2]-[*:"
     "3]=[*:4]-[*+0:5]"},
    {"Normalize 1,5 conjugated cation",
     "[n;+0!H0:1]:[a:2]:[a:3]:[c:4]=[N!$(*[O-]),O;+1H0:5]>>[n+1:1]:[*:2]:[*:3]:"
     "[*:4]-[*+0:5]"},
    // Equivalent to #1.6 in InChI technical manual. RDKit Sanitization handles
    // this for perchlorate.
    {"Charge normalization", "[F,Cl,Br,I,At;-1:1]=[O:2]>>[*-0:1][O-:2]"},
    {"Charge recombination", "[N,P,As,Sb;-1:1]=[C+;v3:2]>>[*+0:1]#[C+0:2]"},
};
} // namespace defaults