File: testCharge.cpp

package info (click to toggle)
rdkit 201809.1%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 123,688 kB
  • sloc: cpp: 230,509; python: 70,501; java: 6,329; ansic: 5,427; sql: 1,899; yacc: 1,739; lex: 1,243; makefile: 445; xml: 229; fortran: 183; sh: 123; cs: 93
file content (160 lines) | stat: -rw-r--r-- 6,329 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
//
//  Copyright (C) 2018 Susan H. Leung
//
//   @@ All Rights Reserved @@
//  This file is part of the RDKit.
//  The contents are covered by the terms of the BSD license
//  which is included in the file license.txt, found at the root
//  of the RDKit source tree.
//
#include "MolStandardize.h"
#include <GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogParams.h>
#include <GraphMol/MolStandardize/AcidBaseCatalog/AcidBaseCatalogUtils.h>
#include "Charge.h"
#include <GraphMol/SmilesParse/SmilesParse.h>
#include <GraphMol/SmilesParse/SmilesWrite.h>

using namespace RDKit;
using namespace MolStandardize;

void testReionizer() {
  BOOST_LOG(rdInfoLog) << "-----------------------\n test reionizer"
                       << std::endl;

  std::string smi1, smi2, smi3, smi4, smi5, smi6, smi7;

  Reionizer reionizer;

  // Test table salt.
  smi1 = "[Na].[Cl]";
  std::shared_ptr<ROMol> m1(SmilesToMol(smi1));
  ROMOL_SPTR reionized(reionizer.reionize(*m1));
  TEST_ASSERT(MolToSmiles(*reionized) == "[Cl-].[Na+]");

  // Test forced charge correction maintaining overall neutral charge.
  smi2 = "[Na].O=C(O)c1ccccc1";
  std::shared_ptr<ROMol> m2(SmilesToMol(smi2));
  ROMOL_SPTR reionized2(reionizer.reionize(*m2));
  TEST_ASSERT(MolToSmiles(*reionized2) == "O=C([O-])c1ccccc1.[Na+]");

  // Test reionizer moves proton to weaker acid.
  smi3 = "C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O";
  std::shared_ptr<ROMol> m3(SmilesToMol(smi3));
  ROMOL_SPTR reionized3(reionizer.reionize(*m3));
  TEST_ASSERT(MolToSmiles(*reionized3) == "O=S(O)c1ccc(S(=O)(=O)[O-])cc1");

  // Test reionizer moves proton to weaker acid.
  smi5 = "C1=C(C=CC(=C1)[S]([O-])=O)[S](O)(=O)=O";
  std::shared_ptr<ROMol> m5(SmilesToMol(smi5));
  ROMOL_SPTR reionized5(reionizer.reionize(*m5));
  TEST_ASSERT(MolToSmiles(*reionized3) == "O=S(O)c1ccc(S(=O)(=O)[O-])cc1");

  // Test charged carbon doesn't get recognised as alpha-carbon-hydrogen-keto.
  smi6 = "CCOC(=O)C(=O)[CH-]C#N";
  std::shared_ptr<ROMol> m6(SmilesToMol(smi6));
  ROMOL_SPTR reionized6(reionizer.reionize(*m6));
  TEST_ASSERT(MolToSmiles(*reionized6) == "CCOC(=O)C(=O)[CH-]C#N");

  // TODO... can't make this work. Python SanitizeMol looks to correct...
  // what is different with MolOps::sanitizeMol?
  smi7 = "C[N+]1=C[CH-]N(C(=N)N)/C1=C/[N+](=O)[O-]";
  std::shared_ptr<ROMol> m7(SmilesToMol(smi7));
  ROMOL_SPTR reionized7(reionizer.reionize(*m7));
  TEST_ASSERT(MolToSmiles(*reionized7) ==
              "C[N+]1=CCN(C(=N)N)/C1=[C-]/[N+](=O)[O-]");
  BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}

void testChargeParent() {
  BOOST_LOG(rdInfoLog) << "-----------------------\n test charge parent"
                       << std::endl;
  std::string smi1, smi2, smi3, smi4, smi5, smi6, smi7, smi8, smi9, smi10,
      smi11, smi12;
  MolStandardize::CleanupParameters params;
  // initialize CleanupParameters with preferOrganic=true
  MolStandardize::CleanupParameters params_preferorg;
  params_preferorg.preferOrganic = true;

  // Test neutralization of ionized acids and bases.
  smi1 = "C(C(=O)[O-])(Cc1n[n-]nn1)(C[NH3+])(C[N+](=O)[O-])";
  std::unique_ptr<RWMol> m1(SmilesToMol(smi1));
  std::unique_ptr<RWMol> res1(MolStandardize::chargeParent(*m1, params));
  TEST_ASSERT(MolToSmiles(*res1) == "NCC(Cc1nn[nH]n1)(C[N+](=O)[O-])C(=O)O");

  // Test preservation of zwitterion.
  smi2 = "n(C)1cc[n+]2cccc([O-])c12";
  std::unique_ptr<RWMol> m2(SmilesToMol(smi2));
  std::unique_ptr<RWMol> res2(MolStandardize::chargeParent(*m2, params));
  TEST_ASSERT(MolToSmiles(*res2) == "Cn1cc[n+]2cccc([O-])c12");

  // Choline should be left with a positive charge.
  smi3 = "C[N+](C)(C)CCO";
  std::unique_ptr<RWMol> m3(SmilesToMol(smi3));
  std::unique_ptr<RWMol> res3(MolStandardize::chargeParent(*m3, params));
  TEST_ASSERT(MolToSmiles(*res3) == "C[N+](C)(C)CCO");

  // Hydrogen should be removed to give deanol as a charge parent.
  smi4 = "C[NH+](C)CCO";
  std::unique_ptr<RWMol> m4(SmilesToMol(smi4));
  std::unique_ptr<RWMol> res4(MolStandardize::chargeParent(*m4, params));
  TEST_ASSERT(MolToSmiles(*res4) == "CN(C)CCO");

  // Sodium benzoate to benzoic acid.
  smi5 = "[Na+].O=C([O-])c1ccccc1";
  std::unique_ptr<RWMol> m5(SmilesToMol(smi5));
  std::unique_ptr<RWMol> res5(MolStandardize::chargeParent(*m5, params));
  TEST_ASSERT(MolToSmiles(*res5) == "O=C(O)c1ccccc1");

  // Benzoate ion to benzoic acid.
  smi6 = "O=C([O-])c1ccccc1";
  std::unique_ptr<RWMol> m6(SmilesToMol(smi6));
  std::unique_ptr<RWMol> res6(MolStandardize::chargeParent(*m6, params));
  TEST_ASSERT(MolToSmiles(*res6) == "O=C(O)c1ccccc1");

  // Charges in histidine should be neutralized.
  smi7 = "[NH3+]C(Cc1cnc[nH]1)C(=O)[O-]";
  std::unique_ptr<RWMol> m7(SmilesToMol(smi7));
  std::unique_ptr<RWMol> res7(MolStandardize::chargeParent(*m7, params));
  TEST_ASSERT(MolToSmiles(*res7) == "NC(Cc1cnc[nH]1)C(=O)O");

  //
  smi8 = "C[NH+](C)(C).[Cl-]";
  std::unique_ptr<RWMol> m8(SmilesToMol(smi8));
  std::unique_ptr<RWMol> res8(MolStandardize::chargeParent(*m8, params));
  TEST_ASSERT(MolToSmiles(*res8) == "CN(C)C");

  // No organic fragments.
  smi9 = "[N+](=O)([O-])[O-]";
  std::unique_ptr<RWMol> m9(SmilesToMol(smi9));
  std::unique_ptr<RWMol> res9(MolStandardize::chargeParent(*m9, params));
  TEST_ASSERT(MolToSmiles(*res9) == "O=[N+]([O-])[O-]");

  // TODO switch prefer_organic=true
  // No organic fragments.
  smi10 = "[N+](=O)([O-])[O-]";
  std::unique_ptr<RWMol> m10(SmilesToMol(smi10));
  std::unique_ptr<RWMol> res10(
      MolStandardize::chargeParent(*m10, params_preferorg));
  TEST_ASSERT(MolToSmiles(*res10) == "O=[N+]([O-])[O-]");

  // Larger inorganic fragment should be chosen.
  smi11 = "[N+](=O)([O-])[O-].[CH2]";
  std::unique_ptr<RWMol> m11(SmilesToMol(smi11));
  std::unique_ptr<RWMol> res11(MolStandardize::chargeParent(*m11, params));
  TEST_ASSERT(MolToSmiles(*res11) == "O=[N+]([O-])[O-]");

  // TODO prefer_organic=true
  // Smaller organic fragment should be chosen over larger inorganic fragment.
  smi12 = "[N+](=O)([O-])[O-].[CH2]";
  std::unique_ptr<RWMol> m12(SmilesToMol(smi12));
  std::unique_ptr<RWMol> res12(
      MolStandardize::chargeParent(*m12, params_preferorg));
  TEST_ASSERT(MolToSmiles(*res12) == "[CH2]");
  BOOST_LOG(rdInfoLog) << "Finished" << std::endl;
}

int main() {
  testReionizer();
  testChargeParent();
  return 0;
}