1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
|
## -----------------------------------------------------------------------------
library(rcdk)
## -----------------------------------------------------------------------------
cdk.version()
## ----echo=FALSE---------------------------------------------------------------
data(bpdata)
str(bpdata)
## ---- eval=FALSE--------------------------------------------------------------
# mols <- load.molecules( c('data1.sdf', '/some/path/data2.sdf') )
## ----eval=FALSE---------------------------------------------------------------
# iter <- iload.molecules('verybig.sdf', type='sdf')
# while(hasNext(iter)) {
# mol <- nextElem(iter)
# print(get.property(mol, "cdk:Title"))
# }
## ----eval=FALSE---------------------------------------------------------------
# smile <- 'c1ccccc1CC(=O)C(N)CC1CCCCOC1'
# mol <- parse.smiles(smile)[[1]]
## ----eval=FALSE---------------------------------------------------------------
# options("java.parameters"=c("-Xmx4000m"))
# library(rcdk)
# for (smile in smiles) {
# m <- parse.smiles(smile)
# ## perform operations on this molecule
#
# jcall("java/lang/System","V","gc")
# gc()
# }
## ----eval=FALSE---------------------------------------------------------------
# write.molecules(mols, filename='mymols.sdf')
## ----eval=FALSE---------------------------------------------------------------
# write.molecules(mols, filename='mymols.sdf', together=FALSE)
## -----------------------------------------------------------------------------
smiles <- c('CCC', 'c1ccccc1', 'CCCC(C)(C)CC(=O)NC')
mols <- parse.smiles(smiles)
get.smiles(mols[[1]])
unlist(lapply(mols, get.smiles))
## -----------------------------------------------------------------------------
smiles <- c('CCC', 'c1ccccc1', 'CCc1ccccc1CC(C)(C)CC(=O)NC')
mols <- parse.smiles(smiles)
get.smiles(mols[[3]], smiles.flavors(c('UseAromaticSymbols')))
get.smiles(mols[[3]], smiles.flavors(c('Generic','CxSmiles')))
## -----------------------------------------------------------------------------
m <- parse.smiles('CCC')[[1]]
m <- generate.2d.coordinates(m)
m
get.smiles(m, smiles.flavors(c('CxSmiles')))
get.smiles(m, smiles.flavors(c('CxCoordinates')))
## ----eval=FALSE---------------------------------------------------------------
# smiles <- c('CCC', 'CCN', 'CCN(C)(C)',
# 'c1ccccc1Cc1ccccc1',
# 'C1CCC1CC(CN(C)(C))CC(=O)CC')
# mols <- parse.smiles(smiles)
# view.molecule.2d(mols[[1]])
# view.molecule.2d(mols)
## ----eval=FALSE---------------------------------------------------------------
# depictor <- get.depictor(style='cob', abbr='reagents', width=300, height=300)
# view.molecule.2d(mols[[5]], depictor=depictor)
## ----eval=FALSE---------------------------------------------------------------
# depictor <- get.depictor(style='cob', abbr='reagents', width=300, height=300)
# view.molecule.2d(mols[[5]], depictor=depictor)
# #depictor$setStyle('cow')
# #view.molecule.2d(mols[[5]], depictor=depictor)
## ----eval=FALSE---------------------------------------------------------------
# depictor <- get.depictor(style='cob', abbr='reagents', sma='N(C)(C)')
# view.molecule.2d(mols, depictor=depictor)
## ----eval=FALSE---------------------------------------------------------------
# smiles <- c('CCC', 'CCN', 'CCN(C)(C)','c1ccccc1Cc1ccccc1')
# mols <- parse.smiles(smiles)
# dframe <- data.frame(x = runif(4),
# toxicity = factor(c('Toxic', 'Toxic', 'Nontoxic', 'Nontoxic')),
# solubility = c('yes', 'yes', 'no', 'yes'))
# view.table(mols, dframe)
## -----------------------------------------------------------------------------
img <- view.image.2d(parse.smiles("B([C@H](CC(C)C)NC(=O)[C@H](CC1=CC=CC=C1)NC(=O)C2=NC=CN=C2)(O)O")[[1]])
plot(1:10, 1:10, pch=19)
rasterImage(img, 1,6, 5,10)
## -----------------------------------------------------------------------------
mol <- parse.smiles('c1ccccc1')[[1]]
set.property(mol, "title", "Molecule 1")
set.property(mol, "hvyAtomCount", 6)
get.property(mol, "title")
## -----------------------------------------------------------------------------
get.properties(mol)
## ----eval=FALSE---------------------------------------------------------------
# write.molecules(mol, 'tagged.sdf', write.props=TRUE)
## -----------------------------------------------------------------------------
mol <- parse.smiles('c1ccccc1C(Cl)(Br)c1ccccc1')[[1]]
atoms <- get.atoms(mol)
bonds <- get.bonds(mol)
cat('No. of atoms =', length(atoms), '\n')
cat('No. of bonds =', length(bonds), '\n')
## -----------------------------------------------------------------------------
unlist(lapply(atoms, get.symbol))
## -----------------------------------------------------------------------------
coords <- get.point3d(atoms[[1]])
## -----------------------------------------------------------------------------
coords <- do.call('rbind', lapply(atoms, get.point3d))
## -----------------------------------------------------------------------------
if ( any(apply(coords, 2, function(x) length(unique(x))) == 1) ) {
print("molecule is flat")
}
## -----------------------------------------------------------------------------
mols <- parse.smiles(c('CC(C)(C)C','c1ccc(Cl)cc1C(=O)O', 'CCC(N)(N)CC'))
query <- '[#6D2]'
matches(query, mols)
## -----------------------------------------------------------------------------
dc <- get.desc.categories()
dc
## -----------------------------------------------------------------------------
dn <- get.desc.names(dc[4])
dn
## ----eval=FALSE---------------------------------------------------------------
# aDesc <- eval.desc(mol, dn[4])
# allDescs <- eval.desc(mol, dn)
## -----------------------------------------------------------------------------
descNames <- unique(unlist(sapply(get.desc.categories(), get.desc.names)))
## -----------------------------------------------------------------------------
data(bpdata)
mols <- parse.smiles(bpdata[,1])
descNames <- c(
'org.openscience.cdk.qsar.descriptors.molecular.KierHallSmartsDescriptor',
'org.openscience.cdk.qsar.descriptors.molecular.APolDescriptor',
'org.openscience.cdk.qsar.descriptors.molecular.HBondDonorCountDescriptor')
descs <- eval.desc(mols, descNames)
class(descs)
dim(descs)
## -----------------------------------------------------------------------------
mol <- parse.smiles('CC(=O)CC(=O)NCN')[[1]]
convert.implicit.to.explicit(mol)
get.tpsa(mol)
get.xlogp(mol)
get.alogp(mol)
## -----------------------------------------------------------------------------
descs <- descs[, !apply(descs, 2, function(x) any(is.na(x)) )]
descs <- descs[, !apply( descs, 2, function(x) length(unique(x)) == 1 )]
r2 <- which(cor(descs)^2 > .6, arr.ind=TRUE)
r2 <- r2[ r2[,1] > r2[,2] , ]
descs <- descs[, -unique(r2[,2])]
## -----------------------------------------------------------------------------
model <- lm(BP ~ khs.sCH3 + khs.sF + apol + nHBDon, data.frame(bpdata, descs))
summary(model)
plot(bpdata$BP, predict(model, descs),
xlab="Observed BP", ylab="Predicted BP",
pch=19, xlim=c(100, 700), ylim=c(100, 700))
abline(0,1, col='red')
## -----------------------------------------------------------------------------
data(bpdata)
mols <- parse.smiles(bpdata[,1])
fps <- lapply(mols, get.fingerprint, type='circular')
## -----------------------------------------------------------------------------
fp.sim <- fingerprint::fp.sim.matrix(fps, method='tanimoto')
fp.dist <- 1 - fp.sim
## -----------------------------------------------------------------------------
cls <- hclust(as.dist(fp.dist))
plot(cls, main='A Clustering of the BP dataset', labels=FALSE)
## -----------------------------------------------------------------------------
query.mol <- parse.smiles('CC(=O)')[[1]]
target.mols <- parse.smiles(bpdata[,1])
query.fp <- get.fingerprint(query.mol, type='circular')
target.fps <- lapply(target.mols, get.fingerprint, type='circular')
sims <- data.frame(sim=do.call(rbind, lapply(target.fps,
fingerprint::distance,
fp2=query.fp, method='tanimoto')))
subset(sims, sim >= 0.3)
|