1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
# helper for fgam regression and classification
# @title Regression of functional data by Functional Generalized Additive Models.
#
# @description
# FGAM estimates a smooth function for scalar on functional regression.
# Where the target is a scalar and the covarite is functional covariate,
# and the regression is an integration with respect to a link function upon the conditional
# expectation. $g\{E(Y_i|X_i)\} = \theta_0 +\int_{\tau} F\{X_i(\tau), \tau \}d{\tau}$,
# where the smooth function $F\{X(t), t\}$(estimation surface which is fitted againt
# all X(t), t pair) is not binded to be linear with the functional predictor $X(t)$ and
# takes an additive form which could quantify how important each functional point is to the
# response. The basic form for the smooth function is penalized splines. Typical application
# for FGAM is Diffusion tensor imaging(DTI) parallel diffusivity on Corpus
# Callosum where signal is higher for voxels where diffusion is hindered for
# multiple sclerosis patient PASAT score( A cognitive measure). For each pixel,
# there could be a tensor of 3*3 symmetric matrix which results from applying gradient
# from several non-collinear directions. In FGAM, X(t) is transformed to be in the interval of
# [0,1] by cdf tranform to let the limited observation data to fill all space of the surface
# and invariant to tranformation of functional predictors. FGAM support multiple functional
# covariate. Currently, only splines are used as base learner.
fgam.ps = makeParamSet(
makeDiscreteLearnerParam(id = "basistype", values = c("te", "ti"), default = "te"),
# mgcv::te tensor(Kronecker) product smooths of X and T(mgcv::ti tensor product interaction plus marginal effect(called main effect in mgcv package))
makeIntegerVectorLearnerParam(id = "mgcv.te_ti.m", lower = 1L, special.vals = list(NA)),
# The order of the spline and its penalty (for smooth classes that use this) for each term. The original default is NA but mlr will generate warnings for this.
makeIntegerVectorLearnerParam(id = "mgcv.te_ti.k", lower = 1L, special.vals = list(NA)),
# the dimension(s) of the bases used to represent the smooth term. If not supplied then set to ?5^d?. The original default is NA but mlr will generate warnings for this.
# skipped: argvals(indices of evaluation of ?X?)
makeDiscreteLearnerParam(id = "integration", values = c("simpson", "trapezoidal", "riemann"), default = "simpson"),
# makeDiscreteLearnerParam(id = "presmooth", values = c("fpca.sc", "fpca.face", "fpca.ssvd", "fpca.bspline", "fpca.interpolate", NULL), default = NULL, special.vals = list(NULL)),
makeLogicalLearnerParam(id = "Qtransform", default = TRUE) # c.d.f transform
)
fgam.par.vals = list(basistype = "te", integration = "simpson", Qtransform = TRUE, mgcv.te_ti.m = NA, mgcv.te_ti.k = NA)
getFGAMFormulaMat = function(mdata, targetname, fns, parlist) {
formula.terms = namedList()
mat.list = namedList(fns)
# Treat functional covariates
if (hasFunctionalFeatures(mdata)) {
fdns = colnames(getFunctionalFeatures(mdata))
# later on, the grid elements in mat.list should have suffix ".grid"
fdg = namedList(fdns)
fd.grids = lapply(fdns, function(name) seq_len(ncol(mdata[, name])))
names(fd.grids) = fdns
fdg = setNames(fd.grids, stri_paste(fdns, ".grid"))
# setup mat.list: for each func covar we add its data matrix and its grid. and once the target col
# also setup character vector of formula terms for functional covariates
mat.list = namedList(fdns)
formula.terms = namedList(fdns)
# for each functional covariate
for (fdn in fdns) {
# ... create a corresponding grid name
gn = stri_paste(fdn, ".grid")
# ... extract the corresponding original data into a list of matrices
mat.list[[fdn]] = mdata[, fdn]
# ... create a formula item
# refund::af \int_{T}F(X_i(t),t)dt where refund::af means additive formula(FGAM),
# while refund::lf means linear Model (FLM)
fkm = sprintf("af(%s, basistype = '%s', Qtransform = %s, k=%s, m= %s, integration = '%s')",
fdn, parlist$basistype, parlist$Qtransform, parlist$mgcv.te_ti.k, parlist$mgcv.te_ti.m, parlist$integration)
fk = sprintf("af(%s, basistype = '%s', Qtransform = %s, k=%s, integration = '%s')",
fdn, parlist$basistype, parlist$Qtransform, parlist$mgcv.te_ti.k, parlist$integration)
fm = sprintf("af(%s, basistype = '%s', Qtransform = %s, m=%s, integration = '%s')",
fdn, parlist$basistype, parlist$Qtransform, parlist$mgcv.te_ti.m, parlist$integration)
f0 = sprintf("af(%s, basistype = '%s', Qtransform = %s, integration = '%s')",
fdn, parlist$basistype, parlist$Qtransform, parlist$integration)
mf = fkm
if (is.na(parlist$mgcv.te_ti.k)) mf = fm
if (is.na(parlist$mgcv.te_ti.m)) mf = fk
if (is.na(parlist$mgcv.te_ti.m) && is.na(parlist$mgcv.te_ti.k)) mf = f0
formula.terms[fdn] = mf
}
# add grid names
mat.list = c(mat.list, fdg)
} else {
stop("fgam does not support soley non-functional data")
}
# add target names
mat.list[[targetname]] = mdata[, targetname]
# Create the formula and train the model
form = as.formula(sprintf("%s~%s", targetname, collapse(unlist(formula.terms), "+")))
return(list(form = form, mat.list = mat.list))
}
getBinomialTarget = function(.task) {
vt = getTaskTargets(.task)
uvt = unique(vt)
dd = getTaskData(.task, target.extra = TRUE, functionals.as = "matrix")
newtarget = sapply(dd$target, function(x) {
if (x == uvt[1]) {
return(1)
}
return(0)
})
return(list(newtarget = newtarget, uvt = uvt))
}
getFDboostFormulaMat = function(.task, knots, df, bsignal.check.ident, degree, differences) {
tdata = getTaskData(.task, functionals.as = "matrix")
tn = getTaskTargetNames(.task)
formula.terms = namedList()
mat.list = namedList(getTaskFeatureNames(.task))
# Treat functional covariates
if (hasFunctionalFeatures(tdata)) {
fdns = colnames(getFunctionalFeatures(tdata))
# later on, the grid elements in mat.list should have suffix ".grid"
fdg = namedList(fdns)
fd.grids = lapply(fdns, function(name) seq_len(ncol(tdata[, name])))
names(fd.grids) = fdns
fdg = setNames(fd.grids, stri_paste(fdns, ".grid"))
# setup mat.list: for each func covar we add its data matrix and its grid. and once the target col
# also setup character vector of formula terms for functional covariates
mat.list = namedList(fdns)
formula.terms = namedList(fdns)
# for each functional covariate
for (fdn in fdns) {
# ... create a corresponding grid name
gn = stri_paste(fdn, ".grid")
# ... extract the corresponding original data into a list of matrices
mat.list[[fdn]] = tdata[, fdn]
# ... create a formula item
formula.terms[fdn] = sprintf("bsignal(%s, %s, knots = %i, df = %f, degree = %i,
differences = %i, check.ident = %s)",
fdn, gn, knots, df, degree, differences, bsignal.check.ident)
}
# add grid names
mat.list = c(mat.list, fdg)
} else {
fdns = NULL # no functional features
}
# Add formula to each scalar covariate, if there is no scalar covariate, this fd.scalars will be empty
for (fsn in setdiff(colnames(tdata), c(fdns, tn))) {
mat.list[[fsn]] = as.vector(as.matrix(tdata[, fsn, drop = FALSE]))
formula.terms[fsn] = sprintf("bbs(%s, knots = %i, df = %f, degree = %i, differences = %i)",
fsn, knots, df, degree, differences)
}
# add target names
mat.list[[tn]] = tdata[, tn]
# Create the formula and train the model
form = as.formula(sprintf("%s ~ %s", tn, collapse(unlist(formula.terms), "+")))
return(list(mat.list = mat.list, form = form))
}
|