File: singles.R

package info (click to toggle)
r-cran-plotmo 3.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,400 kB
  • sloc: sh: 13; makefile: 2
file content (187 lines) | stat: -rw-r--r-- 8,479 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# singles.R:  plotmo.singles and plotmo.pairs

#------------------------------------------------------------------------------
# Return a vector of indices of predictors for degree1 plots, e.g, c(1,3,4).
# The indices are col numbers in the x matrix.  The caller will sort the
# returned vector and remove duplicates.  The default method simply
# returns the indices of all predictors.  The object specific methods
# typically return only the predictors actually used in the model.
#
# Note on the x argument:
#   If the formula is     resp ~ num + sqrt(num) + bool + ord:num + fac
#   then colnames(x) is   num bool ord fac

plotmo.singles <- function(object, x, nresponse, trace, all1, ...)
{
    UseMethod("plotmo.singles")
}
plotmo.singles.default <- function(object, x, nresponse, trace, all1, ...)
{
    seq_len(NCOL(x))
}
#------------------------------------------------------------------------------
# Get the pairs of predictors to be displayed in degree2 plots.
# Each row of the returned pairs matrix is the indices of two predictors
# for a degree2 plot. Example (this was returned from plotmo.pairs.rpart):
#
#    1    2
#    1    2
#    2    1
#
# The indices are col numbers in the x matrix.  The caller will remove
# duplicated pairs and re-order the pairs on the order of the predictors
# in the original call to the model function.  The above example will
# become simply
#
#    1    2
#
# It is ok to return NULL or a matrix with zero rows.

plotmo.pairs <- function(object, x, nresponse=1, trace=0, all2=FALSE, ...)
{
    UseMethod("plotmo.pairs")
}
# Predictors x1 and x2 are considered paired if they appear in
# the formula in forms such as x1:x2 or I(x1*x2) or s(x1,x2)
#
# We use both formula(object) and attr(terms(object), "term.labels").
# formula(object) is necessary for gam formula like "s(x,v1)" because it
# appears in attr(terms,"term.labels") as "x" "v1" (i.e. as unpaired).
# But our rudimentary parsing of the formula is not reliable, so we also
# use the term.labels.  An lm formula like Volume~(Girth*Height2)-Height
# has term.labels "Girth" "Height2" "Girth:Height2"

plotmo.pairs.default <- function(object, x, nresponse, trace, all2, ...)
{
    formula.vars <- NULL
    formula <- try(formula(object), silent=trace < 2)
    if(is.try.err(formula) || is.null(formula))
        trace2(trace,
               "formula(object) failed for %s object in plotmo.pairs.default\n",
               class.as.char(object))
    else {
        trace2(trace, "formula(object) returned %s\n",
               paste.trunc(format(formula), maxlen=100))
        # Note that formula() returns a formula with "." expanded.
        # After as.character: [1] is "~", [2] is lhs, and [3] is rhs
        rhs <- as.character(formula(object))[3] # rhs of formula

        # Sep 2020: removed code below because a `var` may have a "-" in its name
        # if(grepl("\\-", rhs)) { # "-" in formula?
        #     # formula() gives "(Girth + Height)-Height" for Volume~.-Height
        #     rhs <- sub("\\-.*", "", rhs)    # delete "-" and all after
        #     rhs <- gsub("\\(|\\)", "", rhs) # delete ( and )
        # }

        formula.vars <- unlist(strsplit(rhs, "+", fixed=TRUE))
        formula.vars <- gsub("^ +| +$", "", formula.vars) # trim leading and trailing spaces
        trace2(trace, "formula.vars %s\n", quotify.trunc(formula.vars))
    }
    term.labels <- NULL
    terms <- try(terms(object), silent=trace < 2)
    if(is.try.err(terms) || is.null(terms))
        trace2(trace,
               "terms(object) failed for %s object in plotmo.pairs.default\n",
               class.as.char(object))
    else {
        term.labels <- attr(terms, "term.labels")
        if(is.null(term.labels))
            trace2(trace,
                   "attr(terms,\"term.labels\") is NULL in plotmo.pairs.default\n")
        else
            trace2(trace, "term.labels %s\n", quotify.trunc(term.labels, maxlen=100))
    }
    if(is.null(formula.vars) && is.null(term.labels))
        return(NULL)
    plotmo_pairs_from_term_labels(c(formula.vars, term.labels), colnames(x), trace)
}
get.all.pairs.from.singles <- function(object, x, trace, all2)
{
    singles <- plotmo.singles(object, x, nresponse=1, trace, all1=TRUE)
    if(length(singles) == 0)
        return(NULL)    # no pairs (must be an intercept only model)
    if(any(is.na(singles))) {
        # We already issued warning0("NA in singles, will plot all variables")
        singles <- seq_len(NCOL(x)) # plot all pairs
    }
    singles <- unique(singles)
    if(all2 >= 2) {
        max <- 20      # note that 20 * 19 / 2 is 120 plots
        if(length(singles) > max) {
            warning0("too many predictors to plot all pairs,\n         ",
                     "so plotting degree2 plots for just the first ",
                     max, " predictors.")
            singles <- singles[1:max]
        }
    } else {
        max <- 7        # note that  7 * 6 / 2 is 21 plots
        if(all2 && length(singles) > max) {
            warning0("too many predictors to plot all pairs,\n         ",
                     "so plotting degree2 plots for just the first ",
                     max, " predictors.\n         ",
"Call plotmo with all2=2 to plot degree2 plots for up to 20 predictors.")
            singles <- singles[1:max]
        }
    }
    form.pairs(singles)
}
form.pairs <- function(varnames) # return a two column matrix, each row is a pair
{
    col1 <- rep(varnames, times=length(varnames))
    col2 <- rep(varnames, each=length(varnames))
    pairs <- cbind(col1, col2)
    pairs[col1 != col2, , drop=FALSE]
}
# Given the term.labels, return a npairs x 2 matrix specifying which predictors
# are paired. The elements in the returned matrix are column indices of x.
#
# This routine is not infallible but works for the commonly used formulas.
# It works by extracting substrings in each term.label that looks like a
# predictor pair.  The following combos of x1 and x2 for example are
# considered pairs: x1*x2, x1:x2, s(x1,x2), and similar.

plotmo_pairs_from_term_labels <- function(term.labels, pred.names, trace, ...)
{
    trace2(trace, "plotmo_pairs_from_term_labels\n")
    trace2(trace, "term.labels: %s\n", quotify.trunc(term.labels, maxlen=100))
    trace2(trace, "pred.names:  %s\n", quotify.trunc(pred.names, maxlen=100))
    pairs <- matrix(0, nrow=0, ncol=2)          # no pairs initially
    for(i in 1:length(term.labels)) {
        untouchable <- get.untouchable.for.naken(term.labels[i])
        if(NROW(untouchable$replacements)) {
            # weird variable name (backquoted in formula handling) e.g. `sexmale*h(16-age)`
            # the gregexpr below won't work because of spaces etc. in the variable name
            warnf("Cannot determine which variables to plot in degree2 plots (use all2=TRUE?)\n         Confused by variable name %s",
                  quotify.trunc(term.labels[i])[1])
            return(pairs)
        }
        s <- strip.space(term.labels[i])
        s <- gsub("[+*/,]", ":", s)             # replace + * / , with :
        s <- gsub("=[^,)]+", "", s)             # delete "=any"

        # get the indices of expressions of the form "ident1:ident2"
        igrep <- gregexpr("[._$[:alnum:]]+:[._$[:alnum:]]+", s)[[1]]

        trace2(trace, "considering %s", s)

        if(igrep[1] > 0) for(i in seq_along(igrep)) {
            # extract the i'th "ident1:ident2" into pair
            start <- igrep[i]
            stop <- start + attr(igrep, "match.length")[i] - 1
            pair <- substr(s, start=start, stop=stop)
            pair <- strsplit(pair, ":")[[1]]    # pair is now c("ident1","ident2")
            # are the variables in the candidate pair in pred.names?
            ipred1 <- which(pred.names == pair[1])
            ipred2 <- which(pred.names == pair[2])
            trace2(trace, " ->%s%s",
                if(length(ipred1))
                    sprint(" %g=%s", ipred1, pred.names[ipred1]) else "",
                if(length(ipred2))
                    sprint(" %g=%s", ipred2, pred.names[ipred2]) else "")
            if(length(ipred1) == 1 && length(ipred2) == 1 && pair[1] != pair[2])
                pairs <- c(pairs, ipred1, ipred2)
        }
        trace2(trace, "\n")
    }
    matrix(pairs, ncol=2, byrow=TRUE)
}