1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
|
# naken.R:
# Like naken.collapse but don't collapse a vector of strings into a single string.
#
# e.g. c("num","sqrt(num)","ord","offset(off)")
# becomes c("num","num" "ord", "off")
naken <- function(s)
{
naked <- character(length(s))
for(i in seq_along(s))
naked[i] <- naken.collapse(s[i])
naked
}
# Collapse s to s single string and then "naken" it
# (i.e. return only the variables in the string, separated by "+").
#
# e.g. "x1" becomes "x1"
# "sqrt(x1)" becomes "x1"
# "s(x1,x4,df=4)" becomes "x1+x4"
# "sqrt(x1) as.numeric(x4)" becomes "x1"
# c("sqrt(x1)", "as.numeric(x4)") becomes "x1"
# `x 3` becomes "`x 3`" (variables in backquotes unchanged)
naken.collapse <- function(s, warn.if.minus=FALSE)
{
s <- paste.collapse(s)
s.org <- s
untouchable <- get.untouchable.for.naken(s)
s <- strip.space(untouchable$s) # strip space from everything except untouchables
# for "ident" gsubs below
if(grepl("--", s, fixed=TRUE)) # '--'causes problems because '-' gets turned to '+' below
warning0("Consecutive '-' in formula may cause problems\n Formula:", s.org)
# # check for "- ident" in formula (but -1 is ok)
#
# # commented out because this is invisible to the user, because
# # plotmo does not plot the -ident variable
#
# if(warn.if.minus && grepl("\\- *[._[:alpha:]]", s)[1])
# warnf("plotmo will include the variable prefixed by \"-\" in the formula\n Formula: %s", s)
# TODO we can't ignore "-" below because of the paste0(collapse=" + ") later below
s <- gsub("[-*/:]", "+", s) # replace - / * : with +
# next two gsubs allow us to retain "x=x1" but drop "df=99" from "bs(x=x1, df=99)"
s <- gsub("\\(._$[[:alnum:]]+=", "(", s) # replace "(ident=" with "("
s <- gsub("[._$[:alnum:]]+=[^,)]+", "", s) # delete "ident=any"
# replace ",ident" with ")+f(ident", thus "s(x0,x1)" becomes "s(x0)f(x1)"
s <- gsub(",([._$[:alpha:]])", ")+f(\\1", s)
regex <- "[._$[:alnum:]]*\\("
if(grepl(regex, s)) {
s <- gsub(regex, "", s) # replace ident(
s <- gsub("[,)][^+-]*", "", s) # remove remaining ",arg1,arg2)"
}
# s is now something like x1+x2, split it on "+" for further processing
s <- strsplit(s, "+", fixed=TRUE)[[1]]
s <- unique(s) # remove duplicates
# remove numbers e.g. sin(x1*x2/12) is nakened to x1+x1+12, we don't want the 12
is.num <- sapply(s, function(x) grepl("^([0-9]|\\.[0-9])", x))
# but keep the intercept if there is one
which1 <- which(s == "1")
is.num[which1] <- FALSE
s <- paste0(s[!is.num], collapse=" + ")
replace.untouchable.for.naken(s, untouchable$replacements)
}
# In the function naken.collapse(), terms such as [string] and `string`
# must remain the same (regardless of the enclosed string).
# That is, strings in brackets or backquotes must remain untouched.
#
# This function searches for such terms, replaces them with dummies, and
# remembers where they were in the original string (for re-replacement later).
#
# For example, if s = "x1 + x[,2] + `x 3`" we return:
#
# out: "x1 + x!00000! + !00001!" # note the dummies !00000! and !00001!
#
# replacements:
# replacement original
# "[00000]" "[,2]"
# "[00001]" "`x 3`"
get.untouchable.for.naken <- function(s) # utility for naken
{
# for efficiency, check for most common case (no [ or ` in s)
if(!grepl("[\\[\`]", s)[1])
return(list(s=s, replacements=NULL)) # no [ or ` in s
stopifnot(length(s) == 1)
# out and untouchables will be the returned string and table of untouchables
# for simplicity, create untouchables as a vec and convert to a mat at the end
out <- ""
untouchables <- NULL
cs <- strsplit(s, split="")[[1]] # split into individual chars for loop efficiency
len <- length(cs)
i <- 1
while(i <= len) {
c <- cs[i]
# i==len below is for malformed strings with extra [ or ` on end
if((c != "[" && c != "\`") || i == len) # normal character
out <- paste0(out, c)
else { # char is [ or `, skip to matching ] or `
istart <- i
nestdepth <- 0
endchar <- if(c == "[") "]" else "\`"
for(i in (istart+1):len) {
if(c == "[" && cs[i] == "[")
nestdepth <- nestdepth + 1 # nested brackets
if(cs[i] == endchar) {
if(nestdepth <= 0)
break
else
nestdepth <- nestdepth - 1
}
}
replacement <- sprint("!%05.5g!", length(untouchables) / 2)
out <- paste0(out, replacement)
untouchables <- c(untouchables, replacement, substr(s, istart, i))
}
i <- i + 1
}
if(length(untouchables)== 0) # malformed s="[" or s="`"
return(list(s=s, replacements=NULL))
replacements <- matrix(untouchables, byrow=TRUE,
ncol=2, nrow=length(untouchables) / 2)
colnames(replacements) <- c("replacement", "original")
list(s=out, replacements=replacements)
}
# undo the effect of get.untouchable.for.naken
replace.untouchable.for.naken <- function(s, replacements)
{
for(i in seq_len(NROW(replacements)))
s <- gsub(replacements[i, 1], replacements[i, 2], s, fixed=TRUE)
s
}
|