File: naken.R

package info (click to toggle)
r-cran-plotmo 3.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,400 kB
  • sloc: sh: 13; makefile: 2
file content (147 lines) | stat: -rw-r--r-- 5,813 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# naken.R:

# Like naken.collapse but don't collapse a vector of strings into a single string.
#
# e.g.    c("num","sqrt(num)","ord","offset(off)")
# becomes c("num","num"       "ord",        "off")

naken <- function(s)
{
    naked <- character(length(s))
    for(i in seq_along(s))
        naked[i] <- naken.collapse(s[i])
    naked
}
# Collapse s to s single string and then "naken" it
# (i.e. return only the variables in the string, separated by "+").
#
# e.g. "x1"                            becomes "x1"
#      "sqrt(x1)"                      becomes "x1"
#      "s(x1,x4,df=4)"                 becomes "x1+x4"
#      "sqrt(x1) as.numeric(x4)"       becomes "x1"
#      c("sqrt(x1)", "as.numeric(x4)") becomes "x1"
#      `x 3`                           becomes "`x 3`" (variables in backquotes unchanged)

naken.collapse <- function(s, warn.if.minus=FALSE)
{
    s <- paste.collapse(s)
    s.org <- s
    untouchable <- get.untouchable.for.naken(s)
    s <- strip.space(untouchable$s) # strip space from everything except untouchables
                                    # for "ident" gsubs below

    if(grepl("--", s, fixed=TRUE)) # '--'causes problems because '-' gets turned to '+' below
        warning0("Consecutive '-' in formula may cause problems\n         Formula:", s.org)

    # # check for "- ident" in formula (but -1 is ok)
    #
    # # commented out because this is invisible to the user, because
    # # plotmo does not plot the -ident variable
    #
    # if(warn.if.minus && grepl("\\- *[._[:alpha:]]", s)[1])
    #     warnf("plotmo will include the variable prefixed by \"-\" in the formula\n         Formula: %s", s)

    # TODO we can't ignore "-" below because of the paste0(collapse=" + ") later below
    s <- gsub("[-*/:]", "+", s)                 # replace - / * : with +

    # next two gsubs allow us to retain "x=x1" but drop "df=99" from "bs(x=x1, df=99)"

    s <- gsub("\\(._$[[:alnum:]]+=", "(", s)    # replace "(ident=" with "("
    s <- gsub("[._$[:alnum:]]+=[^,)]+", "", s)  # delete "ident=any"

    # replace ",ident" with ")+f(ident", thus "s(x0,x1)" becomes "s(x0)f(x1)"
    s <- gsub(",([._$[:alpha:]])", ")+f(\\1", s)

    regex <- "[._$[:alnum:]]*\\("
    if(grepl(regex, s)) {
        s <- gsub(regex, "", s)                 # replace ident(
        s <- gsub("[,)][^+-]*", "", s)          # remove remaining ",arg1,arg2)"
    }
    # s is now something like x1+x2, split it on "+" for further processing
    s <- strsplit(s, "+", fixed=TRUE)[[1]]

    s <- unique(s) # remove duplicates
    # remove numbers e.g. sin(x1*x2/12) is nakened to x1+x1+12, we don't want the 12
    is.num <- sapply(s, function(x) grepl("^([0-9]|\\.[0-9])", x))
    # but keep the intercept if there is one
    which1 <- which(s == "1")
    is.num[which1] <- FALSE
    s <- paste0(s[!is.num], collapse=" + ")

    replace.untouchable.for.naken(s, untouchable$replacements)
}
# In the function naken.collapse(), terms such as [string] and `string`
# must remain the same (regardless of the enclosed string).
# That is, strings in brackets or backquotes must remain untouched.
#
# This function searches for such terms, replaces them with dummies, and
# remembers where they were in  the original string (for re-replacement later).
#
# For example, if s = "x1 + x[,2] + `x 3`" we return:
#
#     out: "x1 + x!00000! + !00001!"    # note the dummies !00000! and !00001!
#
#     replacements:
#         replacement   original
#           "[00000]"     "[,2]"
#           "[00001]"    "`x 3`"

get.untouchable.for.naken <- function(s) # utility for naken
{
    # for efficiency, check for most common case (no [ or ` in s)
    if(!grepl("[\\[\`]", s)[1])
        return(list(s=s, replacements=NULL)) # no [ or ` in s

    stopifnot(length(s) == 1)

    # out and untouchables will be the returned string and table of untouchables
    # for simplicity, create untouchables as a vec and convert to a mat at the end
    out <- ""
    untouchables <- NULL

    cs <- strsplit(s, split="")[[1]] # split into individual chars for loop efficiency
    len <- length(cs)
    i <- 1
    while(i <= len) {
        c <- cs[i]
        # i==len below is for malformed strings with extra [ or ` on end
        if((c != "[" && c != "\`") || i == len) # normal character
            out <- paste0(out, c)
        else {                                  # char is [ or `, skip to matching ] or `
            istart <- i
            nestdepth <- 0
            endchar <- if(c == "[") "]" else "\`"
            for(i in (istart+1):len) {
                if(c == "[" && cs[i] == "[")
                    nestdepth <- nestdepth + 1 # nested brackets
                if(cs[i] == endchar) {
                    if(nestdepth <= 0)
                        break
                    else
                        nestdepth <- nestdepth - 1
                }
            }
            replacement <- sprint("!%05.5g!", length(untouchables) / 2)
            out <- paste0(out, replacement)
            untouchables <- c(untouchables, replacement, substr(s, istart, i))
        }
        i <- i + 1
    }
    if(length(untouchables)== 0) # malformed s="[" or s="`"
        return(list(s=s, replacements=NULL))

    replacements <- matrix(untouchables, byrow=TRUE,
                           ncol=2, nrow=length(untouchables) / 2)

    colnames(replacements) <- c("replacement", "original")

    list(s=out, replacements=replacements)
}
# undo the effect of get.untouchable.for.naken

replace.untouchable.for.naken <- function(s, replacements)
{
    for(i in seq_len(NROW(replacements)))
        s <- gsub(replacements[i, 1], replacements[i, 2], s, fixed=TRUE)
    s
}