File: Groups.Rd

package info (click to toggle)
r-cran-memisc 0.99.31.8.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 2,136 kB
  • sloc: ansic: 5,117; makefile: 2
file content (171 lines) | stat: -rw-r--r-- 5,933 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
\name{Groups}
\alias{Groups}
\alias{Groups.data.set}
\alias{Groups.data.frame}
\alias{grouped.data}
\alias{Groups.grouped.data}
\alias{with.grouped.data}
\alias{within.grouped.data}
\alias{recombine}
\alias{recombine.grouped.data.frame}
\alias{recombine.grouped.data.set}
\alias{as.data.frame.grouped.data}
\alias{as.data.set,grouped.data.frame-method}
\alias{as.data.set,grouped.data.set-method}
\alias{withGroups}
\alias{withinGroups}
\title{Operate on grouped data in data frames and data sets}
\description{\code{Group} creates a grouped variant of an object of
  class "data.frame" or of class "data.set", for which methods for
  \code{with} and \code{within} are defined, so that these well-known
  functions can be applied "groupwise".
}
\usage{
# Create an object of class "grouped.data" from a
# data frame or a data set.
Groups(data,by,\dots)
\method{Groups}{data.frame}(data,by,\dots)
\method{Groups}{data.set}(data,by,\dots)
\method{Groups}{grouped.data}(data,by,\dots)

# Recombine grouped data into a data fame or a data set
recombine(x,\dots)
\method{recombine}{grouped.data.frame}(x,\dots)
\method{recombine}{grouped.data.set}(x,\dots)

# Recombine grouped data and coerce the result appropriately:
\method{as.data.frame}{grouped.data}(x,\dots)
\S4method{as.data.set}{grouped.data.frame}(x,row.names=NULL,\dots)
\S4method{as.data.set}{grouped.data.set}(x,row.names=NULL,\dots)

# Methods of the generics "with" and "within" for grouped data
\method{with}{grouped.data}(data,expr,\dots)
\method{within}{grouped.data}(data,expr,recombine=FALSE,\dots)

# This is equivalent to with(Groups(data,by),expr,...)
withGroups(data,by,expr,\dots)
# This is equivalent to within(Groups(data,by),expr,recombine,...)
withinGroups(data,by,expr,recombine=TRUE,\dots)
}

\arguments{
  \item{data}{an object of the classes "data.frame", "data.set" if an
    argument to \code{Groups}, \code{withGroups}, \code{withinGroups},
  }
  \item{by}{a formula with the factors the levels of which define the
    groups.}
  \item{expr}{an expression, or several expressions enclosed in curly
    braces.}
  \item{recombine}{a logical vector; should the resulting grouped
    data be recombined?}
  \item{x}{an object of class "grouped.data".}
  \item{row.names}{an optional character vector with row names.}
  \item{\dots}{other arguments, ignored.}
}

\details{
    When applied to a data frame \code{Groups} returns an object with class attributes
    "grouped.data.frame", "grouped.data", and "data.frame", when applied do an object with class
    "data.set", it returns an object with class attributes "grouped.data.set",
    "grouped.data", and "data.set".

    When applied to objects with class attributed
    "grouped.data", both the functions \code{with()} amd \code{within()}
    evaluate \code{expr} separately for each group defined by
    \code{Groups}. \code{with()} returns an array composed of the results
    of \code{expr}, while \code{within()} returns a modified copy of its
    \code{data} argument, which will be a "grouped.data" object
    ("grouped.data.frame" or "grouped.data.set"), unless the argument
    \code{recombine=TRUE} is set.

    The expression \code{expr} may contain references to the variables
    \code{n_}, \code{N_}, and \code{i_}. \code{n_} is equal to the size of
    the respective group (the number of rows belonging to it), while
    \code{N_} is equal to the total number of observations in all
    groups. The variable \code{i_} equals to the indices of the rows
    belonging to the respective group of observations.
}

\examples{
some.data <- data.frame(x=rnorm(n=100))
some.data <- within(some.data,{
    f <- factor(rep(1:4,each=25),labels=letters[1:4])
    g <- factor(rep(1:5,each=4,5),labels=LETTERS[1:5])
    y <- x + rep(1:4,each=25) +  0.75*rep(1:5,each=4,5)
})

# For demonstration purposes, we create an
# 'empty' group:
some.data <- subset(some.data,
                       f!="a" | g!="C")

some.grouped.data <- Groups(some.data,
                           ~f+g)    

# Computing the means of y for each combination f and g
group.means <- with(some.grouped.data,
                    mean(y))
group.means

# Obtaining a groupwise centered variant of y
some.grouped.data <- within(some.grouped.data,{
    y.cent <- y - mean(y)
},recombine=FALSE)

# The groupwise centered variable should have zero mean
# whithin each group
group.means <- with(some.grouped.data,
                    round(mean(y.cent),15))
group.means

# The following demonstrates the use of n_, N_, and i_
# An external copy of y
y1 <- some.data$y
group.means.n <- with(some.grouped.data,
                      c(mean(y),  # Group means for y
                        n_,       # Group sizes
                        sum(y)/n_,# Group means for y
                        n_/N_,    # Relative group sizes
                        sum(y1)/N_,# NOT the grand mean
                        sum(y1[i_])/n_)) # Group mean for y1
group.means.n

# Names can be attached to the groupwise results
with(some.grouped.data,
     c(Centered=round(mean(y.cent),15),
       Uncentered=mean(y)))

some.data.ungrouped <- recombine(some.grouped.data)
str(some.data.ungrouped)

# It all works with "data.set" objects
some.dataset <- as.data.set(some.data)
some.grouped.dataset <- Groups(some.dataset,~f+g)

with(some.grouped.dataset,
     c(Mean=mean(y),
       Variance=var(y)))

# The following two expressions are equivalent:
with(Groups(some.data,~f+g),mean(y))
withGroups(some.data,~f+g,mean(y))

# The following two expressions are equivalent:
some.data <- within(Groups(some.data,~f+g),{
    y.cent <- y - mean(y)
    y.cent.1 <- y - sum(y)/n_
})

some.data <- withinGroups(some.data,~f+g,{
    y.cent <- y - mean(y)
    y.cent.1 <- y - sum(y)/n_
})

# Both variants of groupwise centred varaibles should
# have zero groupwise means:
withGroups(some.data,~f+g,{
    c(round(mean(y.cent),15),
      round(mean(y.cent.1),15))
})

}