File: dplyr.Rout.save

package info (click to toggle)
r-cran-sf 0.9-7%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 6,796 kB
  • sloc: cpp: 5,333; sh: 18; makefile: 2
file content (287 lines) | stat: -rw-r--r-- 12,793 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287

R version 4.0.2 (2020-06-22) -- "Taking Off Again"
Copyright (C) 2020 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> suppressPackageStartupMessages(library(sf))
> 
> library(dplyr)

Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

> options(dplyr.summarise.inform=FALSE)
> read_sf(system.file("shape/nc.shp", package="sf"), quiet = TRUE) %>%
+ 	st_transform(3857) -> nc
> nc %>% filter(AREA > .1) %>% plot()
Warning message:
plotting the first 10 out of 14 attributes; use max.plot = 14 to plot all 
> 
> # plot 10 smallest counties in grey:
> nc %>% 
+   select(BIR74, geometry) %>% 
+   plot()
> 
> nc %>% 
+   select(AREA, geometry) %>% 
+   arrange(AREA) %>% 
+   slice(1:10) %>% 
+   plot(add = TRUE, col = 'grey', main ="")
> 
> # select: check both when geometry is part of the selection, and when not:
> nc %>% select(SID74, SID79) %>% names()
[1] "SID74"    "SID79"    "geometry"
> nc %>% select(SID74, SID79, geometry) %>% names()
[1] "SID74"    "SID79"    "geometry"
> nc %>% select(SID74, SID79) %>% class()
[1] "sf"         "tbl_df"     "tbl"        "data.frame"
> nc %>% select(SID74, SID79, geometry) %>% class()
[1] "sf"         "tbl_df"     "tbl"        "data.frame"
> 
> # group_by:
> nc$area_cl = cut(nc$AREA, c(0, .1, .12, .15, .25))
> nc %>% group_by(area_cl) %>% class()
[1] "sf"         "grouped_df" "tbl_df"     "tbl"        "data.frame"
> 
> # mutate:
> nc2 <- nc %>% mutate(area10 = AREA/10)
> 
> # transmute:
> nc %>% transmute(AREA = AREA/10, geometry = geometry) %>% class()
[1] "sf"         "tbl_df"     "tbl"        "data.frame"
> nc %>% transmute(AREA = AREA/10) %>% class()
[1] "sf"         "tbl_df"     "tbl"        "data.frame"
> 
> # rename:
> nc2 <- nc %>% rename(area = AREA)
> 
> # distinct:
> nc[c(1:100,1:10),] %>% distinct() %>% nrow()
[1] 100
> 
> # summarize:
> nc$area_cl = cut(nc$AREA, c(0, .1, .12, .15, .25))
> nc.g <- nc %>% group_by(area_cl)
> nc.g %>% summarise(mean(AREA))
Simple feature collection with 4 features and 2 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -9386880 ymin: 4012991 xmax: -8399788 ymax: 4382079
projected CRS:  WGS 84 / Pseudo-Mercator
# A tibble: 4 x 3
  area_cl    `mean(AREA)`                                               geometry
  <fct>             <dbl>                                     <MULTIPOLYGON [m]>
1 (0,0.1]          0.0760 (((-8678517 4054264, -8679088 4061405, -8680136 40615…
2 (0.1,0.12]       0.112  (((-9383227 4192541, -9375980 4199500, -9370835 41966…
3 (0.12,0.1…       0.134  (((-8520830 4108031, -8522040 4111119, -8520104 41115…
4 (0.15,0.2…       0.190  (((-8685774 4073056, -8684147 4070879, -8683670 40671…
> nc.g %>% summarize(mean(AREA)) %>% plot(col = 3:6/7)
> 
> library(tidyr)
> 
> # time-wide to long table, using tidyr::gather
> # stack the two SID columns for the July 1, 1974 - June 30, 1978 and July 1, 1979 - June 30, 1984 periods
> # (see https://cran.r-project.org/web/packages/spdep/vignettes/sids.pdf)
> nc %>% select(SID74, SID79, geometry) %>% gather("VAR", "SID", -geometry) %>% summary()
          geometry       VAR                 SID        
 MULTIPOLYGON :200   Length:200         Min.   : 0.000  
 epsg:3857    :  0   Class :character   1st Qu.: 2.000  
 +proj=merc...:  0   Mode  :character   Median : 5.000  
                                        Mean   : 7.515  
                                        3rd Qu.: 9.000  
                                        Max.   :57.000  
> 
> # spread:
> nc$row = 1:100
> nc.g <- nc %>% select(SID74, SID79, row) %>% gather("VAR", "SID", -row, -geometry)
> nc.g %>% tail()
Simple feature collection with 6 features and 3 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -8802506 ymin: 4012991 xmax: -8492268 ymax: 4166167
projected CRS:  WGS 84 / Pseudo-Mercator
# A tibble: 6 x 4
    row                                                     geometry VAR     SID
  <int>                                           <MULTIPOLYGON [m]> <chr> <dbl>
1    95 (((-8588146 4131923, -8589850 4133303, -8589356 4135198, -8… SID79     4
2    96 (((-8711999 4081959, -8719511 4077863, -8731642 4078864, -8… SID79     5
3    97 (((-8685774 4073056, -8697387 4077823, -8700120 4077570, -8… SID79     3
4    98 (((-8755885 4021935, -8802506 4069795, -8798771 4071779, -8… SID79    17
5    99 (((-8678517 4054264, -8679088 4061405, -8680136 4061550, -8… SID79     9
6   100 (((-8755885 4021935, -8753548 4025868, -8753052 4030195, -8… SID79     6
> nc.g %>% spread(VAR, SID) %>% head()
Simple feature collection with 6 features and 3 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -9099356 ymin: 4310668 xmax: -8434988 ymax: 4382079
projected CRS:  WGS 84 / Pseudo-Mercator
# A tibble: 6 x 4
    row                                                     geometry SID74 SID79
  <int>                                           <MULTIPOLYGON [m]> <dbl> <dbl>
1     1 (((-9069486 4332934, -9077066 4338201, -9079419 4338351, -9…     1     0
2     2 (((-9043562 4351030, -9043652 4352973, -9046117 4356516, -9…     0     3
3     3 (((-8956335 4334068, -8958566 4335747, -8965300 4336025, -8…     5     6
4     4 (((-8461241 4344709, -8462173 4347214, -8463902 4346972, -8…     1     2
5     5 (((-8595797 4333852, -8597683 4330212, -8604808 4329788, -8…     9     3
6     6 (((-8543185 4332878, -8569416 4332369, -8570981 4333107, -8…     7     5
> nc %>% select(SID74, SID79, geometry, row) %>% gather("VAR", "SID", -geometry, -row) %>% spread(VAR, SID) %>% head()
Simple feature collection with 6 features and 3 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -9099356 ymin: 4310668 xmax: -8434988 ymax: 4382079
projected CRS:  WGS 84 / Pseudo-Mercator
# A tibble: 6 x 4
                                                      geometry   row SID74 SID79
                                            <MULTIPOLYGON [m]> <int> <dbl> <dbl>
1 (((-9069486 4332934, -9077066 4338201, -9079419 4338351, -9…     1     1     0
2 (((-9043562 4351030, -9043652 4352973, -9046117 4356516, -9…     2     0     3
3 (((-8956335 4334068, -8958566 4335747, -8965300 4336025, -8…     3     5     6
4 (((-8461241 4344709, -8462173 4347214, -8463902 4346972, -8…     4     1     2
5 (((-8595797 4333852, -8597683 4330212, -8604808 4329788, -8…     5     9     3
6 (((-8543185 4332878, -8569416 4332369, -8570981 4333107, -8…     6     7     5
> 
> # test st_set_crs in pipe:
> sfc = st_sfc(st_point(c(0,0)), st_point(c(1,1)))
> x <- sfc %>% st_set_crs(4326) %>% st_transform(3857)
> x
Geometry set for 2 features 
geometry type:  POINT
dimension:      XY
bbox:           xmin: 0 ymin: 0 xmax: 111319.5 ymax: 111325.1
projected CRS:  WGS 84 / Pseudo-Mercator
POINT (0 0)
POINT (111319.5 111325.1)
> 
> read_sf(system.file("shape/nc.shp", package="sf"), quiet = TRUE) %>%
+ 	st_transform(3857) -> nc
> nc.merc <- st_transform(nc, 32119) # NC State Plane
> suppressPackageStartupMessages(library(units))
> install_symbolic_unit("person")
> person = as_units("person")
> nc.merc <- nc.merc %>% mutate(area = st_area(nc.merc), dens = BIR74 * person / area)
> 
> # summary(nc.merc$dens) # requires units 0.4-2
> nc.merc$area_cl <- cut(nc$AREA, c(0, .1, .12, .15, .25))
> nc.grp <- nc.merc %>% group_by(area_cl)
> 
> out <- nc.grp %>% summarise(A = sum(area), pop = sum(dens * area), 
+ 	new_dens = sum(dens * area)/sum(area)) 
> 
> # mean densities depend on grouping:
> nc.merc %>% summarize(mean(dens))
Simple feature collection with 1 feature and 1 field
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
projected CRS:  NAD83 / North Carolina
# A tibble: 1 x 2
  `mean(dens)`                                                          geometry
  [person/m^2]                                                <MULTIPOLYGON [m]>
1 2.593234e-06 (((705429.2 49248.34, 705861.5 27435.66, 698897.7 18679.88, 6485…
> out %>% summarise(mean(new_dens))
Simple feature collection with 1 feature and 1 field
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
projected CRS:  NAD83 / North Carolina
# A tibble: 1 x 2
  `mean(new_dens)`                                                      geometry
      [person/m^2]                                            <MULTIPOLYGON [m]>
1     2.589362e-06 (((724644.4 62316.58, 714305.9 49733.25, 711692 35996.55, 70…
> 
> # total densities don't:
> nc.merc %>% summarise(sum(area * dens))
Simple feature collection with 1 feature and 1 field
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
projected CRS:  NAD83 / North Carolina
# A tibble: 1 x 2
  `sum(area * dens)`                                                    geometry
            [person]                                          <MULTIPOLYGON [m]>
1             329962 (((705429.2 49248.34, 705861.5 27435.66, 698897.7 18679.88…
> out %>% summarise(sum(A * new_dens))
Simple feature collection with 1 feature and 1 field
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: 123829 ymin: 14744.69 xmax: 930521.8 ymax: 318259.9
projected CRS:  NAD83 / North Carolina
# A tibble: 1 x 2
  `sum(A * new_dens…                                                    geometry
            [person]                                          <MULTIPOLYGON [m]>
1             329962 (((724644.4 62316.58, 714305.9 49733.25, 711692 35996.55, …
> 
> conn = system.file("gpkg/nc.gpkg", package = "sf")
> 
> library(DBI)
> library(RSQLite)
> con = dbConnect(SQLite(), dbname = system.file("gpkg/nc.gpkg", package = "sf"))
> dbReadTable(con, "nc.gpkg") %>% filter(AREA > 0.2) %>% collect %>% st_sf
Simple feature collection with 11 features and 15 fields
geometry type:  MULTIPOLYGON
dimension:      XY
bbox:           xmin: -80.06441 ymin: 33.88199 xmax: -76.49254 ymax: 36.06665
CRS:            NA
First 10 features:
   fid  AREA PERIMETER CNTY_ CNTY_ID     NAME  FIPS FIPSNO CRESS_ID BIR74 SID74
1   37 0.219     2.130  1938    1938     Wake 37183  37183       92 14484    16
2   47 0.201     1.805  1968    1968 Randolph 37151  37151       76  4456     7
3   54 0.207     1.851  1989    1989 Johnston 37101  37101       51  3999     6
4   57 0.203     3.197  2004    2004 Beaufort 37013  37013        7  2692     7
5   79 0.241     2.214  2083    2083  Sampson 37163  37163       82  3025     4
6   88 0.204     1.871  2100    2100   Duplin 37061  37061       31  2483     4
7   94 0.240     2.004  2150    2150  Robeson 37155  37155       78  7889    31
8   96 0.225     2.107  2162    2162   Bladen 37017  37017        9  1782     8
9   97 0.214     2.152  2185    2185   Pender 37141  37141       71  1228     4
10  98 0.240     2.365  2232    2232 Columbus 37047  37047       24  3350    15
   NWBIR74 BIR79 SID79 NWBIR79                           geom
1     4397 20857    31    6221 MULTIPOLYGON (((-78.92107 3...
2      384  5711    12     483 MULTIPOLYGON (((-79.76499 3...
3     1165  4780    13    1349 MULTIPOLYGON (((-78.53874 3...
4     1131  2909     4    1163 MULTIPOLYGON (((-77.10377 3...
5     1396  3447     4    1524 MULTIPOLYGON (((-78.11377 3...
6     1061  2777     7    1227 MULTIPOLYGON (((-77.68983 3...
7     5904  9087    26    6899 MULTIPOLYGON (((-78.86451 3...
8      818  2052     5    1023 MULTIPOLYGON (((-78.2615 34...
9      580  1602     3     763 MULTIPOLYGON (((-78.02592 3...
10    1431  4144    17    1832 MULTIPOLYGON (((-78.65572 3...
> 
> # nest:
> storms.sf = st_as_sf(storms, coords = c("long", "lat"), crs = 4326)
> x <- storms.sf %>% group_by(name, year) %>% nest
> 
> nrow(distinct(nc[c(1,1,1,2,2,3:100),]))
[1] 100
> 
> # set.seed(1331)
> nc$gp <- sample(10, 100, replace=TRUE)
> # Get centroid of each group of polygons; https://github.com/r-spatial/sf/issues/969
> nc_gp_cent <- nc %>%
+                 group_by(gp) %>%
+                 group_map(st_area)
> 
> nc %>% st_filter(nc[1,]) %>% nrow
[1] 4
> 
> proc.time()
   user  system elapsed 
  3.233   0.065   3.286