File: programming.html

package info (click to toggle)
r-cran-dplyr 1.1.4-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,292 kB
  • sloc: cpp: 1,403; sh: 17; makefile: 7
file content (812 lines) | stat: -rw-r--r-- 50,610 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
<!DOCTYPE html>

<html>

<head>

<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />

<meta name="viewport" content="width=device-width, initial-scale=1" />



<title>Programming with dplyr</title>

<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
  var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
  var i, h, a;
  for (i = 0; i < hs.length; i++) {
    h = hs[i];
    if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
    a = h.attributes;
    while (a.length > 0) h.removeAttribute(a[0].name);
  }
});
</script>

<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>



<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } 
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } 
code span.at { color: #7d9029; } 
code span.bn { color: #40a070; } 
code span.bu { color: #008000; } 
code span.cf { color: #007020; font-weight: bold; } 
code span.ch { color: #4070a0; } 
code span.cn { color: #880000; } 
code span.co { color: #60a0b0; font-style: italic; } 
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } 
code span.do { color: #ba2121; font-style: italic; } 
code span.dt { color: #902000; } 
code span.dv { color: #40a070; } 
code span.er { color: #ff0000; font-weight: bold; } 
code span.ex { } 
code span.fl { color: #40a070; } 
code span.fu { color: #06287e; } 
code span.im { color: #008000; font-weight: bold; } 
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } 
code span.kw { color: #007020; font-weight: bold; } 
code span.op { color: #666666; } 
code span.ot { color: #007020; } 
code span.pp { color: #bc7a00; } 
code span.sc { color: #4070a0; } 
code span.ss { color: #bb6688; } 
code span.st { color: #4070a0; } 
code span.va { color: #19177c; } 
code span.vs { color: #4070a0; } 
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } 
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
  var sheets = document.styleSheets;
  for (var i = 0; i < sheets.length; i++) {
    if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
    try { var rules = sheets[i].cssRules; } catch (e) { continue; }
    var j = 0;
    while (j < rules.length) {
      var rule = rules[j];
      // check if there is a div.sourceCode rule
      if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
        j++;
        continue;
      }
      var style = rule.style.cssText;
      // check if color or background-color is set
      if (rule.style.color === '' && rule.style.backgroundColor === '') {
        j++;
        continue;
      }
      // replace div.sourceCode by a pre.sourceCode rule
      sheets[i].deleteRule(j);
      sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
    }
  }
})();
</script>




<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap; 
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }

code > span.kw { color: #555; font-weight: bold; } 
code > span.dt { color: #902000; } 
code > span.dv { color: #40a070; } 
code > span.bn { color: #d14; } 
code > span.fl { color: #d14; } 
code > span.ch { color: #d14; } 
code > span.st { color: #d14; } 
code > span.co { color: #888888; font-style: italic; } 
code > span.ot { color: #007020; } 
code > span.al { color: #ff0000; font-weight: bold; } 
code > span.fu { color: #900; font-weight: bold; } 
code > span.er { color: #a61717; background-color: #e3d2d2; } 
</style>




</head>

<body>




<h1 class="title toc-ignore">Programming with dplyr</h1>



<div id="introduction" class="section level2">
<h2>Introduction</h2>
<p>Most dplyr verbs use <strong>tidy evaluation</strong> in some way.
Tidy evaluation is a special type of non-standard evaluation used
throughout the tidyverse. There are two basic forms found in dplyr:</p>
<ul>
<li><p><code>arrange()</code>, <code>count()</code>,
<code>filter()</code>, <code>group_by()</code>, <code>mutate()</code>,
and <code>summarise()</code> use <strong>data masking</strong> so that
you can use data variables as if they were variables in the environment
(i.e. you write <code>my_variable</code> not
<code>df$my_variable</code>).</p></li>
<li><p><code>across()</code>, <code>relocate()</code>,
<code>rename()</code>, <code>select()</code>, and <code>pull()</code>
use <strong>tidy selection</strong> so you can easily choose variables
based on their position, name, or type
(e.g. <code>starts_with(&quot;x&quot;)</code> or
<code>is.numeric</code>).</p></li>
</ul>
<p>To determine whether a function argument uses data masking or tidy
selection, look at the documentation: in the arguments list, you’ll see
<code>&lt;data-masking&gt;</code> or
<code>&lt;tidy-select&gt;</code>.</p>
<p>Data masking and tidy selection make interactive data exploration
fast and fluid, but they add some new challenges when you attempt to use
them indirectly such as in a for loop or a function. This vignette shows
you how to overcome those challenges. We’ll first go over the basics of
data masking and tidy selection, talk about how to use them indirectly,
and then show you a number of recipes to solve common problems.</p>
<p>This vignette will give you the minimum knowledge you need to be an
effective programmer with tidy evaluation. If you’d like to learn more
about the underlying theory, or precisely how it’s different from
non-standard evaluation, we recommend that you read the Metaprogramming
chapters in <a href="https://adv-r.hadley.nz"><em>Advanced
R</em></a>.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span></code></pre></div>
</div>
<div id="data-masking" class="section level2">
<h2>Data masking</h2>
<p>Data masking makes data manipulation faster because it requires less
typing. In most (but not all<a href="#fn1" class="footnote-ref" id="fnref1"><sup>1</sup></a>) base R functions you need to refer to
variables with <code>$</code>, leading to code that repeats the name of
the data frame many times:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" tabindex="-1"></a>starwars[starwars<span class="sc">$</span>homeworld <span class="sc">==</span> <span class="st">&quot;Naboo&quot;</span> <span class="sc">&amp;</span> starwars<span class="sc">$</span>species <span class="sc">==</span> <span class="st">&quot;Human&quot;</span>, ,]</span></code></pre></div>
<p>The dplyr equivalent of this code is more concise because data
masking allows you to need to type <code>starwars</code> once:</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" tabindex="-1"></a>starwars <span class="sc">%&gt;%</span> <span class="fu">filter</span>(homeworld <span class="sc">==</span> <span class="st">&quot;Naboo&quot;</span>, species <span class="sc">==</span> <span class="st">&quot;Human&quot;</span>)</span></code></pre></div>
<div id="data--and-env-variables" class="section level3">
<h3>Data- and env-variables</h3>
<p>The key idea behind data masking is that it blurs the line between
the two different meanings of the word “variable”:</p>
<ul>
<li><p><strong>env-variables</strong> are “programming” variables that
live in an environment. They are usually created with
<code>&lt;-</code>.</p></li>
<li><p><strong>data-variables</strong> are “statistical” variables that
live in a data frame. They usually come from data files
(e.g. <code>.csv</code>, <code>.xls</code>), or are created manipulating
existing variables.</p></li>
</ul>
<p>To make those definitions a little more concrete, take this piece of
code:</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" tabindex="-1"></a>df <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(<span class="at">x =</span> <span class="fu">runif</span>(<span class="dv">3</span>), <span class="at">y =</span> <span class="fu">runif</span>(<span class="dv">3</span>))</span>
<span id="cb4-2"><a href="#cb4-2" tabindex="-1"></a>df<span class="sc">$</span>x</span>
<span id="cb4-3"><a href="#cb4-3" tabindex="-1"></a><span class="co">#&gt; [1] 0.08075014 0.83433304 0.60076089</span></span></code></pre></div>
<p>It creates a env-variable, <code>df</code>, that contains two
data-variables, <code>x</code> and <code>y</code>. Then it extracts the
data-variable <code>x</code> out of the env-variable <code>df</code>
using <code>$</code>.</p>
<p>I think this blurring of the meaning of “variable” is a really nice
feature for interactive data analysis because it allows you to refer to
data-vars as is, without any prefix. And this seems to be fairly
intuitive since many newer R users will attempt to write
<code>diamonds[x == 0 | y == 0, ]</code>.</p>
<p>Unfortunately, this benefit does not come for free. When you start to
program with these tools, you’re going to have to grapple with the
distinction. This will be hard because you’ve never had to think about
it before, so it’ll take a while for your brain to learn these new
concepts and categories. However, once you’ve teased apart the idea of
“variable” into data-variable and env-variable, I think you’ll find it
fairly straightforward to use.</p>
</div>
<div id="indirection" class="section level3">
<h3>Indirection</h3>
<p>The main challenge of programming with functions that use data
masking arises when you introduce some indirection, i.e. when you want
to get the data-variable from an env-variable instead of directly typing
the data-variable’s name. There are two main cases:</p>
<ul>
<li><p>When you have the data-variable in a function argument (i.e. an
env-variable that holds a promise<a href="#fn2" class="footnote-ref" id="fnref2"><sup>2</sup></a>), you need to <strong>embrace</strong> the
argument by surrounding it in doubled braces, like
<code>filter(df, {{ var }})</code>.</p>
<p>The following function uses embracing to create a wrapper around
<code>summarise()</code> that computes the minimum and maximum values of
a variable, as well as the number of observations that were
summarised:</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" tabindex="-1"></a>var_summary <span class="ot">&lt;-</span> <span class="cf">function</span>(data, var) {</span>
<span id="cb5-2"><a href="#cb5-2" tabindex="-1"></a>  data <span class="sc">%&gt;%</span></span>
<span id="cb5-3"><a href="#cb5-3" tabindex="-1"></a>    <span class="fu">summarise</span>(<span class="at">n =</span> <span class="fu">n</span>(), <span class="at">min =</span> <span class="fu">min</span>({{ var }}), <span class="at">max =</span> <span class="fu">max</span>({{ var }}))</span>
<span id="cb5-4"><a href="#cb5-4" tabindex="-1"></a>}</span>
<span id="cb5-5"><a href="#cb5-5" tabindex="-1"></a>mtcars <span class="sc">%&gt;%</span> </span>
<span id="cb5-6"><a href="#cb5-6" tabindex="-1"></a>  <span class="fu">group_by</span>(cyl) <span class="sc">%&gt;%</span> </span>
<span id="cb5-7"><a href="#cb5-7" tabindex="-1"></a>  <span class="fu">var_summary</span>(mpg)</span></code></pre></div></li>
<li><p>When you have an env-variable that is a character vector, you
need to index into the <code>.data</code> pronoun with <code>[[</code>,
like <code>summarise(df, mean = mean(.data[[var]]))</code>.</p>
<p>The following example uses <code>.data</code> to count the number of
unique values in each variable of <code>mtcars</code>:</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" tabindex="-1"></a><span class="cf">for</span> (var <span class="cf">in</span> <span class="fu">names</span>(mtcars)) {</span>
<span id="cb6-2"><a href="#cb6-2" tabindex="-1"></a>  mtcars <span class="sc">%&gt;%</span> <span class="fu">count</span>(.data[[var]]) <span class="sc">%&gt;%</span> <span class="fu">print</span>()</span>
<span id="cb6-3"><a href="#cb6-3" tabindex="-1"></a>}</span></code></pre></div>
<p>Note that <code>.data</code> is not a data frame; it’s a special
construct, a pronoun, that allows you to access the current variables
either directly, with <code>.data$x</code> or indirectly with
<code>.data[[var]]</code>. Don’t expect other functions to work with
it.</p></li>
</ul>
</div>
<div id="name-injection" class="section level3">
<h3>Name injection</h3>
<p>Many data masking functions also use dynamic dots, which gives you
another useful feature: generating names programmatically by using
<code>:=</code> instead of <code>=</code>. There are two basics forms,
as illustrated below with <code>tibble()</code>:</p>
<ul>
<li><p>If you have the name in an env-variable, you can use glue syntax
to interpolate in:</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" tabindex="-1"></a>name <span class="ot">&lt;-</span> <span class="st">&quot;susan&quot;</span></span>
<span id="cb7-2"><a href="#cb7-2" tabindex="-1"></a><span class="fu">tibble</span>(<span class="st">&quot;{name}&quot;</span> <span class="sc">:=</span> <span class="dv">2</span>)</span>
<span id="cb7-3"><a href="#cb7-3" tabindex="-1"></a><span class="co">#&gt; # A tibble: 1 × 1</span></span>
<span id="cb7-4"><a href="#cb7-4" tabindex="-1"></a><span class="co">#&gt;   susan</span></span>
<span id="cb7-5"><a href="#cb7-5" tabindex="-1"></a><span class="co">#&gt;   &lt;dbl&gt;</span></span>
<span id="cb7-6"><a href="#cb7-6" tabindex="-1"></a><span class="co">#&gt; 1     2</span></span></code></pre></div></li>
<li><p>If the name should be derived from a data-variable in an
argument, you can use embracing syntax:</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" tabindex="-1"></a>my_df <span class="ot">&lt;-</span> <span class="cf">function</span>(x) {</span>
<span id="cb8-2"><a href="#cb8-2" tabindex="-1"></a>  <span class="fu">tibble</span>(<span class="st">&quot;{{x}}_2&quot;</span> <span class="sc">:=</span> x <span class="sc">*</span> <span class="dv">2</span>)</span>
<span id="cb8-3"><a href="#cb8-3" tabindex="-1"></a>}</span>
<span id="cb8-4"><a href="#cb8-4" tabindex="-1"></a>my_var <span class="ot">&lt;-</span> <span class="dv">10</span></span>
<span id="cb8-5"><a href="#cb8-5" tabindex="-1"></a><span class="fu">my_df</span>(my_var)</span>
<span id="cb8-6"><a href="#cb8-6" tabindex="-1"></a><span class="co">#&gt; # A tibble: 1 × 1</span></span>
<span id="cb8-7"><a href="#cb8-7" tabindex="-1"></a><span class="co">#&gt;   my_var_2</span></span>
<span id="cb8-8"><a href="#cb8-8" tabindex="-1"></a><span class="co">#&gt;      &lt;dbl&gt;</span></span>
<span id="cb8-9"><a href="#cb8-9" tabindex="-1"></a><span class="co">#&gt; 1       20</span></span></code></pre></div></li>
</ul>
<p>Learn more in <code>?rlang::`dyn-dots`</code>.</p>
</div>
</div>
<div id="tidy-selection" class="section level2">
<h2>Tidy selection</h2>
<p>Data masking makes it easy to compute on values within a dataset.
Tidy selection is a complementary tool that makes it easy to work with
the columns of a dataset.</p>
<div id="the-tidyselect-dsl" class="section level3">
<h3>The tidyselect DSL</h3>
<p>Underneath all functions that use tidy selection is the <a href="https://tidyselect.r-lib.org/">tidyselect</a> package. It provides
a miniature domain specific language that makes it easy to select
columns by name, position, or type. For example:</p>
<ul>
<li><p><code>select(df, 1)</code> selects the first column;
<code>select(df, last_col())</code> selects the last column.</p></li>
<li><p><code>select(df, c(a, b, c))</code> selects columns
<code>a</code>, <code>b</code>, and <code>c</code>.</p></li>
<li><p><code>select(df, starts_with(&quot;a&quot;))</code> selects all columns
whose name starts with “a”; <code>select(df, ends_with(&quot;z&quot;))</code>
selects all columns whose name ends with “z”.</p></li>
<li><p><code>select(df, where(is.numeric))</code> selects all numeric
columns.</p></li>
</ul>
<p>You can see more details in <code>?dplyr_tidy_select</code>.</p>
</div>
<div id="indirection-1" class="section level3">
<h3>Indirection</h3>
<p>As with data masking, tidy selection makes a common task easier at
the cost of making a less common task harder. When you want to use tidy
select indirectly with the column specification stored in an
intermediate variable, you’ll need to learn some new tools. Again, there
are two forms of indirection:</p>
<ul>
<li><p>When you have the data-variable in an env-variable that is a
function argument, you use the same technique as data masking: you
<strong>embrace</strong> the argument by surrounding it in doubled
braces.</p>
<p>The following function summarises a data frame by computing the mean
of all variables selected by the user:</p>
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" tabindex="-1"></a>summarise_mean <span class="ot">&lt;-</span> <span class="cf">function</span>(data, vars) {</span>
<span id="cb9-2"><a href="#cb9-2" tabindex="-1"></a>  data <span class="sc">%&gt;%</span> <span class="fu">summarise</span>(<span class="at">n =</span> <span class="fu">n</span>(), <span class="fu">across</span>({{ vars }}, mean))</span>
<span id="cb9-3"><a href="#cb9-3" tabindex="-1"></a>}</span>
<span id="cb9-4"><a href="#cb9-4" tabindex="-1"></a>mtcars <span class="sc">%&gt;%</span> </span>
<span id="cb9-5"><a href="#cb9-5" tabindex="-1"></a>  <span class="fu">group_by</span>(cyl) <span class="sc">%&gt;%</span> </span>
<span id="cb9-6"><a href="#cb9-6" tabindex="-1"></a>  <span class="fu">summarise_mean</span>(<span class="fu">where</span>(is.numeric))</span></code></pre></div></li>
<li><p>When you have an env-variable that is a character vector, you
need to use <code>all_of()</code> or <code>any_of()</code> depending on
whether you want the function to error if a variable is not found.</p>
<p>The following code uses <code>all_of()</code> to select all of the
variables found in a character vector; then <code>!</code> plus
<code>all_of()</code> to select all of the variables <em>not</em> found
in a character vector:</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" tabindex="-1"></a>vars <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">&quot;mpg&quot;</span>, <span class="st">&quot;vs&quot;</span>)</span>
<span id="cb10-2"><a href="#cb10-2" tabindex="-1"></a>mtcars <span class="sc">%&gt;%</span> <span class="fu">select</span>(<span class="fu">all_of</span>(vars))</span>
<span id="cb10-3"><a href="#cb10-3" tabindex="-1"></a>mtcars <span class="sc">%&gt;%</span> <span class="fu">select</span>(<span class="sc">!</span><span class="fu">all_of</span>(vars))</span></code></pre></div></li>
</ul>
</div>
</div>
<div id="how-tos" class="section level2">
<h2>How-tos</h2>
<p>The following examples solve a grab bag of common problems. We show
you the minimum amount of code so that you can get the basic idea; most
real problems will require more code or combining multiple
techniques.</p>
<div id="user-supplied-data" class="section level3">
<h3>User-supplied data</h3>
<p>If you check the documentation, you’ll see that <code>.data</code>
never uses data masking or tidy select. That means you don’t need to do
anything special in your function:</p>
<div class="sourceCode" id="cb11"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" tabindex="-1"></a>mutate_y <span class="ot">&lt;-</span> <span class="cf">function</span>(data) {</span>
<span id="cb11-2"><a href="#cb11-2" tabindex="-1"></a>  <span class="fu">mutate</span>(data, <span class="at">y =</span> a <span class="sc">+</span> x)</span>
<span id="cb11-3"><a href="#cb11-3" tabindex="-1"></a>}</span></code></pre></div>
</div>
<div id="one-or-more-user-supplied-expressions" class="section level3">
<h3>One or more user-supplied expressions</h3>
<p>If you want the user to supply an expression that’s passed onto an
argument which uses data masking or tidy select, embrace the
argument:</p>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" tabindex="-1"></a>my_summarise <span class="ot">&lt;-</span> <span class="cf">function</span>(data, group_var) {</span>
<span id="cb12-2"><a href="#cb12-2" tabindex="-1"></a>  data <span class="sc">%&gt;%</span></span>
<span id="cb12-3"><a href="#cb12-3" tabindex="-1"></a>    <span class="fu">group_by</span>({{ group_var }}) <span class="sc">%&gt;%</span></span>
<span id="cb12-4"><a href="#cb12-4" tabindex="-1"></a>    <span class="fu">summarise</span>(<span class="at">mean =</span> <span class="fu">mean</span>(mass))</span>
<span id="cb12-5"><a href="#cb12-5" tabindex="-1"></a>}</span></code></pre></div>
<p>This generalises in a straightforward way if you want to use one
user-supplied expression in multiple places:</p>
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" tabindex="-1"></a>my_summarise2 <span class="ot">&lt;-</span> <span class="cf">function</span>(data, expr) {</span>
<span id="cb13-2"><a href="#cb13-2" tabindex="-1"></a>  data <span class="sc">%&gt;%</span> <span class="fu">summarise</span>(</span>
<span id="cb13-3"><a href="#cb13-3" tabindex="-1"></a>    <span class="at">mean =</span> <span class="fu">mean</span>({{ expr }}),</span>
<span id="cb13-4"><a href="#cb13-4" tabindex="-1"></a>    <span class="at">sum =</span> <span class="fu">sum</span>({{ expr }}),</span>
<span id="cb13-5"><a href="#cb13-5" tabindex="-1"></a>    <span class="at">n =</span> <span class="fu">n</span>()</span>
<span id="cb13-6"><a href="#cb13-6" tabindex="-1"></a>  )</span>
<span id="cb13-7"><a href="#cb13-7" tabindex="-1"></a>}</span></code></pre></div>
<p>If you want the user to provide multiple expressions, embrace each of
them:</p>
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" tabindex="-1"></a>my_summarise3 <span class="ot">&lt;-</span> <span class="cf">function</span>(data, mean_var, sd_var) {</span>
<span id="cb14-2"><a href="#cb14-2" tabindex="-1"></a>  data <span class="sc">%&gt;%</span> </span>
<span id="cb14-3"><a href="#cb14-3" tabindex="-1"></a>    <span class="fu">summarise</span>(<span class="at">mean =</span> <span class="fu">mean</span>({{ mean_var }}), <span class="at">sd =</span> <span class="fu">sd</span>({{ sd_var }}))</span>
<span id="cb14-4"><a href="#cb14-4" tabindex="-1"></a>}</span></code></pre></div>
<p>If you want to use the name of a variable in the output, you can
embrace the variable name on the left-hand side of <code>:=</code> with
<code>{{</code>:</p>
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" tabindex="-1"></a>my_summarise4 <span class="ot">&lt;-</span> <span class="cf">function</span>(data, expr) {</span>
<span id="cb15-2"><a href="#cb15-2" tabindex="-1"></a>  data <span class="sc">%&gt;%</span> <span class="fu">summarise</span>(</span>
<span id="cb15-3"><a href="#cb15-3" tabindex="-1"></a>    <span class="st">&quot;mean_{{expr}}&quot;</span> <span class="sc">:=</span> <span class="fu">mean</span>({{ expr }}),</span>
<span id="cb15-4"><a href="#cb15-4" tabindex="-1"></a>    <span class="st">&quot;sum_{{expr}}&quot;</span> <span class="sc">:=</span> <span class="fu">sum</span>({{ expr }}),</span>
<span id="cb15-5"><a href="#cb15-5" tabindex="-1"></a>    <span class="st">&quot;n_{{expr}}&quot;</span> <span class="sc">:=</span> <span class="fu">n</span>()</span>
<span id="cb15-6"><a href="#cb15-6" tabindex="-1"></a>  )</span>
<span id="cb15-7"><a href="#cb15-7" tabindex="-1"></a>}</span>
<span id="cb15-8"><a href="#cb15-8" tabindex="-1"></a>my_summarise5 <span class="ot">&lt;-</span> <span class="cf">function</span>(data, mean_var, sd_var) {</span>
<span id="cb15-9"><a href="#cb15-9" tabindex="-1"></a>  data <span class="sc">%&gt;%</span> </span>
<span id="cb15-10"><a href="#cb15-10" tabindex="-1"></a>    <span class="fu">summarise</span>(</span>
<span id="cb15-11"><a href="#cb15-11" tabindex="-1"></a>      <span class="st">&quot;mean_{{mean_var}}&quot;</span> <span class="sc">:=</span> <span class="fu">mean</span>({{ mean_var }}), </span>
<span id="cb15-12"><a href="#cb15-12" tabindex="-1"></a>      <span class="st">&quot;sd_{{sd_var}}&quot;</span> <span class="sc">:=</span> <span class="fu">sd</span>({{ sd_var }})</span>
<span id="cb15-13"><a href="#cb15-13" tabindex="-1"></a>    )</span>
<span id="cb15-14"><a href="#cb15-14" tabindex="-1"></a>}</span></code></pre></div>
</div>
<div id="any-number-of-user-supplied-expressions" class="section level3">
<h3>Any number of user-supplied expressions</h3>
<p>If you want to take an arbitrary number of user supplied expressions,
use <code>...</code>. This is most often useful when you want to give
the user full control over a single part of the pipeline, like a
<code>group_by()</code> or a <code>mutate()</code>.</p>
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" tabindex="-1"></a>my_summarise <span class="ot">&lt;-</span> <span class="cf">function</span>(.data, ...) {</span>
<span id="cb16-2"><a href="#cb16-2" tabindex="-1"></a>  .data <span class="sc">%&gt;%</span></span>
<span id="cb16-3"><a href="#cb16-3" tabindex="-1"></a>    <span class="fu">group_by</span>(...) <span class="sc">%&gt;%</span></span>
<span id="cb16-4"><a href="#cb16-4" tabindex="-1"></a>    <span class="fu">summarise</span>(<span class="at">mass =</span> <span class="fu">mean</span>(mass, <span class="at">na.rm =</span> <span class="cn">TRUE</span>), <span class="at">height =</span> <span class="fu">mean</span>(height, <span class="at">na.rm =</span> <span class="cn">TRUE</span>))</span>
<span id="cb16-5"><a href="#cb16-5" tabindex="-1"></a>}</span>
<span id="cb16-6"><a href="#cb16-6" tabindex="-1"></a></span>
<span id="cb16-7"><a href="#cb16-7" tabindex="-1"></a>starwars <span class="sc">%&gt;%</span> <span class="fu">my_summarise</span>(homeworld)</span>
<span id="cb16-8"><a href="#cb16-8" tabindex="-1"></a><span class="co">#&gt; # A tibble: 49 × 3</span></span>
<span id="cb16-9"><a href="#cb16-9" tabindex="-1"></a><span class="co">#&gt;   homeworld    mass height</span></span>
<span id="cb16-10"><a href="#cb16-10" tabindex="-1"></a><span class="co">#&gt;   &lt;chr&gt;       &lt;dbl&gt;  &lt;dbl&gt;</span></span>
<span id="cb16-11"><a href="#cb16-11" tabindex="-1"></a><span class="co">#&gt; 1 Alderaan       64   176.</span></span>
<span id="cb16-12"><a href="#cb16-12" tabindex="-1"></a><span class="co">#&gt; 2 Aleen Minor    15    79 </span></span>
<span id="cb16-13"><a href="#cb16-13" tabindex="-1"></a><span class="co">#&gt; 3 Bespin         79   175 </span></span>
<span id="cb16-14"><a href="#cb16-14" tabindex="-1"></a><span class="co">#&gt; 4 Bestine IV    110   180 </span></span>
<span id="cb16-15"><a href="#cb16-15" tabindex="-1"></a><span class="co">#&gt; # ℹ 45 more rows</span></span>
<span id="cb16-16"><a href="#cb16-16" tabindex="-1"></a>starwars <span class="sc">%&gt;%</span> <span class="fu">my_summarise</span>(sex, gender)</span>
<span id="cb16-17"><a href="#cb16-17" tabindex="-1"></a><span class="co">#&gt; `summarise()` has grouped output by &#39;sex&#39;. You can override using the `.groups`</span></span>
<span id="cb16-18"><a href="#cb16-18" tabindex="-1"></a><span class="co">#&gt; argument.</span></span>
<span id="cb16-19"><a href="#cb16-19" tabindex="-1"></a><span class="co">#&gt; # A tibble: 6 × 4</span></span>
<span id="cb16-20"><a href="#cb16-20" tabindex="-1"></a><span class="co">#&gt; # Groups:   sex [5]</span></span>
<span id="cb16-21"><a href="#cb16-21" tabindex="-1"></a><span class="co">#&gt;   sex            gender      mass height</span></span>
<span id="cb16-22"><a href="#cb16-22" tabindex="-1"></a><span class="co">#&gt;   &lt;chr&gt;          &lt;chr&gt;      &lt;dbl&gt;  &lt;dbl&gt;</span></span>
<span id="cb16-23"><a href="#cb16-23" tabindex="-1"></a><span class="co">#&gt; 1 female         feminine    54.7   172.</span></span>
<span id="cb16-24"><a href="#cb16-24" tabindex="-1"></a><span class="co">#&gt; 2 hermaphroditic masculine 1358     175 </span></span>
<span id="cb16-25"><a href="#cb16-25" tabindex="-1"></a><span class="co">#&gt; 3 male           masculine   80.2   179.</span></span>
<span id="cb16-26"><a href="#cb16-26" tabindex="-1"></a><span class="co">#&gt; 4 none           feminine   NaN      96 </span></span>
<span id="cb16-27"><a href="#cb16-27" tabindex="-1"></a><span class="co">#&gt; # ℹ 2 more rows</span></span></code></pre></div>
<p>When you use <code>...</code> in this way, make sure that any other
arguments start with <code>.</code> to reduce the chances of argument
clashes; see <a href="https://design.tidyverse.org/dots-prefix.html" class="uri">https://design.tidyverse.org/dots-prefix.html</a> for more
details.</p>
</div>
<div id="creating-multiple-columns" class="section level3">
<h3>Creating multiple columns</h3>
<p>Sometimes it can be useful for a single expression to return multiple
columns. You can do this by returning an unnamed data frame:</p>
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" tabindex="-1"></a>quantile_df <span class="ot">&lt;-</span> <span class="cf">function</span>(x, <span class="at">probs =</span> <span class="fu">c</span>(<span class="fl">0.25</span>, <span class="fl">0.5</span>, <span class="fl">0.75</span>)) {</span>
<span id="cb17-2"><a href="#cb17-2" tabindex="-1"></a>  <span class="fu">tibble</span>(</span>
<span id="cb17-3"><a href="#cb17-3" tabindex="-1"></a>    <span class="at">val =</span> <span class="fu">quantile</span>(x, probs),</span>
<span id="cb17-4"><a href="#cb17-4" tabindex="-1"></a>    <span class="at">quant =</span> probs</span>
<span id="cb17-5"><a href="#cb17-5" tabindex="-1"></a>  )</span>
<span id="cb17-6"><a href="#cb17-6" tabindex="-1"></a>}</span>
<span id="cb17-7"><a href="#cb17-7" tabindex="-1"></a></span>
<span id="cb17-8"><a href="#cb17-8" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span></span>
<span id="cb17-9"><a href="#cb17-9" tabindex="-1"></a><span class="fu">quantile_df</span>(x)</span>
<span id="cb17-10"><a href="#cb17-10" tabindex="-1"></a><span class="co">#&gt; # A tibble: 3 × 2</span></span>
<span id="cb17-11"><a href="#cb17-11" tabindex="-1"></a><span class="co">#&gt;     val quant</span></span>
<span id="cb17-12"><a href="#cb17-12" tabindex="-1"></a><span class="co">#&gt;   &lt;dbl&gt; &lt;dbl&gt;</span></span>
<span id="cb17-13"><a href="#cb17-13" tabindex="-1"></a><span class="co">#&gt; 1     2  0.25</span></span>
<span id="cb17-14"><a href="#cb17-14" tabindex="-1"></a><span class="co">#&gt; 2     3  0.5 </span></span>
<span id="cb17-15"><a href="#cb17-15" tabindex="-1"></a><span class="co">#&gt; 3     4  0.75</span></span></code></pre></div>
<p>This sort of function is useful inside <code>summarise()</code> and
<code>mutate()</code> which allow you to add multiple columns by
returning a data frame:</p>
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" tabindex="-1"></a>df <span class="ot">&lt;-</span> <span class="fu">tibble</span>(</span>
<span id="cb18-2"><a href="#cb18-2" tabindex="-1"></a>  <span class="at">grp =</span> <span class="fu">rep</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>, <span class="at">each =</span> <span class="dv">10</span>),</span>
<span id="cb18-3"><a href="#cb18-3" tabindex="-1"></a>  <span class="at">x =</span> <span class="fu">runif</span>(<span class="dv">30</span>),</span>
<span id="cb18-4"><a href="#cb18-4" tabindex="-1"></a>  <span class="at">y =</span> <span class="fu">rnorm</span>(<span class="dv">30</span>)</span>
<span id="cb18-5"><a href="#cb18-5" tabindex="-1"></a>)</span>
<span id="cb18-6"><a href="#cb18-6" tabindex="-1"></a></span>
<span id="cb18-7"><a href="#cb18-7" tabindex="-1"></a>df <span class="sc">%&gt;%</span></span>
<span id="cb18-8"><a href="#cb18-8" tabindex="-1"></a>  <span class="fu">group_by</span>(grp) <span class="sc">%&gt;%</span></span>
<span id="cb18-9"><a href="#cb18-9" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="fu">quantile_df</span>(x, <span class="at">probs =</span> .<span class="dv">5</span>))</span>
<span id="cb18-10"><a href="#cb18-10" tabindex="-1"></a><span class="co">#&gt; # A tibble: 3 × 3</span></span>
<span id="cb18-11"><a href="#cb18-11" tabindex="-1"></a><span class="co">#&gt;     grp   val quant</span></span>
<span id="cb18-12"><a href="#cb18-12" tabindex="-1"></a><span class="co">#&gt;   &lt;int&gt; &lt;dbl&gt; &lt;dbl&gt;</span></span>
<span id="cb18-13"><a href="#cb18-13" tabindex="-1"></a><span class="co">#&gt; 1     1 0.361   0.5</span></span>
<span id="cb18-14"><a href="#cb18-14" tabindex="-1"></a><span class="co">#&gt; 2     2 0.541   0.5</span></span>
<span id="cb18-15"><a href="#cb18-15" tabindex="-1"></a><span class="co">#&gt; 3     3 0.456   0.5</span></span>
<span id="cb18-16"><a href="#cb18-16" tabindex="-1"></a></span>
<span id="cb18-17"><a href="#cb18-17" tabindex="-1"></a>df <span class="sc">%&gt;%</span></span>
<span id="cb18-18"><a href="#cb18-18" tabindex="-1"></a>  <span class="fu">group_by</span>(grp) <span class="sc">%&gt;%</span></span>
<span id="cb18-19"><a href="#cb18-19" tabindex="-1"></a>  <span class="fu">summarise</span>(<span class="fu">across</span>(x<span class="sc">:</span>y, <span class="sc">~</span> <span class="fu">quantile_df</span>(.x, <span class="at">probs =</span> .<span class="dv">5</span>), <span class="at">.unpack =</span> <span class="cn">TRUE</span>))</span>
<span id="cb18-20"><a href="#cb18-20" tabindex="-1"></a><span class="co">#&gt; # A tibble: 3 × 5</span></span>
<span id="cb18-21"><a href="#cb18-21" tabindex="-1"></a><span class="co">#&gt;     grp x_val x_quant   y_val y_quant</span></span>
<span id="cb18-22"><a href="#cb18-22" tabindex="-1"></a><span class="co">#&gt;   &lt;int&gt; &lt;dbl&gt;   &lt;dbl&gt;   &lt;dbl&gt;   &lt;dbl&gt;</span></span>
<span id="cb18-23"><a href="#cb18-23" tabindex="-1"></a><span class="co">#&gt; 1     1 0.361     0.5  0.174      0.5</span></span>
<span id="cb18-24"><a href="#cb18-24" tabindex="-1"></a><span class="co">#&gt; 2     2 0.541     0.5 -0.0110     0.5</span></span>
<span id="cb18-25"><a href="#cb18-25" tabindex="-1"></a><span class="co">#&gt; 3     3 0.456     0.5  0.0583     0.5</span></span></code></pre></div>
<p>Notice that we set <code>.unpack = TRUE</code> inside
<code>across()</code>. This tells <code>across()</code> to
<em>unpack</em> the data frame returned by <code>quantile_df()</code>
into its respective columns, combining the column names of the original
columns (<code>x</code> and <code>y</code>) with the column names
returned from the function (<code>val</code> and
<code>quant</code>).</p>
<p>If your function returns multiple <em>rows</em> per group, then
you’ll need to switch from <code>summarise()</code> to
<code>reframe()</code>. <code>summarise()</code> is restricted to
returning 1 row summaries per group, but <code>reframe()</code> lifts
this restriction:</p>
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" tabindex="-1"></a>df <span class="sc">%&gt;%</span></span>
<span id="cb19-2"><a href="#cb19-2" tabindex="-1"></a>  <span class="fu">group_by</span>(grp) <span class="sc">%&gt;%</span></span>
<span id="cb19-3"><a href="#cb19-3" tabindex="-1"></a>  <span class="fu">reframe</span>(<span class="fu">across</span>(x<span class="sc">:</span>y, quantile_df, <span class="at">.unpack =</span> <span class="cn">TRUE</span>))</span>
<span id="cb19-4"><a href="#cb19-4" tabindex="-1"></a><span class="co">#&gt; # A tibble: 9 × 5</span></span>
<span id="cb19-5"><a href="#cb19-5" tabindex="-1"></a><span class="co">#&gt;     grp x_val x_quant  y_val y_quant</span></span>
<span id="cb19-6"><a href="#cb19-6" tabindex="-1"></a><span class="co">#&gt;   &lt;int&gt; &lt;dbl&gt;   &lt;dbl&gt;  &lt;dbl&gt;   &lt;dbl&gt;</span></span>
<span id="cb19-7"><a href="#cb19-7" tabindex="-1"></a><span class="co">#&gt; 1     1 0.219    0.25 -0.710    0.25</span></span>
<span id="cb19-8"><a href="#cb19-8" tabindex="-1"></a><span class="co">#&gt; 2     1 0.361    0.5   0.174    0.5 </span></span>
<span id="cb19-9"><a href="#cb19-9" tabindex="-1"></a><span class="co">#&gt; 3     1 0.674    0.75  0.524    0.75</span></span>
<span id="cb19-10"><a href="#cb19-10" tabindex="-1"></a><span class="co">#&gt; 4     2 0.315    0.25 -0.690    0.25</span></span>
<span id="cb19-11"><a href="#cb19-11" tabindex="-1"></a><span class="co">#&gt; # ℹ 5 more rows</span></span></code></pre></div>
</div>
<div id="transforming-user-supplied-variables" class="section level3">
<h3>Transforming user-supplied variables</h3>
<p>If you want the user to provide a set of data-variables that are then
transformed, use <code>across()</code> and <code>pick()</code>:</p>
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" tabindex="-1"></a>my_summarise <span class="ot">&lt;-</span> <span class="cf">function</span>(data, summary_vars) {</span>
<span id="cb20-2"><a href="#cb20-2" tabindex="-1"></a>  data <span class="sc">%&gt;%</span></span>
<span id="cb20-3"><a href="#cb20-3" tabindex="-1"></a>    <span class="fu">summarise</span>(<span class="fu">across</span>({{ summary_vars }}, <span class="sc">~</span> <span class="fu">mean</span>(., <span class="at">na.rm =</span> <span class="cn">TRUE</span>)))</span>
<span id="cb20-4"><a href="#cb20-4" tabindex="-1"></a>}</span>
<span id="cb20-5"><a href="#cb20-5" tabindex="-1"></a>starwars <span class="sc">%&gt;%</span> </span>
<span id="cb20-6"><a href="#cb20-6" tabindex="-1"></a>  <span class="fu">group_by</span>(species) <span class="sc">%&gt;%</span> </span>
<span id="cb20-7"><a href="#cb20-7" tabindex="-1"></a>  <span class="fu">my_summarise</span>(<span class="fu">c</span>(mass, height))</span>
<span id="cb20-8"><a href="#cb20-8" tabindex="-1"></a><span class="co">#&gt; # A tibble: 38 × 3</span></span>
<span id="cb20-9"><a href="#cb20-9" tabindex="-1"></a><span class="co">#&gt;   species   mass height</span></span>
<span id="cb20-10"><a href="#cb20-10" tabindex="-1"></a><span class="co">#&gt;   &lt;chr&gt;    &lt;dbl&gt;  &lt;dbl&gt;</span></span>
<span id="cb20-11"><a href="#cb20-11" tabindex="-1"></a><span class="co">#&gt; 1 Aleena      15     79</span></span>
<span id="cb20-12"><a href="#cb20-12" tabindex="-1"></a><span class="co">#&gt; 2 Besalisk   102    198</span></span>
<span id="cb20-13"><a href="#cb20-13" tabindex="-1"></a><span class="co">#&gt; 3 Cerean      82    198</span></span>
<span id="cb20-14"><a href="#cb20-14" tabindex="-1"></a><span class="co">#&gt; 4 Chagrian   NaN    196</span></span>
<span id="cb20-15"><a href="#cb20-15" tabindex="-1"></a><span class="co">#&gt; # ℹ 34 more rows</span></span></code></pre></div>
<p>You can use this same idea for multiple sets of input
data-variables:</p>
<div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" tabindex="-1"></a>my_summarise <span class="ot">&lt;-</span> <span class="cf">function</span>(data, group_var, summarise_var) {</span>
<span id="cb21-2"><a href="#cb21-2" tabindex="-1"></a>  data <span class="sc">%&gt;%</span></span>
<span id="cb21-3"><a href="#cb21-3" tabindex="-1"></a>    <span class="fu">group_by</span>(<span class="fu">pick</span>({{ group_var }})) <span class="sc">%&gt;%</span> </span>
<span id="cb21-4"><a href="#cb21-4" tabindex="-1"></a>    <span class="fu">summarise</span>(<span class="fu">across</span>({{ summarise_var }}, mean))</span>
<span id="cb21-5"><a href="#cb21-5" tabindex="-1"></a>}</span></code></pre></div>
<p>Use the <code>.names</code> argument to <code>across()</code> to
control the names of the output.</p>
<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" tabindex="-1"></a>my_summarise <span class="ot">&lt;-</span> <span class="cf">function</span>(data, group_var, summarise_var) {</span>
<span id="cb22-2"><a href="#cb22-2" tabindex="-1"></a>  data <span class="sc">%&gt;%</span></span>
<span id="cb22-3"><a href="#cb22-3" tabindex="-1"></a>    <span class="fu">group_by</span>(<span class="fu">pick</span>({{ group_var }})) <span class="sc">%&gt;%</span> </span>
<span id="cb22-4"><a href="#cb22-4" tabindex="-1"></a>    <span class="fu">summarise</span>(<span class="fu">across</span>({{ summarise_var }}, mean, <span class="at">.names =</span> <span class="st">&quot;mean_{.col}&quot;</span>))</span>
<span id="cb22-5"><a href="#cb22-5" tabindex="-1"></a>}</span></code></pre></div>
</div>
<div id="loop-over-multiple-variables" class="section level3">
<h3>Loop over multiple variables</h3>
<p>If you have a character vector of variable names, and want to operate
on them with a for loop, index into the special <code>.data</code>
pronoun:</p>
<div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" tabindex="-1"></a><span class="cf">for</span> (var <span class="cf">in</span> <span class="fu">names</span>(mtcars)) {</span>
<span id="cb23-2"><a href="#cb23-2" tabindex="-1"></a>  mtcars <span class="sc">%&gt;%</span> <span class="fu">count</span>(.data[[var]]) <span class="sc">%&gt;%</span> <span class="fu">print</span>()</span>
<span id="cb23-3"><a href="#cb23-3" tabindex="-1"></a>}</span></code></pre></div>
<p>This same technique works with for loop alternatives like the base R
<code>apply()</code> family and the purrr <code>map()</code> family:</p>
<div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" tabindex="-1"></a>mtcars <span class="sc">%&gt;%</span> </span>
<span id="cb24-2"><a href="#cb24-2" tabindex="-1"></a>  <span class="fu">names</span>() <span class="sc">%&gt;%</span> </span>
<span id="cb24-3"><a href="#cb24-3" tabindex="-1"></a>  purrr<span class="sc">::</span><span class="fu">map</span>(<span class="sc">~</span> <span class="fu">count</span>(mtcars, .data[[.x]]))</span></code></pre></div>
<p>(Note that the <code>x</code> in <code>.data[[x]]</code> is always
treated as an env-variable; it will never come from the data.)</p>
</div>
<div id="use-a-variable-from-an-shiny-input" class="section level3">
<h3>Use a variable from an Shiny input</h3>
<p>Many Shiny input controls return character vectors, so you can use
the same approach as above: <code>.data[[input$var]]</code>.</p>
<div class="sourceCode" id="cb25"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1" tabindex="-1"></a><span class="fu">library</span>(shiny)</span>
<span id="cb25-2"><a href="#cb25-2" tabindex="-1"></a>ui <span class="ot">&lt;-</span> <span class="fu">fluidPage</span>(</span>
<span id="cb25-3"><a href="#cb25-3" tabindex="-1"></a>  <span class="fu">selectInput</span>(<span class="st">&quot;var&quot;</span>, <span class="st">&quot;Variable&quot;</span>, <span class="at">choices =</span> <span class="fu">names</span>(diamonds)),</span>
<span id="cb25-4"><a href="#cb25-4" tabindex="-1"></a>  <span class="fu">tableOutput</span>(<span class="st">&quot;output&quot;</span>)</span>
<span id="cb25-5"><a href="#cb25-5" tabindex="-1"></a>)</span>
<span id="cb25-6"><a href="#cb25-6" tabindex="-1"></a>server <span class="ot">&lt;-</span> <span class="cf">function</span>(input, output, session) {</span>
<span id="cb25-7"><a href="#cb25-7" tabindex="-1"></a>  data <span class="ot">&lt;-</span> <span class="fu">reactive</span>(<span class="fu">filter</span>(diamonds, .data[[input<span class="sc">$</span>var]] <span class="sc">&gt;</span> <span class="dv">0</span>))</span>
<span id="cb25-8"><a href="#cb25-8" tabindex="-1"></a>  output<span class="sc">$</span>output <span class="ot">&lt;-</span> <span class="fu">renderTable</span>(<span class="fu">head</span>(<span class="fu">data</span>()))</span>
<span id="cb25-9"><a href="#cb25-9" tabindex="-1"></a>}</span></code></pre></div>
<p>See <a href="https://mastering-shiny.org/action-tidy.html" class="uri">https://mastering-shiny.org/action-tidy.html</a> for more
details and case studies.</p>
</div>
</div>
<div class="footnotes footnotes-end-of-document">
<hr />
<ol>
<li id="fn1"><p>dplyr’s <code>filter()</code> is inspired by base R’s
<code>subset()</code>. <code>subset()</code> provides data masking, but
not with tidy evaluation, so the techniques described in this chapter
don’t apply to it.<a href="#fnref1" class="footnote-back">↩︎</a></p></li>
<li id="fn2"><p>In R, arguments are lazily evaluated which means that
until you attempt to use, they don’t hold a value, just a
<strong>promise</strong> that describes how to compute the value. You
can learn more at <a href="https://adv-r.hadley.nz/functions.html#lazy-evaluation" class="uri">https://adv-r.hadley.nz/functions.html#lazy-evaluation</a><a href="#fnref2" class="footnote-back">↩︎</a></p></li>
</ol>
</div>



<!-- code folding -->


<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>

</body>
</html>