1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/udf.R
\name{register_scalar_function}
\alias{register_scalar_function}
\title{Register user-defined functions}
\usage{
register_scalar_function(name, fun, in_type, out_type, auto_convert = FALSE)
}
\arguments{
\item{name}{The function name to be used in the dplyr bindings}
\item{fun}{An R function or rlang-style lambda expression. The function
will be called with a first argument \code{context} which is a \code{list()}
with elements \code{batch_size} (the expected length of the output) and
\code{output_type} (the required \link{DataType} of the output) that may be used
to ensure that the output has the correct type and length. Subsequent
arguments are passed by position as specified by \code{in_types}. If
\code{auto_convert} is \code{TRUE}, subsequent arguments are converted to
R vectors before being passed to \code{fun} and the output is automatically
constructed with the expected output type via \code{\link[=as_arrow_array]{as_arrow_array()}}.}
\item{in_type}{A \link{DataType} of the input type or a \code{\link[=schema]{schema()}}
for functions with more than one argument. This signature will be used
to determine if this function is appropriate for a given set of arguments.
If this function is appropriate for more than one signature, pass a
\code{list()} of the above.}
\item{out_type}{A \link{DataType} of the output type or a function accepting
a single argument (\code{types}), which is a \code{list()} of \link{DataType}s. If a
function it must return a \link{DataType}.}
\item{auto_convert}{Use \code{TRUE} to convert inputs before passing to \code{fun}
and construct an Array of the correct type from the output. Use this
option to write functions of R objects as opposed to functions of
Arrow R6 objects.}
}
\value{
\code{NULL}, invisibly
}
\description{
These functions support calling R code from query engine execution
(i.e., a \code{\link[dplyr:mutate]{dplyr::mutate()}} or \code{\link[dplyr:filter]{dplyr::filter()}} on a \link{Table} or \link{Dataset}).
Use \code{\link[=register_scalar_function]{register_scalar_function()}} attach Arrow input and output types to an
R function and make it available for use in the dplyr interface and/or
\code{\link[=call_function]{call_function()}}. Scalar functions are currently the only type of
user-defined function supported. In Arrow, scalar functions must be
stateless and return output with the same shape (i.e., the same number
of rows) as the input.
}
\examples{
\dontshow{if (arrow_with_dataset() && identical(Sys.getenv("NOT_CRAN"), "true")) withAutoprint(\{ # examplesIf}
library(dplyr, warn.conflicts = FALSE)
some_model <- lm(mpg ~ disp + cyl, data = mtcars)
register_scalar_function(
"mtcars_predict_mpg",
function(context, disp, cyl) {
predict(some_model, newdata = data.frame(disp, cyl))
},
in_type = schema(disp = float64(), cyl = float64()),
out_type = float64(),
auto_convert = TRUE
)
as_arrow_table(mtcars) |>
transmute(mpg, mpg_predicted = mtcars_predict_mpg(disp, cyl)) |>
collect() |>
head()
\dontshow{\}) # examplesIf}
}
|