File: data_partition.Rd

package info (click to toggle)
r-cran-datawizard 1.0.1%2Bdfsg-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 2,300 kB
sloc: sh: 13; makefile: 2
file content (84 lines) | stat: -rw-r--r-- 3,519 bytes
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_partition.R
\name{data_partition}
\alias{data_partition}
\title{Partition data}
\usage{
data_partition(
  data,
  proportion = 0.7,
  by = NULL,
  seed = NULL,
  row_id = ".row_id",
  verbose = TRUE,
  ...
)
}
\arguments{
\item{data}{A data frame.}

\item{proportion}{Scalar (between 0 and 1) or numeric vector, indicating the
proportion(s) of the training set(s). The sum of \code{proportion} must not be
greater than 1. The remaining part will be used for the test set.}

\item{by}{A character vector indicating the name(s) of the column(s) used
for stratified partitioning.}

\item{seed}{A random number generator seed. Enter an integer (e.g. 123) so
that the random sampling will be the same each time you run the function.}

\item{row_id}{Character string, indicating the name of the column that
contains the row-id's.}

\item{verbose}{Toggle messages and warnings.}

\item{...}{Other arguments passed to or from other functions.}
}
\value{
A list of data frames. The list includes one training set per given
proportion and the remaining data as test set. List elements of training
sets are named after the given proportions (e.g., \verb{$p_0.7}), the test set
is named \verb{$test}.
}
\description{
Creates data partitions (for instance, a training and a test set) based on a
data frame that can also be stratified (i.e., evenly spread a given factor)
using the \code{by} argument.
}
\examples{
data(iris)
out <- data_partition(iris, proportion = 0.9)
out$test
nrow(out$p_0.9)

# Stratify by group (equal proportions of each species)
out <- data_partition(iris, proportion = 0.9, by = "Species")
out$test

# Create multiple partitions
out <- data_partition(iris, proportion = c(0.3, 0.3))
lapply(out, head)

# Create multiple partitions, stratified by group - 30\% equally sampled
# from species in first training set, 50\% in second training set and
# remaining 20\% equally sampled from each species in test set.
out <- data_partition(iris, proportion = c(0.3, 0.5), by = "Species")
lapply(out, function(i) table(i$Species))

}
\seealso{
\itemize{
\item Add a prefix or suffix to column names: \code{\link[=data_addprefix]{data_addprefix()}}, \code{\link[=data_addsuffix]{data_addsuffix()}}
\item Functions to reorder or remove columns: \code{\link[=data_reorder]{data_reorder()}}, \code{\link[=data_relocate]{data_relocate()}},
\code{\link[=data_remove]{data_remove()}}
\item Functions to reshape, pivot or rotate data frames: \code{\link[=data_to_long]{data_to_long()}},
\code{\link[=data_to_wide]{data_to_wide()}}, \code{\link[=data_rotate]{data_rotate()}}
\item Functions to recode data: \code{\link[=rescale]{rescale()}}, \code{\link[=reverse]{reverse()}}, \code{\link[=categorize]{categorize()}},
\code{\link[=recode_values]{recode_values()}}, \code{\link[=slide]{slide()}}
\item Functions to standardize, normalize, rank-transform: \code{\link[=center]{center()}}, \code{\link[=standardize]{standardize()}},
\code{\link[=normalize]{normalize()}}, \code{\link[=ranktransform]{ranktransform()}}, \code{\link[=winsorize]{winsorize()}}
\item Split and merge data frames: \code{\link[=data_partition]{data_partition()}}, \code{\link[=data_merge]{data_merge()}}
\item Functions to find or select columns: \code{\link[=data_select]{data_select()}}, \code{\link[=extract_column_names]{extract_column_names()}}
\item Functions to filter rows: \code{\link[=data_match]{data_match()}}, \code{\link[=data_filter]{data_filter()}}
}
}