1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{deduper}
\alias{deduper}
\title{Removes redundant words from beginnings of character strings}
\usage{
deduper(x, sep = ",_\\\\s-", n = NULL)
}
\arguments{
\item{x}{Character vector}
\item{sep}{Delimiter. A regular expression indicating the point at
which to split the strings before checking for
duplicates. Default will look for repeat separated by comma,
underscore, or one space character.}
\item{n}{Limit on number of duplicates to remove. Default, NULL,
means delete all duplicates at the beginning of a string.}
}
\value{
Cleaned up vector.
}
\description{
In Qualtrix data, we sometimes find repeated words in column
names. For whatever reason, the variable names have repeated words
like "Philadelphia_Philadelphia_3". This function changes a
vector c("Philadelphia_Philadelphia_3", "Denver_Denver_4") to
c("Philadelphia_3", "Denver_4"). It is non destructive, so that
other values will not be altered.
}
\details{
See \url{https://stackoverflow.com/questions/43711240/r-regular-expression-match-omit-several-repeats}
}
\examples{
x <- c("Philadelphia_Philadelphia_3", "Denver_Denver_4",
"Den_Den_Den_Den_Den_Den_Den_5")
deduper(x)
deduper(x, n = 2)
deduper(x, n = 3)
deduper(x, n = 4)
x <- c("Philadelphia,Philadelphia_3", "Denver Denver_4")
## Shows comma also detected by default
deduper(x)
## Works even if delimiter is inside matched string,
## or separators vary
x <- c("Den_5_Den_5_Den_5,Den_5 Den_5")
deduper(x)
## generate vector
x <- replicate(10, paste(sample(letters, 5), collapse = ""))
n <- c(paste0("_", sample(1:10, 5)), rep("", 5))
x <- paste0(x, "_", x, n, n)
x
deduper(x)
}
\author{
Paul Johnson <pauljohn@ku.edu>
}
|