File: utils.R

package info (click to toggle)
r-cran-tokenizers 0.3.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 824 kB
  • sloc: cpp: 143; sh: 13; makefile: 2
file content (25 lines) | stat: -rw-r--r-- 692 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
simplify_list <- function(x, simplify) {
  stopifnot(is.logical(simplify))
  if (simplify && length(x) == 1) x[[1]] else x
}

check_input <- function(x) {
  check_character <- is.character(x) |
  if (is.list(x)) {
       check_list <- all(vapply(x, is.character, logical(1))) &
         all(vapply(x, length, integer(1)) == 1L)
  } else {
    check_list <- FALSE
  }
  if (!(check_character | check_list))
    stop("Input must be a character vector of any length or a list of character\n",
         "  vectors, each of which has a length of 1.")
}

remove_stopwords <- function(x, stopwords) {
  out <- x[!x %in% stopwords]
  if (!length(out)) {
    return(NA_character_)
  }
  return(out)
}