1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
## -----------------------------------------------------------------------------
library(stringr)
knitr::opts_chunk$set(
comment = "#>",
collapse = TRUE
)
## -----------------------------------------------------------------------------
str_length("abc")
## -----------------------------------------------------------------------------
x <- c("abcdef", "ghifjk")
# The 3rd letter
str_sub(x, 3, 3)
# The 2nd to 2nd-to-last character
str_sub(x, 2, -2)
## -----------------------------------------------------------------------------
str_sub(x, 3, 3) <- "X"
x
## -----------------------------------------------------------------------------
str_dup(x, c(2, 3))
## -----------------------------------------------------------------------------
x <- c("abc", "defghi")
str_pad(x, 10) # default pads on left
str_pad(x, 10, "both")
## -----------------------------------------------------------------------------
str_pad(x, 4)
## -----------------------------------------------------------------------------
x <- c("Short", "This is a long string")
x %>%
str_trunc(10) %>%
str_pad(10, "right")
## -----------------------------------------------------------------------------
x <- c(" a ", "b ", " c")
str_trim(x)
str_trim(x, "left")
## -----------------------------------------------------------------------------
jabberwocky <- str_c(
"`Twas brillig, and the slithy toves ",
"did gyre and gimble in the wabe: ",
"All mimsy were the borogoves, ",
"and the mome raths outgrabe. "
)
cat(str_wrap(jabberwocky, width = 40))
## -----------------------------------------------------------------------------
x <- "I like horses."
str_to_upper(x)
str_to_title(x)
str_to_lower(x)
# Turkish has two sorts of i: with and without the dot
str_to_lower(x, "tr")
## -----------------------------------------------------------------------------
x <- c("y", "i", "k")
str_order(x)
str_sort(x)
# In Lithuanian, y comes between i and k
str_sort(x, locale = "lt")
## -----------------------------------------------------------------------------
strings <- c(
"apple",
"219 733 8965",
"329-293-8753",
"Work: 579-499-7527; Home: 543.355.3679"
)
phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
## -----------------------------------------------------------------------------
# Which strings contain phone numbers?
str_detect(strings, phone)
str_subset(strings, phone)
## -----------------------------------------------------------------------------
# How many phone numbers in each string?
str_count(strings, phone)
## -----------------------------------------------------------------------------
# Where in the string is the phone number located?
(loc <- str_locate(strings, phone))
str_locate_all(strings, phone)
## -----------------------------------------------------------------------------
# What are the phone numbers?
str_extract(strings, phone)
str_extract_all(strings, phone)
str_extract_all(strings, phone, simplify = TRUE)
## -----------------------------------------------------------------------------
# Pull out the three components of the match
str_match(strings, phone)
str_match_all(strings, phone)
## -----------------------------------------------------------------------------
str_replace(strings, phone, "XXX-XXX-XXXX")
str_replace_all(strings, phone, "XXX-XXX-XXXX")
## -----------------------------------------------------------------------------
str_split("a-b-c", "-")
str_split_fixed("a-b-c", "-", n = 2)
## -----------------------------------------------------------------------------
a1 <- "\u00e1"
a2 <- "a\u0301"
c(a1, a2)
a1 == a2
## -----------------------------------------------------------------------------
str_detect(a1, fixed(a2))
str_detect(a1, coll(a2))
## -----------------------------------------------------------------------------
i <- c("I", "İ", "i", "ı")
i
str_subset(i, coll("i", ignore_case = TRUE))
str_subset(i, coll("i", ignore_case = TRUE, locale = "tr"))
## -----------------------------------------------------------------------------
x <- "This is a sentence."
str_split(x, boundary("word"))
str_count(x, boundary("word"))
str_extract_all(x, boundary("word"))
## -----------------------------------------------------------------------------
str_split(x, "")
str_count(x, "")
|