1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
# R prefers to specify start / stop or start / end
# databases usually specify start / length
# https://www.postgresql.org/docs/current/functions-string.html
#' @export
#' @rdname sql_variant
sql_substr <- function(f = "SUBSTR") {
function(x, start, stop) {
start <- max(cast_number_whole(start), 1L)
stop <- max(cast_number_whole(stop), 1L)
length <- max(stop - start + 1L, 0L)
sql_call2(f, x, start, length)
}
}
cast_number_whole <- function(x, arg = caller_arg(x), call = caller_env()) {
check_number_whole(x, arg = arg, call = call)
vctrs::vec_cast(x, integer(), x_arg = arg)
}
# str_sub(x, start, end) - start and end can be negative
# SUBSTR(string, start, length) - start can be negative
#' @export
#' @rdname sql_variant
sql_str_sub <- function(
subset_f = "SUBSTR",
length_f = "LENGTH",
optional_length = TRUE
) {
function(string, start = 1L, end = -1L) {
start <- cast_number_whole(start)
end <- cast_number_whole(end)
start_sql <- start_pos(string, start, length_f)
if (optional_length && end == -1L) {
sql_call2(subset_f, string, start_sql)
} else {
if (end == 0L) {
length_sql <- 0L
} else if(start > 0 && end < 0) {
n <- start - end - 2L
if (n == 0) {
length_sql <- sql_call2(length_f, string)
} else {
length_sql <- sql_expr(!!sql_call2(length_f, string) - !!n)
}
} else {
length_sql <- pmax(end - start + 1L, 0L)
}
sql_call2(subset_f, string, start_sql, length_sql)
}
}
}
start_pos <- function(string, start, length_f) {
if (start == -1) {
sql_call2(length_f, string)
} else if (start < 0) {
sql_expr(!!sql_call2(length_f, string) - !!abs(start + 1L))
} else {
start
}
}
sql_str_trim <- function(string, side = c("both", "left", "right")) {
side <- match.arg(side)
switch(side,
left = sql_expr(ltrim(!!string)),
right = sql_expr(rtrim(!!string)),
both = sql_expr(ltrim(rtrim(!!string))),
)
}
sql_str_pattern_switch <- function(string,
pattern,
negate = FALSE,
f_fixed = NULL,
f_regex = NULL,
error_call = caller_env()) {
pattern_quo <- enquo(pattern)
is_fixed <- quo_is_call(pattern_quo, "fixed") || inherits(pattern, "stringr_fixed")
if (is_fixed) {
f_fixed(string, pattern, negate)
} else {
if (is_null(f_regex)) {
cli_abort("Only fixed patterns are supported on database backends.", call = error_call)
} else {
f_regex(string, pattern, negate)
}
}
}
# INSTR
# * SQLite https://www.sqlitetutorial.net/sqlite-functions/sqlite-instr/
# * MySQL https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_instr
# * Oracle https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/INSTR.html#GUID-47E3A7C4-ED72-458D-A1FA-25A9AD3BE113
# * Teradata https://docs.teradata.com/r/Teradata-VantageTM-SQL-Functions-Expressions-and-Predicates/March-2019/String-Operators-and-Functions/INSTR
# * Access https://support.microsoft.com/de-de/office/instr-funktion-85d3392c-3b1c-4232-bb18-77cd0cb8a55b
# * Hana https://help.sap.com/docs/SAP_HANA_PLATFORM/e8e6c8142e60469bb401de5fdb6f7c00/f5a9ca3718354a499a98ba61ae3da170.html
# * Hive https://www.revisitclass.com/hadoop/instr-function-in-hive-with-examples/
# * Impala https://impala.apache.org/docs/build/html/topics/impala_string_functions.html#string_functions__instr
# POSITION
# * Snowflake https://docs.snowflake.com/en/sql-reference/functions/position
sql_str_detect_fixed_instr <- function(type = c("detect", "start", "end")) {
type <- arg_match(type)
function(string, pattern, negate = FALSE) {
con <- sql_current_con()
pattern <- unclass(pattern)
index_sql <- glue_sql2(con, "INSTR({.val string}, {.val pattern})")
if (negate) {
switch(type,
detect = translate_sql(!!index_sql == 0L, con = con),
start = translate_sql(!!index_sql != 1L, con = con),
end = translate_sql(!!index_sql != nchar(!!string) - nchar(!!pattern) + 1L, con = con)
)
} else {
switch(type,
detect = translate_sql(!!index_sql > 0L, con = con),
start = translate_sql(!!index_sql == 1L, con = con),
end = translate_sql(!!index_sql == nchar(!!string) - nchar(!!pattern) + 1L, con = con)
)
}
}
}
sql_str_detect_fixed_position <- function(type = c("detect", "start", "end")) {
type <- arg_match(type)
function(string, pattern, negate = FALSE) {
con <- sql_current_con()
pattern <- unclass(pattern)
index_sql <- glue_sql2(con, "POSITION({.val pattern} in {.val string})")
if (negate) {
switch(type,
detect = translate_sql(!!index_sql == 0L, con = con),
start = translate_sql(!!index_sql != 1L, con = con),
end = translate_sql(!!index_sql != nchar(!!string) - nchar(!!pattern) + 1L, con = con)
)
} else {
switch(type,
detect = translate_sql(!!index_sql > 0L, con = con),
start = translate_sql(!!index_sql == 1L, con = con),
end = translate_sql(!!index_sql == nchar(!!string) - nchar(!!pattern) + 1L, con = con)
)
}
}
}
utils::globalVariables(c("ltrim", "rtrim"))
|