File: translate-sql-string.R

package info (click to toggle)
r-cran-dbplyr 2.5.0%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 2,644 kB
  • sloc: sh: 13; makefile: 2
file content (157 lines) | stat: -rw-r--r-- 5,412 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# R prefers to specify start / stop or start / end
# databases usually specify start / length
# https://www.postgresql.org/docs/current/functions-string.html
#' @export
#' @rdname sql_variant
sql_substr <- function(f = "SUBSTR") {
  function(x, start, stop) {
    start <- max(cast_number_whole(start), 1L)
    stop <- max(cast_number_whole(stop), 1L)
    length <- max(stop - start + 1L, 0L)

    sql_call2(f, x, start, length)
  }
}

cast_number_whole <- function(x, arg = caller_arg(x), call = caller_env()) {
  check_number_whole(x, arg = arg, call = call)
  vctrs::vec_cast(x, integer(), x_arg = arg)
}

# str_sub(x, start, end) - start and end can be negative
# SUBSTR(string, start, length) - start can be negative

#' @export
#' @rdname sql_variant
sql_str_sub <- function(
                        subset_f = "SUBSTR",
                        length_f = "LENGTH",
                        optional_length = TRUE
  ) {
  function(string, start = 1L, end = -1L) {
    start <- cast_number_whole(start)
    end <- cast_number_whole(end)

    start_sql <- start_pos(string, start, length_f)

    if (optional_length && end == -1L) {
      sql_call2(subset_f, string, start_sql)
    } else {
      if (end == 0L) {
        length_sql <- 0L
      } else if(start > 0 && end < 0) {
        n <- start - end - 2L
        if (n == 0) {
          length_sql <- sql_call2(length_f, string)
        } else {
          length_sql <- sql_expr(!!sql_call2(length_f, string) - !!n)
        }
      } else {
        length_sql <- pmax(end - start + 1L, 0L)
      }
      sql_call2(subset_f, string, start_sql, length_sql)
    }
  }
}

start_pos <- function(string, start, length_f) {
  if (start == -1) {
    sql_call2(length_f, string)
  } else if (start < 0) {
    sql_expr(!!sql_call2(length_f, string) - !!abs(start + 1L))
  } else {
    start
  }
}

sql_str_trim <- function(string, side = c("both", "left", "right")) {
  side <- match.arg(side)
  switch(side,
    left = sql_expr(ltrim(!!string)),
    right = sql_expr(rtrim(!!string)),
    both = sql_expr(ltrim(rtrim(!!string))),
  )
}



sql_str_pattern_switch <- function(string,
                                   pattern,
                                   negate = FALSE,
                                   f_fixed = NULL,
                                   f_regex = NULL,
                                   error_call = caller_env()) {
  pattern_quo <- enquo(pattern)
  is_fixed <- quo_is_call(pattern_quo, "fixed") || inherits(pattern, "stringr_fixed")

  if (is_fixed) {
    f_fixed(string, pattern, negate)
  } else {
    if (is_null(f_regex)) {
      cli_abort("Only fixed patterns are supported on database backends.", call = error_call)
    } else {
      f_regex(string, pattern, negate)
    }
  }
}

# INSTR
# * SQLite https://www.sqlitetutorial.net/sqlite-functions/sqlite-instr/
# * MySQL https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_instr
# * Oracle https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/INSTR.html#GUID-47E3A7C4-ED72-458D-A1FA-25A9AD3BE113
# * Teradata https://docs.teradata.com/r/Teradata-VantageTM-SQL-Functions-Expressions-and-Predicates/March-2019/String-Operators-and-Functions/INSTR
# * Access https://support.microsoft.com/de-de/office/instr-funktion-85d3392c-3b1c-4232-bb18-77cd0cb8a55b
# * Hana https://help.sap.com/docs/SAP_HANA_PLATFORM/e8e6c8142e60469bb401de5fdb6f7c00/f5a9ca3718354a499a98ba61ae3da170.html
# * Hive https://www.revisitclass.com/hadoop/instr-function-in-hive-with-examples/
# * Impala https://impala.apache.org/docs/build/html/topics/impala_string_functions.html#string_functions__instr
# POSITION
# * Snowflake https://docs.snowflake.com/en/sql-reference/functions/position
sql_str_detect_fixed_instr <- function(type = c("detect", "start", "end")) {
  type <- arg_match(type)

  function(string, pattern, negate = FALSE) {
    con <- sql_current_con()
    pattern <- unclass(pattern)
    index_sql <- glue_sql2(con, "INSTR({.val string}, {.val pattern})")

    if (negate) {
      switch(type,
        detect = translate_sql(!!index_sql == 0L, con = con),
        start = translate_sql(!!index_sql != 1L, con = con),
        end = translate_sql(!!index_sql != nchar(!!string) - nchar(!!pattern) + 1L, con = con)
      )
    } else {
      switch(type,
        detect = translate_sql(!!index_sql > 0L, con = con),
        start = translate_sql(!!index_sql == 1L, con = con),
        end = translate_sql(!!index_sql == nchar(!!string) - nchar(!!pattern) + 1L, con = con)
      )
    }
  }
}

sql_str_detect_fixed_position <- function(type = c("detect", "start", "end")) {
  type <- arg_match(type)

  function(string, pattern, negate = FALSE) {
    con <- sql_current_con()
    pattern <- unclass(pattern)
    index_sql <- glue_sql2(con, "POSITION({.val pattern} in {.val string})")

    if (negate) {
      switch(type,
        detect = translate_sql(!!index_sql == 0L, con = con),
        start = translate_sql(!!index_sql != 1L, con = con),
        end = translate_sql(!!index_sql != nchar(!!string) - nchar(!!pattern) + 1L, con = con)
      )
    } else {
      switch(type,
        detect = translate_sql(!!index_sql > 0L, con = con),
        start = translate_sql(!!index_sql == 1L, con = con),
        end = translate_sql(!!index_sql == nchar(!!string) - nchar(!!pattern) + 1L, con = con)
      )
    }
  }
}

utils::globalVariables(c("ltrim", "rtrim"))