File: unicode.R

package info (click to toggle)
r-cran-cli 3.6.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,288 kB
  • sloc: ansic: 16,412; cpp: 37; sh: 13; makefile: 2
file content (30 lines) | stat: -rw-r--r-- 860 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# To create the data:

update_wide_unicode_data <- function() {
  tab <- read.delim(
    stringsAsFactors = FALSE,
    "https://unicode.org/Public/UNIDATA/EastAsianWidth.txt",
    comment.char = "#",
    sep = ";",
    strip.white = TRUE,
    header = FALSE
  )

  # Keep wide ones
  wide <- tab$V1[tab$V2 == "W"]
  first <- sub("\\.\\..*$", "", wide)
  range <- sub("^([0-9A-F]+)\\.\\.([0-9A-F]+)$", "\\\\U\\1-\\\\U\\2", wide)
  range <- sub("^([0-9A-F]+)$", "\\\\U\\1", range)

  wide_chars <- data.frame(
    stringsAsFactors = FALSE,
    test = sapply(parse(text = paste0('"', "\\U", first, '"')), eval),
    regex = sapply(parse(text = paste0('"', range, '"')), eval)
  )

  env <- new.env(parent = emptyenv())
  load("R/sysdata.rda", envir = env)

  env$wide_chars <- wide_chars
  save(list = ls(env), file = "R/sysdata.rda", envir = env, version = 2)
}