File: unicode.lua

package info (click to toggle)
awesome-extra 2023010601
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,340 kB
  • sloc: cpp: 112; sh: 84; makefile: 25; python: 11
file content (94 lines) | stat: -rw-r--r-- 1,589 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
local utf8length
local utf8positions

local has_native, nativelib = pcall(require, 'obvious.lib.unicode.native')

if has_native then
  return nativelib
end

local naughty = require 'naughty'

naughty.notify {
  title   = 'Obvious',
  text    = 'Not using native Unicode library for obvious - it is highly recommended to compile the native library for accuracy',
  timeout = 0,
}

if utf8 then
  utf8length = utf8.len
else
  local sbyte = string.byte

  function utf8positions(s)
    local function iter(s, pos)
      if not pos then
        return 1
      end

      local byte = sbyte(s, pos)

      if byte >= 0xf0 then
        pos = pos + 4
      elseif byte >= 0xe0 then
        pos = pos + 3
      elseif byte >= 0xc0 then
        pos = pos + 2
      else
        pos = pos + 1
      end

      if pos > #s then
        return nil
      end

      return pos
    end

    if s == '' then
      return function() end, s, nil
    else
      return iter, s, nil
    end
  end

  function utf8length(s)
    local count = 0

    for _ in utf8positions(s) do
      count = count + 1
    end

    return count
  end
end

local function utf8sub(s, start, finish)
  local start_byte
  local end_byte = #s

  local charno = 1
  for pos in utf8positions(s) do
    if charno == start then
      start_byte = pos

      if not finish then
        break
      end
    end

    if finish and charno == finish + 1 then
      end_byte = pos - 1
      break
    end

    charno = charno + 1
  end

  return string.sub(s, start_byte, end_byte)
end

return {
  length = utf8length,
  sub    = utf8sub,
}