File: language.lua

package info (click to toggle)
lua-lpeg-patterns 0.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220 kB
  • sloc: makefile: 6
file content (82 lines) | stat: -rw-r--r-- 2,215 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
-- RFC 5646 Section 2.1

local lpeg = require "lpeg"
local core = require "lpeg_patterns.core"

local C = lpeg.C
local P = lpeg.P
local R = lpeg.R
local Cg = lpeg.Cg
local Ct = lpeg.Ct
local Cmt = lpeg.Cmt

local M = {}

local alphanum = core.ALPHA + core.DIGIT

local extlang = core.ALPHA * core.ALPHA * core.ALPHA * -#alphanum
	* (P"-" * core.ALPHA * core.ALPHA * core.ALPHA * -#alphanum)^-2

local language = Cg(core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA^-3, "language")
	+ Cg(core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA, "language")
	+ Cg(core.ALPHA * core.ALPHA * core.ALPHA^-1, "language") * (P"-" * Cg(extlang, "extlang"))^-1

local script = core.ALPHA * core.ALPHA * core.ALPHA * core.ALPHA
	* -#alphanum -- Prevent intepretation of a 'variant'

local region = (
	core.ALPHA * core.ALPHA
	+ core.DIGIT * core.DIGIT * core.DIGIT
) * -#alphanum -- Prevent intepretation of a 'variant'

local variant = core.DIGIT * alphanum * alphanum * alphanum
	+ alphanum * alphanum * alphanum * alphanum * alphanum * alphanum^-3

local singleton = core.DIGIT + R("AW", "YZ", "aw", "yz")

local extension = C(singleton) * Ct((P"-" * (alphanum*alphanum*alphanum^-6 / string.lower))^1)

M.privateuse = P"x" * Ct((P"-" * C(alphanum*alphanum^-7))^1)

M.langtag = language
	* (P"-" * Cg(script, "script"))^-1
	* (P"-" * Cg(region, "region"))^-1
	* Cg(Ct((P"-" * C(variant))^1), "variant")^-1
	* Cg(Cmt(Ct((P"-" * Ct(extension))^1), function(_, _, c)
		-- Can't use a fold with rawset as we want the pattern to not match if there is a duplicate extension
		local r = {}
		for _, v in ipairs(c) do
			local a, b = v[1], v[2]
			if r[a] then
				-- duplicate extension
				return false
			end
			r[a] = b
		end
		return true, r
	end), "extension")^-1
	* (P"-" * Cg(M.privateuse, "privateuse"))^-1

local irregular = P"en-GB-oed"
	+ P"i-ami"
	+ P"i-bnn"
	+ P"i-default"
	+ P"i-enochian"
	+ P"i-hak"
	+ P"i-klingon"
	+ P"i-lux"
	+ P"i-mingo"
	+ P"i-navajo"
	+ P"i-pwn"
	+ P"i-tao"
	+ P"i-tay"
	+ P"i-tsu"
	+ P"sgn-BE-FR"
	+ P"sgn-BE-NL"
	+ P"sgn-CH-DE"

M.Language_Tag = C((M.langtag
	+ M.privateuse
	+ irregular) / function() end) -- capture the whole tag. throws away decomposition

return M