File: uri.lua

package info (click to toggle)
lua-lpeg-patterns 0.5-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 220 kB
  • sloc: makefile: 6
file content (143 lines) | stat: -rw-r--r-- 4,261 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
-- URI
-- RFC 3986

local lpeg = require "lpeg"
local P = lpeg.P
local S = lpeg.S
local C = lpeg.C
local Cc = lpeg.Cc
local Cg = lpeg.Cg
local Cs = lpeg.Cs
local Ct = lpeg.Ct

local util = require "lpeg_patterns.util"

local core = require "lpeg_patterns.core"
local ALPHA = core.ALPHA
local DIGIT = core.DIGIT
local HEXDIG = core.HEXDIG

local IPv4address = require "lpeg_patterns.IPv4".IPv4address
local IPv6address = require "lpeg_patterns.IPv6".IPv6address

local _M = {}

_M.sub_delims = S"!$&'()*+,;=" -- 2.2
local unreserved  = ALPHA + DIGIT + S"-._~" -- 2.3
_M.pct_encoded = P"%" * (HEXDIG * HEXDIG / util.read_hex) / function(n)
	local c = string.char(n)
	if unreserved:match(c) then
		-- always decode unreserved characters (2.3)
		return c
	else
		-- normalise to upper-case (6.2.2.1)
		return string.format("%%%02X", n)
	end
end -- 2.1

_M.scheme = ALPHA * (ALPHA + DIGIT + S"+-.")^0 / string.lower -- 3.1

_M.userinfo = Cs((unreserved + _M.pct_encoded + _M.sub_delims + P":")^0) -- 3.2.1

-- Host 3.2.2

local IPvFuture_mt = {
	__name = "lpeg_patterns.IPvFuture";
}
function IPvFuture_mt:__tostring()
	return string.format("v%x.%s", self.version, self.string)
end
local function new_IPvFuture(version, string)
	return setmetatable({version=version, string=string}, IPvFuture_mt)
end
local IPvFuture = S"vV" * (HEXDIG^1/util.read_hex) * P"." * C((unreserved+_M.sub_delims+P":")^1) / new_IPvFuture

-- RFC 6874
local ZoneID = Cs((unreserved + _M.pct_encoded)^1)
local IPv6addrz   = IPv6address * (P"%25" * ZoneID)^-1 / function(IPv6, zoneid)
	IPv6:setzoneid(zoneid)
	return IPv6
end

_M.IP_literal = P"[" * (IPv6addrz + IPvFuture) * P"]"
local IP_host = (_M.IP_literal + IPv4address) / tostring
local reg_name = Cs((
	unreserved / string.lower
	+ _M.pct_encoded / function(s) return s:sub(1,1) == "%" and s or string.lower(s) end
	+ _M.sub_delims
)^1) + Cc(nil)
_M.host = IP_host + reg_name

_M.port = DIGIT^0 / tonumber -- 3.2.3

-- Path 3.3
local pchar = unreserved + _M.pct_encoded + _M.sub_delims + S":@"
local segment = pchar^0
_M.segment = Cs(segment)
local segment_nz = pchar^1
local segment_nz_nc = (pchar - P":")^1

-- an empty path is nil instead of the empty string
local path_empty    = Cc(nil)
local path_abempty = Cs((P"/" * segment)^1) + path_empty
local path_rootless = Cs(segment_nz * (P"/" * segment)^0)
local path_noscheme = Cs(segment_nz_nc * (P"/" * segment)^0)
local path_absolute = Cs(P"/" * (segment_nz * (P"/" * segment)^0)^-1)

_M.query = Cs( ( pchar + S"/?" )^0 ) -- 3.4
_M.fragment = _M.query -- 3.5

-- Put together with named captures
_M.authority = ( Cg(_M.userinfo, "userinfo") * P"@" )^-1
	* Cg(_M.host, "host")
	* ( P":" * Cg(_M.port, "port") )^-1

local hier_part = P"//" * _M.authority * Cg (path_abempty, "path")
	+ Cg(path_absolute + path_rootless + path_empty, "path")

_M.absolute_uri = Ct (
	( Cg(_M.scheme, "scheme") * P":" )
	* hier_part
	* ( P"?" * Cg(_M.query, "query"))^-1
)

_M.uri = Ct (
	( Cg(_M.scheme, "scheme") * P":" )
	* hier_part
	* ( P"?" * Cg(_M.query, "query"))^-1
	* ( P"#" * Cg(_M.fragment, "fragment"))^-1
)

_M.relative_part = P"//" * _M.authority * Cg(path_abempty, "path")
	+ Cg(path_absolute + path_noscheme + path_empty, "path")

local relative_ref = Ct (
	_M.relative_part
	* ( P"?" * Cg(_M.query, "query"))^-1
	* ( P"#" * Cg(_M.fragment, "fragment"))^-1
)
_M.uri_reference = _M.uri + relative_ref

_M.path = path_abempty + path_absolute + path_noscheme + path_rootless + path_empty

-- Create a slightly more sane host pattern
-- scheme is optional
-- the "//" isn't required
	-- if missing, the host needs to at least have a "." and end in two alpha characters
-- an authority is always required
local sane_host_char = unreserved / string.lower
local hostsegment = (sane_host_char - P".")^1
local dns_entry   = Cs ( ( hostsegment * P"." )^1 * ALPHA^2 )
_M.sane_host = IP_host + dns_entry
_M.sane_authority = ( Cg(_M.userinfo, "userinfo") * P"@" )^-1
	* Cg(_M.sane_host, "host")
	* ( P":" * Cg(_M.port, "port") )^-1
local sane_hier_part = (P"//")^-1 * _M.sane_authority * Cg(path_absolute + path_empty, "path")
_M.sane_uri = Ct (
	( Cg(_M.scheme, "scheme") * P":" )^-1
	* sane_hier_part
	* ( P"?" * Cg(_M.query, "query"))^-1
	* ( P"#" * Cg(_M.fragment, "fragment"))^-1
)

return _M