1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
-- URI
-- RFC 3986
local lpeg = require "lpeg"
local P = lpeg.P
local S = lpeg.S
local C = lpeg.C
local Cc = lpeg.Cc
local Cg = lpeg.Cg
local Cs = lpeg.Cs
local Ct = lpeg.Ct
local util = require "lpeg_patterns.util"
local core = require "lpeg_patterns.core"
local ALPHA = core.ALPHA
local DIGIT = core.DIGIT
local HEXDIG = core.HEXDIG
local IPv4address = require "lpeg_patterns.IPv4".IPv4address
local IPv6address = require "lpeg_patterns.IPv6".IPv6address
local _M = {}
_M.sub_delims = S"!$&'()*+,;=" -- 2.2
local unreserved = ALPHA + DIGIT + S"-._~" -- 2.3
_M.pct_encoded = P"%" * (HEXDIG * HEXDIG / util.read_hex) / function(n)
local c = string.char(n)
if unreserved:match(c) then
-- always decode unreserved characters (2.3)
return c
else
-- normalise to upper-case (6.2.2.1)
return string.format("%%%02X", n)
end
end -- 2.1
_M.scheme = ALPHA * (ALPHA + DIGIT + S"+-.")^0 / string.lower -- 3.1
_M.userinfo = Cs((unreserved + _M.pct_encoded + _M.sub_delims + P":")^0) -- 3.2.1
-- Host 3.2.2
local IPvFuture_mt = {
__name = "lpeg_patterns.IPvFuture";
}
function IPvFuture_mt:__tostring()
return string.format("v%x.%s", self.version, self.string)
end
local function new_IPvFuture(version, string)
return setmetatable({version=version, string=string}, IPvFuture_mt)
end
local IPvFuture = S"vV" * (HEXDIG^1/util.read_hex) * P"." * C((unreserved+_M.sub_delims+P":")^1) / new_IPvFuture
-- RFC 6874
local ZoneID = Cs((unreserved + _M.pct_encoded)^1)
local IPv6addrz = IPv6address * (P"%25" * ZoneID)^-1 / function(IPv6, zoneid)
IPv6:setzoneid(zoneid)
return IPv6
end
_M.IP_literal = P"[" * (IPv6addrz + IPvFuture) * P"]"
local IP_host = (_M.IP_literal + IPv4address) / tostring
local reg_name = Cs((
unreserved / string.lower
+ _M.pct_encoded / function(s) return s:sub(1,1) == "%" and s or string.lower(s) end
+ _M.sub_delims
)^1) + Cc(nil)
_M.host = IP_host + reg_name
_M.port = DIGIT^0 / tonumber -- 3.2.3
-- Path 3.3
local pchar = unreserved + _M.pct_encoded + _M.sub_delims + S":@"
local segment = pchar^0
_M.segment = Cs(segment)
local segment_nz = pchar^1
local segment_nz_nc = (pchar - P":")^1
-- an empty path is nil instead of the empty string
local path_empty = Cc(nil)
local path_abempty = Cs((P"/" * segment)^1) + path_empty
local path_rootless = Cs(segment_nz * (P"/" * segment)^0)
local path_noscheme = Cs(segment_nz_nc * (P"/" * segment)^0)
local path_absolute = Cs(P"/" * (segment_nz * (P"/" * segment)^0)^-1)
_M.query = Cs( ( pchar + S"/?" )^0 ) -- 3.4
_M.fragment = _M.query -- 3.5
-- Put together with named captures
_M.authority = ( Cg(_M.userinfo, "userinfo") * P"@" )^-1
* Cg(_M.host, "host")
* ( P":" * Cg(_M.port, "port") )^-1
local hier_part = P"//" * _M.authority * Cg (path_abempty, "path")
+ Cg(path_absolute + path_rootless + path_empty, "path")
_M.absolute_uri = Ct (
( Cg(_M.scheme, "scheme") * P":" )
* hier_part
* ( P"?" * Cg(_M.query, "query"))^-1
)
_M.uri = Ct (
( Cg(_M.scheme, "scheme") * P":" )
* hier_part
* ( P"?" * Cg(_M.query, "query"))^-1
* ( P"#" * Cg(_M.fragment, "fragment"))^-1
)
_M.relative_part = P"//" * _M.authority * Cg(path_abempty, "path")
+ Cg(path_absolute + path_noscheme + path_empty, "path")
local relative_ref = Ct (
_M.relative_part
* ( P"?" * Cg(_M.query, "query"))^-1
* ( P"#" * Cg(_M.fragment, "fragment"))^-1
)
_M.uri_reference = _M.uri + relative_ref
_M.path = path_abempty + path_absolute + path_noscheme + path_rootless + path_empty
-- Create a slightly more sane host pattern
-- scheme is optional
-- the "//" isn't required
-- if missing, the host needs to at least have a "." and end in two alpha characters
-- an authority is always required
local sane_host_char = unreserved / string.lower
local hostsegment = (sane_host_char - P".")^1
local dns_entry = Cs ( ( hostsegment * P"." )^1 * ALPHA^2 )
_M.sane_host = IP_host + dns_entry
_M.sane_authority = ( Cg(_M.userinfo, "userinfo") * P"@" )^-1
* Cg(_M.sane_host, "host")
* ( P":" * Cg(_M.port, "port") )^-1
local sane_hier_part = (P"//")^-1 * _M.sane_authority * Cg(path_absolute + path_empty, "path")
_M.sane_uri = Ct (
( Cg(_M.scheme, "scheme") * P":" )^-1
* sane_hier_part
* ( P"?" * Cg(_M.query, "query"))^-1
* ( P"#" * Cg(_M.fragment, "fragment"))^-1
)
return _M
|