1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
-- URL parser tests
context("URL check functions", function()
local mpool = require("rspamd_mempool")
local url = require("rspamd_url")
local logger = require("rspamd_logger")
local test_helper = require("rspamd_test_helper")
local ffi = require("ffi")
ffi.cdef[[
void rspamd_http_normalize_path_inplace(char *path, size_t len, size_t *nlen);
]]
test_helper.init_url_parser()
local pool = mpool.create()
local cases = {
{"test.com", {"test.com", nil}},
{" test.com", {"test.com", nil}},
{"<test.com> text", {"test.com", nil}},
{"test.com. text", {"test.com", nil}},
{"mailto:A.User@example.com text", {"example.com", "A.User"}},
{"http://Тест.Рф:18 text", {"тест.рф", nil}},
{"http://user:password@тест2.РФ:18 text", {"тест2.рф", "user"}},
{"somebody@example.com", {"example.com", "somebody"}},
{"https://127.0.0.1/abc text", {"127.0.0.1", nil}},
{"https://127.0.0.1 text", {"127.0.0.1", nil}},
{"https://[::1]:1", {"::1", nil}},
{"https://user:password@[::1]:1", {"::1", nil}},
{"https://user:password@[::1]", {"::1", nil}},
{"https://user:password@[::1]/1", {"::1", nil}},
}
for i,c in ipairs(cases) do
local res = url.create(pool, c[1])
test("Extract urls from text" .. i, function()
assert_not_nil(res, "cannot parse " .. c[1])
local t = res:to_table()
--local s = logger.slog("%1 -> %2", c[1], t)
--print(s)
assert_not_nil(t, "cannot convert to table " .. c[1])
assert_equal(c[2][1], t['host'],
logger.slog('expected host "%s", but got "%s" in url %s => %s',
c[2][1], t['host'], c[1], t))
if c[2][2] then
assert_equal(c[2][1], t['host'],
logger.slog('expected user "%s", but got "%s" in url %s => %s',
c[2][1], t['host'], c[1], t))
end
end)
end
cases = {
{"http://%30%78%63%30%2e%30%32%35%30.01", true, { --0xc0.0250.01
host = '192.168.0.1',
}},
{"http://www.google.com/foo?bar=baz#", true, {
host = 'www.google.com', path = 'foo', query = 'bar=baz', tld = 'google.com'
}},
{"http://[www.google.com]/", false},
{"<test.com", true, {
host = 'test.com', tld = 'test.com',
}},
{"test.com>", false},
{",test.com text", false},
{"ht\ttp:@www.google.com:80/;p?#", false},
{"http://user:pass@/", false},
{"http://foo:-80/", false},
{"http:////////user:@google.com:99?foo", true, {
host = 'google.com', user = 'user', port = 99, query = 'foo'
}},
{"http://%25DOMAIN:foobar@foodomain.com/", true, {
host = 'foodomain.com', user = '%25DOMAIN'
}},
{"http://0.0xFFFFFF", true, {
host = '0.255.255.255'
}},
{"http://030052000001", true, {
host = '192.168.0.1'
}},
{"http://0xc0.052000001", true, {
host = '192.168.0.1'
}},
{"http://192.168.0.1.", true, {
host = '192.168.0.1'
}},
{"http://[::eeee:192.168.0.1]", true, {
host = '::eeee:c0a8:1'
}},
{"http://twitter.com#test", true, {
host = 'twitter.com', fragment = 'test'
}},
{"http:www.twitter.com#test", true, {
host = 'www.twitter.com', fragment = 'test'
}},
}
-- Some cases from https://code.google.com/p/google-url/source/browse/trunk/src/url_canon_unittest.cc
for i,c in ipairs(cases) do
local res = url.create(pool, c[1])
test("Parse urls " .. i, function()
if c[2] then
assert_not_nil(res, "cannot parse " .. c[1])
local uf = res:to_table()
for k,v in pairs(c[3]) do
assert_not_nil(uf[k], k .. ' is missing in url, must be ' .. v)
assert_equal(uf[k], v, logger.slog('expected "%s", for %s, but got "%s" in url %s => %s',
v, k, uf[k], c[1], uf))
end
for k,v in pairs(uf) do
if k ~= 'url' and k ~= 'protocol' and k ~= 'tld' then
assert_not_nil(c[3][k], k .. ' should be absent but it is ' .. v .. ' in: ' .. c[1])
end
end
else
assert_nil(res, "should not parse " .. c[1] .. ' parsed to: ' .. tostring(res))
end
end)
end
cases = {
{"/././foo", "/foo"},
{"/a/b/c/./../../g", "/a/g"},
{"/./.foo", "/.foo"},
{"/foo/.", "/foo"},
{"/foo/./", "/foo"},
{"/foo/bar/..", "/foo"},
{"/foo/bar/../", "/foo/"},
{"/foo/..bar", "/foo/..bar"},
{"/foo/bar/../ton", "/foo/ton"},
{"/foo/bar/../ton/../../a", "/a"},
{"/foo/../../..", "/"},
{"/foo/../../../ton", "/ton"},
{"////../..", "/"},
{"./", ""},
{"/./", "/"},
{"/./././././././", "/"},
{"/", "/"},
{"/a/b", "/a/b"},
{"/a/b/", "/a/b/"},
{"..", "/"},
{"/../", "/"},
{"../", "/"},
}
for i,v in ipairs(cases) do
test("Normalize paths " .. i, function()
local buf = ffi.new("uint8_t[?]", #v[1])
local sizbuf = ffi.new("size_t[1]")
ffi.copy(buf, v[1], #v[1])
ffi.C.rspamd_http_normalize_path_inplace(buf, #v[1], sizbuf)
local res = ffi.string(buf, tonumber(sizbuf[0]))
assert_equal(v[2], res, 'expected ' .. v[2] .. ' but got ' .. res .. ' in path ' .. v[1])
end)
end
end)
|