1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
--[[
Copyright (c) 2025, Vsevolod Stakhov <vsevolod@rspamd.com>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--
---@module lua_url_filter
-- Fast URL validation during parsing - called from C
-- URLs passed as rspamd_text for efficient processing
local exports = {}
local rspamd_util = require "rspamd_util"
-- Filter result constants
exports.ACCEPT = 0
exports.SUSPICIOUS = 1
exports.REJECT = 2
-- Custom filters (user can add their own)
local custom_filters = {}
---
-- Register a custom URL filter
-- @param filter_func function(url_text, flags) -> result
function exports.register_filter(filter_func)
table.insert(custom_filters, filter_func)
end
---
-- Clear all custom filters (mainly for testing)
function exports.clear_filters()
custom_filters = {}
end
---
-- Main entry point called from C during URL parsing
-- @param url_text rspamd_text - URL string as text object
-- @param flags number - URL parsing flags
-- @return number - ACCEPT/SUSPICIOUS/REJECT
function exports.filter_url_string(url_text, flags)
-- Sanity check: URL length
local url_len = url_text:len()
if url_len > 2048 then
return exports.REJECT -- Overly long URL
end
-- Build control character set: 0x00-0x08, 0x0B-0x1F, 0x7F
-- (excluding \t=0x09 and \n=0x0A)
local control_chars = "\000\001\002\003\004\005\006\007\008" .. -- 0x00-0x08
"\011\012\013\014\015\016\017\018\019\020" .. -- 0x0B-0x14
"\021\022\023\024\025\026\027\028\029\030\031" .. -- 0x15-0x1F
"\127" -- 0x7F (DEL)
-- Check for control characters using memcspn
local span = url_text:memcspn(control_chars)
if span < url_len then
return exports.REJECT -- Control character found
end
-- UTF-8 validation (rspamd_util.is_valid_utf8 accepts both text and string)
if not rspamd_util.is_valid_utf8(url_text) then
return exports.REJECT -- Invalid UTF-8
end
-- Count @ signs and check user field using rspamd_text methods only
local at_count = 0
local first_at_pos = nil
local search_from = 1
-- Count @ signs using memchr
while search_from <= url_len do
local substr = url_text:sub(search_from)
local found = substr:memchr(string.byte('@'), false)
if not found or found == -1 then
break
end
at_count = at_count + 1
-- Adjust found position to be relative to start of url_text
local absolute_pos = search_from + found - 1
-- Defensive check: ensure position is within bounds (should always be true due to url_len check)
if absolute_pos > url_len then
return exports.REJECT -- Position overflow, should not happen
end
if at_count == 1 then
first_at_pos = absolute_pos
end
search_from = absolute_pos + 1 -- Move past the @ we just found
if at_count > 20 then
return exports.REJECT -- Way too many @ signs
end
end
-- Check user field length (if @ present)
if first_at_pos then
-- Find :// to determine start of user field
local schema_pos = url_text:find("://")
local user_start = schema_pos and (schema_pos + 3) or 1
local user_len = first_at_pos - user_start
if user_len > 512 then
return exports.REJECT -- Extremely long user field
elseif user_len > 64 then
return exports.SUSPICIOUS -- Long user field, mark for inspection
end
-- Multiple @ signs is suspicious
if at_count > 1 then
return exports.SUSPICIOUS
end
end
-- Run custom filters
for _, filter in ipairs(custom_filters) do
local result = filter(url_text, flags)
if result == exports.REJECT then
return exports.REJECT -- First filter to reject wins
end
-- Note: SUSPICIOUS results don't immediately return; we continue checking
-- other filters as one might REJECT (upgrade), but we won't downgrade to ACCEPT
end
return exports.ACCEPT
end
---
-- Filter URL object (called from Lua plugin context)
-- @param url userdata - URL object
-- @return number - ACCEPT/SUSPICIOUS/REJECT
function exports.filter_url(url)
if not url then
return exports.ACCEPT
end
-- Get URL as rspamd_text (pass true to get_text)
local url_text = url:get_text(true)
if not url_text then
return exports.ACCEPT
end
-- Get flags directly from URL object (no table conversion)
local flags = url:get_flags_num() or 0
return exports.filter_url_string(url_text, flags)
end
return exports
|