1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
|
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include <stdatomic.h>
#include "daemon/ratelimiting.h"
#include "lib/kru.h"
#include "lib/mmapped.h"
#include "lib/utils.h"
#include "lib/resolve.h"
#define V4_PREFIXES (uint8_t[]) { 18, 20, 24, 32 }
#define V4_RATE_MULT (kru_price_t[]) { 768, 256, 32, 1 }
#define V6_PREFIXES (uint8_t[]) { 32, 48, 56, 64, 128 }
#define V6_RATE_MULT (kru_price_t[]) { 64, 4, 3, 2, 1 }
#define V4_PREFIXES_CNT (sizeof(V4_PREFIXES) / sizeof(*V4_PREFIXES))
#define V6_PREFIXES_CNT (sizeof(V6_PREFIXES) / sizeof(*V6_PREFIXES))
#define MAX_PREFIXES_CNT ((V4_PREFIXES_CNT > V6_PREFIXES_CNT) ? V4_PREFIXES_CNT : V6_PREFIXES_CNT)
struct ratelimiting {
size_t capacity;
uint32_t instant_limit;
uint32_t rate_limit;
uint32_t log_period;
uint16_t slip;
bool dry_run;
bool using_avx2;
_Atomic uint32_t log_time;
kru_price_t v4_prices[V4_PREFIXES_CNT];
kru_price_t v6_prices[V6_PREFIXES_CNT];
_Alignas(64) uint8_t kru[];
};
struct ratelimiting *ratelimiting = NULL;
struct mmapped ratelimiting_mmapped = {0};
int ratelimiting_init(const char *mmap_file, size_t capacity, uint32_t instant_limit,
uint32_t rate_limit, uint16_t slip, uint32_t log_period, bool dry_run)
{
size_t capacity_log = 0;
for (size_t c = capacity - 1; c > 0; c >>= 1) capacity_log++;
size_t size = offsetof(struct ratelimiting, kru) + KRU.get_size(capacity_log);
struct ratelimiting header = {
.capacity = capacity,
.instant_limit = instant_limit,
.rate_limit = rate_limit,
.log_period = log_period,
.slip = slip,
.dry_run = dry_run,
.using_avx2 = kru_using_avx2()
};
size_t header_size = offsetof(struct ratelimiting, using_avx2) + sizeof(header.using_avx2);
static_assert( // no padding up to .using_avx2
offsetof(struct ratelimiting, using_avx2) ==
sizeof(header.capacity) +
sizeof(header.instant_limit) +
sizeof(header.rate_limit) +
sizeof(header.log_period) +
sizeof(header.slip) +
sizeof(header.dry_run),
"detected padding with undefined data inside mmapped header");
int ret = mmapped_init(&ratelimiting_mmapped, mmap_file, size, &header, header_size, false);
if (ret == MMAPPED_PENDING) {
kr_log_info(SYSTEM, "Initializing rate-limiting...\n");
ratelimiting = ratelimiting_mmapped.mem;
const kru_price_t base_price = KRU_LIMIT / instant_limit;
const kru_price_t max_decay = rate_limit > 1000ll * instant_limit ? base_price :
(uint64_t) base_price * rate_limit / 1000;
bool succ = KRU.initialize((struct kru *)ratelimiting->kru, capacity_log, max_decay);
if (!succ) {
ratelimiting = NULL;
ret = kr_error(EINVAL);
goto fail;
}
ratelimiting->log_time = kr_now() - log_period;
for (size_t i = 0; i < V4_PREFIXES_CNT; i++) {
ratelimiting->v4_prices[i] = base_price / V4_RATE_MULT[i];
}
for (size_t i = 0; i < V6_PREFIXES_CNT; i++) {
ratelimiting->v6_prices[i] = base_price / V6_RATE_MULT[i];
}
ret = mmapped_init_finish(&ratelimiting_mmapped);
if (ret != 0) goto fail;
kr_log_info(SYSTEM, "Rate-limiting initialized (%s).\n", (ratelimiting->using_avx2 ? "AVX2" : "generic"));
return 0;
} else if (ret == MMAPPED_EXISTING) {
ratelimiting = ratelimiting_mmapped.mem;
kr_log_info(SYSTEM, "Using existing rate-limiting data (%s).\n", (ratelimiting->using_avx2 ? "AVX2" : "generic"));
return 0;
} else {
kr_assert(ret < 0); // no other combinations of mmapped state flags are allowed in non-persistent case
// fail
}
fail:
kr_log_crit(SYSTEM, "Initialization of shared rate-limiting data failed.\n");
return ret;
}
void ratelimiting_deinit(void)
{
mmapped_deinit(&ratelimiting_mmapped);
ratelimiting = NULL;
}
bool ratelimiting_request_begin(struct kr_request *req)
{
if (!ratelimiting) return false;
if (!req->qsource.addr)
return false; // don't consider internal requests
if (req->qsource.price_factor16 == 0)
return false; // whitelisted
// We only do this on pure UDP. (also TODO if cookies get implemented)
const bool ip_validated = req->qsource.flags.tcp || req->qsource.flags.tls;
if (ip_validated) return false;
const uint32_t time_now = kr_now();
// classify
_Alignas(16) uint8_t key[16] = {0, };
uint8_t limited_prefix;
if (req->qsource.addr->sa_family == AF_INET6) {
struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)req->qsource.addr;
memcpy(key, &ipv6->sin6_addr, 16);
// compute adjusted prices, using standard rounding
kru_price_t prices[V6_PREFIXES_CNT];
for (int i = 0; i < V6_PREFIXES_CNT; ++i) {
prices[i] = (req->qsource.price_factor16
* (uint64_t)ratelimiting->v6_prices[i] + (1<<15)) >> 16;
}
limited_prefix = KRU.limited_multi_prefix_or((struct kru *)ratelimiting->kru, time_now,
1, key, V6_PREFIXES, prices, V6_PREFIXES_CNT, NULL);
} else {
struct sockaddr_in *ipv4 = (struct sockaddr_in *)req->qsource.addr;
memcpy(key, &ipv4->sin_addr, 4); // TODO append port?
// compute adjusted prices, using standard rounding
kru_price_t prices[V4_PREFIXES_CNT];
for (int i = 0; i < V4_PREFIXES_CNT; ++i) {
prices[i] = (req->qsource.price_factor16
* (uint64_t)ratelimiting->v4_prices[i] + (1<<15)) >> 16;
}
limited_prefix = KRU.limited_multi_prefix_or((struct kru *)ratelimiting->kru, time_now,
0, key, V4_PREFIXES, prices, V4_PREFIXES_CNT, NULL);
}
if (!limited_prefix) return false; // not limited
// slip: truncating vs dropping
bool tc =
(ratelimiting->slip > 1) ?
((kr_rand_bytes(1) % ratelimiting->slip == 0) ? true : false) :
((ratelimiting->slip == 1) ? true : false);
// logging
uint32_t log_time_orig = atomic_load_explicit(&ratelimiting->log_time, memory_order_relaxed);
if (ratelimiting->log_period) {
while (time_now - log_time_orig + 1024 >= ratelimiting->log_period + 1024) {
if (atomic_compare_exchange_weak_explicit(&ratelimiting->log_time, &log_time_orig, time_now,
memory_order_relaxed, memory_order_relaxed)) {
kr_log_notice(SYSTEM, "address %s rate-limited on /%d (%s%s)\n",
kr_straddr(req->qsource.addr), limited_prefix,
ratelimiting->dry_run ? "dry-run, " : "",
tc ? "truncated" : "dropped");
break;
}
}
}
req->ratelimited = true; // we set this even on dry_run
if (ratelimiting->dry_run) return false;
// perform limiting
if (tc) { // TC=1: return truncated reply to force source IP validation
knot_pkt_t *answer = kr_request_ensure_answer(req);
if (!answer) { // something bad; TODO: perhaps improve recovery from this
kr_assert(false);
return true;
}
// at this point the packet should be pretty clear
// The TC=1 answer is not perfect, as the right RCODE might differ
// in some cases, but @vcunat thinks that NOERROR isn't really risky here.
knot_wire_set_tc(answer->wire);
knot_wire_clear_ad(answer->wire);
req->state = KR_STATE_DONE;
} else {
// no answer
req->options.NO_ANSWER = true;
req->state = KR_STATE_FAIL;
}
return true;
}
|