1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
|
// SPDX-License-Identifier: GPL-2.0
/*
* Test for sockmap/sockhash redirection.
*
* BPF_MAP_TYPE_SOCKMAP
* BPF_MAP_TYPE_SOCKHASH
* x
* sk_msg-to-egress
* sk_msg-to-ingress
* sk_skb-to-egress
* sk_skb-to-ingress
* x
* AF_INET, SOCK_STREAM
* AF_INET6, SOCK_STREAM
* AF_INET, SOCK_DGRAM
* AF_INET6, SOCK_DGRAM
* AF_UNIX, SOCK_STREAM
* AF_UNIX, SOCK_DGRAM
* AF_VSOCK, SOCK_STREAM
* AF_VSOCK, SOCK_SEQPACKET
*/
#include <errno.h>
#include <error.h>
#include <sched.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
#include <linux/string.h>
#include <linux/vm_sockets.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "linux/const.h"
#include "test_progs.h"
#include "sockmap_helpers.h"
#include "test_sockmap_redir.skel.h"
/* The meaning of SUPPORTED is "will redirect packet as expected".
*/
#define SUPPORTED _BITUL(0)
/* Note on sk_skb-to-ingress ->af_vsock:
*
* Peer socket may receive the packet some time after the return from sendmsg().
* In a typical usage scenario, recvmsg() will block until the redirected packet
* appears in the destination queue, or timeout if the packet was dropped. By
* that point, the verdict map has already been updated to reflect what has
* happened.
*
* But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
* takes place. Which means we may race the execution of the verdict logic and
* read map_verd before it has been updated, i.e. we might observe
* map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
*
* This confuses the selftest logic: if there was no packet dropped, where's the
* packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
* (which implies the verdict program has not been ran) just re-read the verdict
* map again.
*/
#define UNSUPPORTED_RACY_VERD _BITUL(1)
enum prog_type {
SK_MSG_EGRESS,
SK_MSG_INGRESS,
SK_SKB_EGRESS,
SK_SKB_INGRESS,
};
enum {
SEND_INNER = 0,
SEND_OUTER,
};
enum {
RECV_INNER = 0,
RECV_OUTER,
};
struct maps {
int in;
int out;
int verd;
};
struct combo_spec {
enum prog_type prog_type;
const char *in, *out;
};
struct redir_spec {
const char *name;
int idx_send;
int idx_recv;
enum prog_type prog_type;
};
struct socket_spec {
int family;
int sotype;
int send_flags;
int in[2];
int out[2];
};
static int socket_spec_pairs(struct socket_spec *s)
{
return create_socket_pairs(s->family, s->sotype,
&s->in[0], &s->out[0],
&s->in[1], &s->out[1]);
}
static void socket_spec_close(struct socket_spec *s)
{
xclose(s->in[0]);
xclose(s->in[1]);
xclose(s->out[0]);
xclose(s->out[1]);
}
static void get_redir_params(struct redir_spec *redir,
struct test_sockmap_redir *skel, int *prog_fd,
enum bpf_attach_type *attach_type,
int *redirect_flags)
{
enum prog_type type = redir->prog_type;
struct bpf_program *prog;
bool sk_msg;
sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;
*prog_fd = bpf_program__fd(prog);
*attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;
if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
*redirect_flags = BPF_F_INGRESS;
else
*redirect_flags = 0;
}
static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
{
ssize_t n;
char buf;
errno = 0;
n = recv(fd, &buf, 1, flags);
if (n < 0 && expect_success)
FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
if (!n && !expect_success)
FAIL("%s: expected failure: retval=%zd", prefix, n);
}
static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
int sd_recv, int map_verd, int status)
{
unsigned int drop, pass;
char recv_buf;
ssize_t n;
get_verdict:
if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
return;
if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
sched_yield();
goto get_verdict;
}
if (pass != 0) {
FAIL("unsupported: wanted verdict pass 0, have %u", pass);
return;
}
/* If nothing was dropped, packet should have reached the peer */
if (drop == 0) {
errno = 0;
n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
if (n != 1)
FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
}
/* Ensure queues are empty */
try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
if (sd_in != sd_send)
try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);
try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
if (sd_recv != sd_out)
try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
}
static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
int sd_in, int sd_out, int sd_recv,
struct maps *maps, int status)
{
unsigned int drop, pass;
char *send_buf = "ab";
char recv_buf = '\0';
ssize_t n, len = 1;
/* Zero out the verdict map */
if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
return;
if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
return;
if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
goto del_in;
/* Last byte is OOB data when send_flags has MSG_OOB bit set */
if (send_flags & MSG_OOB)
len++;
n = send(sd_send, send_buf, len, send_flags);
if (n >= 0 && n < len)
FAIL("incomplete send");
if (n < 0) {
/* sk_msg redirect combo not supported? */
if (status & SUPPORTED || errno != EACCES)
FAIL_ERRNO("send");
goto out;
}
if (!(status & SUPPORTED)) {
handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
maps->verd, status);
goto out;
}
errno = 0;
n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
if (n != 1) {
FAIL_ERRNO("recv_timeout()");
goto out;
}
/* Check verdict _after_ recv(); af_vsock may need time to catch up */
if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
goto out;
if (drop != 0 || pass != 1)
FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
drop, pass);
if (recv_buf != send_buf[0])
FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);
if (send_flags & MSG_OOB) {
/* Fail reading OOB while in sockmap */
try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
MSG_OOB | MSG_DONTWAIT, false);
/* Remove sd_out from sockmap */
xbpf_map_delete_elem(maps->out, &u32(0));
/* Check that OOB was dropped on redirect */
try_recv("recv(sd_out, MSG_OOB)", sd_out,
MSG_OOB | MSG_DONTWAIT, false);
goto del_in;
}
out:
xbpf_map_delete_elem(maps->out, &u32(0));
del_in:
xbpf_map_delete_elem(maps->in, &u32(0));
}
static int is_redir_supported(enum prog_type type, const char *in,
const char *out)
{
/* Matching based on strings returned by socket_kind_to_str():
* tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
* Plus a wildcard: any
* Not in use: u_seq, v_dgr
*/
struct combo_spec *c, combos[] = {
/* Send to local: TCP -> any, but vsock */
{ SK_MSG_INGRESS, "tcp", "tcp" },
{ SK_MSG_INGRESS, "tcp", "udp" },
{ SK_MSG_INGRESS, "tcp", "u_str" },
{ SK_MSG_INGRESS, "tcp", "u_dgr" },
/* Send to egress: TCP -> TCP */
{ SK_MSG_EGRESS, "tcp", "tcp" },
/* Ingress to egress: any -> any */
{ SK_SKB_EGRESS, "any", "any" },
/* Ingress to local: any -> any, but vsock */
{ SK_SKB_INGRESS, "any", "tcp" },
{ SK_SKB_INGRESS, "any", "udp" },
{ SK_SKB_INGRESS, "any", "u_str" },
{ SK_SKB_INGRESS, "any", "u_dgr" },
};
for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
if (c->prog_type == type &&
(!strcmp(c->in, "any") || strstarts(in, c->in)) &&
(!strcmp(c->out, "any") || strstarts(out, c->out)))
return SUPPORTED;
}
return 0;
}
static int get_support_status(enum prog_type type, const char *in,
const char *out)
{
int status = is_redir_supported(type, in, out);
if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
status |= UNSUPPORTED_RACY_VERD;
return status;
}
static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
struct maps *maps, struct socket_spec *s_in,
struct socket_spec *s_out)
{
int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
const char *in_str, *out_str;
char s[MAX_TEST_NAME];
fd_in = s_in->in[0];
fd_out = s_out->out[0];
fd_send = s_in->in[redir->idx_send];
fd_peer = s_in->in[redir->idx_send ^ 1];
fd_recv = s_out->out[redir->idx_recv];
flags = s_in->send_flags;
in_str = socket_kind_to_str(fd_in);
out_str = socket_kind_to_str(fd_out);
status = get_support_status(redir->prog_type, in_str, out_str);
snprintf(s, sizeof(s),
"%-4s %-17s %-5s %s %-5s%6s",
/* hash sk_skb-to-ingress u_str → v_str (OOB) */
type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
redir->name,
in_str,
status & SUPPORTED ? "→" : " ",
out_str,
(flags & MSG_OOB) ? "(OOB)" : "");
if (!test__start_subtest(s))
return;
test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
maps, status);
}
static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
struct maps *maps)
{
struct socket_spec *s, sockets[] = {
{ AF_INET, SOCK_STREAM },
// { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
{ AF_INET6, SOCK_STREAM },
{ AF_INET, SOCK_DGRAM },
{ AF_INET6, SOCK_DGRAM },
{ AF_UNIX, SOCK_STREAM },
{ AF_UNIX, SOCK_STREAM, MSG_OOB },
{ AF_UNIX, SOCK_DGRAM },
// { AF_UNIX, SOCK_SEQPACKET}, /* Unsupported BPF_MAP_UPDATE_ELEM */
{ AF_VSOCK, SOCK_STREAM },
// { AF_VSOCK, SOCK_DGRAM }, /* Unsupported socket() */
{ AF_VSOCK, SOCK_SEQPACKET },
};
for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
if (socket_spec_pairs(s))
goto out;
/* Intra-proto */
for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
test_socket(type, redir, maps, s, s);
/* Cross-proto */
for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
struct socket_spec *out = &sockets[j];
struct socket_spec *in = &sockets[i];
/* Skip intra-proto and between variants */
if (out->send_flags ||
(in->family == out->family &&
in->sotype == out->sotype))
continue;
test_socket(type, redir, maps, in, out);
}
}
out:
while (--s >= sockets)
socket_spec_close(s);
}
static void test_map(enum bpf_map_type type)
{
struct redir_spec *r, redirs[] = {
{ "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
{ "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
{ "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
{ "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
};
for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
enum bpf_attach_type attach_type;
struct test_sockmap_redir *skel;
struct maps maps;
int prog_fd;
skel = test_sockmap_redir__open_and_load();
if (!skel) {
FAIL("open_and_load");
return;
}
switch (type) {
case BPF_MAP_TYPE_SOCKMAP:
maps.in = bpf_map__fd(skel->maps.nop_map);
maps.out = bpf_map__fd(skel->maps.sock_map);
break;
case BPF_MAP_TYPE_SOCKHASH:
maps.in = bpf_map__fd(skel->maps.nop_hash);
maps.out = bpf_map__fd(skel->maps.sock_hash);
break;
default:
FAIL("Unsupported bpf_map_type");
return;
}
skel->bss->redirect_type = type;
maps.verd = bpf_map__fd(skel->maps.verdict_map);
get_redir_params(r, skel, &prog_fd, &attach_type,
&skel->bss->redirect_flags);
if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
return;
test_redir(type, r, &maps);
if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
return;
test_sockmap_redir__destroy(skel);
}
}
void serial_test_sockmap_redir(void)
{
test_map(BPF_MAP_TYPE_SOCKMAP);
test_map(BPF_MAP_TYPE_SOCKHASH);
}
|