1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
|
/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright Red Hat
* Author: David Gibson <david@gibson.dropbear.id.au>
*
* UDP flow tracking functions
*/
#include <errno.h>
#include <fcntl.h>
#include <sys/uio.h>
#include <unistd.h>
#include <netinet/udp.h>
#include "util.h"
#include "passt.h"
#include "flow_table.h"
#include "udp_internal.h"
#define UDP_CONN_TIMEOUT 180 /* s, timeout for ephemeral or local bind */
/**
* udp_at_sidx() - Get UDP specific flow at given sidx
* @sidx: Flow and side to retrieve
*
* Return: UDP specific flow at @sidx, or NULL of @sidx is invalid. Asserts if
* the flow at @sidx is not FLOW_UDP.
*/
struct udp_flow *udp_at_sidx(flow_sidx_t sidx)
{
union flow *flow = flow_at_sidx(sidx);
if (!flow)
return NULL;
ASSERT(flow->f.type == FLOW_UDP);
return &flow->udp;
}
/*
* udp_flow_close() - Close and clean up UDP flow
* @c: Execution context
* @uflow: UDP flow
*/
void udp_flow_close(const struct ctx *c, struct udp_flow *uflow)
{
unsigned sidei;
if (uflow->closed)
return; /* Nothing to do */
flow_foreach_sidei(sidei) {
flow_hash_remove(c, FLOW_SIDX(uflow, sidei));
if (uflow->s[sidei] >= 0) {
epoll_del(c, uflow->s[sidei]);
close(uflow->s[sidei]);
uflow->s[sidei] = -1;
}
}
uflow->closed = true;
}
/**
* udp_flow_sock() - Create, bind and connect a flow specific UDP socket
* @c: Execution context
* @uflow: UDP flow to open socket for
* @sidei: Side of @uflow to open socket for
*
* Return: fd of new socket on success, -ve error code on failure
*/
static int udp_flow_sock(const struct ctx *c,
struct udp_flow *uflow, unsigned sidei)
{
const struct flowside *side = &uflow->f.side[sidei];
uint8_t pif = uflow->f.pif[sidei];
union {
flow_sidx_t sidx;
uint32_t data;
} fref = { .sidx = FLOW_SIDX(uflow, sidei) };
int s;
s = flowside_sock_l4(c, EPOLL_TYPE_UDP, pif, side, fref.data);
if (s < 0) {
flow_dbg_perror(uflow, "Couldn't open flow specific socket");
return s;
}
if (flowside_connect(c, s, pif, side) < 0) {
int rc = -errno;
epoll_del(c, s);
close(s);
flow_dbg_perror(uflow, "Couldn't connect flow socket");
return rc;
}
/* It's possible, if unlikely, that we could receive some packets in
* between the bind() and connect() which may or may not be for this
* flow. Being UDP we could just discard them, but it's not ideal.
*
* There's also a tricky case if a bunch of datagrams for a new flow
* arrive in rapid succession, the first going to the original listening
* socket and later ones going to this new socket. If we forwarded the
* datagrams from the new socket immediately here they would go before
* the datagram which established the flow. Again, not strictly wrong
* for UDP, but not ideal.
*
* So, we flag that the new socket is in a transient state where it
* might have datagrams for a different flow queued. Before the next
* epoll cycle, udp_flow_defer() will flush out any such datagrams, and
* thereafter everything on the new socket should be strictly for this
* flow.
*/
if (sidei)
uflow->flush1 = true;
else
uflow->flush0 = true;
return s;
}
/**
* udp_flow_new() - Common setup for a new UDP flow
* @c: Execution context
* @flow: Initiated flow
* @now: Timestamp
*
* Return: UDP specific flow, if successful, NULL on failure
*
* #syscalls getsockname
*/
static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
const struct timespec *now)
{
struct udp_flow *uflow = NULL;
const struct flowside *tgt;
unsigned sidei;
if (!(tgt = flow_target(c, flow, IPPROTO_UDP)))
goto cancel;
uflow = FLOW_SET_TYPE(flow, FLOW_UDP, udp);
uflow->ts = now->tv_sec;
uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0;
flow_foreach_sidei(sidei) {
if (pif_is_socket(uflow->f.pif[sidei]))
if ((uflow->s[sidei] = udp_flow_sock(c, uflow, sidei)) < 0)
goto cancel;
}
if (uflow->s[TGTSIDE] >= 0 && inany_is_unspecified(&tgt->oaddr)) {
/* When we target a socket, we connect() it, but might not
* always bind(), leaving the kernel to pick our address. In
* that case connect() will implicitly bind() the socket, but we
* need to determine its local address so that we can match
* reply packets back to the correct flow. Update the flow with
* the information from getsockname() */
union sockaddr_inany sa;
socklen_t sl = sizeof(sa);
in_port_t port;
if (getsockname(uflow->s[TGTSIDE], &sa.sa, &sl) < 0 ||
inany_from_sockaddr(&uflow->f.side[TGTSIDE].oaddr,
&port, &sa) < 0) {
flow_perror(uflow, "Unable to determine local address");
goto cancel;
}
if (port != tgt->oport) {
flow_err(uflow, "Unexpected local port");
goto cancel;
}
}
/* Tap sides always need to be looked up by hash. Socket sides don't
* always, but sometimes do (receiving packets on a socket not specific
* to one flow). Unconditionally hash both sides so all our bases are
* covered
*/
flow_foreach_sidei(sidei)
flow_hash_insert(c, FLOW_SIDX(uflow, sidei));
FLOW_ACTIVATE(uflow);
return FLOW_SIDX(uflow, TGTSIDE);
cancel:
if (uflow)
udp_flow_close(c, uflow);
flow_alloc_cancel(flow);
return FLOW_SIDX_NONE;
}
/**
* udp_flow_from_sock() - Find or create UDP flow for incoming datagram
* @c: Execution context
* @pif: Interface the datagram is arriving from
* @dst: Our (local) address to which the datagram is arriving
* @port: Our (local) port number to which the datagram is arriving
* @s_in: Source socket address, filled in by recvmmsg()
* @now: Timestamp
*
* #syscalls fcntl arm:fcntl64 ppc64:fcntl64|fcntl i686:fcntl64
*
* Return: sidx for the destination side of the flow for this packet, or
* FLOW_SIDX_NONE if we couldn't find or create a flow.
*/
flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
const union inany_addr *dst, in_port_t port,
const union sockaddr_inany *s_in,
const struct timespec *now)
{
const struct flowside *ini;
struct udp_flow *uflow;
union flow *flow;
flow_sidx_t sidx;
sidx = flow_lookup_sa(c, IPPROTO_UDP, pif, s_in, dst, port);
if ((uflow = udp_at_sidx(sidx))) {
uflow->ts = now->tv_sec;
return flow_sidx_opposite(sidx);
}
if (!(flow = flow_alloc())) {
char sastr[SOCKADDR_STRLEN];
debug("Couldn't allocate flow for UDP datagram from %s %s",
pif_name(pif), sockaddr_ntop(s_in, sastr, sizeof(sastr)));
return FLOW_SIDX_NONE;
}
ini = flow_initiate_sa(flow, pif, s_in, dst, port);
if (!inany_is_unicast(&ini->eaddr) ||
ini->eport == 0 || ini->oport == 0) {
/* In principle ini->oddr also must be specified, but when we've
* been initiated from a socket bound to 0.0.0.0 or ::, we don't
* know our address, so we have to leave it unpopulated.
*/
flow_err(flow, "Invalid endpoint on UDP recvfrom()");
flow_alloc_cancel(flow);
return FLOW_SIDX_NONE;
}
return udp_flow_new(c, flow, now);
}
/**
* udp_flow_from_tap() - Find or create UDP flow for tap packets
* @c: Execution context
* @pif: pif on which the packet is arriving
* @af: Address family, AF_INET or AF_INET6
* @saddr: Source address on guest side
* @daddr: Destination address guest side
* @srcport: Source port on guest side
* @dstport: Destination port on guest side
*
* Return: sidx for the destination side of the flow for this packet, or
* FLOW_SIDX_NONE if we couldn't find or create a flow.
*/
flow_sidx_t udp_flow_from_tap(const struct ctx *c,
uint8_t pif, sa_family_t af,
const void *saddr, const void *daddr,
in_port_t srcport, in_port_t dstport,
const struct timespec *now)
{
const struct flowside *ini;
struct udp_flow *uflow;
union flow *flow;
flow_sidx_t sidx;
ASSERT(pif == PIF_TAP);
sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr,
srcport, dstport);
if ((uflow = udp_at_sidx(sidx))) {
uflow->ts = now->tv_sec;
return flow_sidx_opposite(sidx);
}
if (!(flow = flow_alloc())) {
char sstr[INET6_ADDRSTRLEN], dstr[INET6_ADDRSTRLEN];
debug("Couldn't allocate flow for UDP datagram from %s %s:%hu -> %s:%hu",
pif_name(pif),
inet_ntop(af, saddr, sstr, sizeof(sstr)), srcport,
inet_ntop(af, daddr, dstr, sizeof(dstr)), dstport);
return FLOW_SIDX_NONE;
}
ini = flow_initiate_af(flow, PIF_TAP, af, saddr, srcport,
daddr, dstport);
if (inany_is_unspecified(&ini->eaddr) || ini->eport == 0 ||
inany_is_unspecified(&ini->oaddr) || ini->oport == 0) {
flow_dbg(flow, "Invalid endpoint on UDP packet");
flow_alloc_cancel(flow);
return FLOW_SIDX_NONE;
}
return udp_flow_new(c, flow, now);
}
/**
* udp_flush_flow() - Flush datagrams that might not be for this flow
* @c: Execution context
* @uflow: Flow to handle
* @sidei: Side of the flow to flush
* @now: Current timestamp
*/
static void udp_flush_flow(const struct ctx *c,
const struct udp_flow *uflow, unsigned sidei,
const struct timespec *now)
{
/* We don't know exactly where the datagrams will come from, but we know
* they'll have an interface and oport matching this flow */
udp_sock_fwd(c, uflow->s[sidei], uflow->f.pif[sidei],
uflow->f.side[sidei].oport, now);
}
/**
* udp_flow_defer() - Deferred per-flow handling (clean up aborted flows)
* @c: Execution context
* @uflow: Flow to handle
* @now: Current timestamp
*
* Return: true if the connection is ready to free, false otherwise
*/
bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow,
const struct timespec *now)
{
if (uflow->flush0) {
udp_flush_flow(c, uflow, INISIDE, now);
uflow->flush0 = false;
}
if (uflow->flush1) {
udp_flush_flow(c, uflow, TGTSIDE, now);
uflow->flush1 = false;
}
return uflow->closed;
}
/**
* udp_flow_timer() - Handler for timed events related to a given flow
* @c: Execution context
* @uflow: UDP flow
* @now: Current timestamp
*
* Return: true if the flow is ready to free, false otherwise
*/
bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
const struct timespec *now)
{
if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT)
return false;
udp_flow_close(c, uflow);
return true;
}
|