1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
|
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Mellanox Technologies. */
#include <linux/netdevice.h>
#include <net/nexthop.h>
#include "lag/lag.h"
#include "lag/mp.h"
#include "mlx5_core.h"
#include "eswitch.h"
#include "lib/mlx5.h"
static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
{
return ldev->mode == MLX5_LAG_MODE_MULTIPATH;
}
static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
{
if (!mlx5_lag_is_ready(ldev))
return false;
if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
return false;
return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
ldev->pf[MLX5_LAG_P2].dev);
}
bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
bool res;
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_multipath(ldev);
return res;
}
/**
* mlx5_lag_set_port_affinity
*
* @ldev: lag device
* @port:
* 0 - set normal affinity.
* 1 - set affinity to port 1.
* 2 - set affinity to port 2.
*
**/
static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
enum mlx5_lag_port_affinity port)
{
struct lag_tracker tracker = {};
if (!__mlx5_lag_is_multipath(ldev))
return;
switch (port) {
case MLX5_LAG_NORMAL_AFFINITY:
tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
tracker.netdev_state[MLX5_LAG_P1].link_up = true;
tracker.netdev_state[MLX5_LAG_P2].link_up = true;
break;
case MLX5_LAG_P1_AFFINITY:
tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
tracker.netdev_state[MLX5_LAG_P1].link_up = true;
tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
tracker.netdev_state[MLX5_LAG_P2].link_up = false;
break;
case MLX5_LAG_P2_AFFINITY:
tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
tracker.netdev_state[MLX5_LAG_P1].link_up = false;
tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
tracker.netdev_state[MLX5_LAG_P2].link_up = true;
break;
default:
mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
"Invalid affinity port %d", port);
return;
}
if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
MLX5_DEV_EVENT_PORT_AFFINITY,
(void *)0);
if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
MLX5_DEV_EVENT_PORT_AFFINITY,
(void *)0);
mlx5_modify_lag(ldev, &tracker);
}
static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
{
struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
flush_workqueue(mp->wq);
}
static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len)
{
mp->fib.mfi = fi;
mp->fib.priority = fi->fib_priority;
mp->fib.dst = dst;
mp->fib.dst_len = dst_len;
}
struct mlx5_fib_event_work {
struct work_struct work;
struct mlx5_lag *ldev;
unsigned long event;
union {
struct fib_entry_notifier_info fen_info;
struct fib_nh_notifier_info fnh_info;
};
};
static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event,
struct fib_entry_notifier_info *fen_info)
{
struct fib_info *fi = fen_info->fi;
struct lag_mp *mp = &ldev->lag_mp;
struct fib_nh *fib_nh0, *fib_nh1;
unsigned int nhs;
/* Handle delete event */
if (event == FIB_EVENT_ENTRY_DEL) {
/* stop track */
if (mp->fib.mfi == fi)
mp->fib.mfi = NULL;
return;
}
/* Handle multipath entry with lower priority value */
if (mp->fib.mfi && mp->fib.mfi != fi &&
(mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) &&
fi->fib_priority >= mp->fib.priority)
return;
/* Handle add/replace event */
nhs = fib_info_num_path(fi);
if (nhs == 1) {
if (__mlx5_lag_is_active(ldev)) {
struct fib_nh *nh = fib_info_nh(fi, 0);
struct net_device *nh_dev = nh->fib_nh_dev;
int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
if (i < 0)
return;
i++;
mlx5_lag_set_port_affinity(ldev, i);
mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
}
return;
}
if (nhs != 2)
return;
/* Verify next hops are ports of the same hca */
fib_nh0 = fib_info_nh(fi, 0);
fib_nh1 = fib_info_nh(fi, 1);
if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
"Multipath offload require two ports of the same HCA\n");
return;
}
/* First time we see multipath route */
if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) {
struct lag_tracker tracker;
tracker = ldev->tracker;
mlx5_activate_lag(ldev, &tracker, MLX5_LAG_MODE_MULTIPATH, false);
}
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len);
}
static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
unsigned long event,
struct fib_nh *fib_nh,
struct fib_info *fi)
{
struct lag_mp *mp = &ldev->lag_mp;
/* Check the nh event is related to the route */
if (!mp->fib.mfi || mp->fib.mfi != fi)
return;
/* nh added/removed */
if (event == FIB_EVENT_NH_DEL) {
int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
if (i >= 0) {
i = (i + 1) % 2 + 1; /* peer port */
mlx5_lag_set_port_affinity(ldev, i);
}
} else if (event == FIB_EVENT_NH_ADD &&
fib_info_num_path(fi) == 2) {
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
}
}
static void mlx5_lag_fib_update(struct work_struct *work)
{
struct mlx5_fib_event_work *fib_work =
container_of(work, struct mlx5_fib_event_work, work);
struct mlx5_lag *ldev = fib_work->ldev;
struct fib_nh *fib_nh;
/* Protect internal structures from changes */
rtnl_lock();
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_DEL:
mlx5_lag_fib_route_event(ldev, fib_work->event,
&fib_work->fen_info);
fib_info_put(fib_work->fen_info.fi);
break;
case FIB_EVENT_NH_ADD:
case FIB_EVENT_NH_DEL:
fib_nh = fib_work->fnh_info.fib_nh;
mlx5_lag_fib_nexthop_event(ldev,
fib_work->event,
fib_work->fnh_info.fib_nh,
fib_nh->nh_parent);
fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
break;
}
rtnl_unlock();
kfree(fib_work);
}
static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
{
struct mlx5_fib_event_work *fib_work;
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
if (WARN_ON(!fib_work))
return NULL;
INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
fib_work->ldev = ldev;
fib_work->event = event;
return fib_work;
}
static int mlx5_lag_fib_event(struct notifier_block *nb,
unsigned long event,
void *ptr)
{
struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
struct fib_notifier_info *info = ptr;
struct mlx5_fib_event_work *fib_work;
struct fib_entry_notifier_info *fen_info;
struct fib_nh_notifier_info *fnh_info;
struct net_device *fib_dev;
struct fib_info *fi;
if (info->family != AF_INET)
return NOTIFY_DONE;
if (!mlx5_lag_multipath_check_prereq(ldev))
return NOTIFY_DONE;
switch (event) {
case FIB_EVENT_ENTRY_REPLACE:
case FIB_EVENT_ENTRY_DEL:
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
fi = fen_info->fi;
if (fi->nh)
return NOTIFY_DONE;
fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
return NOTIFY_DONE;
}
fib_work = mlx5_lag_init_fib_work(ldev, event);
if (!fib_work)
return NOTIFY_DONE;
fib_work->fen_info = *fen_info;
/* Take reference on fib_info to prevent it from being
* freed while work is queued. Release it afterwards.
*/
fib_info_hold(fib_work->fen_info.fi);
break;
case FIB_EVENT_NH_ADD:
case FIB_EVENT_NH_DEL:
fnh_info = container_of(info, struct fib_nh_notifier_info,
info);
fib_work = mlx5_lag_init_fib_work(ldev, event);
if (!fib_work)
return NOTIFY_DONE;
fib_work->fnh_info = *fnh_info;
fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
break;
default:
return NOTIFY_DONE;
}
queue_work(mp->wq, &fib_work->work);
return NOTIFY_DONE;
}
void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
{
/* Clear mfi, as it might become stale when a route delete event
* has been missed, see mlx5_lag_fib_route_event().
*/
ldev->lag_mp.fib.mfi = NULL;
}
int mlx5_lag_mp_init(struct mlx5_lag *ldev)
{
struct lag_mp *mp = &ldev->lag_mp;
int err;
/* always clear mfi, as it might become stale when a route delete event
* has been missed
*/
mp->fib.mfi = NULL;
if (mp->fib_nb.notifier_call)
return 0;
mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
if (!mp->wq)
return -ENOMEM;
mp->fib_nb.notifier_call = mlx5_lag_fib_event;
err = register_fib_notifier(&init_net, &mp->fib_nb,
mlx5_lag_fib_event_flush, NULL);
if (err) {
destroy_workqueue(mp->wq);
mp->fib_nb.notifier_call = NULL;
}
return err;
}
void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
{
struct lag_mp *mp = &ldev->lag_mp;
if (!mp->fib_nb.notifier_call)
return;
unregister_fib_notifier(&init_net, &mp->fib_nb);
destroy_workqueue(mp->wq);
mp->fib_nb.notifier_call = NULL;
mp->fib.mfi = NULL;
}
|