1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
|
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
# Author: Breno Leitao <leitao@debian.org>
"""
This test aims to evaluate the netpoll polling mechanism (as in
netpoll_poll_dev()). It presents a complex scenario where the network
attempts to send a packet but fails, prompting it to poll the NIC from within
the netpoll TX side.
This has been a crucial path in netpoll that was previously untested. Jakub
suggested using a single RX/TX queue, pushing traffic to the NIC, and then
sending netpoll messages (via netconsole) to trigger the poll.
In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
so, the test passes, otherwise it will be skipped. This test is very dependent on
the driver and environment, given we are trying to trigger a tricky scenario.
"""
import errno
import logging
import os
import random
import string
import threading
import time
from typing import Optional
from lib.py import (
bpftrace,
CmdExitFailure,
defer,
ethtool,
GenerateTraffic,
ksft_exit,
ksft_pr,
ksft_run,
KsftFailEx,
KsftSkipEx,
NetDrvEpEnv,
KsftXfailEx,
)
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
NETCONS_REMOTE_PORT: int = 6666
NETCONS_LOCAL_PORT: int = 1514
# Max number of netcons messages to send. Each iteration will setup
# netconsole and send MAX_WRITES messages
ITERATIONS: int = 20
# Number of writes to /dev/kmsg per iteration
MAX_WRITES: int = 40
# MAPS contains the information coming from bpftrace it will have only one
# key: "hits", which tells the number of times netpoll_poll_dev() was called
MAPS: dict[str, int] = {}
# Thread to run bpftrace in parallel
BPF_THREAD: Optional[threading.Thread] = None
# Time bpftrace will be running in parallel.
BPFTRACE_TIMEOUT: int = 10
def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
"""
Read the ringsize using ethtool. This will be used to restore it after the test
"""
try:
ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
rxs = ethtool_result["rx"]
txs = ethtool_result["tx"]
except (KeyError, IndexError) as exception:
raise KsftSkipEx(
f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
) from exception
return rxs, txs
def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
"""Try to the number of RX and TX ringsize."""
rxs = ring_size[0]
txs = ring_size[1]
logging.debug("Setting ring size to %d/%d", rxs, txs)
try:
ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
except CmdExitFailure:
# This might fail on real device, retry with a higher value,
# worst case, keep it as it is.
return False
return True
def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
"""Read the number of RX, TX and combined queues using ethtool"""
try:
ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
rxq = ethtool_result.get("rx", -1)
txq = ethtool_result.get("tx", -1)
combined = ethtool_result.get("combined", -1)
except IndexError as exception:
raise KsftSkipEx(
f"Failed to read queues numbers: {exception}. Not going to mess with them."
) from exception
return rxq, txq, combined
def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
"""Set the number of RX, TX and combined queues using ethtool"""
rxq, txq, combined = queues
cmdline = f"-L {interface_name}"
if rxq != -1:
cmdline += f" rx {rxq}"
if txq != -1:
cmdline += f" tx {txq}"
if combined != -1:
cmdline += f" combined {combined}"
logging.debug("calling: ethtool %s", cmdline)
try:
ethtool(cmdline)
except CmdExitFailure as exception:
raise KsftSkipEx(
f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
) from exception
def netcons_generate_random_target_name() -> str:
"""Generate a random target name starting with 'netcons'"""
random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
return f"netcons_{random_suffix}"
def netcons_create_target(
config_data: dict[str, str],
target_name: str,
) -> None:
"""Create a netconsole dynamic target against the interfaces"""
logging.debug("Using netconsole name: %s", target_name)
try:
os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
logging.debug(
"Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise KsftFailEx(
f"Failed to create netconsole target directory: {exception}"
) from exception
try:
for key, value in config_data.items():
path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
logging.debug("Writing %s to %s", key, path)
with open(path, "w", encoding="utf-8") as file:
# Always convert to string to write to file
file.write(str(value))
# Read all configuration values for debugging purposes
for debug_key in config_data.keys():
with open(
f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
"r",
encoding="utf-8",
) as file:
content = file.read()
logging.debug(
"%s/%s/%s : %s",
NETCONSOLE_CONFIGFS_PATH,
target_name,
debug_key,
content.strip(),
)
except Exception as exception:
raise KsftFailEx(
f"Failed to configure netconsole target: {exception}"
) from exception
def netcons_configure_target(
cfg: NetDrvEpEnv, interface_name: str, target_name: str
) -> None:
"""Configure netconsole on the interface with the given target name"""
config_data = {
"extended": "1",
"dev_name": interface_name,
"local_port": NETCONS_LOCAL_PORT,
"remote_port": NETCONS_REMOTE_PORT,
"local_ip": cfg.addr,
"remote_ip": cfg.remote_addr,
"remote_mac": "00:00:00:00:00:00", # Not important for this test
"enabled": "1",
}
netcons_create_target(config_data, target_name)
logging.debug(
"Created netconsole target: %s on interface %s", target_name, interface_name
)
def netcons_delete_target(name: str) -> None:
"""Delete a netconsole dynamic target"""
target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
try:
if os.path.exists(target_path):
os.rmdir(target_path)
except OSError as exception:
raise KsftFailEx(
f"Failed to delete netconsole target: {exception}"
) from exception
def netcons_load_module() -> None:
"""Try to load the netconsole module"""
os.system("modprobe netconsole")
def bpftrace_call() -> None:
"""Call bpftrace to find how many times netpoll_poll_dev() is called.
Output is saved in the global variable `maps`"""
# This is going to update the global variable, that will be seen by the
# main function
global MAPS # pylint: disable=W0603
# This will be passed to bpftrace as in bpftrace -e "expr"
expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
logging.debug("BPFtrace output: %s", MAPS)
def bpftrace_start():
"""Start a thread to call `call_bpf` in a parallel thread"""
global BPF_THREAD # pylint: disable=W0603
BPF_THREAD = threading.Thread(target=bpftrace_call)
BPF_THREAD.start()
if not BPF_THREAD.is_alive():
raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
def bpftrace_stop() -> None:
"""Stop the bpftrace thread"""
if BPF_THREAD:
BPF_THREAD.join()
def bpftrace_any_hit(join: bool) -> bool:
"""Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
if not BPF_THREAD:
raise KsftFailEx("BPFtrace didn't start")
if BPF_THREAD.is_alive():
if join:
# Wait for bpftrace to finish
BPF_THREAD.join()
else:
# bpftrace is still running, so, we will not check the result yet
return False
logging.debug("MAPS coming from bpftrace = %s", MAPS)
if "hits" not in MAPS.keys():
raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
logging.debug("Got a total of %d hits", MAPS["hits"])
return MAPS["hits"] > 0
def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
"""Print messages to the console, trying to trigger a netpoll poll"""
# Start bpftrace in parallel, so, it is watching
# netpoll_poll_dev() while we are sending netconsole messages
bpftrace_start()
defer(bpftrace_stop)
do_netpoll_flush(cfg, ifname, target_name)
if bpftrace_any_hit(join=True):
ksft_pr("netpoll_poll_dev() was called. Success")
return
raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
"""Print messages to the console, trying to trigger a netpoll poll"""
netcons_configure_target(cfg, ifname, target_name)
retry = 0
for i in range(int(ITERATIONS)):
if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
# bpftrace is done, stop sending messages
break
msg = f"netcons test #{i}"
with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
for j in range(MAX_WRITES):
try:
kmsg.write(f"{msg}-{j}\n")
except OSError as exception:
# in some cases, kmsg can be busy, so, we will retry
time.sleep(1)
retry += 1
if retry < 5:
logging.info("Failed to write to kmsg. Retrying")
# Just retry a few times
continue
raise KsftFailEx(
f"Failed to write to kmsg: {exception}"
) from exception
netcons_delete_target(target_name)
netcons_configure_target(cfg, ifname, target_name)
# If we sleep here, we will have a better chance of triggering
# This number is based on a few tests I ran while developing this test
time.sleep(0.4)
def configure_network(ifname: str) -> None:
"""Configure ring size and queue numbers"""
# Set defined queues to 1 to force congestion
prev_queues = ethtool_get_queues_cnt(ifname)
logging.debug("RX/TX/combined queues: %s", prev_queues)
# Only set the queues to 1 if they exists in the device. I.e, they are > 0
ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
defer(ethtool_set_queues_cnt, ifname, prev_queues)
# Try to set the ring size to some low value.
# Do not fail if the hardware do not accepted desired values
prev_ring_size = ethtool_get_ringsize(ifname)
for size in [(1, 1), (128, 128), (256, 256)]:
if ethtool_set_ringsize(ifname, size):
# hardware accepted the desired ringsize
logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
break
defer(ethtool_set_ringsize, ifname, prev_ring_size)
def test_netpoll(cfg: NetDrvEpEnv) -> None:
"""
Test netpoll by sending traffic to the interface and then sending
netconsole messages to trigger a poll
"""
ifname = cfg.ifname
configure_network(ifname)
target_name = netcons_generate_random_target_name()
traffic = None
try:
traffic = GenerateTraffic(cfg)
do_netpoll_flush_monitored(cfg, ifname, target_name)
finally:
if traffic:
traffic.stop()
# Revert RX/TX queues
netcons_delete_target(target_name)
def test_check_dependencies() -> None:
"""Check if the dependencies are met"""
if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
raise KsftSkipEx(
f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301
)
def main() -> None:
"""Main function to run the test"""
netcons_load_module()
test_check_dependencies()
with NetDrvEpEnv(__file__) as cfg:
ksft_run(
[test_netpoll],
args=(cfg,),
)
ksft_exit()
if __name__ == "__main__":
main()
|