1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
|
#!/usr/bin/python3
#
# Copyright (C) 2020-2022 Canonical, Ltd.
# Author: Łukasz 'sil2100' Zemczak <lukasz.zemczak@canonical.com>
# Author: Lukas Märdian <slyon@ubuntu.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import json
import logging
import os
import subprocess
import typing
from typing import Dict, List, Optional, Set
from . import utils
from ..configmanager import ConfigurationError
import netplan
# PCIDevice class originates from mlnx_switchdev_mode/sriovify.py
# Copyright 2019 Canonical Ltd, Apache License, Version 2.0
# https://github.com/openstack-charmers/mlnx-switchdev-mode
class PCIDevice(object):
"""Helper class for interaction with a PCI device"""
def __init__(self, pci_addr: str):
"""Initialise a new PCI device handler
:param pci_addr: PCI address of device
:type: str
"""
self.pci_addr = pci_addr
@property
def sys(self) -> str:
"""sysfs path (can be overridden for testing)
:return: full path to /sys filesystem
:rtype: str
"""
return "/sys"
@property
def path(self) -> str:
"""/sys path for PCI device
:return: full path to PCI device in /sys filesystem
:rtype: str
"""
return os.path.join(self.sys, "bus/pci/devices", self.pci_addr)
def subpath(self, subpath: str) -> str:
"""/sys subpath helper for PCI device
:param subpath: subpath to construct path for
:type: str
:return: self.path + subpath
:rtype: str
"""
return os.path.join(self.path, subpath)
@property
def driver(self) -> str:
"""Kernel driver for PCI device
:return: kernel driver in use for device
:rtype: str
"""
driver = ''
if os.path.exists(self.subpath("driver")):
driver = os.path.basename(os.readlink(self.subpath("driver")))
return driver
@property
def bound(self) -> bool:
"""Determine if device is bound to a kernel driver
:return: whether device is bound to a kernel driver
:rtype: bool
"""
return os.path.exists(self.subpath("driver"))
@property
def is_pf(self) -> bool:
"""Determine if device is a SR-IOV Physical Function
:return: whether device is a PF
:rtype: bool
"""
return os.path.exists(self.subpath("sriov_numvfs"))
@property
def is_vf(self) -> bool:
"""Determine if device is a SR-IOV Virtual Function
:return: whether device is a VF
:rtype: bool
"""
return os.path.exists(self.subpath("physfn"))
@property
def vf_addrs(self) -> list:
"""List Virtual Function addresses associated with a Physical Function
:return: List of PCI addresses of Virtual Functions
:rtype: list[str]
"""
vf_addrs = []
i = 0
while True:
try:
vf_addrs.append(
os.path.basename(
os.readlink(self.subpath("virtfn{}".format(i)))
)
)
except FileNotFoundError:
break
i += 1
return vf_addrs
@property
def vfs(self) -> list:
"""List Virtual Function associated with a Physical Function
:return: List of PCI devices of Virtual Functions
:rtype: list[PCIDevice]
"""
return [PCIDevice(addr) for addr in self.vf_addrs]
def devlink_set(self, obj_name: str, prop: str, value: str):
"""Set devlink options for the PCI device
:param obj_name: devlink object to set options on
:type: str
:param prop: property to set
:type: str
:param value: value to set for property
:type: str
"""
subprocess.check_call(
[
"/sbin/devlink",
"dev",
obj_name,
"set",
"pci/{}".format(self.pci_addr),
prop,
value,
]
)
def devlink_eswitch_mode(self) -> str:
"""Query eswitch mode via devlink for the PCI device
:return: the eswitch mode or '__undetermined' if it can't be retrieved
:rtype: str
"""
pci = f"pci/{self.pci_addr}"
try:
output = subprocess.check_output(
[
"/sbin/devlink",
"-j",
"dev",
"eswitch",
"show",
pci,
],
stderr=subprocess.DEVNULL,
)
except subprocess.CalledProcessError:
return '__undetermined'
json_output = json.loads(output)
# The JSON document looks like this when the 'mode' is available:
# {"dev":{"pci/0000:03:00.0":{"mode":"switchdev"}}}
# and like this when it's not available
# {"dev":{}}
return json_output.get("dev", {}).get(pci, {}).get('mode', '__undetermined')
def __str__(self) -> str:
"""String represenation of object
:return: PCI address of string
:rtype: str
"""
return self.pci_addr
def bind_vfs(vfs: typing.Iterable[PCIDevice], driver):
"""Bind unbound VFs to driver."""
bound_vfs = []
for vf in vfs:
if not vf.bound:
with open("/sys/bus/pci/drivers/{}/bind".format(driver), "wt") as f:
f.write(vf.pci_addr)
bound_vfs.append(vf)
return bound_vfs
def unbind_vfs(vfs: typing.Iterable[PCIDevice], driver) -> typing.Iterable[PCIDevice]:
"""Unbind bound VFs from driver."""
unbound_vfs = []
for vf in vfs:
if vf.bound:
with open("/sys/bus/pci/drivers/{}/unbind".format(driver), "wt") as f:
f.write(vf.pci_addr)
unbound_vfs.append(vf)
return unbound_vfs
def _interface_matches(netdef: netplan.NetDefinition, interface: str) -> bool:
return netdef._match_interface(
iface_name=interface,
iface_driver=utils.get_interface_driver_name(interface),
iface_mac=utils.get_interface_macaddress(interface))
def _get_interface_name_for_netdef(netdef: netplan.NetDefinition) -> Optional[str]:
"""
Try to match a netdef with the real system network interface.
Throws ConfigurationError if there is more than one match.
"""
interfaces: List[str] = utils.get_interfaces()
if netdef._has_match:
# now here it's a bit tricky
set_name: str = netdef.set_name
if set_name and set_name in interfaces:
# if we had a match: stanza and set-name: this means we should
# assume that, if found, the interface has already been
# renamed - use the new name
return set_name
else:
matches: Set[str] = set()
# we walk through all the system interfaces to determine if there is
# more than one matched interface
for interface in interfaces:
if not _interface_matches(netdef, interface):
continue
# we have a matching PF
# error out if we matched more than one
if len(matches) > 1:
raise ConfigurationError('matched more than one interface for a PF device: %s' % netdef.id)
matches.add(interface)
if matches:
return list(matches)[0]
else:
# no match field, assume entry name is the interface name
if netdef.id in interfaces:
return netdef.id
return None
def _get_pci_slot_name(netdev):
"""
Read PCI slot name for given interface name
"""
uevent_path = os.path.join('/sys/class/net', netdev, 'device/uevent')
try:
with open(uevent_path) as f:
pci_slot_name = None
for line in f.readlines():
line = line.strip()
if line.startswith('PCI_SLOT_NAME='):
pci_slot_name = line.split('=', 2)[1]
return pci_slot_name
except IOError as e:
raise RuntimeError('failed parsing PCI slot name for %s: %s' % (netdev, str(e)))
def _get_physical_functions(np_state: netplan.State) -> Dict[str, str]:
"""
Go through the list of netplan ethernet devices and identify which are
PFs matching them with actual network interfaces.
"""
pfs = {}
for netdef in np_state.ethernets.values():
# If the sriov_link is present, the interface is a VF and link is the PF
if link := netdef.links.get('sriov'):
if iface := _get_interface_name_for_netdef(np_state[link.id]):
pfs[link.id] = iface
else:
# If a netdef also defines the embedded_switch_mode key we consider it's a PF
# This enables us to change the eswitch mode even when the PF has no VFs.
if netdef._embedded_switch_mode:
if iface := _get_interface_name_for_netdef(netdef):
pfs[netdef.id] = iface
# If the netdef has any (positive) number of VFs that's because it's a PF
try:
count = netdef._vf_count
except netplan.NetplanException as e:
raise ConfigurationError(str(e))
if count > 0:
if iface := _get_interface_name_for_netdef(netdef):
pfs[netdef.id] = iface
return pfs
def _get_vf_number_per_pf(np_state: netplan.State) -> Dict[str, int]:
"""
Go through the list of netplan ethernet devices and identify which ones
have VFs. netdef._vf_count ultimately calls _netplan_state_get_vf_count_for_def
from libnetplan which return MAX(sriov_explicit_vf_count, number of VF netdefs).
"""
vf_counts = {}
for netdef in np_state.ethernets.values():
try:
count = netdef._vf_count
except netplan.NetplanException as e:
raise ConfigurationError(str(e))
if count > 0:
if iface := _get_interface_name_for_netdef(netdef):
vf_counts[iface] = count
return vf_counts
def _get_virtual_functions(np_state: netplan.State) -> Set[str]:
"""
Go through the list of netplan ethernet devices and identify which ones
are virtual functions
"""
vfs = set()
for netdef in np_state.ethernets.values():
# If the sriov_link is present and the PF is also present in the system we save the VF
if link := netdef.links.get('sriov'):
if _get_interface_name_for_netdef(np_state[link.id]):
vfs.add(netdef.id)
return vfs
def set_numvfs_for_pf(pf, vf_count):
"""
Allocate the required number of VFs for the selected PF.
"""
if vf_count > 256:
raise ConfigurationError(
'cannot allocate more VFs for PF %s than the SR-IOV maximum: %s > 256' % (pf, vf_count))
devdir = os.path.join('/sys/class/net', pf, 'device')
numvfs_path = os.path.join(devdir, 'sriov_numvfs')
totalvfs_path = os.path.join(devdir, 'sriov_totalvfs')
try:
with open(totalvfs_path) as f:
vf_max = int(f.read().strip())
except IOError as e:
raise RuntimeError('failed parsing sriov_totalvfs for %s: %s' % (pf, str(e)))
except ValueError:
raise RuntimeError('invalid sriov_totalvfs value for %s' % pf)
if vf_count > vf_max:
raise ConfigurationError(
'cannot allocate more VFs for PF %s than supported: %s > %s (sriov_totalvfs)' % (pf, vf_count, vf_max))
try:
with open(numvfs_path, 'w') as f:
f.write(str(vf_count))
except IOError as e:
bail = True
if e.errno == 16: # device or resource busy
logging.warning('device or resource busy while setting sriov_numvfs for %s, trying workaround' % pf)
try:
# doing this in two open/close sequences so that
# it's as close to writing via shell as possible
with open(numvfs_path, 'w') as f:
f.write('0')
with open(numvfs_path, 'w') as f:
f.write(str(vf_count))
except IOError as e_inner:
e = e_inner
else:
bail = False
if bail:
raise RuntimeError('failed setting sriov_numvfs to %s for %s: %s' % (vf_count, pf, str(e)))
return True
def perform_hardware_specific_quirks(pf):
"""
Perform any hardware-specific quirks for the given SR-IOV device to make
sure all the VF-count changes are applied.
"""
devdir = os.path.join('/sys/class/net', pf, 'device')
try:
with open(os.path.join(devdir, 'vendor')) as f:
device_id = f.read().strip()[2:]
with open(os.path.join(devdir, 'device')) as f:
vendor_id = f.read().strip()[2:]
except IOError as e:
raise RuntimeError('could not determine vendor and device ID of %s: %s' % (pf, str(e)))
combined_id = ':'.join([vendor_id, device_id])
quirk_devices = () # TODO: add entries to the list
if combined_id in quirk_devices: # pragma: nocover (empty quirk_devices)
# some devices need special handling, so this is the place
# Currently this part is empty, but has been added as a preemptive
# measure, as apparently a lot of SR-IOV cards have issues with
# dynamically allocating VFs. Some cards seem to require a full
# kernel module reload cycle after changing the sriov_numvfs value
# for the changes to come into effect.
# Any identified card/vendor can then be special-cased here, if
# needed.
pass
def apply_vlan_filter_for_vf(pf, vf, vlan_name, vlan_id, prefix='/'):
"""
Apply the hardware VLAN filtering for the selected VF.
"""
# this is more complicated, because to do this, we actually need to have
# the vf index - just knowing the vf interface name is not enough
vf_index = None
# the prefix argument is here only for unit testing purposes
vf_devdir = os.path.join(prefix, 'sys/class/net', vf, 'device')
vf_dev_id = os.path.basename(os.readlink(vf_devdir))
pf_devdir = os.path.join(prefix, 'sys/class/net', pf, 'device')
for f in os.listdir(pf_devdir):
if 'virtfn' in f:
dev_path = os.path.join(pf_devdir, f)
dev_id = os.path.basename(os.readlink(dev_path))
if dev_id == vf_dev_id:
vf_index = f[6:]
break
if not vf_index:
raise RuntimeError(
'could not determine the VF index for %s while configuring vlan %s' % (vf, vlan_name))
# now, create the VLAN filter
# TODO: would be best if we did this directl via python, without calling
# the iproute tooling
try:
subprocess.check_call(['ip', 'link', 'set',
'dev', pf,
'vf', vf_index,
'vlan', str(vlan_id)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError:
raise RuntimeError(
'failed setting SR-IOV VLAN filter for vlan %s (ip link set command failed)' % vlan_name)
def apply_sriov_config(config_manager, rootdir='/'):
"""
Go through all interfaces, identify which ones are SR-IOV VFs, create
them and perform all other necessary setup.
"""
config_manager.parse()
interfaces = utils.get_interfaces()
np_state = config_manager.np_state
# for sr-iov devices, we identify VFs by them having a link: field
# pointing to an PF. So let's browse through all ethernet devices,
# find all that are VFs and count how many of those are linked to
# particular PFs, as we need to then set the numvfs for each.
vf_counts = _get_vf_number_per_pf(np_state)
# we also store all matches between VF/PF netplan entry names and
# interface that they're currently matching to
vfs_set = _get_virtual_functions(np_state)
pfs = _get_physical_functions(np_state)
# setup the required number of VFs per PF
# at the same time store which PFs got changed in case the NICs
# require some special quirks for the VF number to change
vf_count_changed = []
if vf_counts:
for pf, vf_count in vf_counts.items():
if not set_numvfs_for_pf(pf, vf_count):
continue
vf_count_changed.append(pf)
if vf_count_changed:
# some cards need special treatment when we want to change the
# number of enabled VFs
for pf in vf_count_changed:
perform_hardware_specific_quirks(pf)
# also, since the VF number changed, the interfaces list also
# changed, so we need to refresh it
interfaces = utils.get_interfaces()
# now in theory we should have all the new VFs set up and existing;
# this is needed because we will have to now match the defined VF
# entries to existing interfaces, otherwise we won't be able to set
# filtered VLANs for those.
# XXX: does matching those even make sense?
vfs = {}
for vf in vfs_set:
netdef = np_state[vf]
if netdef._has_match:
# right now we only match by name, as I don't think matching per
# driver and/or macaddress makes sense
# TODO: print warning if other matches are provided
for interface in interfaces:
if netdef._match_interface(iface_name=interface):
if vf in vfs and vfs[vf]:
raise ConfigurationError('matched more than one interface for a VF device: %s' % vf)
vfs[vf] = interface
else:
if vf in interfaces:
vfs[vf] = vf
# Walk the SR-IOV PFs and check if we need to change the eswitch mode
for netdef_id, iface in pfs.items():
netdef = np_state[netdef_id]
eswitch_mode = netdef._embedded_switch_mode
if eswitch_mode in ['switchdev', 'legacy']:
pci_addr = _get_pci_slot_name(iface)
pcidev = PCIDevice(pci_addr)
current_eswitch_mode_system = pcidev.devlink_eswitch_mode()
if eswitch_mode != current_eswitch_mode_system:
if pcidev.is_pf:
logging.debug("Found VFs of {}: {}".format(pcidev, pcidev.vf_addrs))
if pcidev.vfs:
try:
unbind_vfs(pcidev.vfs, pcidev.driver)
except Exception as e:
logging.warning(f'Unbinding of VFs for {netdef_id} failed: {str(e)}')
logging.debug(f'Changing eswitch mode from {current_eswitch_mode_system} to {eswitch_mode} for: {netdef_id}')
pcidev.devlink_set('eswitch', 'mode', eswitch_mode)
if pcidev.vfs:
if not netdef._delay_virtual_functions_rebind:
bind_vfs(pcidev.vfs, pcidev.driver)
filtered_vlans_set = set()
for vlan, netdef in np_state.vlans.items():
# there is a special sriov vlan renderer that one can use to mark
# a selected vlan to be done in hardware (VLAN filtering)
if netdef._has_sriov_vlan_filter:
# this only works for SR-IOV VF interfaces
link = netdef.links.get('vlan')
vlan_id = netdef._vlan_id
vf = vfs.get(link.id)
if not vf:
# it is possible this is not an error, for instance when
# the configuration has been defined 'for the future'
# XXX: but maybe we should error out here as well?
logging.warning(
'SR-IOV vlan defined for %s but link %s is either not a VF or has no matches' % (vlan, link.id))
continue
# get the parent pf interface
# first we fetch the related vf netplan entry
# and finally, get the matched pf interface
pf = pfs.get(link.links.get('sriov').id)
if vf in filtered_vlans_set:
raise ConfigurationError(
'interface %s for netplan device %s (%s) already has an SR-IOV vlan defined' % (vf, link.id, vlan))
# TODO: make sure that we don't apply the filter twice
apply_vlan_filter_for_vf(pf, vf, vlan, vlan_id)
filtered_vlans_set.add(vf)
|