File: conftest.py

package info (click to toggle)
intel-gpu-tools 2.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 63,360 kB
  • sloc: xml: 781,458; ansic: 360,567; python: 8,336; yacc: 2,781; perl: 1,196; sh: 1,177; lex: 487; asm: 227; lisp: 35; makefile: 30
file content (307 lines) | stat: -rw-r--r-- 12,191 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# SPDX-License-Identifier: MIT
# Copyright © 2024 Intel Corporation

import json
import logging
import re
import typing

from dataclasses import dataclass
from pathlib import Path

import pytest

from bench import exceptions
from bench.helpers.helpers import (modprobe_driver, modprobe_driver_check)
from bench.helpers.log import HOST_DMESG_FILE
from bench.configurators.vgpu_profile_config import VgpuProfileConfigurator, VfSchedulingMode
from bench.configurators.vgpu_profile import VgpuProfile
from bench.configurators.vmtb_config import VmtbConfigurator
from bench.machines.host import Host, Device
from bench.machines.virtual.vm import VirtualMachine


logger = logging.getLogger('Conftest')


def pytest_addoption(parser):
    parser.addoption('--vm-image',
                     action='store',
                     help='OS image to boot on VM')
    parser.addoption('--card',
                     action='store',
                     help='Device card index for test execution')


@dataclass
class VmmTestingConfig:
    """Structure represents test configuration used by a setup fixture.

    Available settings:
    - num_vfs: requested number of VFs to enable
    - max_num_vms: maximal number of VMs (the value can be different than enabled number of VFs)
    - scheduling_mode: requested vGPU scheduling profile (infinite maps to default 0's)
    - auto_poweron_vm: assign VFs and power on VMs automatically in setup fixture
    - auto_probe_vm_driver: probe guest DRM driver in setup fixture (VM must be powered on)
    - unload_host_drivers_on_teardown: unload host DRM drivers in teardown fixture
    - wa_reduce_vf_lmem: workaround to reduce VF LMEM (for save-restore/migration tests speed-up)
    """
    num_vfs: int = 1
    max_num_vms: int = 2
    scheduling_mode: VfSchedulingMode = VfSchedulingMode.INFINITE

    auto_poweron_vm: bool = True
    auto_probe_vm_driver: bool = True
    unload_host_drivers_on_teardown: bool = False
    # Temporary W/A: reduce size of LMEM assigned to VFs to speed up a VF state save-restore process
    wa_reduce_vf_lmem: bool = False

    def __str__(self) -> str:
        return f'{self.num_vfs}VF'

    def __repr__(self) -> str:
        return (f'\nVmmTestingConfig:'
                f'\nNum VFs = {self.num_vfs} / max num VMs = {self.max_num_vms}'
                f'\nVF scheduling mode = {self.scheduling_mode}'
                f'\nSetup flags:'
                f'\n\tVM - auto power-on = {self.auto_poweron_vm}'
                f'\n\tVM - auto DRM driver probe = {self.auto_probe_vm_driver}'
                f'\n\tHost - unload drivers on teardown = {self.unload_host_drivers_on_teardown}'
                f'\n\tW/A - reduce VF LMEM (improves migration time) = {self.wa_reduce_vf_lmem}')


class VmmTestingSetup:
    def __init__(self, vmtb_config: VmtbConfigurator, cmdline_config, host, testing_config):
        self.testing_config: VmmTestingConfig = testing_config
        self.host: Host = host

        self.dut_index = vmtb_config.get_host_config().card_index if cmdline_config['card_index'] is None \
                         else int(cmdline_config['card_index'])
        self.guest_os_image = vmtb_config.get_guest_config().os_image_path if cmdline_config['vm_image'] is None \
                         else cmdline_config['vm_image']

        self.vgpu_profiles_dir = vmtb_config.vmtb_config_file.parent / vmtb_config.config.vgpu_profiles_path

        self.host.dut_index = self.dut_index
        self.host.drm_driver_name = vmtb_config.get_host_config().driver
        self.host.igt_config = vmtb_config.get_host_config().igt_config

        self.host.load_drivers()
        self.host.discover_devices()

        logger.info("\nDUT info:"
                    "\n\tCard index: %s"
                    "\n\tPCI BDF: %s "
                    "\n\tDevice ID: %s (%s)"
                    "\n\tHost DRM driver: %s",
                    self.host.dut_index,
                    self.get_dut().pci_info.bdf,
                    self.get_dut().pci_info.devid, self.get_dut().gpu_model,
                    self.get_dut().driver.get_name())

        self.vgpu_profile: VgpuProfile = self.get_vgpu_profile()

        # Start maximum requested number of VMs, but not more than VFs supported by the given vGPU profile
        self.vms: typing.List[VirtualMachine] = [
            VirtualMachine(vm_idx, self.guest_os_image,
                           vmtb_config.get_guest_config().driver,
                           vmtb_config.get_guest_config().igt_config)
            for vm_idx in range(min(self.vgpu_profile.num_vfs, self.testing_config.max_num_vms))]

    def get_vgpu_profile(self) -> VgpuProfile:
        configurator = VgpuProfileConfigurator(self.vgpu_profiles_dir, self.get_dut().gpu_model)
        try:
            vgpu_profile = configurator.get_vgpu_profile(self.testing_config.num_vfs,
                                                         self.testing_config.scheduling_mode)
        except exceptions.VgpuProfileError as exc:
            logger.error("Suitable vGPU profile not found: %s", exc)
            raise exceptions.VgpuProfileError('Invalid test setup - vGPU profile not found!')

        vgpu_profile.print_parameters()

        return vgpu_profile

    def get_dut(self) -> Device:
        try:
            return self.host.gpu_devices[self.dut_index]
        except IndexError as exc:
            logger.error("Invalid VMTB config - device card index = %s not available", self.dut_index)
            raise exceptions.VmtbConfigError(f'Device card index = {self.dut_index} not available') from exc

    @property
    def get_vm(self):
        return self.vms

    def get_num_vms(self) -> int:
        return len(self.vms)

    def poweron_vms(self):
        for vm in self.vms:
            vm.poweron()

    def poweroff_vms(self):
        for vm in self.vms:
            if vm.is_running():
                try:
                    vm.poweroff()
                except Exception as exc:
                    self.testing_config.unload_host_drivers_on_teardown = True
                    logger.warning("Error on VM%s poweroff (%s)", vm.vmnum, exc)

        if self.testing_config.unload_host_drivers_on_teardown:
            raise exceptions.GuestError('VM poweroff issue - cleanup on test teardown')

    def teardown(self):
        try:
            self.poweroff_vms()
        except Exception as exc:
            logger.error("Error on test teardown (%s)", exc)
        finally:
            num_vfs = self.get_dut().get_current_vfs()
            self.get_dut().remove_vfs()
            self.get_dut().reset_provisioning(num_vfs)
            self.get_dut().cancel_work()

            if self.testing_config.unload_host_drivers_on_teardown:
                self.host.unload_drivers()


@pytest.fixture(scope='session', name='get_vmtb_config')
def fixture_get_vmtb_config(create_host_log, pytestconfig):
    VMTB_CONFIG_FILE = 'vmtb_config.json'
    # Pytest Config.rootpath points to the VMTB base directory
    vmtb_config_file_path: Path = pytestconfig.rootpath / VMTB_CONFIG_FILE
    return VmtbConfigurator(vmtb_config_file_path)


@pytest.fixture(scope='session', name='create_host_log')
def fixture_create_host_log():
    if HOST_DMESG_FILE.exists():
        HOST_DMESG_FILE.unlink()
    HOST_DMESG_FILE.touch()


@pytest.fixture(scope='session', name='get_cmdline_config')
def fixture_get_cmdline_config(request):
    cmdline_params = {}
    cmdline_params['vm_image'] = request.config.getoption('--vm-image')
    cmdline_params['card_index'] = request.config.getoption('--card')
    return cmdline_params


@pytest.fixture(scope='session', name='get_host')
def fixture_get_host():
    return Host()


@pytest.fixture(scope='class', name='setup_vms')
def fixture_setup_vms(get_vmtb_config, get_cmdline_config, get_host, request):
    """Arrange VM environment for the VMM Flows test execution.

    VM setup steps follow the configuration provided as VmmTestingConfig parameter, including:
    host drivers probe (DRM and VFIO), provision and enable VFs, boot VMs and load guest DRM driver.
    Tear-down phase covers test environment cleanup:
    shutdown VMs, reset provisioning, disable VMs and optional host drivers unload.

    The fixture is designed for test parametrization, as the input to the following test class decorator:
    @pytest.mark.parametrize('setup_vms', set_test_config(max_vms=N), ids=idfn_test_config, indirect=['setup_vms'])
    where 'set_test_config' provides request parameter with a VmmTestingConfig (usually list of configs).
    """
    tc: VmmTestingConfig = request.param
    logger.debug(repr(tc))

    host: Host = get_host
    ts: VmmTestingSetup = VmmTestingSetup(get_vmtb_config, get_cmdline_config, host, tc)

    device: Device = ts.get_dut()
    num_vfs = ts.vgpu_profile.num_vfs
    num_vms = ts.get_num_vms()

    logger.info('[Test setup: %sVF-%sVM]', num_vfs, num_vms)

    # XXX: VF migration on discrete devices (with LMEM) is currently quite slow.
    # As a temporary workaround, reduce size of LMEM assigned to VFs to speed up a state save/load process.
    if tc.wa_reduce_vf_lmem and device.has_lmem():
        logger.debug("W/A: reduce VFs LMEM quota to accelerate state save/restore")
        org_vgpu_profile_vfLmem = ts.vgpu_profile.resources.vfLmem
        # Assign max 512 MB to VF
        ts.vgpu_profile.resources.vfLmem = min(ts.vgpu_profile.resources.vfLmem // 2, 536870912)

    device.provision(ts.vgpu_profile)

    assert device.create_vf(num_vfs) == num_vfs

    if tc.auto_poweron_vm:
        bdf_list = [device.get_vf_bdf(vf) for vf in range(1, num_vms + 1)]
        for vm, bdf in zip(ts.get_vm, bdf_list):
            vm.assign_vf(bdf)

        ts.poweron_vms()

        if tc.auto_probe_vm_driver:
            modprobe_cmds = [modprobe_driver(vm) for vm in ts.get_vm]
            for i, cmd in enumerate(modprobe_cmds):
                assert modprobe_driver_check(ts.get_vm[i], cmd), f'modprobe failed on VM{i}'

    logger.info('[Test execution: %sVF-%sVM]', num_vfs, num_vms)
    yield ts

    logger.info('[Test teardown: %sVF-%sVM]', num_vfs, num_vms)
    # XXX: cleanup counterpart for VFs LMEM quota workaround - restore original value
    if tc.wa_reduce_vf_lmem and device.has_lmem():
        ts.vgpu_profile.resources.vfLmem = org_vgpu_profile_vfLmem

    ts.teardown()


def idfn_test_config(test_config: VmmTestingConfig):
    """Provide test config ID in parametrized tests (e.g. test_something[V4].
    Usage: @pytest.mark.parametrize([...], ids=idfn_test_config, [...])
    """
    return str(test_config)


RESULTS_FILE = Path() / "results.json"
results = {
    "results_version": 10,
    "name": "results",
    "tests": {},
}


@pytest.hookimpl(hookwrapper=True)
def pytest_report_teststatus(report):
    yield
    with open(HOST_DMESG_FILE, 'r+', encoding='utf-8') as dmesg_file:
        dmesg = dmesg_file.read()
        test_string = re.findall('[A-Za-z_.]*::.*', report.nodeid)[0]
        results["name"] = f"vmtb_{test_string}"
        test_name = f"vmtb@{test_string}"
        if report.when == 'call':
            out = report.capstdout
            if report.passed:
                result = "pass"
                out = f"{test_name} passed"
            elif report.failed:
                result = "fail"
            else:
                result = "skip"
            result = {"out": out, "result": result, "time": {"start": 0, "end": report.duration},
                    "err": report.longreprtext, "dmesg": dmesg}
            results["tests"][test_name] = result
            dmesg_file.truncate(0)
        elif report.when == 'setup' and report.failed:
            result = {"out": report.capstdout, "result": "crash", "time": {"start": 0, "end": report.duration},
                    "err": report.longreprtext, "dmesg": dmesg}
            results["tests"][test_name] = result
            dmesg_file.truncate(0)


@pytest.hookimpl()
def pytest_sessionfinish():
    if RESULTS_FILE.exists():
        RESULTS_FILE.unlink()
    RESULTS_FILE.touch()
    jsonString = json.dumps(results, indent=2)
    with open(str(RESULTS_FILE), 'w',  encoding='utf-8') as f:
        f.write(jsonString)