1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
|
// SPDX-License-Identifier: MIT
/*
* Copyright © 2023-2024 Intel Corporation
*/
#include <drm/drm_managed.h>
#include "xe_assert.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_sriov_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc_ct.h"
#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_sriov_printk.h"
#include "xe_sriov_vf.h"
#include "xe_tile_sriov_vf.h"
/**
* DOC: VF restore procedure in PF KMD and VF KMD
*
* Restoring previously saved state of a VF is one of core features of
* SR-IOV. All major VM Management applications allow saving and restoring
* the VM state, and doing that to a VM which uses SRIOV VF as one of
* the accessible devices requires support from KMD on both PF and VF side.
* VMM initiates all required operations through VFIO module, which then
* translates them into PF KMD calls. This description will focus on these
* calls, leaving out the module which initiates these steps (VFIO).
*
* In order to start the restore procedure, GuC needs to keep the VF in
* proper state. The PF driver can ensure GuC set it to VF_READY state
* by provisioning the VF, which in turn can be done after Function Level
* Reset of said VF (or after it was freshly created - in that case FLR
* is not needed). The FLR procedure ends with GuC sending message
* `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC.
* After the provisioning is completed, the VF needs to be paused, and
* at that point the actual restore can begin.
*
* During VF Restore, state of several resources is restored. These may
* include local memory content (system memory is restored by VMM itself),
* values of MMIO registers, stateless compression metadata and others.
* The final resource which also needs restoring is state of the VF
* submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE`
* message is used, with reference to the state blob to be consumed by
* GuC.
*
* Next, when VFIO is asked to set the VM into running state, the PF driver
* sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this
* changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the
* usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform
* the VF KMD within the VM that it was migrated.
*
* As soon as Virtual GPU of the VM starts, the VF driver within receives
* the MIGRATED interrupt and schedules post-migration recovery worker.
* That worker queries GuC for new provisioning (using MMIO communication),
* and applies fixups to any non-virtualized resources used by the VF.
*
* When the VF driver is ready to continue operation on the newly connected
* hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to
* enter the long awaited `VF_RUNNING` state, and therefore start handling
* CTB messages and scheduling workloads from the VF::
*
* PF GuC VF
* [ ] | |
* [ ] PF2GUC_VF_CONTROL(pause) | |
* [ ]---------------------------> [ ] |
* [ ] [ ] GuC sets new VF state to |
* [ ] [ ]------- VF_READY_PAUSED |
* [ ] [ ] | |
* [ ] [ ] <----- |
* [ ] success [ ] |
* [ ] <---------------------------[ ] |
* [ ] | |
* [ ] PF loads resources from the | |
* [ ]------- saved image supplied | |
* [ ] | | |
* [ ] <----- | |
* [ ] | |
* [ ] GUC_PF_OPCODE_VF_RESTORE | |
* [ ]---------------------------> [ ] |
* [ ] [ ] GuC loads contexts and CTB |
* [ ] [ ]------- state from image |
* [ ] [ ] | |
* [ ] [ ] <----- |
* [ ] [ ] |
* [ ] [ ] GuC sets new VF state to |
* [ ] [ ]------- VF_RESFIX_PAUSED |
* [ ] [ ] | |
* [ ] success [ ] <----- |
* [ ] <---------------------------[ ] |
* [ ] | |
* [ ] GUC_PF_TRIGGER_VF_RESUME | |
* [ ]---------------------------> [ ] |
* [ ] [ ] GuC sets new VF state to |
* [ ] [ ]------- VF_RESFIX_BLOCKED |
* [ ] [ ] | |
* [ ] [ ] <----- |
* [ ] [ ] |
* [ ] [ ] GUC_INTR_SW_INT_0 |
* [ ] success [ ]---------------------------> [ ]
* [ ] <---------------------------[ ] [ ]
* | | VF2GUC_QUERY_SINGLE_KLV [ ]
* | [ ] <---------------------------[ ]
* | [ ] [ ]
* | [ ] new VF provisioning [ ]
* | [ ]---------------------------> [ ]
* | | [ ]
* | | VF driver applies post [ ]
* | | migration fixups -------[ ]
* | | | [ ]
* | | -----> [ ]
* | | [ ]
* | | VF2GUC_NOTIFY_RESFIX_DONE [ ]
* | [ ] <---------------------------[ ]
* | [ ] [ ]
* | [ ] GuC sets new VF state to [ ]
* | [ ]------- VF_RUNNING [ ]
* | [ ] | [ ]
* | [ ] <----- [ ]
* | [ ] success [ ]
* | [ ]---------------------------> [ ]
* | | |
* | | |
*/
static bool vf_migration_supported(struct xe_device *xe)
{
/*
* TODO: Add conditions to allow specific platforms, when they're
* supported at production quality.
*/
return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
}
static void migration_worker_func(struct work_struct *w);
/**
* xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
* @xe: the &xe_device to initialize
*/
void xe_sriov_vf_init_early(struct xe_device *xe)
{
INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
if (!vf_migration_supported(xe))
xe_sriov_info(xe, "migration not supported by this module version\n");
}
static bool gt_vf_post_migration_needed(struct xe_gt *gt)
{
return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags);
}
/*
* Notify GuCs marked in flags about resource fixups apply finished.
* @xe: the &xe_device struct instance
* @gt_flags: flags marking to which GTs the notification shall be sent
*/
static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags)
{
struct xe_gt *gt;
unsigned int id;
int err = 0;
for_each_gt(gt, xe, id) {
if (!test_bit(id, >_flags))
continue;
/* skip asking GuC for RESFIX exit if new recovery request arrived */
if (gt_vf_post_migration_needed(gt))
continue;
err = xe_gt_sriov_vf_notify_resfix_done(gt);
if (err)
break;
clear_bit(id, >_flags);
}
if (gt_flags && !err)
drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n");
return err;
}
static int vf_get_next_migrated_gt_id(struct xe_device *xe)
{
struct xe_gt *gt;
unsigned int id;
for_each_gt(gt, xe, id) {
if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags))
return id;
}
return -1;
}
/**
* Perform post-migration fixups on a single GT.
*
* After migration, GuC needs to be re-queried for VF configuration to check
* if it matches previous provisioning. Most of VF provisioning shall be the
* same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT
* range has changed, we have to perform fixups - shift all GGTT references
* used anywhere within the driver. After the fixups in this function succeed,
* it is allowed to ask the GuC bound to this GT to continue normal operation.
*
* Returns: 0 if the operation completed successfully, or a negative error
* code otherwise.
*/
static int gt_vf_post_migration_fixups(struct xe_gt *gt)
{
s64 shift;
int err;
err = xe_gt_sriov_vf_query_config(gt);
if (err)
return err;
shift = xe_gt_sriov_vf_ggtt_shift(gt);
if (shift) {
xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift);
/* FIXME: add the recovery steps */
xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift);
}
return 0;
}
static void vf_post_migration_recovery(struct xe_device *xe)
{
unsigned long fixed_gts = 0;
int id, err;
drm_dbg(&xe->drm, "migration recovery in progress\n");
xe_pm_runtime_get(xe);
if (!vf_migration_supported(xe)) {
xe_sriov_err(xe, "migration not supported by this module version\n");
err = -ENOTRECOVERABLE;
goto fail;
}
while (id = vf_get_next_migrated_gt_id(xe), id >= 0) {
struct xe_gt *gt = xe_device_get_gt(xe, id);
err = gt_vf_post_migration_fixups(gt);
if (err)
goto fail;
set_bit(id, &fixed_gts);
}
err = vf_post_migration_notify_resfix_done(xe, fixed_gts);
if (err)
goto fail;
xe_pm_runtime_put(xe);
drm_notice(&xe->drm, "migration recovery ended\n");
return;
fail:
xe_pm_runtime_put(xe);
drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err));
xe_device_declare_wedged(xe);
}
static void migration_worker_func(struct work_struct *w)
{
struct xe_device *xe = container_of(w, struct xe_device,
sriov.vf.migration.worker);
vf_post_migration_recovery(xe);
}
/*
* Check if post-restore recovery is coming on any of GTs.
* @xe: the &xe_device struct instance
*
* Return: True if migration recovery worker will soon be running. Any worker currently
* executing does not affect the result.
*/
static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe)
{
struct xe_gt *gt;
unsigned int id;
for_each_gt(gt, xe, id) {
if (test_bit(id, &xe->sriov.vf.migration.gt_flags))
return true;
}
return false;
}
/**
* xe_sriov_vf_start_migration_recovery - Start VF migration recovery.
* @xe: the &xe_device to start recovery on
*
* This function shall be called only by VF.
*/
void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
{
bool started;
xe_assert(xe, IS_SRIOV_VF(xe));
if (!vf_ready_to_recovery_on_any_gts(xe))
return;
started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker);
drm_info(&xe->drm, "VF migration recovery %s\n", started ?
"scheduled" : "already in progress");
}
|