File: xe_exec_mix_modes.c

package info (click to toggle)
intel-gpu-tools 2.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 63,360 kB
  • sloc: xml: 781,458; ansic: 360,567; python: 8,336; yacc: 2,781; perl: 1,196; sh: 1,177; lex: 487; asm: 227; lisp: 35; makefile: 30
file content (294 lines) | stat: -rw-r--r-- 9,001 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2024 Intel Corporation
 */

/**
 * TEST: Test the parallel submission of jobs in LR and dma fence modes
 * Category: Core
 * Mega feature: General Core features
 * Sub-category: CMD submission
 * Functionality: fault mode
 * GPU requirements: GPU needs support for DRM_XE_VM_CREATE_FLAG_FAULT_MODE
 */

#include <fcntl.h>

#include "igt.h"
#include "lib/igt_syncobj.h"
#include "lib/intel_reg.h"
#include "xe_drm.h"

#include "xe/xe_ioctl.h"
#include "xe/xe_query.h"
#include "xe/xe_spin.h"
#include "xe/xe_util.h"
#include <string.h>

#define FLAG_EXEC_MODE_LR	(0x1 << 0)
#define FLAG_JOB_TYPE_SIMPLE	(0x1 << 1)

#define NUM_INTERRUPTING_JOBS	1
#define USER_FENCE_VALUE	0xdeadbeefdeadbeefull
#define VM_DATA			0
#define SPIN_DATA		1
#define EXEC_DATA		2
#define DATA_COUNT		3

struct data {
	struct xe_spin spin;
	uint32_t batch[16];
	uint64_t vm_sync;
	uint32_t data;
	uint64_t exec_sync;
	uint64_t addr;
};

static void store_dword_batch(struct data *data, uint64_t addr, int value)
{
	int b;
	uint64_t batch_offset = (char *)&(data->batch) - (char *)data;
	uint64_t batch_addr = addr + batch_offset;
	uint64_t sdi_offset = (char *)&(data->data) - (char *)data;
	uint64_t sdi_addr = addr + sdi_offset;

	b = 0;
	data->batch[b++] = MI_STORE_DWORD_IMM_GEN4;
	data->batch[b++] = sdi_addr;
	data->batch[b++] = sdi_addr >> 32;
	data->batch[b++] = value;
	data->batch[b++] = MI_BATCH_BUFFER_END;
	igt_assert(b <= ARRAY_SIZE(data->batch));

	data->addr = batch_addr;
}

enum engine_execution_mode {
	EXEC_MODE_LR,
	EXEC_MODE_DMA_FENCE,
};

enum job_type {
	SIMPLE_BATCH_STORE,
	SPINNER_INTERRUPTED,
};

static void
run_job(int fd, struct drm_xe_engine_class_instance *hwe,
	enum engine_execution_mode engine_execution_mode,
	enum job_type job_type, bool allow_recursion,
	struct xe_spin *dma_fence_job_spin)
{
	struct drm_xe_sync sync[1] = {
		{ .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
	};
	struct drm_xe_exec exec = {
		.num_batch_buffer = 1,
		.num_syncs = 1,
		.syncs = to_user_pointer(&sync),
	};
	struct data *data;
	uint32_t vm;
	uint32_t exec_queue;
	size_t bo_size;
	int value = 0x123456;
	uint64_t addr = 0x100000;
	uint32_t bo = 0;
	unsigned int vm_flags = 0;
	struct xe_spin_opts spin_opts = { .preempt = true };
	struct timespec tv;
	enum engine_execution_mode interrupting_engine_execution_mode;
	int64_t timeout_short = 1;

	if (engine_execution_mode == EXEC_MODE_LR) {
		sync[0].type = DRM_XE_SYNC_TYPE_USER_FENCE;
		sync[0].timeline_value = USER_FENCE_VALUE;
		vm_flags = DRM_XE_VM_CREATE_FLAG_LR_MODE | DRM_XE_VM_CREATE_FLAG_FAULT_MODE;
	} else if (engine_execution_mode == EXEC_MODE_DMA_FENCE) {
		sync[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
		sync[0].handle = syncobj_create(fd, 0);
	}

	vm = xe_vm_create(fd, vm_flags, 0);
	bo_size = sizeof(*data) * DATA_COUNT;
	bo_size = xe_bb_size(fd, bo_size);
	bo = xe_bo_create(fd, vm, bo_size,
			  vram_if_possible(fd, hwe->gt_id),
			  DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
	data = xe_bo_map(fd, bo, bo_size);
	if (engine_execution_mode == EXEC_MODE_LR)
		sync[0].addr = to_user_pointer(&data[VM_DATA].vm_sync);
	xe_vm_bind_async(fd, vm, 0, bo, 0, addr, bo_size, &sync[0], 1);

	store_dword_batch(data, addr, value);
	if (engine_execution_mode == EXEC_MODE_LR) {
		xe_wait_ufence(fd, &data[VM_DATA].vm_sync, USER_FENCE_VALUE, 0, NSEC_PER_SEC);
		sync[0].addr = addr + (char *)&data[EXEC_DATA].exec_sync - (char *)data;
	} else if (engine_execution_mode == EXEC_MODE_DMA_FENCE) {
		igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
		syncobj_reset(fd, &sync[0].handle, 1);
		sync[0].flags &= DRM_XE_SYNC_FLAG_SIGNAL;
	}
	exec_queue = xe_exec_queue_create(fd, vm, hwe, 0);
	exec.exec_queue_id = exec_queue;

	if (job_type == SPINNER_INTERRUPTED) {
		spin_opts.addr = addr + (char *)&data[SPIN_DATA].spin - (char *)data;
		xe_spin_init(&data[SPIN_DATA].spin, &spin_opts);
		if (engine_execution_mode == EXEC_MODE_LR)
			sync[0].addr = addr + (char *)&data[SPIN_DATA].exec_sync - (char *)data;
		exec.address = spin_opts.addr;
	} else if (job_type == SIMPLE_BATCH_STORE) {
		exec.address = data->addr;
	}
	xe_exec(fd, &exec);

	if (job_type == SPINNER_INTERRUPTED) {
		if (engine_execution_mode == EXEC_MODE_LR)
			interrupting_engine_execution_mode = EXEC_MODE_DMA_FENCE;
		else if (engine_execution_mode == EXEC_MODE_DMA_FENCE)
			interrupting_engine_execution_mode = EXEC_MODE_LR;
		xe_spin_wait_started(&data[SPIN_DATA].spin);
	} else if (job_type == SIMPLE_BATCH_STORE) {
		interrupting_engine_execution_mode = engine_execution_mode;
	}

	if (allow_recursion) {
		igt_gettime(&tv);
		for (int i = 0; i < NUM_INTERRUPTING_JOBS; i++)
		{
			struct xe_spin *spin_arg;

			if (job_type == SPINNER_INTERRUPTED &&
			    engine_execution_mode == EXEC_MODE_DMA_FENCE &&
			    interrupting_engine_execution_mode == EXEC_MODE_LR)
				/**
				 * In this case, jobs in LR mode are submitted while a job in dma
				 * fence mode is running. It is expected that the KMD will wait
				 * for completion of the dma fence job before executing the jobs
				 * in LR mode. Provide a pointer to the spinner to the interrupting
				 * dma fence job so that it can check that it was blocked, then
				 * end the spinner, then check that it was unblocked and completed,
				 * see "if (dma_fence_job_spin) ... " below.
				 */
				spin_arg = &data[SPIN_DATA].spin;

			run_job(fd, hwe, interrupting_engine_execution_mode, SIMPLE_BATCH_STORE,
				false, spin_arg);

			if (job_type == SPINNER_INTERRUPTED &&
			    engine_execution_mode == EXEC_MODE_LR &&
			    interrupting_engine_execution_mode == EXEC_MODE_DMA_FENCE) {
				/**
				 * In that case, jobs in dma fence mode are submitted while a job
				 * in LR mode is running. It is expected that the KMD will preempt
				 * the LR mode job to execute the dma fence mode jobs. At this
				 * point the dma fence job has completed, check that the LR mode
				 * job is still running, meaning was successfully preempted.
				 */
				igt_assert_neq(0, __xe_wait_ufence(fd, &data[SPIN_DATA].exec_sync,
								   USER_FENCE_VALUE,
								   0, &timeout_short));
			}
		}
	}

	if (dma_fence_job_spin) {
		igt_assert_neq(0, __xe_wait_ufence(fd, &data[EXEC_DATA].exec_sync,
						   USER_FENCE_VALUE, 0, &timeout_short));
		xe_spin_end(dma_fence_job_spin);
	} else if (job_type == SPINNER_INTERRUPTED &&
		   engine_execution_mode == EXEC_MODE_LR) {
		xe_spin_end(&data[SPIN_DATA].spin);
	}

	if (engine_execution_mode == EXEC_MODE_LR) {
		if (job_type == SPINNER_INTERRUPTED)
			xe_wait_ufence(fd, &data[SPIN_DATA].exec_sync, USER_FENCE_VALUE, 0, NSEC_PER_SEC);
		else if (job_type == SIMPLE_BATCH_STORE)
			xe_wait_ufence(fd, &data[EXEC_DATA].exec_sync, USER_FENCE_VALUE, 0, NSEC_PER_SEC);
	} else if (engine_execution_mode == EXEC_MODE_DMA_FENCE) {
		igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
		syncobj_destroy(fd, sync[0].handle);
	}

	if (job_type == SIMPLE_BATCH_STORE)
		igt_assert_eq(data->data, value);

	munmap(data, bo_size);
	gem_close(fd, bo);
	xe_exec_queue_destroy(fd, exec_queue);
	xe_vm_destroy(fd, vm);
}

/**
 * SUBTEST: exec-simple-batch-store-lr
 * Description: Execute a simple batch store job in long running mode
 *
 * SUBTEST: exec-simple-batch-store-dma-fence
 * Description: Execute a simple batch store job in dma fence mode
 *
 * SUBTEST: exec-spinner-interrupted-lr
 * Description: Spin in long running mode then get interrupted by a simple
 *              batch store job in dma fence mode
 *
 * SUBTEST: exec-spinner-interrupted-dma-fence
 * Description: Spin in dma fence mode then get interrupted by a simple
 *              batch store job in long running mode
 */
static void
test_exec(int fd, struct drm_xe_engine_class_instance *hwe,
	  unsigned int flags)
{
	enum engine_execution_mode engine_execution_mode;
	enum job_type job_type;

	if (flags & FLAG_EXEC_MODE_LR)
		engine_execution_mode = EXEC_MODE_LR;
	else
		engine_execution_mode = EXEC_MODE_DMA_FENCE;

	if (flags & FLAG_JOB_TYPE_SIMPLE)
		job_type = SIMPLE_BATCH_STORE;
	else
		job_type = SPINNER_INTERRUPTED;

	run_job(fd, hwe, engine_execution_mode, job_type, true, NULL);
}

igt_main
{
	struct drm_xe_engine_class_instance *hwe;
	const struct section {
		const char *name;
		unsigned int flags;
	} sections[] = {
		{ "simple-batch-store-lr", FLAG_JOB_TYPE_SIMPLE | FLAG_EXEC_MODE_LR },
		{ "simple-batch-store-dma-fence", FLAG_JOB_TYPE_SIMPLE },
		{ "spinner-interrupted-lr", FLAG_EXEC_MODE_LR },
		{ "spinner-interrupted-dma-fence", 0 },
		{ NULL },
	};
	int fd;

	igt_fixture {
		bool supports_faults;
		int ret = 0;

		fd = drm_open_driver(DRIVER_XE);
		ret = xe_supports_faults(fd);
		supports_faults = !ret;
		igt_require(supports_faults);
	}

	for (const struct section *s = sections; s->name; s++) {
		igt_subtest_f("exec-%s", s->name)
			xe_for_each_engine(fd, hwe)
				if (hwe->engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
					test_exec(fd, hwe, s->flags);
	}

	igt_fixture {
		drm_close_driver(fd);
	}
}