File: memcpy.c

package info (click to toggle)
libocxl 1.2.1-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 636 kB
  • sloc: ansic: 4,391; makefile: 115; sh: 109; perl: 40
file content (667 lines) | stat: -rw-r--r-- 17,371 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
/*
 * Copyright 2017 International Business Machines
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define _DEFAULT_SOURCE

#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <stdbool.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/wait.h>
#include "libocxl.h"


#define LOG_ERR(fmt, x...) fprintf(stderr, fmt, ##x)
#define LOG_INF(fmt, x...) printf(fmt, ##x)

#define AFU_NAME "IBM,MEMCPY3"
#define MEMCPY_SIZE  2048 // Max of 2048

#define CACHELINESIZE	128
/* Queue sizes other than 512kB don't seem to work (still true?) */
#define QUEUE_SIZE	4095*CACHELINESIZE

#define MEMCPY_WED(queue, depth)			\
	((((uint64_t)queue) & 0xfffffffffffff000ULL) |	\
		(((uint64_t)depth) & 0xfffULL))

#define MEMCPY_WE_CMD(valid, cmd)		\
	(((valid) & 0x1) |			\
		(((cmd) & 0x3f) << 2))
#define MEMCPY_WE_CMD_VALID	(0x1 << 0)
#define MEMCPY_WE_CMD_WRAP	(0x1 << 1)
#define MEMCPY_WE_CMD_COPY		0
#define MEMCPY_WE_CMD_IRQ		1
#define MEMCPY_WE_CMD_STOP		2
#define MEMCPY_WE_CMD_WAKE_HOST_THREAD	3
#define MEMCPY_WE_CMD_INCREMENT		4
#define MEMCPY_WE_CMD_ATOMIC		5
#define MEMCPY_WE_CMD_TRANSLATE_TOUCH	6

/* global mmio registers */
#define MEMCPY_AFU_GLOBAL_CFG	0
#define MEMCPY_AFU_GLOBAL_TRACE	0x20

/* per-process mmio registers */
#define MEMCPY_AFU_PP_WED	0
#define MEMCPY_AFU_PP_STATUS	0x10
#define   MEMCPY_AFU_PP_STATUS_Terminated	0x8
#define   MEMCPY_AFU_PP_STATUS_Stopped		0x10

#define MEMCPY_AFU_PP_CTRL	0x18
#define   MEMCPY_AFU_PP_CTRL_Restart	(0x1 << 0)
#define   MEMCPY_AFU_PP_CTRL_Terminate	(0x1 << 1)
#define MEMCPY_AFU_PP_IRQ	0x28


struct memcpy_work_element {
	volatile uint8_t cmd; /* valid, wrap, cmd */
	volatile uint8_t status;
	union {
		uint16_t length;
		uint16_t tid;
	};
	uint8_t cmd_extra;
	uint8_t reserved[3];
	uint64_t atomic_op;
	uint64_t src;  /* also irq EA or atomic_op2 */
	uint64_t dst;
} __packed;

struct memcpy_weq {
	struct memcpy_work_element *queue;
	struct memcpy_work_element *next;
	struct memcpy_work_element *last;
	int wrap;
	int count;
};

int memcpy3_queue_length(size_t queue_size)
{
	return queue_size/sizeof(struct memcpy_work_element);
}

void memcpy3_init_weq(struct memcpy_weq *weq, size_t queue_size)
{
	weq->queue = aligned_alloc(getpagesize(), queue_size);
	memset(weq->queue, 0, queue_size);
	weq->next = weq->queue;
	weq->last = weq->queue + memcpy3_queue_length(queue_size) - 1;
	weq->wrap = 0;
	weq->count = 0;
}

/*
 * Copies a work element into the queue, taking care to set the wrap
 * bit correctly.  Returns a pointer to the element in the queue.
 *
 * @param weq the work element queue to populate
 * @param we the work element
 */
struct memcpy_work_element *memcpy3_add_we(struct memcpy_weq *weq, struct memcpy_work_element *we)
{
	struct memcpy_work_element *new_we = weq->next;

	new_we->status = we->status;
	new_we->length = we->length;
	new_we->cmd_extra = we->cmd_extra;
	new_we->atomic_op = we->atomic_op;
	new_we->src = we->src;
	new_we->dst = we->dst;
	__sync_synchronize();
	new_we->cmd = (we->cmd & ~MEMCPY_WE_CMD_WRAP) | weq->wrap;
	weq->next++;
	if (weq->next > weq->last) {
		weq->wrap ^= MEMCPY_WE_CMD_WRAP;
		weq->next = weq->queue;
	}

	return new_we;
}

/**
 * Set up the Global MMIO area of the AFU
 *
 * @param afu the AFU handle
 * @return false on success
 */
static bool global_setup(ocxl_afu_h afu)
{
	uint64_t cfg;
	ocxl_mmio_h global;

	// Map the full global MMIO area of the AFU
	if (OCXL_OK != ocxl_mmio_map(afu, OCXL_GLOBAL_MMIO, &global)) {
		return true;
	}

	if (OCXL_OK != ocxl_mmio_read64(global, MEMCPY_AFU_GLOBAL_CFG, OCXL_MMIO_LITTLE_ENDIAN, &cfg)) {
		LOG_ERR("Reading global config register failed\n");
		return true;
	}
	LOG_INF("AFU config = 0x%lx\n", cfg);

	uint64_t reg = 0x8008008000000000;
	if (OCXL_OK != ocxl_mmio_write64(global, MEMCPY_AFU_GLOBAL_TRACE, OCXL_MMIO_LITTLE_ENDIAN, reg)) {
		LOG_ERR("Writing trace register failed\n");
		return true;
	}

	LOG_INF("traces reset and rearmed\n");

	return 0;
}

/**
 * Restart the AFU if it is stopped
 *
 * @param pp_mmio the per-PASID MMIO area of the AFU to restart
 * @return false on success, true on failure
 */
static bool restart_afu_if_stopped(ocxl_mmio_h pp_mmio)
{
	// Allow the AFU to proceed
	if (OCXL_OK != ocxl_mmio_write64(pp_mmio, MEMCPY_AFU_PP_CTRL, OCXL_MMIO_LITTLE_ENDIAN, MEMCPY_AFU_PP_CTRL_Restart)) {
		LOG_ERR("couldn't restart memcpy after interrupt\n");
		return true;
	}

	return false;
}

/**
 * Wait for a completion IRQ
 *
 * @param timeout the maximum amount of time to wait (seconds)
 * @param afu the AFU that will be issuing the IRQ
 * @param pp_mmio the per-PASID MMIO area of the AFU (or 0 if completion IRQ is not used)
 * @param irq_ea the handle of the completion IRQ (or 0 if not used)
 * @param err_ea the handle of the error IRQ
 *
 * @return a bitwise OR of issues detected
 * 	0x01: An AFU error was detected
 * 	0x02: A translation fault was received
 * 	0x04: An error occurred while accessing the AFU
 * 	0x08: A timeout occurred
 */
static int wait_for_irq(int timeout, ocxl_afu_h afu, ocxl_mmio_h pp_mmio, uint64_t irq_ea, uint64_t err_ea)
{
	ocxl_event event;
	int event_count;

	int check_timeout = timeout * 1000; // convert to milliseconds
	int ret = 0;

	do {
		event_count = ocxl_afu_event_check(afu, check_timeout, &event, 1);
		if (event_count < 0) {
			return 0x04;
		}

		if (event_count == 0) {
			if (timeout) {
				LOG_ERR("Timeout waiting for interrupt\n");
				ret |= 0x08;
			}
			break;
		}

		// No need to wait if we go around the loop again
		check_timeout = 0;

		switch (event.type) {
		case OCXL_EVENT_IRQ:
			if (irq_ea && event.irq.handle == irq_ea) { // We have an AFU interrupt
				LOG_INF("AFU completion interrupt received\n");
				restart_afu_if_stopped(pp_mmio);
				return 0; // Successfully got the completion interrupt & restarted the AFU
			} else if (event.irq.handle == err_ea) { // We have an AFU error interrupt
				LOG_ERR("AFU error interrupt received\n");
				ret |= 0x01;
			}
			break;
		case OCXL_EVENT_TRANSLATION_FAULT:
			LOG_ERR("Translation fault detected, addr=%p count=%lu\n",
			        event.translation_fault.addr, event.translation_fault.count);
			ret |= 0x02;
			break;
		}
	} while (event_count == 1); // Go back around in case there are more events to process

	return ret;
}

/**
 * Wait for a completion bit to be set
 *
 * @param timeout the maximum amount of time to wait (seconds)
 * @param work_element the work element to poll for completion
 * @param afu the AFU that will be issuing the IRQ
 * @param err_ea the handle of the error IRQ
 *
 * @return a bitwise OR of issues detected
 * 	0x01: An AFU error was detected
 * 	0x02: A translation fault was received
 * 	0x04: An error occurred while accessing the AFU
 * 	0x08: A timeout occurred
 */
static int wait_for_status(int timeout, ocxl_afu_h afu, struct memcpy_work_element *work_element, uint64_t err_ea)
{
	struct timeval test_timeout, temp;

	temp.tv_sec = timeout;
	temp.tv_usec = 0;

	gettimeofday(&test_timeout, NULL);
	timeradd(&test_timeout, &temp, &test_timeout);

	for (;; gettimeofday(&temp, NULL)) {
		if (timercmp(&temp, &test_timeout, >)) {
			LOG_ERR("timeout polling for completion\n");
			return 0x08;
		}

		int ret = wait_for_irq(0, afu, 0, 0, err_ea);
		if (ret) {
			return ret;
		}

		if (work_element->status) {
			break;
		}
	}
	return 0;
}

#ifdef _ARCH_PPC64
/**
 * Wait for wake_host_thread to be issued by the AFU
 *
 * @param timeout the maximum amount of time to wait (seconds)
 * @param work_element the work element to poll for completion
 * @param afu the AFU that will be issuing the IRQ
 * @param pp_mmio the per-PASID MMIO area of the AFU
 * @param irq_ea the handle of the completion IRQ
 * @param err_ea the handle of the error IRQ
 *
 * @return a bitwise OR of issues detected
 * 	0x01: An AFU error was detected
 * 	0x02: A translation fault was received
 * 	0x04: An error occurred while accessing the AFU
 * 	0x08: A timeout occurred
 */
int wait_for_wake_host_thread(int timeout, ocxl_afu_h afu, ocxl_mmio_h pp_mmio,
                              struct memcpy_work_element *work_element, uint64_t irq_ea, uint64_t err_ea)
{
	struct timeval test_timeout, temp;

	temp.tv_sec = timeout;
	temp.tv_usec = 0;

	gettimeofday(&test_timeout, NULL);
	timeradd(&test_timeout, &temp, &test_timeout);

	for (;;) {
		ocxl_wait();

		if (work_element->status) {
			return 0;
		}

		gettimeofday(&temp, NULL);
		if (timercmp(&temp, &test_timeout, >)) {
			LOG_ERR("timeout waiting for wake_host_thread\n");
			break;
		}
	}

	int ret = wait_for_irq(0, afu, pp_mmio, irq_ea, err_ea);
	if (ret) {
		return ret;
	}

	return 8;
}
#endif

/**
 * Fill a buffer with data
 *
 * @param buf the buffer to fill
 * @param size the size of the buffer
 */
static void fill_buffer(char *buf, size_t size)
{
	/* Initialise source buffer */
	for (size_t i = 0; i < size; i++) {
		*(buf + i) = i & 0xff;
	}
}

/**
 * Display the status of the AFU
 *
 * @param pp_mmio the per-PASID MMIO area of the AFU context
 */
static void display_afu_status(ocxl_mmio_h pp_mmio)
{
	uint64_t status = 0;
	(void)ocxl_mmio_read64(pp_mmio, MEMCPY_AFU_PP_STATUS, OCXL_MMIO_LITTLE_ENDIAN, &status);

	if (status) {
		LOG_INF("AFU Status register is %lx\n", status);
	}
}

/**
 * Run a single memcpy operation
 *
 * @param afu the AFU to copy with
 * @param src the data source
 * @param dst where the dat should be copied to
 * @param size the number of bytes to copy
 * @param completion how to signal completion, 0 = poll, 1 = interrupt, 2 = notify/wait on Power9
 * @param timeout the timeout in seconds to wait for completion
 *
 * @return false on success
 */
static bool afu_memcpy(ocxl_afu_h afu, const char *src, char *dst, size_t size, int completion, int timeout)
{
	uint64_t wed;
	struct memcpy_weq weq;

	memcpy3_init_weq(&weq, QUEUE_SIZE);

	// Point the work element descriptor (wed) at the work queue
	wed = MEMCPY_WED(weq.queue, QUEUE_SIZE / CACHELINESIZE);

	// Setup a work element in the queue
	struct memcpy_work_element memcpy_we;
	memset(&memcpy_we, 0, sizeof(memcpy_we));
	memcpy_we.cmd = MEMCPY_WE_CMD(0, MEMCPY_WE_CMD_COPY);
	memcpy_we.length = htole16((uint16_t) size);
	memcpy_we.src = htole64((uintptr_t) src);
	memcpy_we.dst = htole64((uintptr_t) dst);

	LOG_INF("WED=0x%lx  src=%p  dst=%p size=%u\n", wed, src, dst, MEMCPY_SIZE);

	if (OCXL_OK != ocxl_afu_attach(afu, OCXL_ATTACH_FLAGS_NONE)) {
		goto err;
	}

	// Map the per-PASID MMIO space
	ocxl_mmio_h pp_mmio;
	if (OCXL_OK != ocxl_mmio_map(afu, OCXL_PER_PASID_MMIO, &pp_mmio)) {
		goto err;
	}

	// Allocate an IRQ to report errors
	ocxl_irq_h err_irq;
	if (OCXL_OK != ocxl_irq_alloc(afu, NULL, &err_irq)) {
		goto err;
	}

	// Let the AFU know the handle to trigger for errors
	uint64_t err_irq_handle = ocxl_irq_get_handle(afu, err_irq);

	if (OCXL_OK != ocxl_mmio_write64(pp_mmio, MEMCPY_AFU_PP_IRQ, OCXL_MMIO_LITTLE_ENDIAN, err_irq_handle)) {
		goto err;
	}

	// Write the address of the work element descriptor to the AFU
	if (OCXL_OK != ocxl_mmio_write64(pp_mmio, MEMCPY_AFU_PP_WED, OCXL_MMIO_LITTLE_ENDIAN, wed)) {
		goto err;
	}

	// setup the work queue
	struct memcpy_work_element *memcpy_element = memcpy3_add_we(&weq, &memcpy_we);
	struct memcpy_work_element *irq_element = NULL;
	struct memcpy_work_element *wake_element = NULL;
	struct memcpy_work_element *stop_element = NULL;

	ocxl_irq_h afu_irq;
	uint64_t afu_irq_handle = 0;
	if (completion == 1) {
		// Set up the interrupt work element

		// Allocate an IRQ to use for AFU notifications
		if (OCXL_OK != ocxl_irq_alloc(afu, NULL, &afu_irq)) {
			goto err;
		}

		// Insert the IRQ handle into a work element
		afu_irq_handle = ocxl_irq_get_handle(afu, afu_irq);
		struct memcpy_work_element irq_we;
		memset(&irq_we, 0, sizeof(irq_we));
		irq_we.cmd = MEMCPY_WE_CMD(1, MEMCPY_WE_CMD_IRQ);
		irq_we.src = htole64(afu_irq_handle);

		LOG_INF("irq EA = %lx\n", afu_irq_handle);

		irq_element = memcpy3_add_we(&weq, &irq_we);
#ifdef _ARCH_PPC64
	}  else if (completion == 2) {
		// Set up the wake_host_thread work element

		// Allocate an IRQ to use for AFU notifications if wake_host_thread fails
		if (OCXL_OK != ocxl_irq_alloc(afu, NULL, &afu_irq)) {
			goto err;
		}
		afu_irq_handle = ocxl_irq_get_handle(afu, afu_irq);

		uint16_t tid;
		if (OCXL_OK != ocxl_afu_get_p9_thread_id(afu, &tid)) {
			goto err;
		}

		struct memcpy_work_element wake_we;
		memset(&wake_we, 0, sizeof(wake_we));
		wake_we.cmd = MEMCPY_WE_CMD(1, MEMCPY_WE_CMD_WAKE_HOST_THREAD);
		wake_we.src = htole64(afu_irq_handle);
		wake_we.tid = htole16(tid);
		wake_we.cmd_extra = 0x01;

		LOG_INF("TID for wake_host_thread/wait = 0x%x\n", tid);

		wake_element = memcpy3_add_we(&weq, &wake_we);
#endif
	}

	struct memcpy_work_element stop_we;
	memset(&stop_we, 0, sizeof(stop_we));
	stop_we.cmd = MEMCPY_WE_CMD(1, MEMCPY_WE_CMD_STOP);

	stop_element = memcpy3_add_we(&weq, &stop_we);

	// memory barrier to ensure the descriptor is written to memory before we ask the AFU to use it
	__sync_synchronize();

	// Initiate the memcpy
	memcpy_element->cmd |= MEMCPY_WE_CMD_VALID;

	/*
	 * wait for the AFU to be done
	 *
	 * if we're using an interrupt, we can go to sleep.
	 * Otherwise, we poll the last work element status from memory
	 */
	int rc = (completion == 1) ? wait_for_irq(timeout, afu, pp_mmio, afu_irq_handle, err_irq_handle) :
#ifdef _ARCH_PPC64
	         (completion == 2) ? wait_for_wake_host_thread(timeout, afu, pp_mmio, wake_element, afu_irq_handle, err_irq_handle) :
#endif
	         wait_for_status(timeout, afu, memcpy_element, err_irq_handle);
	if (rc) {
		goto err_status;
	}

	if (memcpy_element->status != 1) {
		LOG_ERR("unexpected status 0x%x for copy\n", memcpy_element->status);
		goto err_status;
	}

	if (completion == 1 && irq_element->status != 1) {
		LOG_ERR("unexpected status 0x%x for IRQ\n", irq_element->status);
		goto err_status;
	}

	if (completion == 2 && wake_element->status != 1) {
		LOG_ERR("unexpected status 0x%x for wake\n", wake_element->status);
		goto err_status;
	}

	wait_for_status(timeout, afu, stop_element, err_irq_handle);

	if (stop_element->status != 1) {
		LOG_ERR("unexpected status 0x%x for stop\n", stop_element->status);
		goto err_status;
	}

	return 0;

err_status:
	display_afu_status(pp_mmio);
	goto err;

err:
	return true;
}

static void usage(char *name)
{
	fprintf(stderr, "Usage: %s [ options ]\n", name);
	fprintf(stderr, "Options:\n");
	fprintf(stderr,
	        "\t-i\t\tUse interrupts to indicate completion\n");
#ifdef _ARCH_PPC64
	fprintf(stderr,
	        "\t-w\t\tUse wake_host_thread/wait to indicate completion\n");
#endif
	fprintf(stderr,
	        "\t-t <timeout>\tSeconds to wait for the AFU to signal completion\n");
	fprintf(stderr,
	        "\t-v\t\tShow interactions with the AFU\n");
	exit(1);
}

struct memcpy_test_args {
	int completion; // 0 = Poll, 1 = IRQ, 2 = wake_host_thread/wait
	bool verbose;
	int completion_timeout;
};

int main(int argc, char *argv[])
{
	struct memcpy_test_args args;

	args.completion = 0;
	args.completion_timeout = -1;
	args.verbose = false;

	while (1) {
		int c = getopt(argc, argv, "+hs:it:v"
#ifdef _ARCH_PPC64
		               "w"
#endif
		              );
		if (c < 0)
			break;
		switch (c) {
		case '?':
		case 'h':
			usage(argv[0]);
			break;
		case 'i':
			args.completion = 1;
			break;
#ifdef _ARCH_PPC64
		case 'w':
			args.completion = 2;
			break;
#endif
		case 't':
			args.completion_timeout = atoi(optarg);
			break;
		case 'v':
			args.verbose = true;
			break;
		}
	}

	if (args.completion_timeout == -1) {
		args.completion_timeout = 10;
	}

	if (argv[optind]) {
		fprintf(stderr,
		        "Error: Unexpected argument '%s'\n", argv[optind]);
		usage(argv[0]);
	}

	// Enable messages for open calls
	if (args.verbose) {
		ocxl_enable_messages(OCXL_ERRORS | OCXL_TRACING);
	} else {
		ocxl_enable_messages(OCXL_ERRORS);
	}

	ocxl_afu_h afu;
	if (OCXL_OK != ocxl_afu_open(AFU_NAME, &afu)) {
		LOG_ERR("Could not open AFU '%s'\n", AFU_NAME);
		exit(1);
	}

	// Enable per-AFU messages
	if (args.verbose) {
		ocxl_afu_enable_messages(afu, OCXL_ERRORS | OCXL_TRACING);
	} else {
		ocxl_afu_enable_messages(afu, OCXL_ERRORS);
	}

	if (global_setup(afu)) {
		exit(1);
	}

	// Allocate memory areas for afu to copy to/from
	char *src = aligned_alloc(64, MEMCPY_SIZE);
	char *dst = aligned_alloc(64, MEMCPY_SIZE);

	fill_buffer(src, MEMCPY_SIZE);
	memset(dst, '\0', MEMCPY_SIZE);

	if (afu_memcpy(afu, src, dst, MEMCPY_SIZE, args.completion, args.completion_timeout)) {
		ocxl_afu_close(afu);
		LOG_ERR("memcpy failed\n");
		return 1;
	}

	if (memcmp(dst, src, MEMCPY_SIZE)) {
		LOG_ERR("Memory contents do not match\n");
	} else {
		LOG_INF("Memory contents match\n");
	}

	ocxl_afu_close(afu);

	return 0;
}