File: test_cudafft.c

package info (click to toggle)
bart-cuda 0.8.00-2
  • links: PTS, VCS
  • area: contrib
  • in suites: bookworm, sid
  • size: 7,752 kB
  • sloc: ansic: 100,267; python: 717; makefile: 576; sh: 564; cpp: 104
file content (102 lines) | stat: -rw-r--r-- 2,350 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/* Copyright 2019. Uecker Lab, University Medical Center Göttingen.
 * All rights reserved. Use of this source code is governed by
 * a BSD-style license which can be found in the LICENSE file.
 *
 * Authors:
 * 2019 Christian Holme <christian.holme@med.uni-goettingen.de>
 */

#include <complex.h>
#include <assert.h>

#include "num/fft.h"
#include "num/rand.h"

#include "num/multind.h"
#include "num/flpmath.h"
#include "num/init.h"

#include "misc/debug.h"
#include "misc/misc.h"

#include "utest.h"




static bool run_cuda_fft_test(const unsigned int D, const long* dims, const unsigned long flags,
			       const complex float* in, complex float* cpu_inout,
			       complex float* gpu_inout, complex float* gpu_result)
{
	md_copy(D, dims, cpu_inout, in, CFL_SIZE);
	md_copy(D, dims, gpu_inout, in, CFL_SIZE);

	const struct operator_s* fftplan = fft_create(D, dims, flags, cpu_inout, cpu_inout, false);

	fft_exec(fftplan, cpu_inout, cpu_inout);
	fft_exec(fftplan, gpu_inout, gpu_inout);

	fft_free(fftplan);

	md_copy(D, dims, gpu_result, gpu_inout, CFL_SIZE);

	UT_ASSERT(md_znrmse(D, dims, cpu_inout, gpu_result) < UT_TOL);
}




static bool test_cuda_fft(void)
{
#ifndef USE_CUDA
	return true;
#else
	// TODO: detect if GPU works

	num_rand_init(5);
	num_init_gpu();

	enum { test_cuda_fft_dims = 7 };

	const long dims[test_cuda_fft_dims] = { 4, 4, 4, 4, 4, 4, 1 }; // in last dim != 1 works...

	const bool transform_dims[][test_cuda_fft_dims] = {
		{ 1, 1, 1, 0, 0, 0, 0 },
		{ 1, 1, 0, 0, 1, 0, 0 },
		{ 1, 0, 1, 0, 1, 0, 0 },
		{ 1, 1, 0, 1, 1, 1, 0 },
		{ 1, 1, 0, 1, 1, 0, 1 },
		{ 0, 0, 0, 0, 0, 0, 0 },
	};

	const unsigned int D = test_cuda_fft_dims;

	complex float* in = md_alloc(D, dims, CFL_SIZE);
	md_gaussian_rand(D, dims, in);

	complex float* cpu_inout = md_alloc(D, dims, CFL_SIZE);
	complex float* gpu_inout = md_alloc_gpu(D, dims, CFL_SIZE);
	complex float* gpu_result = md_alloc(D, dims, CFL_SIZE);


	for (unsigned int i = 0; i < ARRAY_SIZE(transform_dims); ++i) {

		unsigned long flags = 0;

		for (unsigned int j = 0; j < D; ++j)
			if (transform_dims[i][j])
				flags = MD_SET(flags, j);

		run_cuda_fft_test(D, dims, flags, in, cpu_inout, gpu_inout, gpu_result);
	}

	md_free(gpu_result);
	md_free(gpu_inout);
	md_free(cpu_inout);
	md_free(in);

	return true;
#endif
}

UT_GPU_REGISTER_TEST(test_cuda_fft);