File: test40.8.prog.cu

package info (click to toggle)
slurm-wlm 22.05.8-4%2Bdeb12u3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 48,492 kB
  • sloc: ansic: 475,246; exp: 69,020; sh: 8,862; javascript: 6,528; python: 6,444; makefile: 4,185; perl: 4,069; pascal: 131
file content (65 lines) | stat: -rw-r--r-- 1,568 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// Slurm regression test40.8.prog.cu
#include <iostream>
#include <math.h>
#include <sys/time.h>
// Kernel function to add the elements of two arrays
__global__
void add(int n, float *x, float *y)
{
	int index = threadIdx.x;
	int stride = blockDim.x;
	for (int i = index; i < n; i += stride)
		y[i] = x[i] + y[i];
}

int main(void)
{
	int N = 1024 * 1024 * 16;
	int i;
	float *x, *y;
	float maxError = 0.0f;
	struct timeval tv1, tv2;
	int delta_t;

	// Get start time
	gettimeofday(&tv1, NULL);

	// Allocate Unified Memory – accessible from CPU or GPU
	if (cudaMallocManaged(&x, N * sizeof(float)) != cudaSuccess) {
	        std::cerr << "Couldn't allocate memory for x: " << errno << std::endl;
		return 1;
	}
	if (cudaMallocManaged(&y, N * sizeof(float)) != cudaSuccess) {
	        std::cerr << "Couldn't allocate memory for y: " << errno << std::endl;
		return 1;
	}

	// initialize x and y arrays on the host
	for (i = 0; i < N; i++) {
		x[i] = 1.0f;
		y[i] = 2.0f;
	}

	// Run kernel on 256 elements at a time on the GPU
	add<<<1, 256>>>(N, x, y);

	// Wait for GPU to finish before accessing on host
	cudaDeviceSynchronize();

	// Check for errors (all values should be 3.0f)
	for (i = 0; i < N; i++)
		maxError = fmax(maxError, fabs(y[i] - 3.0f));
	std::cout << "Max error: " << maxError << std::endl;

	// Free memory
	cudaFree(x);
	cudaFree(y);

	// Get start time
	gettimeofday(&tv2, NULL);
	delta_t  = (tv2.tv_sec  - tv1.tv_sec) * 1000000;
	delta_t += (tv2.tv_usec - tv1.tv_usec);
	std::cout << "Run Time (usec): " << delta_t << std::endl;

	return 0;
}