File: test39.21.prog.cu

package info (click to toggle)

slurm-wlm 22.05.8-4%2Bdeb12u3

links: PTS, VCS
area: main
in suites: bookworm
size: 48,492 kB
sloc: ansic: 475,246; exp: 69,020; sh: 8,862; javascript: 6,528; python: 6,444; makefile: 4,185; perl: 4,069; pascal: 131

file content (47 lines) | stat: -rw-r--r-- 1,019 bytes

parent folder | download | duplicates (3)

// Slurm regression test39.21.prog.cu
#include <iostream>
#include <math.h>
// Kernel function to add the elements of two arrays
__global__
void add(int n, float *x, float *y)
{
	int index = threadIdx.x;
	int stride = blockDim.x;
	for (int i = index; i < n; i += stride)
		y[i] = x[i] + y[i];
}

int main(void)
{
	int N = 1024;
	int i;
	float *x, *y;
	float maxError = 0.0f;

	// Allocate Unified Memory – accessible from CPU or GPU
	cudaMallocManaged(&x, N * sizeof(float));
	cudaMallocManaged(&y, N * sizeof(float));

	// initialize x and y arrays on the host
	for (i = 0; i < N; i++) {
		x[i] = 1.0f;
		y[i] = 2.0f;
	}

	// Run kernel on 256 elements at a time on the GPU
	add<<<1, 256>>>(N, x, y);

	// Wait for GPU to finish before accessing on host
	cudaDeviceSynchronize();

	// Check for errors (all values should be 3.0f)
	for (i = 0; i < N; i++)
		maxError = fmax(maxError, fabs(y[i] - 3.0f));
	std::cout << "Max error: " << maxError << std::endl;

	// Free memory
	cudaFree(x);
	cudaFree(y);

	return 0;
}