1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
/* ************************************************************************
* Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop-
* ies of the Software, and to permit persons to whom the Software is furnished
* to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM-
* PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE-
* CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*
* ************************************************************************ */
#include <algorithm> // for std::min
#include <hip/hip_runtime_api.h> // for hip functions
#include <hipsolver/hipsolver.h> // for all the hipsolver C interfaces and type declarations
#include <stdio.h> // for size_t, printf
#include <vector>
// Example: Compute the LU Factorization of a matrix on the GPU
void get_example_matrix(std::vector<double>& hA, int& M, int& N, int& lda)
{
// a *very* small example input; not a very efficient use of the API
const double A[3][3] = {{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}};
M = 3;
N = 3;
lda = 3;
// note: matrices must be stored in column major format,
// i.e. entry (i,j) should be accessed by hA[i + j*lda]
hA.resize(size_t(lda) * N);
for(size_t i = 0; i < M; ++i)
{
for(size_t j = 0; j < N; ++j)
{
// copy A (2D array) into hA (1D array, column-major)
hA[i + j * lda] = A[i][j];
}
}
}
// We use hipsolverDgetrf to factor a real M-by-N matrix, A.
int main()
{
int M; // rows
int N; // cols
int lda; // leading dimension
std::vector<double> hA; // input matrix on CPU
get_example_matrix(hA, M, N, lda);
// let's print the input matrix, just to see it
printf("A = [\n");
for(size_t i = 0; i < M; ++i)
{
printf(" ");
for(size_t j = 0; j < N; ++j)
{
printf("% .3f ", hA[i + j * lda]);
}
printf(";\n");
}
printf("]\n");
// initialization
hipsolverHandle_t handle;
hipsolverCreate(&handle);
// calculate the sizes of our arrays
size_t size_piv = size_t(std::min(M, N)); // count of pivot indices
size_t size_A = size_t(lda) * N; // count of elements in matrix A
// allocate memory on GPU
int* dInfo;
int* dIpiv;
double* dA;
hipMalloc(&dInfo, sizeof(int));
hipMalloc(&dIpiv, sizeof(int) * size_piv);
hipMalloc(&dA, sizeof(double) * size_A);
// copy data to GPU
hipMemcpy(dA, hA.data(), sizeof(double) * size_A, hipMemcpyHostToDevice);
// create the workspace
double* dWork;
int size_work; // size of workspace to pass to getrf
hipsolverDgetrf_bufferSize(handle, M, N, dA, lda, &size_work);
hipMalloc(&dWork, size_work);
// compute the LU factorization on the GPU
hipsolverStatus_t status
= hipsolverDgetrf(handle, M, N, dA, lda, dWork, size_work, dIpiv, dInfo);
if(status != HIPSOLVER_STATUS_SUCCESS)
return status;
// copy the results back to CPU
std::vector<int> hInfo(1); // provides information about algorithm completion
std::vector<int> hIpiv(size_piv); // array for pivot indices on CPU
hipMemcpy(hInfo.data(), dInfo, sizeof(int), hipMemcpyDeviceToHost);
hipMemcpy(hIpiv.data(), dIpiv, sizeof(int) * size_piv, hipMemcpyDeviceToHost);
hipMemcpy(hA.data(), dA, sizeof(double) * size_A, hipMemcpyDeviceToHost);
// the results are now in hA and hIpiv
// we can print some of the results if we want to see them
printf("U = [\n");
for(size_t i = 0; i < M; ++i)
{
printf(" ");
for(size_t j = 0; j < N; ++j)
{
printf("% .3f ", (i <= j) ? hA[i + j * lda] : 0);
}
printf(";\n");
}
printf("]\n");
// clean up
hipFree(dWork);
hipFree(dInfo);
hipFree(dIpiv);
hipFree(dA);
hipsolverDestroy(handle);
}
|