File: csr_raw.cu

package info (click to toggle)
python-escript 5.6-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 144,304 kB
  • sloc: python: 592,074; cpp: 136,909; ansic: 18,675; javascript: 9,411; xml: 3,384; sh: 738; makefile: 207
file content (132 lines) | stat: -rw-r--r-- 4,694 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#include <cusp/csr_matrix.h>
#include <cusp/print.h>
#include <cusp/multiply.h>

// This example shows how to wrap "raw" host and device memory
// with a csr_matrix_view.  This situation arises when interfacing
// Cusp with data that is managed externally.  Once raw data has
// been appropriately wrapped the csr_matrix_view can be used
// in Cusp algorithms like cusp::copy() and cusp::multiply()
// just like a csr_matrix container.
//
//  Example Matrix:
//   [10  0 20]
//   [ 0  0  0]
//   [ 0  0 30]
//   [40 50 60]

int main(void)
{
  // CSR format in raw host memory
  int   host_Ap[5] = {0,2,2,3,6};            // CSR row pointer
  int   host_Aj[6] = {0,2,2,0,1,2};          // CSR column indices
  float host_Ax[6] = {10,20,30,40,50,60};    // CSR values

  // x and y arrays in host memory
  float host_x[3] = {1,1,1};
  float host_y[4] = {0,0,0,0};

  // allocate device memory for CSR format
  int   * device_Ap;  cudaMalloc(&device_Ap, 5 * sizeof(int));
  int   * device_Aj;  cudaMalloc(&device_Aj, 6 * sizeof(int));
  float * device_Ax;  cudaMalloc(&device_Ax, 6 * sizeof(float));
  
  // allocate device memory for x and y arrays
  float * device_x;   cudaMalloc(&device_x, 3 * sizeof(float));
  float * device_y;   cudaMalloc(&device_y, 4 * sizeof(float));

  // copy raw data from host to device
  cudaMemcpy(device_Ap, host_Ap, 5 * sizeof(int),   cudaMemcpyHostToDevice);
  cudaMemcpy(device_Aj, host_Aj, 6 * sizeof(int),   cudaMemcpyHostToDevice);
  cudaMemcpy(device_Ax, host_Ax, 6 * sizeof(float), cudaMemcpyHostToDevice);
  cudaMemcpy(device_x,  host_x,  3 * sizeof(float), cudaMemcpyHostToDevice);
  cudaMemcpy(device_y,  host_y,  4 * sizeof(float), cudaMemcpyHostToDevice);

  // wrap the host memory with a csr_matrix_view
  {
    // use array1d_view to wrap the individual arrays
    typedef typename cusp::array1d_view<int   *> HostIndexArrayView;
    typedef typename cusp::array1d_view<float *> HostValueArrayView;

    HostIndexArrayView row_offsets   (host_Ap, host_Ap + 5);
    HostIndexArrayView column_indices(host_Aj, host_Aj + 6);
    HostValueArrayView values        (host_Ax, host_Ax + 6);
    
    HostValueArrayView x (host_x, host_x + 3);
    HostValueArrayView y (host_y, host_y + 4);

    // combine the three array1d_views into a csr_matrix_view
    typedef cusp::csr_matrix_view<HostIndexArrayView,
                                  HostIndexArrayView,
                                  HostValueArrayView> HostView;

    HostView A(4, 3, 6, row_offsets, column_indices, values);

    // print view
    std::cout << "\nhost csr_matrix_view" << std::endl;
    cusp::print(A);

    // compute y = A* x 
    cusp::multiply(A, x, y);
    
    // print x
    std::cout << "\nx array" << std::endl;
    cusp::print(x);

    // print y
    std::cout << "\n y array, y = A * x" << std::endl;
    cusp::print(y);
  }

  // wrap the device memory with a csr_matrix_view
  {
    // *NOTE* raw pointers must be wrapped with thrust::device_ptr!
    thrust::device_ptr<int>   wrapped_device_Ap(device_Ap);
    thrust::device_ptr<int>   wrapped_device_Aj(device_Aj);
    thrust::device_ptr<float> wrapped_device_Ax(device_Ax);
    thrust::device_ptr<float> wrapped_device_x(device_x);
    thrust::device_ptr<float> wrapped_device_y(device_y);

    // use array1d_view to wrap the individual arrays
    typedef typename cusp::array1d_view< thrust::device_ptr<int>   > DeviceIndexArrayView;
    typedef typename cusp::array1d_view< thrust::device_ptr<float> > DeviceValueArrayView;

    DeviceIndexArrayView row_offsets   (wrapped_device_Ap, wrapped_device_Ap + 5);
    DeviceIndexArrayView column_indices(wrapped_device_Aj, wrapped_device_Aj + 6);
    DeviceValueArrayView values        (wrapped_device_Ax, wrapped_device_Ax + 6);
    DeviceValueArrayView x (wrapped_device_x, wrapped_device_x + 3);
    DeviceValueArrayView y (wrapped_device_y, wrapped_device_y + 4);

    // combine the three array1d_views into a csr_matrix_view
    typedef cusp::csr_matrix_view<DeviceIndexArrayView,
                                  DeviceIndexArrayView,
                                  DeviceValueArrayView> DeviceView;

    DeviceView A(4, 3, 6, row_offsets, column_indices, values);

    // print view
    std::cout << "\ndevice csr_matrix_view" << std::endl;
    cusp::print(A);

    // compute y = A* x 
    cusp::multiply(A, x, y);
    
    // print x
    std::cout << "\nx array" << std::endl;
    cusp::print(x);

    // print y
    std::cout << "\n y array, y = A * x" << std::endl;
    cusp::print(y);
  }
  
  // free device arrays
  cudaFree(device_Ap);
  cudaFree(device_Aj);
  cudaFree(device_Ax);
  cudaFree(device_x);
  cudaFree(device_y);

  return 0;
}