File: ucl_matrix.h

package info (click to toggle)
lammps 20220106.git7586adbb6a%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 348,064 kB
  • sloc: cpp: 831,421; python: 24,896; xml: 14,949; f90: 10,845; ansic: 7,967; sh: 4,226; perl: 4,064; fortran: 2,424; makefile: 1,501; objc: 238; lisp: 163; csh: 16; awk: 14; tcl: 6
file content (239 lines) | stat: -rw-r--r-- 11,007 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
/***************************************************************************
                                 ucl_matrix.h
                             -------------------
                               W. Michael Brown

  Matrix Container on Host

 __________________________________________________________________________
    This file is part of the Geryon Unified Coprocessor Library (UCL)
 __________________________________________________________________________

    begin                : Thu May 10 2012
    copyright            : (C) 2012 by W. Michael Brown
    email                : brownw@ornl.gov
 ***************************************************************************/

/* -----------------------------------------------------------------------
   This software is distributed under the Simplified BSD License.
   ----------------------------------------------------------------------- */

// Only allow this file to be included by CUDA and OpenCL specific headers
#ifdef _UCL_MAT_ALLOW

/// Matrix S-Object
template <class hosttype, class devtype>
class UCL_Matrix {
 public:
  // Traits for copying data
  // MEM_TYPE is 0 for device, 1 for host, and 2 for image
  enum traits {
    DATA_TYPE = _UCL_DATA_ID<hosttype>::id,
    MEM_TYPE = 1,
    PADDED = 0,
    ROW_MAJOR = 1,
    VECTOR = 0
  };
  typedef hosttype data_type;

  /// Host Allocation
  UCL_H_Mat<hosttype> host;

  /// Device Allocation
  UCL_D_Mat<devtype> device;

  UCL_Matrix() { }
  ~UCL_Matrix() { }

  /// Construct with specied number of rows and columns
  /** \sa alloc() **/
  UCL_Matrix(const size_t rows, const size_t cols, UCL_Device &acc,
             const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
             const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); }

  /// Set up host matrix with specied # of rows/cols and reserve memory
  /** The kind1 parameter controls memory access from the host
    * - UCL_READ_WRITE - Specify that you will read and write from host
    * - UCL_WRITE_ONLY - Specify that you will only write from host
    * - UCL_READ_ONLY  - Specify that you will only read from host
    * - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
    * The kind2 parameter controls memory optimizations from the device:
    * - UCL_READ_WRITE - Specify that you will read and write in kernels
    * - UCL_WRITE_ONLY - Specify that you will only write in kernels
    * - UCL_READ_ONLY  - Specify that you will only read in kernels
    * \note When passing a command queue instead of a device, the device
    *       allocation is always performed. Even if the device shares memory
    *       with the host.
    * \param cq Default command queue for operations copied from another mat
    * \return UCL_SUCCESS if the memory allocation is successful **/
  template <class mat_type>
  inline int alloc(const size_t rows, const size_t cols, mat_type &cq,
                   const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
                   const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
    { return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        alloc(host,device,_buffer,rows,cols,cq,kind1,kind2); }

  /// Set up host matrix with specied # of rows/cols and reserve memory
  /** The kind1 parameter controls memory access from the host
    * - UCL_READ_WRITE - Specify that you will read and write from host
    * - UCL_WRITE_ONLY - Specify that you will only write from host
    * - UCL_READ_ONLY  - Specify that you will only read from host
    * - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
    * The kind2 parameter controls memory optimizations from the device:
    * - UCL_READ_WRITE - Specify that you will read and write in kernels
    * - UCL_WRITE_ONLY - Specify that you will only write in kernels
    * - UCL_READ_ONLY  - Specify that you will only read in kernels
    * \param device Used to get the default command queue for operations
    * \return UCL_SUCCESS if the memory allocation is successful **/
  inline int alloc(const size_t rows, const size_t cols, UCL_Device &acc,
                   const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
                   const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
    { return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); }

  /// Free memory and set size to 0
  inline void clear()
    { host.clear(); device.clear(); }

  /// Resize the allocation to contain cols elements
  inline int resize(const int rows, const int cols) {
    assert(host.kind()!=UCL_VIEW);
    int err=host.resize(rows,cols);
    if (err!=UCL_SUCCESS)
      return err;
    return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
      dev_resize(device,host,_buffer,rows,cols);
  }

  /// Resize (only if bigger) the allocation to contain cols elements
  inline int resize_ib(const int new_rows, const int new_cols)
    { if (new_rows>rows() || new_cols>cols()) return resize(new_rows,new_cols);
      else return UCL_SUCCESS; }

  /// Set each element to zero (asynchronously on device)
  inline void zero() { zero(cq()); }
  /// Set first n elements to zero (asynchronously on device)
  inline void zero(const int n) { zero(n,cq()); }
  /// Set each element to zero (asynchronously on device)
  inline void zero(command_queue &cq) {
    host.zero();
    if (device.kind()!=UCL_VIEW) device.zero(cq);
    else if (_buffer.numel()>0) _buffer.zero();
  }
  /// Set first n elements to zero (asynchronously on device)
  inline void zero(const int n, command_queue &cq) {
    host.zero(n);
    if (device.kind()!=UCL_VIEW) device.zero(n,cq);
    else if (_buffer.numel()>0) _buffer.zero();
  }

  /// Get the number of elements
  inline size_t numel() const { return host.numel(); }
  /// Get the number of rows
  inline size_t rows() const { return host.rows(); }
  /// Get the number of columns
  inline size_t cols() const { return host.cols(); }
  /// Get the memory usage (bytes) of the s-object (including any buffers)
  inline size_t host_mem_usage()
    { return host.row_bytes()*host.rows()+_buffer.row_bytes()*_buffer.rows(); }
  /// Get the memory usage (bytes) of the s-object (including any buffers)
  inline size_t device_mem_usage()
    { return device.row_bytes()*device.rows(); }

  /// Get element at index i
  inline hosttype & operator[](const int i) { return host[i]; }
  /// Get element at index i
  inline const hosttype & operator[](const int i) const { return host[i]; }
  /// 2D access (row should always be 0)
  inline hosttype & operator()(const int row, const int col)
    { return host(row,col); }
  /// 2D access (row should always be 0)
  inline const hosttype & operator()(const int row, const int col) const
    { return host(row,col); }

  /// Returns pointer to memory pointer for allocation on host
  inline hosttype ** host_ptr() { return host.host_ptr(); }

  /// Return the default command queue/stream associated with this data
  inline command_queue & cq() { return host.cq(); }
  /// Change the default command queue associated with this data
  inline void cq(command_queue &cq_in) { host.cq(cq_in); device.cq(cq_in); }
  /// Block until command_queue associated with matrix is complete
  inline void sync() { host.sync(); }

  ///Get the size of a row on the host (including any padding) in elements
  inline size_t row_size() const { return host.row_size(); }
  /// Get the size of a row on the host(including any padding) in bytes
  inline size_t row_bytes() const { return host.row_bytes(); }
  /// Get the size on the host in bytes of 1 element
  inline int element_size() const { return sizeof(hosttype); }


  /// Update the allocation on the host asynchronously
  inline void update_host()
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(host,device,_buffer,true); }
  /// Update the allocation on the host (true for asynchronous copy)
  inline void update_host(const bool async)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(host,device,_buffer,async); }
  /// Update the allocation on the host (using command queue)
  inline void update_host(command_queue &cq)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(host,device,_buffer,cq); }
  /// Update the first n elements on the host (true for asynchronous copy)
  inline void update_host(const int n, const bool async)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(host,device,n,_buffer,async); }
  /// Update the first n elements on the host (using command queue)
  inline void update_host(const int n, command_queue &cq)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(host,device,n,_buffer,cq); }
  /// Update slice on the host (true for asynchronous copy)
  inline void update_host(const int rows, const int cols, const bool async)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(host,device,rows,cols,_buffer,async); }
  /// Update slice on the host (using command queue)
  inline void update_host(const int rows, const int cols, command_queue &cq)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(host,device,rows,cols,_buffer,cq); }


  /// Update the allocation on the device asynchronously
  inline void update_device()
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(device,host,_buffer,true); }
  /// Update the allocation on the device (true for asynchronous copy)
  inline void update_device(const bool async)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(device,host,_buffer,async); }
  /// Update the allocation on the device (using command queue)
  inline void update_device(command_queue &cq)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(device,host,_buffer,cq); }
  /// Update the first n elements on the device (true for asynchronous copy)
  inline void update_device(const int n, const bool async)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(device,host,n,_buffer,async); }
  /// Update the first n elements on the device (using command queue)
  inline void update_device(const int n, command_queue &cq)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(device,host,n,_buffer,cq); }
  /// Update slice on the device (true for asynchronous copy)
  inline void update_device(const int rows, const int cols, const bool async)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(device,host,rows,cols,_buffer,async); }
  /// Update slice on the device (using command queue)
  inline void update_device(const int rows, const int cols, command_queue &cq)
    { _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
        copy(device,host,rows,cols,_buffer,cq); }


 private:
  UCL_H_Mat<devtype> _buffer;
};

#endif