1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
|
/***************************************************************************
ucl_matrix.h
-------------------
W. Michael Brown
Matrix Container on Host
__________________________________________________________________________
This file is part of the Geryon Unified Coprocessor Library (UCL)
__________________________________________________________________________
begin : Thu May 10 2012
copyright : (C) 2012 by W. Michael Brown
email : brownw@ornl.gov
***************************************************************************/
/* -----------------------------------------------------------------------
This software is distributed under the Simplified BSD License.
----------------------------------------------------------------------- */
// Only allow this file to be included by CUDA and OpenCL specific headers
#ifdef _UCL_MAT_ALLOW
/// Matrix S-Object
template <class hosttype, class devtype>
class UCL_Matrix {
public:
// Traits for copying data
// MEM_TYPE is 0 for device, 1 for host, and 2 for image
enum traits {
DATA_TYPE = _UCL_DATA_ID<hosttype>::id,
MEM_TYPE = 1,
PADDED = 0,
ROW_MAJOR = 1,
VECTOR = 0
};
typedef hosttype data_type;
/// Host Allocation
UCL_H_Mat<hosttype> host;
/// Device Allocation
UCL_D_Mat<devtype> device;
UCL_Matrix() { }
~UCL_Matrix() { }
/// Construct with specied number of rows and columns
/** \sa alloc() **/
UCL_Matrix(const size_t rows, const size_t cols, UCL_Device &acc,
const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); }
/// Set up host matrix with specied # of rows/cols and reserve memory
/** The kind1 parameter controls memory access from the host
* - UCL_READ_WRITE - Specify that you will read and write from host
* - UCL_WRITE_ONLY - Specify that you will only write from host
* - UCL_READ_ONLY - Specify that you will only read from host
* - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
* The kind2 parameter controls memory optimizations from the device:
* - UCL_READ_WRITE - Specify that you will read and write in kernels
* - UCL_WRITE_ONLY - Specify that you will only write in kernels
* - UCL_READ_ONLY - Specify that you will only read in kernels
* \note When passing a command queue instead of a device, the device
* allocation is always performed. Even if the device shares memory
* with the host.
* \param cq Default command queue for operations copied from another mat
* \return UCL_SUCCESS if the memory allocation is successful **/
template <class mat_type>
inline int alloc(const size_t rows, const size_t cols, mat_type &cq,
const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
{ return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
alloc(host,device,_buffer,rows,cols,cq,kind1,kind2); }
/// Set up host matrix with specied # of rows/cols and reserve memory
/** The kind1 parameter controls memory access from the host
* - UCL_READ_WRITE - Specify that you will read and write from host
* - UCL_WRITE_ONLY - Specify that you will only write from host
* - UCL_READ_ONLY - Specify that you will only read from host
* - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
* The kind2 parameter controls memory optimizations from the device:
* - UCL_READ_WRITE - Specify that you will read and write in kernels
* - UCL_WRITE_ONLY - Specify that you will only write in kernels
* - UCL_READ_ONLY - Specify that you will only read in kernels
* \param device Used to get the default command queue for operations
* \return UCL_SUCCESS if the memory allocation is successful **/
inline int alloc(const size_t rows, const size_t cols, UCL_Device &acc,
const enum UCL_MEMOPT kind1=UCL_READ_WRITE,
const enum UCL_MEMOPT kind2=UCL_READ_WRITE)
{ return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
alloc(host,device,_buffer,rows,cols,acc,kind1,kind2); }
/// Free memory and set size to 0
inline void clear()
{ host.clear(); device.clear(); }
/// Resize the allocation to contain cols elements
inline int resize(const int rows, const int cols) {
assert(host.kind()!=UCL_VIEW);
int err=host.resize(rows,cols);
if (err!=UCL_SUCCESS)
return err;
return _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
dev_resize(device,host,_buffer,rows,cols);
}
/// Resize (only if bigger) the allocation to contain cols elements
inline int resize_ib(const int new_rows, const int new_cols)
{ if (new_rows>rows() || new_cols>cols()) return resize(new_rows,new_cols);
else return UCL_SUCCESS; }
/// Set each element to zero (asynchronously on device)
inline void zero() { zero(cq()); }
/// Set first n elements to zero (asynchronously on device)
inline void zero(const int n) { zero(n,cq()); }
/// Set each element to zero (asynchronously on device)
inline void zero(command_queue &cq) {
host.zero();
if (device.kind()!=UCL_VIEW) device.zero(cq);
else if (_buffer.numel()>0) _buffer.zero();
}
/// Set first n elements to zero (asynchronously on device)
inline void zero(const int n, command_queue &cq) {
host.zero(n);
if (device.kind()!=UCL_VIEW) device.zero(n,cq);
else if (_buffer.numel()>0) _buffer.zero();
}
/// Get the number of elements
inline size_t numel() const { return host.numel(); }
/// Get the number of rows
inline size_t rows() const { return host.rows(); }
/// Get the number of columns
inline size_t cols() const { return host.cols(); }
/// Get the memory usage (bytes) of the s-object (including any buffers)
inline size_t host_mem_usage()
{ return host.row_bytes()*host.rows()+_buffer.row_bytes()*_buffer.rows(); }
/// Get the memory usage (bytes) of the s-object (including any buffers)
inline size_t device_mem_usage()
{ return device.row_bytes()*device.rows(); }
/// Get element at index i
inline hosttype & operator[](const int i) { return host[i]; }
/// Get element at index i
inline const hosttype & operator[](const int i) const { return host[i]; }
/// 2D access (row should always be 0)
inline hosttype & operator()(const int row, const int col)
{ return host(row,col); }
/// 2D access (row should always be 0)
inline const hosttype & operator()(const int row, const int col) const
{ return host(row,col); }
/// Returns pointer to memory pointer for allocation on host
inline hosttype ** host_ptr() { return host.host_ptr(); }
/// Return the default command queue/stream associated with this data
inline command_queue & cq() { return host.cq(); }
/// Change the default command queue associated with this data
inline void cq(command_queue &cq_in) { host.cq(cq_in); device.cq(cq_in); }
/// Block until command_queue associated with matrix is complete
inline void sync() { host.sync(); }
///Get the size of a row on the host (including any padding) in elements
inline size_t row_size() const { return host.row_size(); }
/// Get the size of a row on the host(including any padding) in bytes
inline size_t row_bytes() const { return host.row_bytes(); }
/// Get the size on the host in bytes of 1 element
inline int element_size() const { return sizeof(hosttype); }
/// Update the allocation on the host asynchronously
inline void update_host()
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(host,device,_buffer,true); }
/// Update the allocation on the host (true for asynchronous copy)
inline void update_host(const bool async)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(host,device,_buffer,async); }
/// Update the allocation on the host (using command queue)
inline void update_host(command_queue &cq)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(host,device,_buffer,cq); }
/// Update the first n elements on the host (true for asynchronous copy)
inline void update_host(const int n, const bool async)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(host,device,n,_buffer,async); }
/// Update the first n elements on the host (using command queue)
inline void update_host(const int n, command_queue &cq)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(host,device,n,_buffer,cq); }
/// Update slice on the host (true for asynchronous copy)
inline void update_host(const int rows, const int cols, const bool async)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(host,device,rows,cols,_buffer,async); }
/// Update slice on the host (using command queue)
inline void update_host(const int rows, const int cols, command_queue &cq)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(host,device,rows,cols,_buffer,cq); }
/// Update the allocation on the device asynchronously
inline void update_device()
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(device,host,_buffer,true); }
/// Update the allocation on the device (true for asynchronous copy)
inline void update_device(const bool async)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(device,host,_buffer,async); }
/// Update the allocation on the device (using command queue)
inline void update_device(command_queue &cq)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(device,host,_buffer,cq); }
/// Update the first n elements on the device (true for asynchronous copy)
inline void update_device(const int n, const bool async)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(device,host,n,_buffer,async); }
/// Update the first n elements on the device (using command queue)
inline void update_device(const int n, command_queue &cq)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(device,host,n,_buffer,cq); }
/// Update slice on the device (true for asynchronous copy)
inline void update_device(const int rows, const int cols, const bool async)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(device,host,rows,cols,_buffer,async); }
/// Update slice on the device (using command queue)
inline void update_device(const int rows, const int cols, command_queue &cq)
{ _ucl_s_obj_help< ucl_same_type<hosttype,devtype>::ans >::
copy(device,host,rows,cols,_buffer,cq); }
private:
UCL_H_Mat<devtype> _buffer;
};
#endif
|