File: ucl_h_vec.h

package info (click to toggle)
lammps 20220106.git7586adbb6a%2Bds1-2
links: PTS, VCS
area: main
in suites: bookworm
size: 348,064 kB
sloc: cpp: 831,421; python: 24,896; xml: 14,949; f90: 10,845; ansic: 7,967; sh: 4,226; perl: 4,064; fortran: 2,424; makefile: 1,501; objc: 238; lisp: 163; csh: 16; awk: 14; tcl: 6
file content (415 lines) | stat: -rw-r--r-- 16,623 bytes
/***************************************************************************
                                 ucl_h_vec.h
                             -------------------
                               W. Michael Brown

  Vector Container on Host

 __________________________________________________________________________
    This file is part of the Geryon Unified Coprocessor Library (UCL)
 __________________________________________________________________________

    begin                : Thu Jun 25 2009
    copyright            : (C) 2009 by W. Michael Brown
    email                : brownw@ornl.gov
 ***************************************************************************/

/* -----------------------------------------------------------------------
   Copyright (2009) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the Simplified BSD License.
   ----------------------------------------------------------------------- */

// Only allow this file to be included by CUDA and OpenCL specific headers
#ifdef _UCL_MAT_ALLOW

/// Row Vector on Host with options for pinning (page locked)
template <class numtyp>
class UCL_H_Vec : public UCL_BaseMat {
 public:
   // Traits for copying data
   // MEM_TYPE is 0 for device, 1 for host, and 2 for image
   enum traits {
     DATA_TYPE = _UCL_DATA_ID<numtyp>::id,
     MEM_TYPE = 1,
     PADDED = 0,
     ROW_MAJOR = 1,
     VECTOR = 1
   };
   typedef numtyp data_type;

 UCL_H_Vec() : _row_bytes(0), _cols(0){
    #ifdef _OCL_MAT
    _carray=(cl_mem)(0);
    #endif
  }
  ~UCL_H_Vec() { _host_free(*this); }

  /// Construct with n columns
  /** \sa alloc() **/
  UCL_H_Vec(const size_t n, UCL_Device &device,
            const enum UCL_MEMOPT kind=UCL_READ_WRITE)
    { _cols=0; _kind=UCL_VIEW; alloc(n,device,kind); }

  /// Set up host vector with 'cols' columns and reserve memory
  /** The kind parameter controls memory pinning as follows:
    * - UCL_READ_WRITE - Specify that you will read and write from host
    * - UCL_WRITE_ONLY - Specify that you will only write from host
    * - UCL_READ_ONLY  - Specify that you will only read from host
    * - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
    * \param cq Default command queue for operations copied from another mat
    * \return UCL_SUCCESS if the memory allocation is successful **/
  template <class mat_type>
  inline int alloc(const size_t cols, mat_type &cq,
                   const enum UCL_MEMOPT kind=UCL_READ_WRITE,
                   const enum UCL_MEMOPT kind2=UCL_NOT_SPECIFIED) {
    clear();

    _row_bytes=cols*sizeof(numtyp);
    int err=_host_alloc(*this,cq,_row_bytes,kind,kind2);

    if (err!=UCL_SUCCESS) {
      #ifndef UCL_NO_EXIT
      std::cerr << "UCL Error: Could not allocate " << _row_bytes
                << " bytes on host.\n";
      _row_bytes=0;
      UCL_GERYON_EXIT;
      #endif
      _row_bytes=0;
      return err;
    }

    _cols=cols;
    _kind=kind;
    _end=_array+cols;
    return err;
  }

  /// Set up host vector with 'cols' columns and reserve memory
  /** The kind parameter controls memory pinning as follows:
    * - UCL_READ_WRITE - Specify that you will read and write from host
    * - UCL_WRITE_ONLY - Specify that you will only write from host
    * - UCL_READ_ONLY  - Specify that you will only read from host
    * - UCL_NOT_PINNED - Memory is not pinned/page-locked on host
    * \param device Used to get the default command queue for operations
    * \return UCL_SUCCESS if the memory allocation is successful **/
  inline int alloc(const size_t cols, UCL_Device &device,
                   const enum UCL_MEMOPT kind=UCL_READ_WRITE,
                   const enum UCL_MEMOPT kind2=UCL_NOT_SPECIFIED) {
    clear();

    _row_bytes=cols*sizeof(numtyp);
    int err=_host_alloc(*this,device,_row_bytes,kind,kind2);

    if (err!=UCL_SUCCESS) {
      #ifndef UCL_NO_EXIT
      std::cerr << "UCL Error: Could not allocate " << _row_bytes
                << " bytes on host.\n";
      _row_bytes=0;
      UCL_GERYON_EXIT;
      #endif
      _row_bytes=0;
      return err;
    }

    _cols=cols;
    _kind=kind;
    _end=_array+cols;
    return err;
  }

  /// Do not allocate memory, instead use an existing allocation from Geryon
  /** This function must be passed a Geryon vector or matrix container.
    * No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device container on the host is not supported **/
  template <class ucl_type>
  inline void view(ucl_type &input, const size_t rows, const size_t cols) {
    #ifdef UCL_DEBUG
    assert(rows==1);
    #endif
    clear();
    _kind=UCL_VIEW;
    _cols=cols;
    _row_bytes=_cols*sizeof(numtyp);
    this->_cq=input.cq();
    _array=(numtyp *)input.begin();
    _end=_array+_cols;
    #ifdef _OCL_MAT
    _carray=input.cbegin();
    CL_SAFE_CALL(clRetainMemObject(input.cbegin()));
    CL_SAFE_CALL(clRetainCommandQueue(input.cq()));
    #endif
  }

  /// Do not allocate memory, instead use an existing allocation from Geryon
  /** This function must be passed a Geryon vector or matrix container.
    * No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device container on the host is not supported
    * \param stride Number of _elements_ between the start of each row **/
  template <class ucl_type>
  inline void view(ucl_type &input, const size_t rows, const size_t cols,
                   const size_t stride) { view(input,rows,cols); }

  /// Do not allocate memory, instead use an existing allocation from Geryon
  /** This function must be passed a Geryon vector or matrix container.
    * No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - If a matrix is used a input, all elements (including padding)
    *   will be used for view
    * - Viewing a device container on the host is not supported **/
  template <class ucl_type>
  inline void view(ucl_type &input, const size_t cols)
    { view(input,1,cols); }

  /// Do not allocate memory, instead use an existing allocation from Geryon
  /** This function must be passed a Geryon vector or matrix container.
    * No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container
    * - If a matrix is used a input, all elements (including padding)
    *   will be used for view
    * - Viewing a device container on the host is not supported **/
  template <class ucl_type>
  inline void view(ucl_type &input)
    { view(input,input.rows()*input.row_size()); }

  /// Do not allocate memory, instead use an existing allocation
  /** - No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device pointer on the host is not supported **/
  template <class ptr_type>
  inline void view(ptr_type *input, const size_t rows, const size_t cols,
                   UCL_Device &dev) {
    #ifdef UCL_DEBUG
    assert(rows==1);
    #endif
    clear();
    _kind=UCL_VIEW;
    _cols=cols;
    _row_bytes=_cols*sizeof(numtyp);
    this->_cq=dev.cq();
    _array=input;
    _end=_array+_cols;

    #ifdef _OCL_MAT
    _host_view(*this,dev,_row_bytes);
    #endif
  }

  /// Do not allocate memory, instead use an existing allocation
  /** - No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device pointer on the host is not supported
    * \param stride Number of _elements_ between the start of each row **/
  template <class ptr_type>
  inline void view(ptr_type *input, const size_t rows, const size_t cols,
                   const size_t stride, UCL_Device &dev)
    { view(input,rows,cols,stride); }

  /// Do not allocate memory, instead use an existing allocation
  /** - No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device pointer on the host is not supported **/
  template <class ptr_type>
  inline void view(ptr_type *input, const size_t cols, UCL_Device &dev)
    { view(input,1,cols,dev); }

  /// Do not allocate memory, instead use an existing allocation from Geryon
  /** This function must be passed a Geryon vector or matrix container.
    * No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device container on the host is not supported **/
  template <class ucl_type>
  inline void view_offset(const size_t offset,ucl_type &input,const size_t rows,
                          const size_t cols) {
    #ifdef UCL_DEBUG
    assert(rows==1);
    #endif
    clear();
    _kind=UCL_VIEW;
    _cols=cols;
    _row_bytes=_cols*sizeof(numtyp);
    this->_cq=input.cq();
    _array=(numtyp *)input.begin()+offset;
    _end=_array+_cols;
    #ifdef _OCL_MAT
    _host_view(*this,input,offset*sizeof(numtyp),_row_bytes);
    #endif
  }

  /// Do not allocate memory, instead use an existing allocation from Geryon
  /** This function must be passed a Geryon vector or matrix container.
    * No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device container on the host is not supported
    * \param stride Number of _elements_ between the start of each row **/
  template <class ucl_type>
  inline void view_offset(const size_t offset,ucl_type &input,const size_t rows,
                          const size_t cols, const size_t stride)
    { view_offset(offset,input,rows,cols); }

  /// Do not allocate memory, instead use an existing allocation from Geryon
  /** This function must be passed a Geryon vector or matrix container.
    * No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - If a matrix is used a input, all elements (including padding)
    *   will be used for view
    * - Viewing a device container on the host is not supported **/
  template <class ucl_type>
  inline void view_offset(const size_t offset,ucl_type &input,const size_t cols)
    { view_offset(offset,input,1,cols); }

  /// Do not allocate memory, instead use an existing allocation from Geryon
  /** This function must be passed a Geryon vector or matrix container.
    * No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - If a matrix is used a input, all elements (including padding)
    *   will be used for view
    * - Viewing a device container on the host is not supported **/
  template <class ucl_type>
  inline void view_offset(const size_t offset, ucl_type &input)
    { view_offset(offset,input,input.rows()*input.row_size()-offset); }

  /// Do not allocate memory, instead use an existing allocation
  /** - No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device pointer on the host is not supported **/
  template <class ptr_type>
  inline void view_offset(const size_t offset,ptr_type *input,const size_t rows,
                          const size_t cols, UCL_Device &dev)
    { view(input+offset,rows,cols,dev); }

  /// Do not allocate memory, instead use an existing allocation
  /** - No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device pointer on the host is not supported
    * \param stride Number of _elements_ between the start of each row **/
  template <class ptr_type>
  inline void view_offset(const size_t offset,ptr_type *input,const size_t rows,
                          const size_t cols,const size_t stride,UCL_Device &dev)
    { view(input+offset,rows,cols,stride,dev); }

  /// Do not allocate memory, instead use an existing allocation
  /** - No memory is freed when the object is destructed.
    * - The view does not prevent the memory from being freed by the
    *   allocating container when using CUDA APIs
    * - Viewing a device pointer on the host is not supported **/
  template <class ptr_type>
  inline void view_offset(const size_t offset, ptr_type *input,
                          const size_t cols, UCL_Device &dev)
    { view(input+offset,1,cols,dev); }

  /// Free memory and set size to 0
  inline void clear()
    { _host_free(*this); _kind=UCL_VIEW; _cols=0; }

  /// Resize the allocation to contain cols elements
  /** \note Cannot be used on views **/
  inline int resize(const int cols) {
    assert(_kind!=UCL_VIEW);
    _row_bytes=cols*sizeof(numtyp);
    int err=_host_resize(*this,_row_bytes);

    if (err!=UCL_SUCCESS) {
      #ifndef UCL_NO_EXIT
      std::cerr << "UCL Error: Could not allocate " << _row_bytes
                << " bytes on host.\n";
      _row_bytes=0;
      UCL_GERYON_EXIT;
      #endif
      _row_bytes=0;
      return err;
    }

    _cols=cols;
    _end=_array+cols;
    return err;
  }

  /// Resize (only if bigger) the allocation to contain cols elements
  /** \note Cannot be used on views **/
  inline int resize_ib(const int cols)
    { if (cols>_cols) return resize(cols); else return UCL_SUCCESS; }

  /// Set each element to zero
  inline void zero() { _host_zero(_array,row_bytes()); }

  /// Set first n elements to zero
  inline void zero(const int n) { _host_zero(_array,n*sizeof(numtyp)); }

  /// Get host pointer to first element
  inline numtyp * begin() { return _array; }
  /// Get host pointer to first element
  inline const numtyp * begin() const { return _array; }
  /// Get host pointer to one past last element
  inline numtyp * end() { return _end; }
  /// Get host pointer to one past last element
  inline const numtyp * end() const { return _end; }

  /// Get the number of elements
  inline size_t numel() const { return _cols; }
  /// Get the number of rows
  inline size_t rows() const { return 1; }
  /// Get the number of columns
  inline size_t cols() const { return _cols; }
  ///Get the size of a row (including any padding) in elements
  inline size_t row_size() const { return _cols; }
  /// Get the size of a row (including any padding) in bytes
  inline size_t row_bytes() const { return _row_bytes; }
  /// Get the size in bytes of 1 element
  inline int element_size() const { return sizeof(numtyp); }

  /// Get element at index i
  inline numtyp & operator[](const int i) { return _array[i]; }
  /// Get element at index i
  inline const numtyp & operator[](const int i) const { return _array[i]; }
  /// 2D access (row should always be 0)
  inline numtyp & operator()(const int row, const int col)
    { return _array[col]; }
  /// 2D access (row should always be 0)
  inline const numtyp & operator()(const int row, const int col) const
    { return _array[col]; }

  /// Returns pointer to memory pointer for allocation on host
  inline numtyp ** host_ptr() { return &_array; }

  /// Return the offset (in elements) from begin() pointer where data starts
  /** \note Always 0 for host matrices and CUDA APIs **/
  inline size_t offset() const { return 0; }
  /// Return the offset (in bytes) from begin() pointer where data starts
  /** \note Always 0 for host matrices and CUDA APIs **/
  inline size_t byteoff() const { return 0; }

  #ifdef _OCL_MAT
  /// For OpenCL, returns a reference to the cl_mem object
  inline device_ptr & cbegin() { return _carray; }
  /// For OpenCL, returns a reference to the cl_mem object
  inline const device_ptr & cbegin() const { return _carray; }
  #endif

 private:
  numtyp *_array, *_end;
  size_t _row_bytes, _cols;

  #ifdef _OCL_MAT
  device_ptr _carray;
  #endif
};

#endif