File: lal_pppm.h

package info (click to toggle)
lammps 20220106.git7586adbb6a%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 348,064 kB
  • sloc: cpp: 831,421; python: 24,896; xml: 14,949; f90: 10,845; ansic: 7,967; sh: 4,226; perl: 4,064; fortran: 2,424; makefile: 1,501; objc: 238; lisp: 163; csh: 16; awk: 14; tcl: 6
file content (200 lines) | stat: -rw-r--r-- 6,451 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/***************************************************************************
                                   pppm.h
                             -------------------
                            W. Michael Brown (ORNL)

  Class for PPPM acceleration

 __________________________________________________________________________
    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
 __________________________________________________________________________

    begin                :
    email                : brownw@ornl.gov
 ***************************************************************************/

#ifndef LAL_PPPM_H
#define LAL_PPPM_H

#include "mpi.h"
#include "lal_device.h"

#if defined(USE_OPENCL)
#include "geryon/ocl_texture.h"
#elif defined(USE_CUDART)
#include "geryon/nvc_texture.h"
#elif defined(USE_HIP)
#include "geryon/hip_texture.h"
#else
#include "geryon/nvd_texture.h"
#endif

namespace LAMMPS_AL {

template <class numtyp, class acctyp> class Device;

template <class numtyp, class acctyp, class grdtyp, class grdtyp4>
class PPPM {
 public:
  PPPM();
  virtual ~PPPM();

  /// Clear any previous data and set up for a new LAMMPS run
  /** Success will be:
    * -  0 if successful
    * - -1 if fix gpu not found
    * - -2 if GPU could not be found
    * - -3 if there is an out of memory error
    * - -4 if the GPU library was not compiled for GPU
    * - -5 Double precision is not supported on card **/
  grdtyp * init(const int nlocal, const int nall, FILE *screen, const int order,
                const int nxlo_out, const int nylo_out, const int nzlo_out,
                const int nxhi_out, const int nyhi_out, const int nzhi_out,
                grdtyp **rho_coeff, grdtyp **vd_brick,
                const double slab_volfactor, const int nx_pppm,
                const int ny_pppm, const int nz_pppm, const bool split,
                int &success);

  /// Check if there is enough storage for atom arrays and realloc if not
  /** \param success set to false if insufficient memory **/
  inline void resize_atom(const int inum, const int nall, bool &success) {
    if (atom->resize(nall, success)) {
      pos_tex.bind_float(atom->x,4);
      q_tex.bind_float(atom->q,1);
    }
    ans->resize(inum,success);
  }

  /// Check if there is enough storage for local atoms and realloc if not
  inline void resize_local(const int, bool &) {
  }

  /// Clear all host and device data
  /** \note This is called at the beginning of the init() routine **/
  void clear(const double cpu_time);

  /// Returns memory usage on device per atom
  int bytes_per_atom() const;

  /// Total host memory used by library for pair style
  double host_memory_usage() const;

  /// Accumulate timers
  inline void acc_timers() {
    if (device->time_device()) {
      ans->acc_timers();
      time_in.add_to_total();
      time_out.add_to_total();
      time_map.add_to_total();
      time_rho.add_to_total();
      time_interp.add_to_total();
    }
  }

  /// Zero timers
  inline void zero_timers() {
    atom->zero_timers();
    ans->zero_timers();
    time_in.zero();
    time_out.zero();
    time_map.zero();
    time_rho.zero();
    time_interp.zero();
  }

  /// Precomputations for charge assignment that can be done asynchronously
  inline void precompute(const int ago, const int nlocal, const int nall,
                         double **host_x, int *host_type, bool &success,
                         double *charge, double *boxlo, double *prd) {
    double delxinv=_nx_pppm/prd[0];
    double delyinv=_ny_pppm/prd[1];
    double delzinv=_nz_pppm/(prd[2]*_slab_volfactor);
    _precompute(ago,nlocal,nall,host_x,host_type,success,charge,boxlo,delxinv,
                delyinv,delzinv);
  }

  /// Returns non-zero if out of bounds atoms
  int spread(const int ago, const int nlocal, const int nall, double **host_x,
             int *host_type, bool &success, double *charge, double *boxlo,
             const double delxinv, const double delyinv, const double delzinv);

  void interp(const grdtyp qqrd2e_scale);

  // -------------------------- DEVICE DATA -------------------------

  /// Device Properties and Atom and Neighbor storage
  Device<numtyp,acctyp> *device;

  /// Geryon device
  UCL_Device *ucl_device;

  /// Device Timers
  UCL_Timer time_in, time_out, time_map, time_rho, time_interp;

  /// LAMMPS pointer for screen output
  FILE *screen;

  // --------------------------- ATOM DATA --------------------------

  /// Atom Data
  Atom<numtyp,acctyp> *atom;


  // --------------------------- GRID DATA --------------------------

  UCL_Vector<grdtyp,grdtyp> brick;
  UCL_Vector<grdtyp,grdtyp> vd_brick;

  // Count of number of atoms assigned to each grid point
  UCL_D_Vec<int> d_brick_counts;
  // Atoms assigned to each grid point
  UCL_D_Vec<grdtyp4> d_brick_atoms;

  // Error checking for out of bounds atoms
  UCL_Vector<int,int> error_flag;

  // Number of grid points in brick (including ghost)
  int _npts_x, _npts_y, _npts_z, _npts_yx;

  // Number of local grid points in brick
  int _nlocal_x, _nlocal_y, _nlocal_z, _nlocal_yx, _atom_stride;

  // -------------------------- SPLINE DATA -------------------------
  UCL_D_Vec<grdtyp> d_rho_coeff;
  int _order, _nlower, _nupper, _order_m_1, _order2;
  int _nxlo_out, _nylo_out, _nzlo_out, _nxhi_out, _nyhi_out, _nzhi_out;

  // ------------------------ FORCE/ENERGY DATA -----------------------

  Answer<numtyp,acctyp> *ans;

  // ------------------------- DEVICE KERNELS -------------------------
  UCL_Program *pppm_program;
  UCL_Kernel k_particle_map, k_make_rho, k_interp;
  inline int block_size() { return _block_size; }

  // --------------------------- TEXTURES -----------------------------
  UCL_Texture pos_tex;
  UCL_Texture q_tex;

 protected:
  bool _allocated, _compiled, _precompute_done, _kspace_split;
  int _block_size, _block_pencils, _pencil_size, _max_brick_atoms, _max_atoms;
  double  _max_bytes, _max_an_bytes;
  double _cpu_idle_time;

  grdtyp _brick_x, _brick_y, _brick_z, _delxinv, _delyinv, _delzinv;

  double _slab_volfactor;
  int _nx_pppm, _ny_pppm, _nz_pppm;

  void compile_kernels(UCL_Device &dev);
  void _precompute(const int ago, const int nlocal, const int nall,
                   double **host_x, int *host_type, bool &success,
                   double *charge, double *boxlo, const double delxinv,
                   const double delyinv, const double delzinv);
};

}

#endif