File: lal_eam.h

package info (click to toggle)
lammps 20220106.git7586adbb6a%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 348,064 kB
  • sloc: cpp: 831,421; python: 24,896; xml: 14,949; f90: 10,845; ansic: 7,967; sh: 4,226; perl: 4,064; fortran: 2,424; makefile: 1,501; objc: 238; lisp: 163; csh: 16; awk: 14; tcl: 6
file content (145 lines) | stat: -rw-r--r-- 4,968 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/***************************************************************************
                                    eam.h
                             -------------------
                   Trung Dac Nguyen, W. Michael Brown (ORNL)

  Class for acceleration of the eam pair style.

 __________________________________________________________________________
    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
 __________________________________________________________________________

    begin                :
    email                : brownw@ornl.gov nguyentd@ornl.gov
 ***************************************************************************/

#ifndef LAL_EAM_H
#define LAL_EAM_H

#include "lal_precision.h"
#include "lal_base_atomic.h"

namespace LAMMPS_AL {

template <class numtyp, class acctyp>
class EAM : public BaseAtomic<numtyp, acctyp> {
 public:
  EAM();
  ~EAM();

  /// Clear any previous data and set up for a new LAMMPS run
  /** \param max_nbors initial number of rows in the neighbor matrix
    * \param cell_size cutoff + skin
    * \param gpu_split fraction of particles handled by device
    *
    * Returns:
    * -  0 if successful
    * - -1 if fix gpu not found
    * - -3 if there is an out of memory error
    * - -4 if the GPU library was not compiled for GPU
    * - -5 Double precision is not supported on card **/
  int init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
           int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline,
           double ***host_z2r_spline, double ***host_frho_spline, double** host_cutsq,
           double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r,
           int nfrho, int nr, const int nlocal, const int nall,
           const int max_nbors, const int maxspecial, const double cell_size,
           const double gpu_split, FILE *_screen);

  // Copy charges to device asynchronously
  inline void add_fp_data() {
    int nghost=this->atom->nall()-_nlocal;
    if (nghost>0) {
      UCL_H_Vec<numtyp> host_view;
      UCL_D_Vec<numtyp> dev_view;
      host_view.view_offset(_nlocal,_fp.host);
      dev_view.view_offset(_nlocal,_fp.device);
      ucl_copy(dev_view,host_view,nghost,true);
    }
  }

  /// Clear all host and device data
  /** \note This is called at the beginning of the init() routine **/
  void clear();

  /// Returns memory usage on device per atom
  int bytes_per_atom(const int max_nbors) const;

  /// Total host memory used by library for pair style
  double host_memory_usage() const;

  /// Pair loop with host neighboring
  void compute(const int f_ago, const int inum_full, const int, const int nall,
               double **host_x, int *host_type, int *ilist, int *numj,
               int **firstneigh, const bool eflag, const bool vflag,
               const bool eatom, const bool vatom, int &host_start,
               const double cpu_time, bool &success,
               void **fp_ptr);

  /// Pair loop with device neighboring
  int** compute(const int ago, const int inum_full, const int nall,
                double **host_x, int *host_type, double *sublo,
                double *subhi, tagint *tag, int **nspecial,
                tagint **special, const bool eflag, const bool vflag,
                const bool eatom, const bool vatom, int &host_start,
                int **ilist, int **numj, const double cpu_time, bool &success,
                int &inum, void **fp_ptr);

  /// Pair loop with host neighboring
  void compute2(int *ilist, const bool eflag, const bool vflag,
                const bool eatom, const bool vatom);

  // ------------------------- DEVICE KERNELS -------------------------
  UCL_Kernel k_energy, k_energy_fast, k_energy_fast_noev, *k_energy_sel;

  // --------------------------- TEXTURES -----------------------------
  UCL_Texture fp_tex;
  UCL_Texture rhor_spline1_tex, rhor_spline2_tex;
  UCL_Texture frho_spline1_tex, frho_spline2_tex;
  UCL_Texture z2r_spline1_tex, z2r_spline2_tex;

  // --------------------------- DEVICE DATA --------------------------

  /// Device Timers
  UCL_Timer time_pair2, time_fp1, time_fp2;

  // --------------------------- TYPE DATA --------------------------

  UCL_D_Vec<int2> type2rhor_z2r;
  UCL_D_Vec<int> type2frho;

  UCL_D_Vec<numtyp4> z2r_spline1, z2r_spline2;
  UCL_D_Vec<numtyp4> frho_spline1, frho_spline2;
  UCL_D_Vec<numtyp4> rhor_spline1, rhor_spline2;

  UCL_D_Vec<numtyp> cutsq;

  numtyp _cutforcesq,_rdr,_rdrho, _rhomax;

  int _nfrho,_nrhor,_nrho,_nz2r,_nr;

  /// If atom type constants fit in shared memory, use fast kernels
  bool shared_types;

  /// Number of atom types
  int _ntypes;

  int _max_fp_size;

  /// True of energy kernels are compiled
  bool _compiled_energy;

  /// Per-atom arrays
  UCL_Vector<numtyp,numtyp> _fp;

protected:
  bool _allocated;
  int _nlocal;
  int loop(const int eflag, const int vflag);
  void loop2(const bool eflag, const bool vflag);
};

}

#endif