File: lal_answer.h

package info (click to toggle)
lammps 20220106.git7586adbb6a%2Bds1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 348,064 kB
  • sloc: cpp: 831,421; python: 24,896; xml: 14,949; f90: 10,845; ansic: 7,967; sh: 4,226; perl: 4,064; fortran: 2,424; makefile: 1,501; objc: 238; lisp: 163; csh: 16; awk: 14; tcl: 6
file content (179 lines) | stat: -rw-r--r-- 5,633 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
/***************************************************************************
                                  answer.h
                             -------------------
                            W. Michael Brown (ORNL)

  Class for data management of forces, torques, energies, and virials

 __________________________________________________________________________
    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
 __________________________________________________________________________

    begin                :
    email                : brownw@ornl.gov
 ***************************************************************************/

#ifndef LAL_ANSWER_H
#define LAL_ANSWER_H

#include <cmath>
#include "mpi.h"

#if defined(USE_OPENCL)
#include "geryon/ocl_timer.h"
#include "geryon/ocl_mat.h"
using namespace ucl_opencl;
#elif defined(USE_CUDART)
#include "geryon/nvc_timer.h"
#include "geryon/nvc_mat.h"
using namespace ucl_cudart;
#elif defined(USE_HIP)
#include "geryon/hip_timer.h"
#include "geryon/hip_mat.h"
using namespace ucl_hip;
#else
#include "geryon/nvd_timer.h"
#include "geryon/nvd_mat.h"
using namespace ucl_cudadr;
#endif

#include "lal_precision.h"

namespace LAMMPS_AL {

template <class numtyp, class acctyp>
class Answer {
 public:
  Answer();
  ~Answer() { clear(); }

  /// Current number of local atoms stored
  inline int inum() const { return _inum; }
  /// Set number of local atoms for future copy operations
  inline void inum(const int n) { _inum=n; }
  /// Return the maximum number of atoms that can be stored currently
  inline int max_inum() const { return _max_local; }
  /// Return the number of fields used for energy and virial
  inline int ev_fields(const int mode) const {
    return (mode == 1) ? _ev_fields : _e_fields;
  }

  /// Memory usage per atom in this class
  int bytes_per_atom() const;

  /// Clear any previous data and set up for a new LAMMPS run
  /** \param rot True if atom storage needs quaternions **/
  bool init(const int inum, const bool charge, const bool rot, UCL_Device &dev);

  /// Check if we have enough device storage and realloc if not
  inline void resize(const int inum, bool &success) {
    _inum=inum;
    if (inum>_max_local) {
      _max_local=static_cast<int>(static_cast<double>(inum)*1.10);
      success=success && (force.resize(_max_local*_ans_fields)==UCL_SUCCESS);
      success=success && (engv.resize(_max_local*_ev_fields)==UCL_SUCCESS);
      _gpu_bytes=engv.device.row_bytes()+force.device.row_bytes();
    }
  }

  /// If already initialized by another LAMMPS style, add fields as necessary
  /** \param rot True if atom storage needs quaternions **/
  bool add_fields(const bool charge, const bool rot);

  /// Free all memory on host and device
  void clear();

  /// Return the total amount of host memory used by class in bytes
  double host_memory_usage() const;

  /// Add copy times to timers
  inline void acc_timers() {
    time_answer.add_to_total();
  }

  /// Add copy times to timers
  inline void zero_timers() {
    time_answer.zero();
  }

  /// Return the total time for host/device data transfer
  inline double transfer_time() {
    return time_answer.total_seconds();
  }

  /// Return the total time for data cast/pack
  inline double cast_time() { return _time_cast; }

  /// Return number of bytes used on device
  inline double gpu_bytes() { return _gpu_bytes; }

  // -------------------------COPY FROM GPU -------------------------------

  /// Copy answers from device into read buffer asynchronously
  void copy_answers(const bool eflag, const bool vflag, const bool ef_atom,
                    const bool vf_atom, const int red_blocks);

  /// Copy answers from device into read buffer asynchronously
  void copy_answers(const bool eflag, const bool vflag, const bool ef_atom,
                    const bool vf_atom, int *ilist, const int red_blocks);

  /// Copy energy and virial data into LAMMPS memory
  double energy_virial(double *eatom, double **vatom, double *virial);

  /// Copy energy and virial data into LAMMPS memory
  double energy_virial(double *eatom, double **vatom, double *virial,
                       double &ecoul);

  /// Add forces and torques from the GPU into a LAMMPS pointer
  void get_answers(double **f, double **tor);

  inline double get_answers(double **f, double **tor, double *eatom, double **vatom,
                            double *virial, double &ecoul, int &error_flag_in) {
    double ta=MPI_Wtime();
    time_answer.sync_stop();
    _time_cpu_idle+=MPI_Wtime()-ta;
    double ts=MPI_Wtime();
    if (error_flag[0]) error_flag_in=error_flag[0];
    double evdw=energy_virial(eatom,vatom,virial,ecoul);
    get_answers(f,tor);
    _time_cast+=MPI_Wtime()-ts;
    return evdw;
  }

  /// Return the time the CPU was idle waiting for GPU
  inline double cpu_idle_time() { return _time_cpu_idle; }

  /// Change the command queue used for copies and timers
  void cq(const int cq_index);

  // ------------------------------ DATA ----------------------------------

  /// Force and possibly torque
  UCL_Vector<acctyp,acctyp> force;
  /// Energy and virial per-atom storage
  UCL_Vector<acctyp,acctyp> engv;
  /// Error flag
  UCL_Vector<int,int> error_flag;

  /// Device timers
  UCL_Timer time_answer;

  /// Geryon device
  UCL_Device *dev;

 private:
  bool alloc(const int inum);

  bool _allocated, _eflag, _vflag, _ef_atom, _vf_atom, _rot, _charge, _other;
  int _max_local, _inum, _e_fields, _ev_fields, _ans_fields, _ev_stride;
  int *_ilist;
  double _time_cast, _time_cpu_idle;

  double _gpu_bytes;

  bool _newton;
};

}

#endif