File: fix_gpu.cpp

package info (click to toggle)
lammps 0~20120615.gite442279-1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 128,448 kB
  • sloc: cpp: 321,874; fortran: 15,187; ansic: 11,007; python: 7,889; perl: 2,915; sh: 2,088; makefile: 924; f90: 374; objc: 238; lisp: 169; csh: 16; tcl: 6
file content (191 lines) | stat: -rw-r--r-- 5,887 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov

   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */

#include "string.h"
#include "stdlib.h"
#include "fix_gpu.h"
#include "atom.h"
#include "force.h"
#include "pair.h"
#include "respa.h"
#include "input.h"
#include "error.h"
#include "timer.h"
#include "modify.h"
#include "domain.h"
#include "universe.h"
#include "gpu_extra.h"
#include "neighbor.h"

using namespace LAMMPS_NS;
using namespace FixConst;

enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH};

extern int lmp_init_device(MPI_Comm world, MPI_Comm replica,
                           const int first_gpu, const int last_gpu,
                           const int gpu_mode, const double particle_split,
                           const int nthreads, const int t_per_atom);
extern void lmp_clear_device();
extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
                             double **vatom, double *virial, double &ecoul);

/* ---------------------------------------------------------------------- */

FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
  Fix(lmp, narg, arg)
{
  if (lmp->cuda)
    error->all(FLERR,"Cannot use fix GPU with USER-CUDA mode enabled");

  if (narg < 7) error->all(FLERR,"Illegal fix GPU command");
  if (strcmp(arg[1],"all") != 0) error->all(FLERR,"Illegal fix GPU command");

  int first_gpu, last_gpu;

  if (strcmp(arg[3],"force") == 0)
    _gpu_mode = GPU_FORCE;
  else if (strcmp(arg[3],"force/neigh") == 0) {
    _gpu_mode = GPU_NEIGH;
    if (domain->triclinic)
      error->all(FLERR,"Cannot use force/neigh with triclinic box");
  } else if (strcmp(arg[3],"force/hybrid_neigh") == 0) {
    _gpu_mode = GPU_HYB_NEIGH;
    if (domain->triclinic)
      error->all(FLERR,
                 "Cannot use force/hybrid_neigh with triclinic box");
  } else
    error->all(FLERR,"Illegal fix GPU command");

  first_gpu = atoi(arg[4]);
  last_gpu = atoi(arg[5]);

  _particle_split = force->numeric(arg[6]);
  if (_particle_split==0 || _particle_split>1)
    error->all(FLERR,"Illegal fix GPU command");

  int nthreads = 1;
  int threads_per_atom = -1;
  if (narg == 9) {
    if (strcmp(arg[7],"threads_per_atom") == 0)
      threads_per_atom = atoi(arg[8]);
    else if (strcmp(arg[7],"nthreads") == 0)
      nthreads = atoi(arg[8]);
    else
      error->all(FLERR,"Illegal fix GPU command");
  } else if (narg != 7)
    error->all(FLERR,"Illegal fix GPU command");

  if (nthreads < 1)
    error->all(FLERR,"Illegal fix GPU command");

  #ifndef _OPENMP
  if (nthreads > 1)
    error->all(FLERR,"No OpenMP support compiled in");
  #endif

  int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu,
                                 _gpu_mode, _particle_split, nthreads,
                                 threads_per_atom);
  GPU_EXTRA::check_flag(gpu_flag,error,world);
}

/* ---------------------------------------------------------------------- */

FixGPU::~FixGPU()
{
  lmp_clear_device();
}

/* ---------------------------------------------------------------------- */

int FixGPU::setmask()
{
  int mask = 0;
  mask |= POST_FORCE;
  mask |= MIN_POST_FORCE;
  return mask;
}

/* ---------------------------------------------------------------------- */

void FixGPU::init()
{
  // hybrid cannot be used with force/neigh option

  if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
    if (force->pair_match("hybrid",1) != NULL ||
        force->pair_match("hybrid/overlay",1) != NULL)
      error->all(FLERR,"Cannot use pair hybrid with GPU neighbor builds");
  if (_particle_split < 0)
    if (force->pair_match("hybrid",1) != NULL ||
        force->pair_match("hybrid/overlay",1) != NULL)
      error->all(FLERR,"Fix GPU split must be positive for hybrid pair styles");
}

/* ---------------------------------------------------------------------- */

void FixGPU::setup(int vflag)
{
  if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
    if (neighbor->exclude_setting()!=0)
      error->all(FLERR,
                 "Cannot use neigh_modify exclude with GPU neighbor builds");
  post_force(vflag);
}

/* ---------------------------------------------------------------------- */

void FixGPU::min_setup(int vflag)
{
  post_force(vflag);
}

/* ---------------------------------------------------------------------- */

void FixGPU::post_force(int vflag)
{
  timer->stamp();
  double lvirial[6];
  for (int i = 0; i < 6; i++) lvirial[i] = 0.0;
  double my_eng = lmp_gpu_forces(atom->f, atom->torque, force->pair->eatom,
                                 force->pair->vatom, lvirial,
                                 force->pair->eng_coul);

  force->pair->eng_vdwl += my_eng;
  force->pair->virial[0] += lvirial[0];
  force->pair->virial[1] += lvirial[1];
  force->pair->virial[2] += lvirial[2];
  force->pair->virial[3] += lvirial[3];
  force->pair->virial[4] += lvirial[4];
  force->pair->virial[5] += lvirial[5];

  if (force->pair->vflag_fdotr) force->pair->virial_fdotr_compute();
  timer->stamp(TIME_PAIR);
}

/* ---------------------------------------------------------------------- */

void FixGPU::min_post_force(int vflag)
{
  post_force(vflag);
}

/* ---------------------------------------------------------------------- */

double FixGPU::memory_usage()
{
  double bytes = 0.0;
  // Memory usage currently returned by pair routine
  return bytes;
}