File: Nvidia.makefile

package info (click to toggle)
lammps 20250204%2Bdfsg.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 474,368 kB
  • sloc: cpp: 1,060,070; python: 27,785; ansic: 8,956; f90: 7,254; sh: 6,044; perl: 4,171; fortran: 2,442; xml: 1,714; makefile: 1,352; objc: 238; lisp: 188; yacc: 58; csh: 16; awk: 14; tcl: 6; javascript: 2
file content (139 lines) | stat: -rw-r--r-- 4,595 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Common headers for kernels
PRE1_H = lal_preprocessor.h lal_aux_fun1.h

# Headers for Geryon
UCL_H  = $(wildcard ./geryon/ucl*.h)
NVD_H  = $(wildcard ./geryon/nvd*.h) $(UCL_H) lal_preprocessor.h \
         lal_pre_cuda_hip.h

# Headers for Host files
HOST_H = lal_answer.h lal_atom.h lal_balance.h lal_base_atomic.h lal_base_amoeba.h \
         lal_base_charge.h lal_base_dipole.h lal_base_dpd.h \
         lal_base_ellipsoid.h lal_base_three.h lal_device.h lal_neighbor.h \
         lal_neighbor_shared.h lal_pre_ocl_config.h $(NVD_H)

# Source files
SRCS := $(wildcard ./lal_*.cpp)
OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o))
CUS  := $(wildcard lal_*.cu)
CUHS := $(filter-out pppm_cubin.h, $(CUS:lal_%.cu=%_cubin.h)) pppm_f_cubin.h pppm_d_cubin.h
CUHS := $(addprefix $(OBJ_DIR)/, $(CUHS))

ifdef CUDPP_OPT
CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
        $(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \
        $(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
endif

# targets

GPU_LIB = $(LIB_DIR)/libgpu.a

EXECS = $(BIN_DIR)/nvc_get_devices

all: $(OBJ_DIR) $(CUHS) $(GPU_LIB) $(EXECS)

$(OBJ_DIR):
	mkdir -p $@

# Compilers and linkers

CUDA  = $(NVCC) $(CUDA_INCLUDE) $(CUDA_OPTS) $(CUDA_ARCH) \
             $(CUDA_PRECISION)
CUDR  = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
         $(CUDPP_OPT)
CUDA_LINK = $(CUDA_LIB) -lcudart

BIN2C = $(CUDA_HOME)/bin/bin2c

# device code compilation

$(OBJ_DIR)/pppm_f.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h \
                         lal_pre_cuda_hip.h
	$(CUDA) --fatbin -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ lal_pppm.cu

$(OBJ_DIR)/pppm_f_cubin.h: $(OBJ_DIR)/pppm_f.cubin
	$(BIN2C) -c -n pppm_f $(OBJ_DIR)/pppm_f.cubin > $(OBJ_DIR)/pppm_f_cubin.h

$(OBJ_DIR)/pppm_d.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h \
                         lal_pre_cuda_hip.h
	$(CUDA) --fatbin -DNV_KERNEL -Dgrdtyp=double -Dgrdtyp4=double4 -o $@ lal_pppm.cu

$(OBJ_DIR)/pppm_d_cubin.h: $(OBJ_DIR)/pppm_d.cubin
	$(BIN2C) -c -n pppm_d $(OBJ_DIR)/pppm_d.cubin > $(OBJ_DIR)/pppm_d_cubin.h

$(OBJ_DIR)/%_cubin.h: lal_%.cu $(PRE1_H)
	$(CUDA) --fatbin -DNV_KERNEL -o $(OBJ_DIR)/$*.cubin $(OBJ_DIR)/lal_$*.cu
	$(BIN2C) -c -n $* $(OBJ_DIR)/$*.cubin > $@

# host code compilation

$(OBJ_DIR)/lal_answer.o: lal_answer.cpp $(HOST_H)
	$(CUDR) -o $@ -c lal_answer.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_dpd_tstat_ext.o: lal_dpd_tstat_ext.cpp lal_dpd.h $(HOST_H)
	$(CUDR) -o $@ -c lal_dpd_tstat_ext.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_eam_alloy_ext.o: lal_eam_alloy_ext.cpp lal_eam.h $(HOST_H)
	$(CUDR) -o $@ -c lal_eam_alloy_ext.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_eam_fs_ext.o: lal_eam_fs_ext.cpp lal_eam.h $(HOST_H)
	$(CUDR) -o $@ -c lal_eam_fs_ext.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp $(HOST_H)
	$(CUDR) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp $(HOST_H)
	$(CUDR) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_pppm.o: lal_pppm.cpp pppm_f_cubin.h pppm_d_cubin.h $(HOST_H)
	$(CUDR) -o $@ -c lal_pppm.cpp -I$(OBJ_DIR)

$(OBJ_DIR)/lal_%_ext.o: lal_%_ext.cpp lal_%.h $(HOST_H)
	$(CUDR) -o $@ -c $< -I$(OBJ_DIR)

$(OBJ_DIR)/lal_base_%.o: lal_base_%.cpp $(HOST_H)
	$(CUDR) -o $@ -c $< -I$(OBJ_DIR)

$(OBJ_DIR)/lal_%.o: lal_%.cpp %_cubin.h $(HOST_H)
	$(CUDR) -o $@ -c $< -I$(OBJ_DIR)

#ifdef CUDPP_OPT
$(OBJ_DIR)/cudpp.o: cudpp_mini/cudpp.cpp
	$(CUDR) -o $@ -c cudpp_mini/cudpp.cpp -Icudpp_mini

$(OBJ_DIR)/cudpp_plan.o: cudpp_mini/cudpp_plan.cpp
	$(CUDR) -o $@ -c cudpp_mini/cudpp_plan.cpp -Icudpp_mini

$(OBJ_DIR)/cudpp_maximal_launch.o: cudpp_mini/cudpp_maximal_launch.cpp
	$(CUDR) -o $@ -c cudpp_mini/cudpp_maximal_launch.cpp -Icudpp_mini

$(OBJ_DIR)/cudpp_plan_manager.o: cudpp_mini/cudpp_plan_manager.cpp
	$(CUDR) -o $@ -c cudpp_mini/cudpp_plan_manager.cpp -Icudpp_mini

$(OBJ_DIR)/radixsort_app.cu_o: cudpp_mini/radixsort_app.cu
	$(CUDA) -o $@ -c cudpp_mini/radixsort_app.cu -Icudpp_mini

$(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu
	$(CUDA) -o $@ -c cudpp_mini/scan_app.cu -Icudpp_mini
#endif

# build libgpu.a

$(GPU_LIB): $(OBJS) $(CUDPP)
	$(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP)
	@cp $(EXTRAMAKE) Makefile.lammps

# test app for querying device info

$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda

clean:
	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.cubin *.linkinfo

veryclean: clean
	-rm -rf *~ *.linkinfo

cleanlib:
	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUHS) *.linkinfo