File: machine-matrix.sh

package info (click to toggle)
hypre 3.1.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 80,152 kB
  • sloc: ansic: 409,546; cpp: 63,220; sh: 11,814; fortran: 9,433; makefile: 3,346; perl: 2,994; awk: 339; python: 126
file content (150 lines) | stat: -rwxr-xr-x 7,741 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/bin/bash
# Copyright (c) 1998 Lawrence Livermore National Security, LLC and other
# HYPRE Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

testname=`basename $0 .sh`

# Echo usage information
case $1 in
   -h|-help)
      cat <<EOF

   **** Only run this script on LC's matrix cluster ****

   $0 [-h|-help] {src_dir}

   where: -h|-help   prints this usage information and exits
          {src_dir}  is the hypre source directory

   This script runs a number of tests suitable for the matrix cluster.

   Example usage: $0 ../src

EOF
      exit
      ;;
esac

# Setup
cmake_version=3.30
test_dir=`pwd`
output_dir=`pwd`/$testname.dir
rm -fr $output_dir
mkdir -p $output_dir
src_dir=`cd $1; pwd`
root_dir=`cd $src_dir/..; pwd`
shift

# Basic build and run tests
aco="LIBS=-ldl"
cco="-DMPIEXEC_EXECUTABLE=\"srun\" -DMPIEXEC_NUMPROC_FLAG=\"n\""
cmo="--parallel"
mo="-j test"
eo=""
rtol="0.0"
atol="3e-15"
save="matrix"

# 1C) GCC 13.3.1 + CUDA 12.9.1 with UM and memory tracker OFF in debug mode [error, ij, ams, struct, sstruct]
module -q load cmake/${cmake_version} cuda/12.9.1 gcc/13.3.1
co="${cco} -DHYPRE_ENABLE_CUDA=ON -DHYPRE_ENABLE_UMPIRE=OFF -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CUDA_ARCHITECTURES=90 -DHYPRE_ENABLE_MEMORY_TRACKER=OFF -DHYPRE_ENABLE_PERSISTENT_COMM=ON -DHYPRE_ENABLE_PRINT_ERRORS=ON"
ro="-error -ij -ams -struct -sstruct -rt -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh cmake.sh $root_dir -co: $co -mo: $cmo -ro: $ro
./renametest.sh cmake $output_dir/cmake-cuda-um-dbg

# 2C) GCC 13.3.1 + CUDA 12.9.1 with mixed integers and UM in debug mode [ij-mixed, ams, struct, sstruct-mixed]
module reset && module -q load cmake/${cmake_version} cuda/12.9.1 gcc/13.3.1
co="${cco} -DHYPRE_ENABLE_UMPIRE=OFF -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DHYPRE_ENABLE_CUDA=ON -DHYPRE_ENABLE_MIXEDINT=ON -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CUDA_ARCHITECTURES=90 -DHYPRE_ENABLE_PRINT_ERRORS=ON"
ro="-error -ij-mixed -ams -struct -sstruct-mixed -rt -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh cmake.sh $root_dir -co: $co -mo: $cmo -ro: $ro
./renametest.sh cmake $output_dir/cmake-cuda-um-mixedint

# 3C) GCC 13.3.1 + CUDA 12.9.1 with OMP and shared library in release mode
module reset && module -q load cmake/${cmake_version} cuda/12.9.1 gcc/13.3.1
co="${cco} -DHYPRE_ENABLE_UMPIRE=OFF -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DHYPRE_ENABLE_CUDA=ON -DHYPRE_ENABLE_OPENMP=ON -DHYPRE_ENABLE_HOPSCOTCH=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90 -DHYPRE_ENABLE_PRINT_ERRORS=ON"
ro="-gpumemcheck -rt -cudasan -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh cmake.sh $root_dir -co: $co -mo: $cmo -ro: $ro
./renametest.sh cmake $output_dir/cmake-cuda-um-shared

# 4C) GCC 13.3.1 + CUDA 12.9.1 with UM and single precision in debug mode
module reset && module -q load cmake/${cmake_version} cuda/12.9.1 gcc/13.3.1
co="${cco} -DHYPRE_ENABLE_SINGLE=ON -DHYPRE_ENABLE_UMPIRE=OFF -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DHYPRE_ENABLE_CUDA=ON -DHYPRE_ENABLE_CUSOLVER=ON -DHYPRE_ENABLE_OPENMP=ON -DHYPRE_ENABLE_HOPSCOTCH=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_CUDA_ARCHITECTURES=90 -DHYPRE_ENABLE_PRINT_ERRORS=ON"
ro="-single -rt -save ${save} -rtol ${rtol} -atol ${atol}"
./test.sh cmake.sh $root_dir -co: $co -mo: $cmo -ro: $ro
./renametest.sh cmake $output_dir/cmake-cuda-um-single

# 5C) GCC 13.3.1 + CUDA 12.9.1 without MPI [no run]
module reset && module -q load cmake/${cmake_version} cuda/12.9.1 gcc/13.3.1
co="${cco} -DHYPRE_ENABLE_MPI=OFF -DHYPRE_ENABLE_UMPIRE=OFF -DHYPRE_ENABLE_UNIFIED_MEMORY=ON -DHYPRE_ENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=90 -DHYPRE_ENABLE_PRINT_ERRORS=ON"
./test.sh cmake.sh $root_dir -co: $co -mo: $cmo
./renametest.sh cmake $output_dir/cmake-cuda-um-without-MPI

# 6C) GCC 13.3.1 + CUDA 12.9.1 with Umpire [benchmark]
module reset && module -q load cmake/${cmake_version} cuda/12.9.1 gcc/13.3.1
UMPIRE_DIR=/usr/workspace/hypre/ext-libs/Umpire/install-umpire_2025.09.0-cuda_12.9_sm90-gcc_13.3
co="${cco} -DHYPRE_ENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=90 -DHYPRE_ENABLE_PRINT_ERRORS=ON -Dumpire_DIR=${UMPIRE_DIR}/lib/cmake"
ro="-bench -rt -mpibind -save ${save}"
./test.sh cmake.sh $root_dir -co: $co -mo: $cmo -ro: $ro
./renametest.sh cmake $output_dir/cmake-cuda-bench

# 7C) GCC 13.3.1 + CUDA 12.9.1 with host execution
module reset && module -q load cmake/${cmake_version} cuda/12.9.1 gcc/13.3.1
co="${cco} -DHYPRE_ENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=90 -DHYPRE_ENABLE_UMPIRE=OFF -DHYPRE_ENABLE_TEST_USING_HOST=ON -DHYPRE_ENABLE_MEMORY_TRACKER=OFF -DHYPRE_ENABLE_PRINT_ERRORS=ON -DCMAKE_BUILD_TYPE=Debug"
ro="-ij-noilu -ams -struct -sstruct"
./test.sh cmake.sh $root_dir -co: $co -mo: $cmo -ro: $ro
./renametest.sh cmake $output_dir/cmake-cuda-cpu

##################################
## Autotools (build only) tests ##
##################################

# 1A) GCC 13.3.1 + CUDA 12.9.1 with UM and memory tracker in debug mode
module -q load cuda/12.9.1 gcc/13.3.1
co="${aco} --with-cuda --without-umpire --enable-unified-memory --enable-debug --with-gpu-arch=90 --with-memory-tracker --enable-persistent --with-print-errors"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-um-dbg

# 2A) GCC 13.3.1 + CUDA 12.9.1 with mixed integers and UM in debug mode
module reset && module -q load cuda/12.9.1 gcc/13.3.1
co="${aco} --with-cuda --without-umpire --enable-unified-memory --enable-debug --with-gpu-arch=90 --enable-mixedint --with-print-errors"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-um-mixedint

# 3A) GCC 13.3.1 + CUDA 12.9.1 with OMP and shared library in release mode
module reset && module -q load cuda/12.9.1 gcc/13.3.1
co="${aco} --with-cuda --without-umpire --enable-unified-memory --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=90 --with-print-errors"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-um-shared

# 4A) GCC 13.3.1 + CUDA 12.9.1 with UM and single precision in debug mode
module reset && module -q load cuda/12.9.1 gcc/13.3.1
co="${aco} --enable-single --without-umpire --enable-unified-memory --with-cuda --enable-cusolver --with-openmp --enable-hopscotch --enable-shared --with-gpu-arch=90 --with-print-errors"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-um-single

# 5A) GCC 13.3.1 + CUDA 12.9.1 without MPI
module reset && module -q load cuda/12.9.1 gcc/13.3.1
co="${aco} --without-MPI --without-umpire --enable-unified-memory --with-cuda --with-gpu-arch=90 --with-print-errors"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-um-without-MPI

# 6A) GCC 13.3.1 + CUDA 12.9.1 with Umpire
module reset && module -q load cuda/12.9.1 gcc/13.3.1
UMPIRE_DIR=/usr/workspace/hypre/ext-libs/Umpire/install-umpire_2025.09.0-cuda_12.9_sm90-gcc_13.3
co="${aco} --with-cuda --with-gpu-arch=90 --with-umpire --with-umpire-include=${UMPIRE_DIR}/include --with-umpire-lib-dirs=${UMPIRE_DIR}/lib --with-umpire-libs=\\"umpire camp\\" --with-print-errors"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-bench

# 7A) GCC 13.3.1 + CUDA 12.9.1 with host execution
module reset && module -q load cuda/12.9.1 gcc/13.3.1
co="${aco} --with-cuda --with-gpu-arch=90 --without-umpire --with-test-using-host --with-memory-tracker --enable-debug --with-print-errors"
./test.sh basic.sh $src_dir -co: $co -mo: $mo
./renametest.sh basic $output_dir/basic-cuda-cpu

# Echo to stderr all nonempty error files in $output_dir
for errfile in $( find $output_dir ! -size 0 -name "*.err" )
do
   echo $errfile >&2
done