File: test.bats

package info (click to toggle)
charliecloud 0.43-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 3,084 kB
  • sloc: python: 6,021; sh: 4,284; ansic: 3,863; makefile: 598
file content (159 lines) | stat: -rw-r--r-- 5,503 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
CH_TEST_TAG=$ch_test_tag
load "${CHTEST_DIR}/common.bash"

setup () {
    scope full
    prerequisites_ok "$ch_tag"
    pmix_or_skip
    # One iteration on most of these tests because we just care about
    # correctness, not performance. (If we let the benchmark choose, there is
    # an overwhelming number of errors when MPI calls start failing, e.g. if
    # CMA isn’t working, and this makes the test take really long.)
    #
    # Large -npmin because we only want to test all cores.
    imb_mpi1=/usr/local/src/mpi-benchmarks/src_c/IMB-MPI1
    imb_args="-iter 1 -npmin 1000000000"

    # On the HSN performance test, we do want to run multiple iterations to
    # reduce variability. The benchmark will automatically scale down the
    # number of iterations based on the expected run time, disabling that so
    # we get more consistent results. Npmin is omitted as we are only running
    # with two processes, one per node.
    imb_perf_args="-iter 100 -iter_policy off"
}

check_errors () {
    [[ ! "$1" =~ 'errno =' ]]
}

check_finalized () {
    [[ "$1" =~ 'All processes entering MPI_Finalize' ]]
}

check_process_ct () {
    ranks_expected="$1"
    echo "ranks expected: ${ranks_expected}"
    ranks_found=$(  echo "$output" \
                  | grep -F '#processes =' \
                  | sed -r 's/^.+#processes = ([0-9]+)\s+$/\1/')
    echo "ranks found: ${ranks_found}"
    [[ $ranks_found -eq "$ranks_expected" ]]
}

# one from "Single Transfer Benchmarks"
@test "${ch_tag}/pingpong (guest launch)" {
    openmpi_or_skip
    # shellcheck disable=SC2086
    run ch-run $ch_unslurm "$ch_img" -- \
               "$ch_mpi_exe" $ch_mpirun_np "$imb_mpi1" $imb_args PingPong
    echo "$output"
    [[ $status -eq 0 ]]
    check_errors "$output"
    check_process_ct 2 "$output"
    check_finalized "$output"
}

# one from "Parallel Transfer Benchmarks"
@test "${ch_tag}/sendrecv (guest launch)" {
    openmpi_or_skip
    # shellcheck disable=SC2086
    run ch-run $ch_unslurm "$ch_img" -- \
               "$ch_mpi_exe" $ch_mpirun_np "$imb_mpi1" $imb_args Sendrecv
    echo "$output"
    [[ $status -eq 0 ]]
    check_errors "$output"
    check_process_ct "$ch_cores_node" "$output"
    check_finalized "$output"
}

# one from "Collective Benchmarks"
@test "${ch_tag}/allreduce (guest launch)" {
    openmpi_or_skip
    # shellcheck disable=SC2086
    run ch-run $ch_unslurm "$ch_img" -- \
               "$ch_mpi_exe" $ch_mpirun_np "$imb_mpi1" $imb_args Allreduce
    echo "$output"
    [[ $status -eq 0 ]]
    check_errors "$output"
    check_process_ct "$ch_cores_node" "$output"
    check_finalized "$output"
}

@test "${ch_tag}/inject cray mpi ($cray_prov)" {
    cray_ofi_or_skip "$ch_img"
    run ch-run "$ch_img" -- fi_info
    echo "$output"
    [[ $output == *"provider: $cray_prov"* ]]
    [[ $output == *"fabric: $cray_prov"* ]]
    [[ $status -eq 0 ]]
}

# This test compares OpenMPI’s point to point bandwidth with all high speed
# plugins enabled against the performance just using TCP. Pass if HSN
# performance is at least double TCP.
@test "${ch_tag}/using the high-speed network (host launch)" {
    multiprocess_ok
    [[ $ch_multinode ]] || skip "multinode only"
    if [[ $ch_cray ]]; then
        [[ $cray_prov == 'gni' ]] && skip "gni doesn't support tcp"
    fi
    openmpi_or_skip
    # Verify we have known HSN devices present. (Note that -d tests for
    # directory, not device.)
    if [[ ! -d /dev/infiniband ]] && [[ ! -e /dev/cxi0 ]]; then
        pedantic_fail "no high speed network detected"
    fi
    # shellcheck disable=SC2086
    hsn_enabled_bw=$($ch_mpirun_2_2node ch-run \
                       "$ch_img" -- "$imb_mpi1" $imb_perf_args Sendrecv \
                     | tail -n +35 | sort -nrk6 | head -1 | awk '{print $6}')
    # Configure network transport plugins to TCP only.
    # shellcheck disable=SC2086
    hsn_disabled_bw=$(OMPI_MCA_pml=ob1 OMPI_MCA_btl=tcp,self \
                      $ch_mpirun_2_2node ch-run "$ch_img" -- \
                      "$imb_mpi1" $imb_perf_args Sendrecv | tail -n +35 \
                      | sort -nrk6 | head -1 | awk '{print $6}')
    echo "Max bandwidth with high speed network: $hsn_enabled_bw MB/s"
    echo "Max bandwidth without high speed network: $hsn_disabled_bw MB/s"
    [[ ${hsn_disabled_bw%\.*} -lt $((${hsn_enabled_bw%\.*} / 2)) ]]
}

@test "${ch_tag}/pingpong (host launch)" {
    multiprocess_ok
    # shellcheck disable=SC2086
    run $ch_mpirun_core ch-run --join "$ch_img" -- \
                               "$imb_mpi1" $imb_args PingPong
    echo "$output"
    [[ $status -eq 0 ]]
    check_errors "$output"
    check_process_ct 2 "$output"
    check_finalized "$output"
}

@test "${ch_tag}/sendrecv (host launch)" {
    multiprocess_ok
    # shellcheck disable=SC2086
    run $ch_mpirun_core ch-run --join "$ch_img" -- \
                               "$imb_mpi1" $imb_args Sendrecv
    echo "$output"
    [[ $status -eq 0 ]]
    check_errors "$output"
    check_process_ct "$ch_cores_total" "$output"
    check_finalized "$output"
}

@test "${ch_tag}/allreduce (host launch)" {
    multiprocess_ok
    # shellcheck disable=SC2086
    run $ch_mpirun_core ch-run --join "$ch_img" -- \
                               "$imb_mpi1" $imb_args Allreduce
    echo "$output"
    [[ $status -eq 0 ]]
    check_errors "$output"
    check_process_ct "$ch_cores_total" "$output"
    check_finalized "$output"
}

@test "${ch_tag}/revert image" {
    unpack_img_all_nodes "$ch_cray"
}