File: build-hpc.yml

package info (click to toggle)
ectrans 1.7.0-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,968 kB
  • sloc: f90: 51,064; ansic: 5,942; cpp: 1,112; python: 488; sh: 127; makefile: 47
file content (192 lines) | stat: -rw-r--r-- 6,432 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
name: build-hpc

# Controls when the action will run
on:

  # Trigger the workflow on all pushes to main and develop, except on tag creation
  push:
    branches:
    - main
    - develop
    tags-ignore:
    - '**'

  # Trigger the workflow on all pull requests
  pull_request: ~

  # Allow workflow to be dispatched on demand
  workflow_dispatch: ~

  # Trigger after public PR approved for CI
  pull_request_target:
    types: [labeled]

env:
  ECTRANS_TOOLS: ${{ github.workspace }}/.github/tools
  CTEST_PARALLEL_LEVEL: 1
  CACHE_SUFFIX: v1        # Increase to force new cache to be created

jobs:
  ci-hpc:
    name: ci-hpc
    if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}

    strategy:
      fail-fast: false    # false: try to complete all jobs

      matrix:
        name:
          - ac-gpu nvhpc
          - lumi-g cce

        include:
          - name: ac-gpu nvhpc
            site: ac-batch
            troika_user_secret: HPC_CI_SSH_USER
            sbatch_options: |
              #SBATCH --time=00:30:00
              #SBATCH --nodes=1
              #SBATCH --ntasks=2
              #SBATCH --cpus-per-task=32
              #SBATCH --gpus-per-task=1
              #SBATCH --mem=200G
              #SBATCH --qos=dg
            modules:
              - cmake
              - ninja
              - prgenv/nvidia
              - nvidia/24.5
              - hpcx-openmpi/2.19.0-cuda
              - fftw
            cmake_options: -DENABLE_ACC=ON -G Ninja

          - name: lumi-g cce
            site: lumi
            troika_user_secret: LUMI_CI_SSH_USER
            account_secret: LUMI_CI_PROJECT
            sbatch_options: |
              #SBATCH --time=01:10:00
              #SBATCH --nodes=1
              #SBATCH --ntasks-per-node=8
              #SBATCH --gpus-per-task=1
              #SBATCH --partition=standard-g
              #SBATCH --account={0}
            modules:
              - CrayEnv
              - PrgEnv-cray
              - cce/17.0.1
              - craype-accel-amd-gfx90a
              - rocm/6.0.3
              - cray-fftw
              - buildtools
            output_dir: /scratch/{0}/github-actions/ectrans/${{ github.run_id }}/${{ github.run_attempt }}
            workdir: /scratch/{0}/github-actions/ectrans/${{ github.run_id }}/${{ github.run_attempt }}
            cmake_options: >
              -DOpenMP_C_LIB_NAMES=craymp -DOpenMP_CXX_LIB_NAMES=craymp
              -DOpenMP_Fortran_LIB_NAMES=craymp -DOpenMP_craymp_LIBRARY=craymp
              -DENABLE_OMP=ON
            env_vars:
              - ROCFFT_RTC_CACHE_PATH=$PWD/../../rocfft_kernel_cache.db
              - MPICH_GPU_SUPPORT_ENABLED=1
              - MPICH_SMP_SINGLE_COPY_MODE=NONE
              - CMAKE_BUILD_PARALLEL_LEVEL=1


    runs-on: [self-hosted, linux, hpc]
    env:
      GH_TOKEN: ${{ github.token }}
    steps:
      - uses: ecmwf-actions/reusable-workflows/ci-hpc-generic@v2
        with:
          site: ${{ matrix.site }}
          troika_user: ${{ secrets[matrix.troika_user_secret] }}
          sbatch_options: ${{ format(matrix.sbatch_options, secrets[matrix.account_secret]) }}
          output_dir: ${{ format(matrix.output_dir, secrets[matrix.account_secret]) || '' }}
          workdir: ${{ format(matrix.workdir, secrets[matrix.account_secret]) || '' }}
          template_data: |
            site: ${{ matrix.site }}
            cmake_options:
              - -DENABLE_MPI=ON
              - -DENABLE_GPU=ON
              - -DENABLE_ETRANS=ON
              - -DENABLE_GPU_GRAPHS_GEMM=OFF
              - -DENABLE_GPU_GRAPHS_FFT=OFF
              - ${{ matrix.cmake_options || '' }}
            ctest_options: ${{ matrix.ctest_options || '' }}
            dependencies:
              ecmwf/ecbuild:
                version: develop
              ecmwf-ifs/fiat:
                version: develop
                cmake_options:
                  - -DENABLE_MPI=ON
                  - ${{ matrix.cmake_options || '' }}
          template: |
            REPO=${{ github.event.pull_request.head.repo.full_name || github.repository }}
            SHA=${{ github.event.pull_request.head.sha || github.sha }}

            # Cleanup function
            cleanup() {
              {% for name in dependencies.keys() %}
                rm -r {{name}}
              {% endfor %}

              rm -r $REPO
              rm -r build
            }
            error_trap() {
              cleanup

              echo "Finished: FAILURE"
              exit 1
            }

            trap error_trap ERR

            {% for module in "${{ join(matrix.modules, ',') }}".split(',') %}
              module load {{module}}
            {% endfor %}

            {% for var in "${{ join(matrix.env_vars, ',') }}".split(',') %}
              export {{var}}
            {% endfor %}

            export CMAKE_TEST_LAUNCHER="srun;-n;1"
            export DR_HOOK_ASSERT_MPI_INITIALIZED=0
            BASEDIR=$PWD
            {% for name, options in dependencies.items() %}
                mkdir -p {{name}}
                pushd {{name}}
                git init
                git remote add origin ${{ github.server_url }}/{{name}}
                git fetch origin {{options['version']}}
                git reset --hard FETCH_HEAD
                cmake -S . -B build \
                  {% for name in dependencies %}
                    {% set org, proj = name.split('/') %}
                    -D{{proj}}_ROOT=$BASEDIR/{{name}}/installation \
                  {% endfor %}
                  {{ options['cmake_options']|join(' ') }}
                cmake --build build
                cmake --install build --prefix installation
                popd
            {% endfor %}
            mkdir -p $REPO
            pushd $REPO
            git init
            git remote add origin ${{ github.server_url }}/$REPO
            git fetch origin $SHA
            git reset --hard FETCH_HEAD
            popd
            cmake -S $REPO -B build \
              {% for name in dependencies %}
                {% set org, proj = name.split('/') %}
                -D{{proj}}_ROOT=$BASEDIR/{{name}}/installation \
              {% endfor %}
              {{ cmake_options|join(' ') }}
            cmake --build build

            ctest --test-dir build --output-on-failure {{ ctest_options }}

            cleanup