1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
|
// clang-format off
// RUN: %libomptarget-compile-generic
// RUN: env LIBOMPTARGET_INFO=16 \
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefix=DEFAULT
// REQUIRES: amdgpu
__attribute__((optnone)) int optnone() { return 1; }
int main() {
int N = optnone() * 4098 * 32;
// DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]]
#pragma omp target teams distribute parallel for simd
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]]
#pragma omp target teams distribute parallel for simd
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]]
#pragma omp target teams distribute parallel for simd
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: [[NT:(128|256)]] (MaxFlatWorkGroupSize: [[NT]]
#pragma omp target
#pragma omp teams distribute parallel for
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: 42 (MaxFlatWorkGroupSize: 1024
#pragma omp target thread_limit(optnone() * 42)
#pragma omp teams distribute parallel for
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: 42 (MaxFlatWorkGroupSize: 42
#pragma omp target thread_limit(optnone() * 42) ompx_attribute(__attribute__((amdgpu_flat_work_group_size(42, 42))))
#pragma omp teams distribute parallel for
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: 42 (MaxFlatWorkGroupSize: 42
#pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(42, 42))))
#pragma omp teams distribute parallel for
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: MaxFlatWorkGroupSize: 1024
#pragma omp target
#pragma omp teams distribute parallel for num_threads(optnone() * 42)
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: MaxFlatWorkGroupSize: 1024
#pragma omp target teams distribute parallel for thread_limit(optnone() * 42)
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: MaxFlatWorkGroupSize: 1024
#pragma omp target teams distribute parallel for num_threads(optnone() * 42)
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: 9 (MaxFlatWorkGroupSize: 9
#pragma omp target
#pragma omp teams distribute parallel for num_threads(9)
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: 4 (MaxFlatWorkGroupSize: 4
#pragma omp target thread_limit(4)
#pragma omp teams distribute parallel for
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: 4 (MaxFlatWorkGroupSize: 4
#pragma omp target
#pragma omp teams distribute parallel for thread_limit(4)
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: 9 (MaxFlatWorkGroupSize: 9
#pragma omp target teams distribute parallel for num_threads(9)
for (int i = 0; i < N; ++i) {
optnone();
}
// DEFAULT: 4 (MaxFlatWorkGroupSize: 4
#pragma omp target teams distribute parallel for simd thread_limit(4)
for (int i = 0; i < N; ++i) {
optnone();
}
}
|