File: omp_parallel_num_threads_list.c

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.6-1~exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,304 kB
  • sloc: cpp: 7,438,677; ansic: 1,393,822; asm: 1,012,926; python: 241,650; f90: 86,635; objc: 75,479; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672
file content (212 lines) | stat: -rw-r--r-- 7,608 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 %libomp-run
#include <stdio.h>
#include "omp_testsuite.h"

// When compiler supports num_threads clause list format, remove the following
// and use num_threads clause directly
#if defined(__cplusplus)
extern "C" {
#endif

int __kmpc_global_thread_num(void *loc);
void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length,
                                  int *list);

#if defined(__cplusplus)
}
#endif

int test_omp_parallel_num_threads_list() {
  int num_failed = 0;

// Initially, 5 levels specified via OMP_NUM_THREADS with 2 threads per level
// Check top 3 levels
#pragma omp parallel reduction(+ : num_failed) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2);
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

// Make sure that basic single element num_threads clause works
#pragma omp parallel reduction(+ : num_failed) num_threads(4) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 4);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

// Check that basic single element num_threads clause works on second level
#pragma omp parallel reduction(+ : num_failed) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
#pragma omp parallel reduction(+ : num_failed) num_threads(4) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 4);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

  // Try a short list. It should completely overwrite the old settings.
  // We need to use the compiler interface for now.
  int threads[2] = {3, 3};
  __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
                               threads);
#pragma omp parallel reduction(+ : num_failed) // num_threads(3,3) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
// NOTE: should just keep using last element in list, to nesting depth
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 3);
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

// Similar,  but at a lower level.
#pragma omp parallel reduction(+ : num_failed) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
    int threads[2] = {3, 3};
    __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
                                 threads);
#pragma omp parallel reduction(+ : num_failed) // num_threads(3,3) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
// NOTE: just keep using last element in list, to nesting depth
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 3);
      } // end 3rd level parallel
    } // end 2nd level parallel
// Make sure a second inner parallel is NOT affected by the clause
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        // NOTE: just keep using last element in list, to nesting depth
        num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

  // Test lists at multiple levels
  int threads2[2] = {3,2};
  __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
                               threads2);
#pragma omp parallel reduction(+ : num_failed) // num_threads(3,2) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2);
        int threads3[2] = {3,1};
        __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
                                     threads3);
#pragma omp parallel reduction(+ : num_failed) // num_threads(3,1) // 4th level
        {
#pragma omp single
          num_failed = num_failed + !(omp_get_num_threads() == 3);
#pragma omp parallel reduction(+ : num_failed) // 5th level
          {
#pragma omp single
            num_failed = num_failed + !(omp_get_num_threads() == 1);
#pragma omp parallel reduction(+ : num_failed) // 6th level
            {
#pragma omp single
              num_failed = num_failed + !(omp_get_num_threads() == 1);
            } // end 6th level parallel
          } // end 5th level parallel
        } // end 4th level parallel
#pragma omp parallel reduction(+ : num_failed) // 4th level
        {
#pragma omp single
          num_failed = num_failed + !(omp_get_num_threads() == 2);
        } // end 4th level parallel
      } // end 3rd level parallel
    } // end 2nd level parallel
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2);
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

// Now we should be back to the way we started.
#pragma omp parallel reduction(+ : num_failed) // 1st level
  {
#pragma omp single
    num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 2nd level
    {
#pragma omp single
      num_failed = num_failed + !(omp_get_num_threads() == 2);
#pragma omp parallel reduction(+ : num_failed) // 3rd level
      {
#pragma omp single
        num_failed = num_failed + !(omp_get_num_threads() == 2);
      } // end 3rd level parallel
    } // end 2nd level parallel
  } // end 1st level parallel

  return (!num_failed);
}

int main() {
  int i;
  int num_failed = 0;

  for (i = 0; i < REPETITIONS; i++) {
    if (!test_omp_parallel_num_threads_list()) {
      num_failed++;
    }
  }
  return num_failed;
}