File: meson.build

package info (click to toggle)
numpy 1%3A2.2.4%2Bds-1.2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 83,420 kB
  • sloc: python: 248,499; asm: 232,365; ansic: 216,874; cpp: 135,657; f90: 1,540; sh: 938; fortran: 558; makefile: 414; sed: 139; xml: 109; java: 92; perl: 79; cs: 54; javascript: 53; objc: 29; lex: 13; yacc: 9
file content (318 lines) | stat: -rw-r--r-- 10,127 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
# The CPU Dispatcher implementation.
#
# This script handles the CPU dispatcher and requires the Meson module
# 'features'.
#
# The CPU dispatcher script is responsible for three main tasks:
#
# 1. Defining the enabled baseline and dispatched features by parsing build
#    options or compiler arguments, including detection of native flags.
#
# 2. Specifying the baseline arguments and definitions across all sources.
#
# 3. Generating the main configuration file, which contains information about
#    the enabled features, along with a collection of C macros necessary for
#    runtime dispatching. For more details, see the template file
#    `main_config.h.in`.
#
# This script exposes the following variables:
#
# - `CPU_BASELINE`: A set of CPU feature objects obtained from
#                   `mod_features.new()`, representing the minimum CPU features
#                   specified by the build option `-Dcpu-baseline`.
#
# - `CPU_BASELINE_NAMES`: A set of enabled CPU feature names, representing the
#                         minimum CPU features specified by the build option
#                         `-Dcpu-baseline`.
#
# - `CPU_DISPATCH_NAMES`: A set of enabled CPU feature names, representing the
#                         additional CPU features that can be dispatched at
#                         runtime, specified by the build option
#                         `-Dcpu-dispatch`.
#
# - `CPU_FEATURES`: A dictionary containing all supported CPU feature objects.
#
# Additionally, this script exposes a set of variables that represent each
# supported feature to be used within the Meson function
# `mod_features.multi_targets()`.

# Prefix used by all macros and features definitions
CPU_CONF_PREFIX = 'NPY_'
# main configuration name
CPU_CONF_CONFIG = 'npy_cpu_dispatch_config.h'

if get_option('disable-optimization')
  add_project_arguments('-D' + CPU_CONF_PREFIX + 'DISABLE_OPTIMIZATION', language: ['c', 'cpp'])
  CPU_CONF_BASELINE = 'none'
  CPU_CONF_DISPATCH = 'none'
else
  baseline_detect = false
  c_args = get_option('c_args')
  foreach arg : c_args
    foreach carch : ['-march', '-mcpu', '-xhost', '/QxHost']
      if arg.contains(carch)
        message('Appending option "detect" to "cpu-baseline" due to detecting global architecture c_arg "' + arg + '"')
        baseline_detect = true
        break
      endif
    endforeach
    if baseline_detect
      break
    endif
  endforeach
  # The required minimal set of required CPU features.
  CPU_CONF_BASELINE = get_option('cpu-baseline')
  if baseline_detect
    CPU_CONF_BASELINE += '+detect'
  endif
  # The required dispatched set of additional CPU features.
  CPU_CONF_DISPATCH = get_option('cpu-dispatch')
endif

# Initialize the CPU features Export the X86 features objects 'SSE', 'AVX',
# etc. plus a dictionary "X86_FEATURES" which maps to each object by its name
subdir('x86')
subdir('ppc64')
subdir('s390x')
subdir('arm')
subdir('riscv64')

CPU_FEATURES = {}
CPU_FEATURES += ARM_FEATURES
CPU_FEATURES += X86_FEATURES
CPU_FEATURES += PPC64_FEATURES
CPU_FEATURES += S390X_FEATURES
CPU_FEATURES += RV64_FEATURES

# Parse the requested baseline (CPU_CONF_BASELINE) and dispatch features
# (CPU_CONF_DISPATCH).
cpu_family = host_machine.cpu_family()
# Used by build option 'min'
min_features = {
  'x86': [SSE2],
  'x86_64': [SSE3],
  'ppc64': [],
  's390x': [],
  'arm': [],
  'aarch64': [ASIMD],
  'riscv64': [],
  'wasm32': [],
}.get(cpu_family, [])
if host_machine.endian() == 'little' and cpu_family == 'ppc64'
  min_features = [VSX2]
endif

# Used by build option 'max/native/detect'
max_features_dict = {
  'x86': X86_FEATURES,
  'x86_64': X86_FEATURES,
  'ppc64': PPC64_FEATURES,
  's390x': S390X_FEATURES,
  'arm': ARM_FEATURES,
  'aarch64': ARM_FEATURES,
  'riscv64': RV64_FEATURES,
  'wasm32': {},
}.get(cpu_family, {})
max_features = []
foreach fet_name, fet_obj : max_features_dict
  max_features += [fet_obj]
endforeach
if max_features.length() == 0
  message('Disabling CPU feature detection due to unsupported architecture: "' + cpu_family + '"')
  CPU_CONF_BASELINE = 'none'
  CPU_CONF_DISPATCH = 'none'
endif

parse_options = {
  'cpu-baseline': CPU_CONF_BASELINE,
  'cpu-dispatch': CPU_CONF_DISPATCH
}
parse_result = {
  'cpu-baseline': [],
  'cpu-dispatch': []
}
mod_features = import('features')
foreach opt_name, conf : parse_options
  # no support for regex :(?
  tokens = conf.replace(',', ' ').replace('+', ' + ').replace('-', ' - ').strip().to_upper().split()
  result = []
  ignored = []
  # append is the default
  append = true
  foreach tok : tokens
    if tok == '+'
      append = true
      continue
    elif tok == '-'
      append = false
      continue
    elif tok == 'NONE'
      continue
    elif tok == 'NATIVE'
      if not is_variable('cpu_native_features')
        compiler_id = meson.get_compiler('c').get_id()
        native_flags = {
          'intel': '-xHost',
          'intel-cl': '/QxHost',
          # FIXME: Add support for fcc(-mcpu=a64fx) compiler
        }.get(compiler_id, '-march=native')
        test_native = mod_features.test(
          max_features, anyfet: true,
          force_args: [native_flags] + '-DDETECT_FEATURES'
        )
        if not test_native[0]
          error('Option "native" doesn\'t support compiler', compiler_id)
        endif
        cpu_native_features = []
        foreach fet_name : test_native[1].get('features')
          cpu_native_features += CPU_FEATURES[fet_name]
        endforeach
      endif
      accumulate = cpu_native_features
    elif tok == 'DETECT'
      if not is_variable('cpu_detect_features')
        test_detect = mod_features.test(
          max_features, anyfet: true,
          force_args: ['-DDETECT_FEATURES'] + get_option('c_args')
        )
        cpu_detect_features = []
        foreach fet_name : test_detect[1].get('features')
          cpu_detect_features += CPU_FEATURES[fet_name]
        endforeach
      endif
      accumulate = cpu_detect_features
    elif tok == 'MIN'
      accumulate = min_features
    elif tok == 'MAX'
      accumulate = max_features
    elif tok in CPU_FEATURES
      tokobj = CPU_FEATURES[tok]
      if tokobj not in max_features
        ignored += tok
        continue
      endif
      accumulate = [tokobj]
    else
      error('Invalid token "'+tok+'" within option --'+opt_name)
    endif
    if append
      foreach fet : accumulate
        if fet not in result
          result += fet
        endif
      endforeach
    else
      filterd = []
      foreach fet : result
        if fet not in accumulate
          filterd += fet
        endif
      endforeach
      result = filterd
    endif # append
  endforeach # tok : tokens
  if ignored.length() > 0
    message(
      'During parsing ' + opt_name +
      ': The following CPU features were ignored due to platform ' +
      'incompatibility or lack of support:\n"' + ' '.join(ignored) + '"'
    )
  endif
  if result.length() > 0
    parse_result += {opt_name: mod_features.implicit_c(result)}
  endif
endforeach # opt_name, conf : parse_options

# Test the baseline and dispatch features and set their flags and #definitions
# across all sources.
#
# It is important to know that this test enables the maximum supported features
# by the platform depending on the required features.
#
# For example, if the user specified `--cpu-baseline=avx512_skx`, and the
# compiler doesn't support it, but still supports any of the implied features,
# then we enable the maximum supported implied features, e.g., AVX2, which can
# be done by specifying `anyfet: true` to the test function.
if parse_result['cpu-baseline'].length() > 0
    baseline = mod_features.test(parse_result['cpu-baseline'], anyfet: true)[1]
    baseline_args = baseline['args']
    foreach baseline_fet : baseline['defines']
        baseline_args += ['-D' + CPU_CONF_PREFIX + 'HAVE_' + baseline_fet]
    endforeach
    add_project_arguments(baseline_args, language: ['c', 'cpp'])
else
    baseline = {}
endif
# The name of the baseline features including its implied features.
CPU_BASELINE_NAMES = baseline.get('features', [])
CPU_BASELINE = []
foreach fet_name : CPU_BASELINE_NAMES
  CPU_BASELINE += [CPU_FEATURES[fet_name]]
endforeach
# Loop over all initialized features and disable any feature that is not part
# of the requested baseline and dispatch features to avoid it enabled by
# import('feature').multi_targets
foreach fet_name, fet_obj : CPU_FEATURES
  if fet_obj in parse_result['cpu-dispatch'] or fet_name in CPU_BASELINE_NAMES
    continue
  endif
  fet_obj.update(disable: 'Not part of the requested features')
endforeach

CPU_DISPATCH_NAMES = []
foreach fet_obj : parse_result['cpu-dispatch']
  # skip baseline features
  if fet_obj.get('name') in CPU_BASELINE_NAMES
    continue
  endif
  fet_test = mod_features.test(fet_obj)
  if not fet_test[0]
    continue
  endif
  CPU_DISPATCH_NAMES += [fet_obj.get('name')]
endforeach
# Generate main configuration header see 'main_config.h.in' for more
# clarification.
main_config = {
  'P': CPU_CONF_PREFIX,
  'WITH_CPU_BASELINE': ' '.join(CPU_BASELINE_NAMES),
  'WITH_CPU_BASELINE_N': CPU_BASELINE_NAMES.length(),
  'WITH_CPU_DISPATCH': ' '.join(CPU_DISPATCH_NAMES),
  'WITH_CPU_DISPATCH_N': CPU_DISPATCH_NAMES.length(),
}
clines = []
macro_tpl = '@0@_CPU_EXPAND(EXEC_CB(@1@, __VA_ARGS__)) \\'
foreach fet : CPU_BASELINE_NAMES
  clines += macro_tpl.format(CPU_CONF_PREFIX, fet)
endforeach
main_config += {'WITH_CPU_BASELINE_CALL': '\n'.join(clines)}
clines = []
foreach fet : CPU_DISPATCH_NAMES
  clines += macro_tpl.format(CPU_CONF_PREFIX, fet)
endforeach
main_config += {'WITH_CPU_DISPATCH_CALL': '\n'.join(clines)}

configure_file(
  input : 'main_config.h.in',
  output : CPU_CONF_CONFIG,
  configuration : configuration_data(main_config)
)
add_project_arguments(
  '-I' + meson.current_build_dir(),
  language: ['c', 'cpp']
)

message(
'''
CPU Optimization Options
  baseline:
    Requested : @0@
    Enabled   : @1@
  dispatch:
    Requested : @2@
    Enabled   : @3@
'''.format(
    CPU_CONF_BASELINE, ' '.join(CPU_BASELINE_NAMES),
    CPU_CONF_DISPATCH, ' '.join(CPU_DISPATCH_NAMES)
  )
)