1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
|
# The CPU Dispatcher implementation.
#
# This script handles the CPU dispatcher and requires the Meson module
# 'features'.
#
# The CPU dispatcher script is responsible for three main tasks:
#
# 1. Defining the enabled baseline and dispatched features by parsing build
# options or compiler arguments, including detection of native flags.
#
# 2. Specifying the baseline arguments and definitions across all sources.
#
# 3. Generating the main configuration file, which contains information about
# the enabled features, along with a collection of C macros necessary for
# runtime dispatching. For more details, see the template file
# `main_config.h.in`.
#
# This script exposes the following variables:
#
# - `CPU_BASELINE`: A set of CPU feature objects obtained from
# `mod_features.new()`, representing the minimum CPU features
# specified by the build option `-Dcpu-baseline`.
#
# - `CPU_BASELINE_NAMES`: A set of enabled CPU feature names, representing the
# minimum CPU features specified by the build option
# `-Dcpu-baseline`.
#
# - `CPU_DISPATCH_NAMES`: A set of enabled CPU feature names, representing the
# additional CPU features that can be dispatched at
# runtime, specified by the build option
# `-Dcpu-dispatch`.
#
# - `CPU_FEATURES`: A dictionary containing all supported CPU feature objects.
#
# Additionally, this script exposes a set of variables that represent each
# supported feature to be used within the Meson function
# `mod_features.multi_targets()`.
# Prefix used by all macros and features definitions
CPU_CONF_PREFIX = 'NPY_'
# main configuration name
CPU_CONF_CONFIG = 'npy_cpu_dispatch_config.h'
if get_option('disable-optimization')
add_project_arguments('-D' + CPU_CONF_PREFIX + 'DISABLE_OPTIMIZATION', language: ['c', 'cpp'])
CPU_CONF_BASELINE = 'none'
CPU_CONF_DISPATCH = 'none'
else
baseline_detect = false
c_args = get_option('c_args')
foreach arg : c_args
foreach carch : ['-march', '-mcpu', '-xhost', '/QxHost']
if arg.contains(carch)
message('Appending option "detect" to "cpu-baseline" due to detecting global architecture c_arg "' + arg + '"')
baseline_detect = true
break
endif
endforeach
if baseline_detect
break
endif
endforeach
# The required minimal set of required CPU features.
CPU_CONF_BASELINE = get_option('cpu-baseline')
if baseline_detect
CPU_CONF_BASELINE += '+detect'
endif
# The required dispatched set of additional CPU features.
CPU_CONF_DISPATCH = get_option('cpu-dispatch')
endif
# Initialize the CPU features Export the X86 features objects 'SSE', 'AVX',
# etc. plus a dictionary "X86_FEATURES" which maps to each object by its name
subdir('x86')
subdir('ppc64')
subdir('s390x')
subdir('arm')
subdir('riscv64')
CPU_FEATURES = {}
CPU_FEATURES += ARM_FEATURES
CPU_FEATURES += X86_FEATURES
CPU_FEATURES += PPC64_FEATURES
CPU_FEATURES += S390X_FEATURES
CPU_FEATURES += RV64_FEATURES
# Parse the requested baseline (CPU_CONF_BASELINE) and dispatch features
# (CPU_CONF_DISPATCH).
cpu_family = host_machine.cpu_family()
# Used by build option 'min'
min_features = {
'x86': [SSE2],
'x86_64': [SSE3],
'ppc64': [],
's390x': [],
'arm': [],
'aarch64': [ASIMD],
'riscv64': [],
'wasm32': [],
}.get(cpu_family, [])
if host_machine.endian() == 'little' and cpu_family == 'ppc64'
min_features = [VSX2]
endif
# Used by build option 'max/native/detect'
max_features_dict = {
'x86': X86_FEATURES,
'x86_64': X86_FEATURES,
'ppc64': PPC64_FEATURES,
's390x': S390X_FEATURES,
'arm': ARM_FEATURES,
'aarch64': ARM_FEATURES,
'riscv64': RV64_FEATURES,
'wasm32': {},
}.get(cpu_family, {})
max_features = []
foreach fet_name, fet_obj : max_features_dict
max_features += [fet_obj]
endforeach
if max_features.length() == 0
message('Disabling CPU feature detection due to unsupported architecture: "' + cpu_family + '"')
CPU_CONF_BASELINE = 'none'
CPU_CONF_DISPATCH = 'none'
endif
parse_options = {
'cpu-baseline': CPU_CONF_BASELINE,
'cpu-dispatch': CPU_CONF_DISPATCH
}
parse_result = {
'cpu-baseline': [],
'cpu-dispatch': []
}
mod_features = import('features')
foreach opt_name, conf : parse_options
# no support for regex :(?
tokens = conf.replace(',', ' ').replace('+', ' + ').replace('-', ' - ').strip().to_upper().split()
result = []
ignored = []
# append is the default
append = true
foreach tok : tokens
if tok == '+'
append = true
continue
elif tok == '-'
append = false
continue
elif tok == 'NONE'
continue
elif tok == 'NATIVE'
if not is_variable('cpu_native_features')
compiler_id = meson.get_compiler('c').get_id()
native_flags = {
'intel': '-xHost',
'intel-cl': '/QxHost',
# FIXME: Add support for fcc(-mcpu=a64fx) compiler
}.get(compiler_id, '-march=native')
test_native = mod_features.test(
max_features, anyfet: true,
force_args: [native_flags] + '-DDETECT_FEATURES'
)
if not test_native[0]
error('Option "native" doesn\'t support compiler', compiler_id)
endif
cpu_native_features = []
foreach fet_name : test_native[1].get('features')
cpu_native_features += CPU_FEATURES[fet_name]
endforeach
endif
accumulate = cpu_native_features
elif tok == 'DETECT'
if not is_variable('cpu_detect_features')
test_detect = mod_features.test(
max_features, anyfet: true,
force_args: ['-DDETECT_FEATURES'] + get_option('c_args')
)
cpu_detect_features = []
foreach fet_name : test_detect[1].get('features')
cpu_detect_features += CPU_FEATURES[fet_name]
endforeach
endif
accumulate = cpu_detect_features
elif tok == 'MIN'
accumulate = min_features
elif tok == 'MAX'
accumulate = max_features
elif tok in CPU_FEATURES
tokobj = CPU_FEATURES[tok]
if tokobj not in max_features
ignored += tok
continue
endif
accumulate = [tokobj]
else
error('Invalid token "'+tok+'" within option --'+opt_name)
endif
if append
foreach fet : accumulate
if fet not in result
result += fet
endif
endforeach
else
filterd = []
foreach fet : result
if fet not in accumulate
filterd += fet
endif
endforeach
result = filterd
endif # append
endforeach # tok : tokens
if ignored.length() > 0
message(
'During parsing ' + opt_name +
': The following CPU features were ignored due to platform ' +
'incompatibility or lack of support:\n"' + ' '.join(ignored) + '"'
)
endif
if result.length() > 0
parse_result += {opt_name: mod_features.implicit_c(result)}
endif
endforeach # opt_name, conf : parse_options
# Test the baseline and dispatch features and set their flags and #definitions
# across all sources.
#
# It is important to know that this test enables the maximum supported features
# by the platform depending on the required features.
#
# For example, if the user specified `--cpu-baseline=avx512_skx`, and the
# compiler doesn't support it, but still supports any of the implied features,
# then we enable the maximum supported implied features, e.g., AVX2, which can
# be done by specifying `anyfet: true` to the test function.
if parse_result['cpu-baseline'].length() > 0
baseline = mod_features.test(parse_result['cpu-baseline'], anyfet: true)[1]
baseline_args = baseline['args']
foreach baseline_fet : baseline['defines']
baseline_args += ['-D' + CPU_CONF_PREFIX + 'HAVE_' + baseline_fet]
endforeach
add_project_arguments(baseline_args, language: ['c', 'cpp'])
else
baseline = {}
endif
# The name of the baseline features including its implied features.
CPU_BASELINE_NAMES = baseline.get('features', [])
CPU_BASELINE = []
foreach fet_name : CPU_BASELINE_NAMES
CPU_BASELINE += [CPU_FEATURES[fet_name]]
endforeach
# Loop over all initialized features and disable any feature that is not part
# of the requested baseline and dispatch features to avoid it enabled by
# import('feature').multi_targets
foreach fet_name, fet_obj : CPU_FEATURES
if fet_obj in parse_result['cpu-dispatch'] or fet_name in CPU_BASELINE_NAMES
continue
endif
fet_obj.update(disable: 'Not part of the requested features')
endforeach
CPU_DISPATCH_NAMES = []
foreach fet_obj : parse_result['cpu-dispatch']
# skip baseline features
if fet_obj.get('name') in CPU_BASELINE_NAMES
continue
endif
fet_test = mod_features.test(fet_obj)
if not fet_test[0]
continue
endif
CPU_DISPATCH_NAMES += [fet_obj.get('name')]
endforeach
# Generate main configuration header see 'main_config.h.in' for more
# clarification.
main_config = {
'P': CPU_CONF_PREFIX,
'WITH_CPU_BASELINE': ' '.join(CPU_BASELINE_NAMES),
'WITH_CPU_BASELINE_N': CPU_BASELINE_NAMES.length(),
'WITH_CPU_DISPATCH': ' '.join(CPU_DISPATCH_NAMES),
'WITH_CPU_DISPATCH_N': CPU_DISPATCH_NAMES.length(),
}
clines = []
macro_tpl = '@0@_CPU_EXPAND(EXEC_CB(@1@, __VA_ARGS__)) \\'
foreach fet : CPU_BASELINE_NAMES
clines += macro_tpl.format(CPU_CONF_PREFIX, fet)
endforeach
main_config += {'WITH_CPU_BASELINE_CALL': '\n'.join(clines)}
clines = []
foreach fet : CPU_DISPATCH_NAMES
clines += macro_tpl.format(CPU_CONF_PREFIX, fet)
endforeach
main_config += {'WITH_CPU_DISPATCH_CALL': '\n'.join(clines)}
configure_file(
input : 'main_config.h.in',
output : CPU_CONF_CONFIG,
configuration : configuration_data(main_config)
)
add_project_arguments(
'-I' + meson.current_build_dir(),
language: ['c', 'cpp']
)
message(
'''
CPU Optimization Options
baseline:
Requested : @0@
Enabled : @1@
dispatch:
Requested : @2@
Enabled : @3@
'''.format(
CPU_CONF_BASELINE, ' '.join(CPU_BASELINE_NAMES),
CPU_CONF_DISPATCH, ' '.join(CPU_DISPATCH_NAMES)
)
)
|