1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
|
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2024 Intel Corporation
*/
#ifndef XE_OA_H
#define XE_OA_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include "igt_list.h"
#include <xe_drm.h>
#define _DIV_ROUND_UP(a, b) (((a) + (b) - 1) / (b))
#define INTEL_XE_DEVICE_MAX_SLICES (8)
#define INTEL_XE_DEVICE_MAX_SUBSLICES (64) /* Maximum on XE_PVC */
#define INTEL_XE_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
enum intel_xe_oa_format_name {
XE_OA_FORMAT_C4_B8 = 1,
/* Gen8+ */
XE_OA_FORMAT_A12,
XE_OA_FORMAT_A12_B8_C8,
XE_OA_FORMAT_A32u40_A4u32_B8_C8,
/* DG2 */
XE_OAR_FORMAT_A32u40_A4u32_B8_C8,
XE_OA_FORMAT_A24u40_A14u32_B8_C8,
/* DG2/MTL OAC */
XE_OAC_FORMAT_A24u64_B8_C8,
XE_OAC_FORMAT_A22u32_R2u32_B8_C8,
/* MTL OAM */
XE_OAM_FORMAT_MPEC8u64_B8_C8,
XE_OAM_FORMAT_MPEC8u32_B8_C8,
/* Xe2+ */
XE_OA_FORMAT_PEC64u64,
XE_OA_FORMAT_PEC64u64_B8_C8,
XE_OA_FORMAT_PEC64u32,
XE_OA_FORMAT_PEC32u64_G1,
XE_OA_FORMAT_PEC32u32_G1,
XE_OA_FORMAT_PEC32u64_G2,
XE_OA_FORMAT_PEC32u32_G2,
XE_OA_FORMAT_PEC36u64_G1_32_G2_4,
XE_OA_FORMAT_PEC36u64_G1_4_G2_32,
XE_OA_FORMAT_MAX,
};
struct intel_xe_perf_devinfo {
char devname[20];
char prettyname[100];
/*
* Always false for gputop, we don't have the additional
* snapshots of register values, only the OA reports.
*/
bool query_mode;
bool has_dynamic_configs;
/* The following fields are prepared for equations from the XML files.
* Their values are build up from the topology fields.
*/
uint32_t devid;
uint32_t graphics_ver;
uint32_t revision;
/**
* Bit shifting required to put OA report timestamps into
* timestamp_frequency (some HW generations can shift
* timestamp values to the right by a number of bits).
*/
int32_t oa_timestamp_shift;
/**
* On some platforms only part of the timestamp bits are valid
* (on previous platforms we would get full 32bits, newer
* platforms can have fewer). It's important to know when
* correlating the full 36bits timestamps to the OA report
* timestamps.
*/
uint64_t oa_timestamp_mask;
/* Frequency of the timestamps in Hz */
uint64_t timestamp_frequency;
uint64_t gt_min_freq;
uint64_t gt_max_freq;
/* Total number of EUs */
uint64_t n_eus;
/* Total number of EUs in a slice */
uint64_t n_eu_slices;
/* Total number of subslices/dualsubslices */
uint64_t n_eu_sub_slices;
/* Number of subslices/dualsubslices in the first half of the
* slices.
*/
uint64_t n_eu_sub_slices_half_slices;
/* Mask of available subslices/dualsubslices */
uint64_t subslice_mask;
/* Mask of available slices */
uint64_t slice_mask;
/* Number of threads in one EU */
uint64_t eu_threads_count;
/**
* Maximu number of slices present on this device (can be more than
* num_slices if some slices are fused).
*/
uint16_t max_slices;
/**
* Maximu number of subslices per slice present on this device (can be more
* than the maximum value in the num_subslices[] array if some subslices are
* fused).
*/
uint16_t max_subslices_per_slice;
/**
* Stride to access subslice_masks[].
*/
uint16_t subslice_slice_stride;
/**
* Maximum number of EUs per subslice (can be more than
* num_eu_per_subslice if some EUs are fused off).
*/
uint16_t max_eu_per_subslice;
/**
* Strides to access eu_masks[].
*/
uint16_t eu_slice_stride;
uint16_t eu_subslice_stride;
/**
* A bit mask of the slices available.
*/
uint8_t slice_masks[_DIV_ROUND_UP(INTEL_XE_DEVICE_MAX_SLICES, 8)];
/**
* An array of bit mask of the subslices available, use subslice_slice_stride
* to access this array.
*/
uint8_t subslice_masks[INTEL_XE_DEVICE_MAX_SLICES *
_DIV_ROUND_UP(INTEL_XE_DEVICE_MAX_SUBSLICES, 8)];
/**
* An array of bit mask of EUs available, use eu_slice_stride &
* eu_subslice_stride to access this array.
*/
uint8_t eu_masks[INTEL_XE_DEVICE_MAX_SLICES *
INTEL_XE_DEVICE_MAX_SUBSLICES *
_DIV_ROUND_UP(INTEL_XE_DEVICE_MAX_EUS_PER_SUBSLICE, 8)];
};
typedef enum {
INTEL_XE_PERF_LOGICAL_COUNTER_STORAGE_UINT64,
INTEL_XE_PERF_LOGICAL_COUNTER_STORAGE_UINT32,
INTEL_XE_PERF_LOGICAL_COUNTER_STORAGE_DOUBLE,
INTEL_XE_PERF_LOGICAL_COUNTER_STORAGE_FLOAT,
INTEL_XE_PERF_LOGICAL_COUNTER_STORAGE_BOOL32,
} intel_xe_perf_logical_counter_storage_t;
typedef enum {
INTEL_XE_PERF_LOGICAL_COUNTER_TYPE_RAW,
INTEL_XE_PERF_LOGICAL_COUNTER_TYPE_DURATION_RAW,
INTEL_XE_PERF_LOGICAL_COUNTER_TYPE_DURATION_NORM,
INTEL_XE_PERF_LOGICAL_COUNTER_TYPE_EVENT,
INTEL_XE_PERF_LOGICAL_COUNTER_TYPE_THROUGHPUT,
INTEL_XE_PERF_LOGICAL_COUNTER_TYPE_TIMESTAMP,
} intel_xe_perf_logical_counter_type_t;
typedef enum {
/* size */
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_BYTES,
/* frequency */
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_HZ,
/* time */
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_NS,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_US,
/**/
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_PIXELS,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_TEXELS,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_THREADS,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_PERCENT,
/* events */
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_MESSAGES,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_NUMBER,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_CYCLES,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_EVENTS,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_UTILIZATION,
/**/
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_EU_SENDS_TO_L3_CACHE_LINES,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_EU_REQUESTS_TO_L3_CACHE_LINES,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_EU_BYTES_PER_L3_CACHE_LINE,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_GBPS,
INTEL_XE_PERF_LOGICAL_COUNTER_UNIT_MAX
} intel_xe_perf_logical_counter_unit_t;
/* Hold deltas of raw performance counters. */
struct intel_xe_perf_accumulator {
#define INTEL_XE_PERF_MAX_RAW_OA_COUNTERS 128
uint64_t deltas[INTEL_XE_PERF_MAX_RAW_OA_COUNTERS];
};
struct intel_xe_perf;
struct intel_xe_perf_metric_set;
struct intel_xe_perf_logical_counter {
const struct intel_xe_perf_metric_set *metric_set;
const char *name;
const char *symbol_name;
const char *desc;
const char *group;
bool (*availability)(const struct intel_xe_perf *perf);
intel_xe_perf_logical_counter_storage_t storage;
intel_xe_perf_logical_counter_type_t type;
intel_xe_perf_logical_counter_unit_t unit;
union {
uint64_t (*max_uint64)(const struct intel_xe_perf *perf,
const struct intel_xe_perf_metric_set *metric_set,
uint64_t *deltas);
double (*max_float)(const struct intel_xe_perf *perf,
const struct intel_xe_perf_metric_set *metric_set,
uint64_t *deltas);
};
union {
uint64_t (*read_uint64)(const struct intel_xe_perf *perf,
const struct intel_xe_perf_metric_set *metric_set,
uint64_t *deltas);
double (*read_float)(const struct intel_xe_perf *perf,
const struct intel_xe_perf_metric_set *metric_set,
uint64_t *deltas);
};
struct igt_list_head link; /* list from intel_xe_perf_logical_counter_group.counters */
};
struct intel_xe_perf_register_prog {
uint32_t reg;
uint32_t val;
};
struct intel_xe_perf_metric_set {
const char *name;
const char *symbol_name;
const char *hw_config_guid;
struct intel_xe_perf_logical_counter *counters;
int n_counters;
uint64_t perf_oa_metrics_set;
int perf_oa_format;
int perf_raw_size;
/* For indexing into accumulator->deltas[] ... */
int gpu_time_offset;
int gpu_clock_offset;
int a_offset;
int b_offset;
int c_offset;
int perfcnt_offset;
int pec_offset;
const struct intel_xe_perf_register_prog *b_counter_regs;
uint32_t n_b_counter_regs;
const struct intel_xe_perf_register_prog *mux_regs;
uint32_t n_mux_regs;
const struct intel_xe_perf_register_prog *flex_regs;
uint32_t n_flex_regs;
struct igt_list_head link;
};
/* A tree structure with group having subgroups and counters. */
struct intel_xe_perf_logical_counter_group {
char *name;
struct igt_list_head counters;
struct igt_list_head groups;
struct igt_list_head link; /* link for intel_xe_perf_logical_counter_group.groups */
};
struct intel_xe_perf {
const char *name;
struct intel_xe_perf_logical_counter_group *root_group;
struct igt_list_head metric_sets;
struct intel_xe_perf_devinfo devinfo;
};
/* This is identical to 'struct drm_i915_query_topology_info' at present */
struct intel_xe_topology_info {
uint16_t flags;
uint16_t max_slices;
uint16_t max_subslices;
uint16_t max_eus_per_subslice;
uint16_t subslice_offset;
uint16_t subslice_stride;
uint16_t eu_offset;
uint16_t eu_stride;
uint8_t data[];
};
struct intel_xe_perf_record_header {
uint32_t type;
uint16_t pad;
uint16_t size;
};
static inline bool
intel_xe_perf_devinfo_slice_available(const struct intel_xe_perf_devinfo *devinfo,
int slice)
{
return (devinfo->slice_masks[slice / 8] & (1U << (slice % 8))) != 0;
}
static inline bool
intel_xe_perf_devinfo_subslice_available(const struct intel_xe_perf_devinfo *devinfo,
int slice, int subslice)
{
return (devinfo->subslice_masks[slice * devinfo->subslice_slice_stride +
subslice / 8] & (1U << (subslice % 8))) != 0;
}
static inline bool
intel_xe_perf_devinfo_eu_available(const struct intel_xe_perf_devinfo *devinfo,
int slice, int subslice, int eu)
{
unsigned subslice_offset = slice * devinfo->eu_slice_stride +
subslice * devinfo->eu_subslice_stride;
return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
}
struct intel_xe_topology_info *
xe_fill_topology_info(int drm_fd, uint32_t device_id, uint32_t *topology_size);
struct intel_xe_perf *intel_xe_perf_for_fd(int drm_fd, int gt);
struct intel_xe_perf *intel_xe_perf_for_devinfo(uint32_t device_id,
uint32_t revision,
uint64_t timestamp_frequency,
uint64_t gt_min_freq,
uint64_t gt_max_freq,
const struct intel_xe_topology_info *topology);
void intel_xe_perf_free(struct intel_xe_perf *perf);
void intel_xe_perf_add_logical_counter(struct intel_xe_perf *perf,
struct intel_xe_perf_logical_counter *counter,
const char *group);
void intel_xe_perf_add_metric_set(struct intel_xe_perf *perf,
struct intel_xe_perf_metric_set *metric_set);
void intel_xe_perf_load_perf_configs(struct intel_xe_perf *perf, int drm_fd);
struct intel_xe_oa_open_prop {
uint32_t num_properties;
uint32_t reserved;
uint64_t properties_ptr;
};
void intel_xe_perf_accumulate_reports(struct intel_xe_perf_accumulator *acc,
const struct intel_xe_perf *perf,
const struct intel_xe_perf_metric_set *metric_set,
const struct intel_xe_perf_record_header *record0,
const struct intel_xe_perf_record_header *record1);
uint64_t intel_xe_perf_read_record_timestamp(const struct intel_xe_perf *perf,
const struct intel_xe_perf_metric_set *metric_set,
const struct intel_xe_perf_record_header *record);
uint64_t intel_xe_perf_read_record_timestamp_raw(const struct intel_xe_perf *perf,
const struct intel_xe_perf_metric_set *metric_set,
const struct intel_xe_perf_record_header *record);
const char *intel_xe_perf_read_report_reason(const struct intel_xe_perf *perf,
const struct intel_xe_perf_record_header *record);
void intel_xe_oa_prop_to_ext(struct intel_xe_oa_open_prop *properties,
struct drm_xe_ext_set_property *extn);
int intel_xe_perf_ioctl(int fd, enum drm_xe_observation_op op, void *arg);
void intel_xe_perf_ioctl_err(int fd, enum drm_xe_observation_op op, void *arg, int err);
#ifdef __cplusplus
};
#endif
#endif /* XE_OA_H */
|