1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
|
// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
#ifndef __INTEL_ALLOCATOR_H__
#define __INTEL_ALLOCATOR_H__
#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>
#include <stdint.h>
#include <stdatomic.h>
#include "i915/gem_submission.h"
#include "intel_reg.h"
/**
* SECTION:intel_allocator
* @short_description: igt implementation of allocator
* @title: Intel allocator
* @include: intel_allocator.h
*
* # Introduction
*
* With the era of discrete cards we requested to adopt IGT to handle
* addresses in userspace only (softpin, without support of relocations).
* Writing an allocator for single purpose would be relatively easy
* but supporting different tests with different requirements became
* quite complicated task where couple of scenarios may be not covered yet.
*
* # Assumptions
*
* - Allocator has to work in multiprocess / multithread environment.
* - Allocator backend (algorithm) should be plugable. Currently we support
* SIMPLE (borrowed from Mesa allocator), RELOC (pseudo allocator which
* returns incremented addresses without checking overlapping)
* and RANDOM (pseudo allocator which randomize addresses without
* checking overlapping).
* - Has to integrate in intel-bb (our simpler libdrm replacement used in
* couple of tests).
*
* # Implementation
*
* ## Single process (allows multiple threads)
*
* For single process we don't need to create dedicated
* entity (kind of arbiter) to solve allocations. Simple locking over
* allocator data structure is enough. As basic usage example would be:
*
* |[<!-- language="c" -->
* struct object {
* uint32_t handle;
* uint64_t offset;
* uint64_t size;
* };
*
* struct object obj1, obj2;
* uint64_t ahnd, startp, endp, size = 4096, align = 1 << 13;
* int fd = -1;
*
* fd = drm_open_driver(DRIVER_INTEL);
* ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
*
* obj1.handle = gem_create(4096);
* obj2.handle = gem_create(4096);
*
* // Reserve hole for an object in given address.
* // In this example the first possible address.
* intel_allocator_get_address_range(ahnd, &startp, &endp);
* obj1.offset = startp;
* igt_assert(intel_allocator_reserve(ahnd, obj1.handle, size, startp));
*
* // Get the most suitable offset for the object. Preferred way.
* obj2.offset = intel_allocator_alloc(ahnd, obj2.handle, size, align);
*
* ...
*
* // Reserved addresses can be only freed by unreserve.
* intel_allocator_unreserve(ahnd, obj1.handle, size, obj1.offset);
* intel_allocator_free(ahnd, obj2.handle);
*
* gem_close(obj1.handle);
* gem_close(obj2.handle);
* ]|
*
* Description:
* - ahnd is allocator handle (vm space handled by it)
* - we call get_address_range() to get start/end range provided by the
* allocator (we haven't specified its range in open so allocator code will
* assume some safe address range - we don't want to exercise some potential
* HW bugs on the last page)
* - alloc() / free() pair just gets address for gem object proposed by the
* allocator
* - reserve() / unreserve() pair gives us full control of acquire/return
* range we're interested in
*
* ## Multiple processes
*
* When process forks and its child uses same fd vm its address space is also
* the same. Some coordination - in this case interprocess communication -
* is required to assign proper addresses for gem objects and avoid collision.
* Additional thread is spawned for such case to cover child processes needs.
* It uses some form of communication channel to receive, perform action
* (alloc, free...) and send response to requesting process. Currently
* SYSVIPC message queue was chosen for this but it can replaced by other
* mechanism. Allocation techniques are same as for single process, we
* just need to wrap such code with:
*
*
* |[<!-- language="c" -->
*
*
* intel_allocator_multiprocess_start();
*
* ... allocation code (open, close, alloc, free, ...)
*
* intel_allocator_multiprocess_stop();
* ]|
*
* Calling start() spawns additional allocator thread ready for handling
* incoming allocation requests (open / close are also requests in that case).
*
* Calling stop() request to stop allocator thread unblocking all pending
* children (if any).
*/
enum allocator_strategy {
ALLOC_STRATEGY_NONE,
ALLOC_STRATEGY_LOW_TO_HIGH,
ALLOC_STRATEGY_HIGH_TO_LOW
};
struct intel_allocator {
int fd;
uint8_t type;
enum allocator_strategy strategy;
uint64_t default_alignment;
_Atomic(int32_t) refcount;
pthread_mutex_t mutex;
/* allocator's private structure */
void *priv;
void (*get_address_range)(struct intel_allocator *ial,
uint64_t *startp, uint64_t *endp);
uint64_t (*alloc)(struct intel_allocator *ial, uint32_t handle,
uint64_t size, uint64_t alignment, uint8_t pat_index,
enum allocator_strategy strategy);
bool (*is_allocated)(struct intel_allocator *ial, uint32_t handle,
uint64_t size, uint64_t offset);
bool (*reserve)(struct intel_allocator *ial,
uint32_t handle, uint64_t start, uint64_t end);
bool (*unreserve)(struct intel_allocator *ial,
uint32_t handle, uint64_t start, uint64_t end);
bool (*is_reserved)(struct intel_allocator *ial,
uint64_t start, uint64_t end);
bool (*free)(struct intel_allocator *ial, uint32_t handle);
void (*destroy)(struct intel_allocator *ial);
bool (*is_empty)(struct intel_allocator *ial);
void (*print)(struct intel_allocator *ial, bool full);
};
void intel_allocator_init(void);
void __intel_allocator_multiprocess_prepare(void);
void __intel_allocator_multiprocess_start(void);
void intel_allocator_multiprocess_start(void);
void intel_allocator_multiprocess_stop(void);
uint64_t intel_allocator_open(int fd, uint32_t ctx, uint8_t allocator_type);
uint64_t intel_allocator_open_full(int fd, uint32_t ctx,
uint64_t start, uint64_t end,
uint8_t allocator_type,
enum allocator_strategy strategy,
uint64_t default_alignment);
uint64_t intel_allocator_open_vm(int fd, uint32_t vm, uint8_t allocator_type);
uint64_t intel_allocator_open_vm_full(int fd, uint32_t vm,
uint64_t start, uint64_t end,
uint8_t allocator_type,
enum allocator_strategy strategy,
uint64_t default_alignment);
bool intel_allocator_close(uint64_t allocator_handle);
void intel_allocator_get_address_range(uint64_t allocator_handle,
uint64_t *startp, uint64_t *endp);
uint64_t __intel_allocator_alloc(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t alignment, uint8_t pat_index,
enum allocator_strategy strategy);
uint64_t intel_allocator_alloc(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t alignment);
uint64_t intel_allocator_alloc_with_strategy(uint64_t allocator_handle,
uint32_t handle,
uint64_t size, uint64_t alignment,
enum allocator_strategy strategy);
bool intel_allocator_free(uint64_t allocator_handle, uint32_t handle);
bool intel_allocator_is_allocated(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t offset);
bool intel_allocator_reserve(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t offset);
bool intel_allocator_unreserve(uint64_t allocator_handle, uint32_t handle,
uint64_t size, uint64_t offset);
bool intel_allocator_is_reserved(uint64_t allocator_handle,
uint64_t size, uint64_t offset);
bool intel_allocator_reserve_if_not_allocated(uint64_t allocator_handle,
uint32_t handle,
uint64_t size, uint64_t offset,
bool *is_allocatedp);
void intel_allocator_print(uint64_t allocator_handle);
void intel_allocator_bind(uint64_t allocator_handle,
uint32_t sync_in, uint32_t sync_out);
#define ALLOC_INVALID_ADDRESS (-1ull)
#define INTEL_ALLOCATOR_NONE 0
#define INTEL_ALLOCATOR_RELOC 1
#define INTEL_ALLOCATOR_SIMPLE 2
#define GEN8_GTT_ADDRESS_WIDTH 48
static inline uint64_t CANONICAL(uint64_t offset)
{
return sign_extend64(offset, GEN8_GTT_ADDRESS_WIDTH - 1);
}
#define DECANONICAL(offset) (offset & ((1ull << GEN8_GTT_ADDRESS_WIDTH) - 1))
static inline uint64_t get_simple_ahnd(int fd, uint32_t ctx)
{
bool do_relocs = gem_has_relocations(fd);
return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_SIMPLE);
}
static inline uint64_t get_simple_l2h_ahnd(int fd, uint32_t ctx)
{
bool do_relocs = gem_has_relocations(fd);
return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0,
INTEL_ALLOCATOR_SIMPLE,
ALLOC_STRATEGY_LOW_TO_HIGH,
0);
}
static inline uint64_t get_simple_h2l_ahnd(int fd, uint32_t ctx)
{
bool do_relocs = gem_has_relocations(fd);
return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0,
INTEL_ALLOCATOR_SIMPLE,
ALLOC_STRATEGY_HIGH_TO_LOW,
0);
}
static inline uint64_t get_reloc_ahnd(int fd, uint32_t ctx)
{
bool do_relocs = gem_has_relocations(fd);
return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_RELOC);
}
static inline bool put_ahnd(uint64_t ahnd)
{
return !ahnd || intel_allocator_close(ahnd);
}
uint64_t get_offset_pat_index(uint64_t ahnd, uint32_t handle, uint64_t size,
uint64_t alignment, uint8_t pat_index);
static inline uint64_t get_offset(uint64_t ahnd, uint32_t handle,
uint64_t size, uint64_t alignment)
{
if (!ahnd)
return 0;
return intel_allocator_alloc(ahnd, handle, size, alignment);
}
static inline bool put_offset(uint64_t ahnd, uint32_t handle)
{
if (!ahnd)
return 0;
return intel_allocator_free(ahnd, handle);
}
#endif
|