1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
|
/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include "top.hpp"
#include "device/device.hpp"
#include "device/devhcmessages.hpp"
#include <cstddef>
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "device/devurilocator.hpp"
#endif
#endif
namespace amd {
/** \file Support for invoking host services from the device.
*
* A hostcall is a fixed-size request generated by a kernel running
* on the device, for some predefined service provided by the
* host. The life-cycle of a hostcall is as follows:
*
* 1. A workitem in the some kernel dispatch submits a request as a
* "packet" in a "hostcall buffer". The workitem blocks until it
* receives a response from the host.
*
* 2. A host thread called the "hostcall listener" notices the packet
* and invokes the desired service on the host.
*
* 3. When the service completes, the listener copies the response
* into the request packet. This unblocks the workitem, and the
* hostcall is said to be completed.
*
* The hostcall listeners and buffers are managed by the VDI
* runtime. The typical flow is as follows:
*
* - Create and launch one or more hostcall listeners.
*
* - Create and initialize a distinct hostcall buffer for each
* command queue in hardware (e.g., an hsa_queue_t on ROCm).
*
* - Register this buffer with the appropriate listener.
*
* - When a buffer is no longer used, deregister and then free
* it. This usually happens when the corresponding hardware queue
* is freed.
*
* - Destroy the listener(s) when they are no longer required. This must be
* done before exiting the application, so that the listener
* threads can join() correctly.
*
* A single listener is sufficient to correctly handle all hostcall
* buffers created in the application. The client may also launch
* multiple listeners, as long the same hostcall buffer is not
* registered with multiple listeners.
*/
/** \brief Determine the buffer size to be allocated
* \param num_packets Number of packets to be supported.
* \return Required size, including any internal padding required for
* the packets and their headers.
*/
size_t getHostcallBufferSize(uint32_t num_packets);
/** \brief Return the required alignment for a hostcall buffer.
*/
uint32_t getHostcallBufferAlignment(void);
bool enableHostcalls(const amd::Device& dev, void* buffer, uint32_t numPackets);
void disableHostcalls(void* buffer);
enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 };
/** \brief Packet payload
*
* Contains 64 slots of 8 ulongs each, one for each workitem in the
* wave. A slot with index \c i contains valid data if the
* corresponding bit in PacketHeader::activemask is set.
*/
struct Payload {
uint64_t slots[64][8];
};
/** Packet header */
struct PacketHeader {
/** Tagged pointer to the next packet in an intrusive stack */
uint64_t next_;
/** Bitmask that represents payload slots with valid data */
uint64_t activemask_;
/** Service ID requested by the wave */
uint32_t service_;
/** Control bits.
* \li 0: \c READY flag. Indicates packet awaiting a host response.
*/
std::atomic<uint32_t> control_;
};
static_assert(std::is_standard_layout<PacketHeader>::value,
"the hostcall packet must be useable from other languages");
/** Field offsets in the packet control field */
enum ControlOffset {
CONTROL_OFFSET_READY_FLAG = 0,
CONTROL_OFFSET_RESERVED0 = 1,
};
/** Field widths in the packet control field */
enum ControlWidth {
CONTROL_WIDTH_READY_FLAG = 1,
CONTROL_WIDTH_RESERVED0 = 31,
};
/** \brief Shared buffer submitting hostcall requests.
*
* Holds hostcall packets requested by all kernels executing on the
* same device queue. Each hostcall buffer is associated with at most
* one device queue.
*
* Packets in the buffer are accessed using 64-bit tagged pointers to mitigate
* the ABA problem in lock-free stacks. The index_mask is used to extract the
* lower bits of the pointer, which form the index into the packet array. The
* remaining higher bits define a tag that is incremented on every pop from a
* stack.
*/
class HostcallBuffer {
/** Array of packet headers */
PacketHeader* headers_;
/** Array of packet payloads */
Payload* payloads_;
/** Signal used by kernels to indicate new work */
void* doorbell_;
/** Stack of free packets. Uses tagged pointers. */
uint64_t free_stack_;
/** Stack of ready packets. Uses tagged pointers */
std::atomic<uint64_t> ready_stack_;
/** Mask for accessing the packet index in the tagged pointer. */
uint64_t index_mask_;
/** Some services need a device**/
const amd::Device* device_;
PacketHeader* getHeader(uint64_t ptr) const;
Payload* getPayload(uint64_t ptr) const;
public:
void processPackets(MessageHandler& messages);
void initialize(uint32_t num_packets);
void setDoorbell(void* doorbell) { doorbell_ = doorbell; };
void setDevice(const amd::Device* dptr) { device_ = dptr; };
#if defined(__clang__)
#if __has_feature(address_sanitizer)
private:
device::UriLocator* uri_locator;
public:
void setUriLocator(device::UriLocator* uri_l) { uri_locator = uri_l; };
#endif
#endif
};
static_assert(std::is_standard_layout<HostcallBuffer>::value,
"the hostcall buffer must be useable from other languages");
}// namespace amd
|