1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
|
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef HSA_RUNTIME_EXT_IMAGE_BLIT_KERNEL_H
#define HSA_RUNTIME_EXT_IMAGE_BLIT_KERNEL_H
#include <assert.h>
#include <atomic>
#include <mutex>
#include <unordered_map>
#include <vector>
#include "inc/hsa.h"
#include "resource.h"
namespace rocr {
namespace image {
typedef struct BlitQueue {
hsa_queue_t* queue_;
volatile std::atomic<uint64_t> cached_index_;
} BlitQueue;
typedef struct BlitCodeInfo {
uint64_t code_handle_;
uint32_t group_segment_size_;
uint32_t private_segment_size_;
} BlitCodeInfo;
class BlitKernel {
public:
typedef enum KernelOp {
KERNEL_OP_COPY_IMAGE_TO_BUFFER = 0,
KERNEL_OP_COPY_BUFFER_TO_IMAGE = 1,
KERNEL_OP_COPY_IMAGE_DEFAULT = 2,
KERNEL_OP_COPY_IMAGE_LINEAR_TO_STANDARD = 3,
KERNEL_OP_COPY_IMAGE_STANDARD_TO_LINEAR = 4,
KERNEL_OP_COPY_IMAGE_1DB = 5,
KERNEL_OP_COPY_IMAGE_1DB_TO_REG = 6,
KERNEL_OP_COPY_IMAGE_REG_TO_1DB = 7,
KERNEL_OP_CLEAR_IMAGE = 8,
KERNEL_OP_CLEAR_IMAGE_1DB = 9,
KERNEL_OP_COUNT = 10
} KernelOp;
explicit BlitKernel();
~BlitKernel();
hsa_status_t Initialize();
hsa_status_t Cleanup();
hsa_status_t BuildBlitCode(hsa_agent_t agent,
std::vector<BlitCodeInfo>& blit_code_catalog);
hsa_status_t CopyBufferToImage(
BlitQueue& blit_queue,
const std::vector<BlitCodeInfo>& blit_code_catalog,
const void* src_memory, size_t src_row_pitch, size_t src_slice_pitch,
const Image& dst_image, const hsa_ext_image_region_t& image_region);
hsa_status_t CopyImageToBuffer(
BlitQueue& blit_queue,
const std::vector<BlitCodeInfo>& blit_code_catalog,
const Image& src_image, void* dst_memory, size_t dst_row_pitch,
size_t dst_slice_pitch, const hsa_ext_image_region_t& image_region);
hsa_status_t CopyImage(BlitQueue& blit_queue,
const std::vector<BlitCodeInfo>& blit_code_catalog,
const Image& dst_image, const Image& src_image,
const hsa_dim3_t& dst_origin,
const hsa_dim3_t& src_origin, const hsa_dim3_t size,
KernelOp copy_type);
hsa_status_t FillImage(BlitQueue& blit_queue,
const std::vector<BlitCodeInfo>& blit_code_catalog,
const Image& image, const void* pattern,
const hsa_ext_image_region_t& region);
private:
hsa_status_t PopulateKernelCode(
hsa_agent_t agent, hsa_executable_t executable,
std::vector<BlitCodeInfo>& blit_code_catalog);
inline void CalcBufferRowSlicePitchesInPixel(
hsa_ext_image_geometry_t geometry, uint32_t element_size,
const hsa_dim3_t& copy_size, size_t in_row_pitch_byte,
size_t in_slice_pitch_byte, unsigned long* out_pitch_pixel);
inline uint32_t GetDimSize(const Image& image);
inline uint32_t GetNumChannel(const Image& image);
inline uint32_t GetImageAccessType(const Image& image);
void CalcWorkingSize(const Image& image, const hsa_dim3_t& range,
hsa_kernel_dispatch_packet_t& packet);
void CalcWorkingSize(const Image& src_image, const Image& dst_image,
const hsa_dim3_t& range,
hsa_kernel_dispatch_packet_t& packet);
hsa_status_t ConvertImage(const Image& original_image,
const Image** new_image);
hsa_status_t LaunchKernel(BlitQueue& queue,
hsa_kernel_dispatch_packet_t& packet);
// The kernels' name.
static const char* kernel_name_[KERNEL_OP_COUNT];
static const char* ocl_kernel_name_[KERNEL_OP_COUNT];
// Mapping of ISA and kernel object.
std::unordered_map<uint64_t, hsa_code_object_t> code_object_map_;
// Mapping of ISA and kernel executable.
std::unordered_map<uint64_t, hsa_executable_t> code_executable_map_;
std::mutex lock_;
DISALLOW_COPY_AND_ASSIGN(BlitKernel);
// Get the patched code object
hsa_status_t GetPatchedBlitObject(const char* agent_name, uint8_t** code_object_handle);
};
} // namespace image
} // namespace rocr
#endif // HSA_RUNTIME_EXT_IMAGE_BLIT_KERNEL_H
|