1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
|
#pragma once
#include <c10/core/Allocator.h>
#include <c10/core/ScalarType.h>
#include <c10/core/SymInt.h>
#include <c10/util/intrusive_ptr.h>
namespace c10 {
// A storage represents the underlying backing data buffer for a
// tensor. This concept was inherited from the original Torch7
// codebase; we'd kind of like to get rid of the concept
// (see https://github.com/pytorch/pytorch/issues/14797) but
// it's hard work and no one has gotten around to doing it.
//
// NB: storage is supposed to uniquely own a data pointer; e.g.,
// two non-null data pointers alias if and only if they are from
// the same storage. Technically you can violate this invariant
// (e.g., you can create a non-owning StorageImpl with at::from_blob)
// but a lot of things won't work correctly, including:
//
// - An ordinary deleter on such a storage is wrong, because normal deleters
// assume unique ownership, but if you have two storages at the same data,
// that implies there is some sort of shared ownership. So your deleter would
// have to actually be internally doing some sort of refcount thing
// - Deepcopy in Python side relies on storage equality and not data pointer
// equality; so if there are two separate storages pointing to the same data,
// the data will actually get duplicated in that case (one data ptr before,
// two data ptrs after)
// - Version counts won't work correctly, because we do all VC tracking at the
// level of storages (unless you explicitly disconnect the VC with detach);
// mutation because data pointers are the same are totally untracked
struct C10_API StorageImpl : public c10::intrusive_ptr_target {
public:
struct use_byte_size_t {};
StorageImpl(
use_byte_size_t /*use_byte_size*/,
SymInt size_bytes,
at::DataPtr data_ptr,
at::Allocator* allocator,
bool resizable)
: data_ptr_(std::move(data_ptr)),
size_bytes_(std::move(size_bytes)),
size_bytes_is_symbolic_(size_bytes_.is_symbolic()),
resizable_(resizable),
received_cuda_(false),
allocator_(allocator) {
if (resizable) {
TORCH_INTERNAL_ASSERT(
allocator_, "For resizable storage, allocator must be provided");
}
}
StorageImpl(
use_byte_size_t /*use_byte_size*/,
SymInt size_bytes,
at::Allocator* allocator,
bool resizable)
: StorageImpl(
use_byte_size_t(),
size_bytes,
size_bytes.is_symbolic()
? allocator->allocate(0)
: allocator->allocate(size_bytes.as_int_unchecked()),
allocator,
resizable) {}
StorageImpl& operator=(StorageImpl&& other) = default;
StorageImpl& operator=(const StorageImpl&) = delete;
StorageImpl() = delete;
StorageImpl(StorageImpl&& other) = default;
StorageImpl(const StorageImpl&) = delete;
~StorageImpl() override = default;
void reset() {
data_ptr_.clear();
size_bytes_ = 0;
size_bytes_is_symbolic_ = false;
}
template <typename T>
inline T* data() const {
return unsafe_data<T>();
}
template <typename T>
inline T* unsafe_data() const {
return static_cast<T*>(this->data_ptr_.get());
}
// Destructor doesn't call release_resources because it's
// unnecessary; don't forget to change that if needed!
void release_resources() override {
data_ptr_.clear();
}
size_t nbytes() const {
TORCH_CHECK(!size_bytes_is_symbolic_);
return size_bytes_.as_int_unchecked();
}
SymInt sym_nbytes() const {
return size_bytes_;
}
// TODO: remove later
void set_nbytes(size_t size_bytes) {
size_bytes_ = size_bytes;
size_bytes_is_symbolic_ = false;
}
void set_nbytes(c10::SymInt size_bytes) {
size_bytes_ = size_bytes;
}
bool resizable() const {
return resizable_;
};
at::DataPtr& data_ptr() {
return data_ptr_;
};
const at::DataPtr& data_ptr() const {
return data_ptr_;
};
// Returns the previous data_ptr
at::DataPtr set_data_ptr(at::DataPtr&& data_ptr) {
at::DataPtr old_data_ptr(std::move(data_ptr_));
data_ptr_ = std::move(data_ptr);
return old_data_ptr;
};
void set_data_ptr_noswap(at::DataPtr&& data_ptr) {
data_ptr_ = std::move(data_ptr);
}
// TODO: Return const ptr eventually if possible
void* data() {
return data_ptr_.get();
}
void* data() const {
return data_ptr_.get();
}
at::DeviceType device_type() const {
return data_ptr_.device().type();
}
at::Allocator* allocator() {
return allocator_;
}
const at::Allocator* allocator() const {
return allocator_;
};
// You generally shouldn't use this method, but it is occasionally
// useful if you want to override how a tensor will be reallocated,
// after it was already allocated (and its initial allocator was
// set)
void set_allocator(at::Allocator* allocator) {
allocator_ = allocator;
}
Device device() const {
return data_ptr_.device();
}
void set_resizable(bool resizable) {
if (resizable) {
// We need an allocator to be resizable
AT_ASSERT(allocator_);
}
resizable_ = resizable;
}
/**
* Can only be called when use_count is 1
*/
void UniqueStorageShareExternalPointer(
void* src,
size_t size_bytes,
DeleterFnPtr d = nullptr) {
UniqueStorageShareExternalPointer(
at::DataPtr(src, src, d, data_ptr_.device()), size_bytes);
}
/**
* Can only be called when use_count is 1
*/
void UniqueStorageShareExternalPointer(
at::DataPtr&& data_ptr,
size_t size_bytes) {
data_ptr_ = std::move(data_ptr);
size_bytes_ = size_bytes;
size_bytes_is_symbolic_ = false;
allocator_ = nullptr;
resizable_ = false;
}
// This method can be used only after storage construction and cannot be used
// to modify storage status
void set_received_cuda(bool received_cuda) {
received_cuda_ = received_cuda;
}
bool received_cuda() {
return received_cuda_;
}
private:
DataPtr data_ptr_;
SymInt size_bytes_;
bool size_bytes_is_symbolic_;
bool resizable_;
// Identifies that Storage was received from another process and doesn't have
// local to process cuda memory allocation
bool received_cuda_;
Allocator* allocator_;
};
} // namespace c10
|