File: Memory.h

package info (click to toggle)
jazz2-native 3.5.0-2
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid
  • size: 16,912 kB
  • sloc: cpp: 172,557; xml: 113; python: 36; makefile: 5; sh: 2
file content (310 lines) | stat: -rw-r--r-- 13,332 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#pragma once

#include "../Common.h"
#include "../Containers/GrowableArray.h"

#include <cstring>

#if defined(DEATH_TARGET_APPLE)
#	include <stdlib.h>
#elif defined(DEATH_TARGET_UNIX) || defined(DEATH_TARGET_WINDOWS)
#	include <malloc.h>
#endif
#if defined(DEATH_TARGET_MSVC) && !defined(DEATH_TARGET_CLANG_CL)
#	include <intrin.h>	// For _byteswap_ushort()/_byteswap_ulong()/_byteswap_uint64()
#endif

namespace Death { namespace Memory {
//###==##====#=====--==~--~=~- --- -- -  -  -   -

	/**
		@brief Allocate aligned memory and value-initialize it
		@tparam T           Type of the returned array
		@tparam alignment   Allocation alignment, in bytes
		@param  size        Count of @p T items to allocate. If @cpp 0 @ce, no allocation is done.

		Compared to the classic C @ref std::malloc() or C++ @cpp new @ce that commonly
		aligns only to @cpp 2*sizeof(void*) @ce, this function returns "overaligned"
		allocations, which is mainly useful for efficient SIMD operations.

		The alignment is implicitly @cpp alignof(T) @ce, but can be overridden with the
		@p alignment template parameter. When specified explicitly, it is expected to
		be a power-of-two value, at most @cpp 256 @ce bytes and the total byte size
		being a multiple of the alignment.

		The returned pointer is always aligned to at least the desired value, but the
		alignment can be also higher. For example, allocating a 2 MB buffer may result
		in it being aligned to the whole memory page, or small alignment values could
		get rounded up to the default @cpp 2*sizeof(void*) @ce alignment.

		@section Memory-AllocateAligned-initialization Array initialization

		Like with @ref Containers::Array, the returned array is by default
		* *value-initialized*, which means that trivial types are zero-initialized and
		the default constructor is called on other types. Different behavior can be
		achieved with the following tags, compared to @ref Containers::Array the
		initialization is performed separately from the allocation itself with either a
		loop or a call to @ref std::memset().

		-   @ref AllocateAligned(Containers::ValueInitT, std::size_t) is equivalent to the default
			case, zero-initializing trivial types and calling the default constructor
			elsewhere. Useful when you want to make the choice appear explicit.
		-   @ref AllocateAligned(Containers::NoInitT, std::size_t) does not initialize anything.
			Useful for trivial types when you'll be overwriting the contents anyway,
			for non-trivial types this is the dangerous option and you need to call the
			constructor on all elements manually using placement new,
			@ref std::uninitialized_copy() or similar.
	*/
	template<class T, std::size_t alignment = alignof(T)> inline Containers::Array<T> AllocateAligned(std::size_t size);

	/**
		@brief Allocate aligned memory and value-initialize it

		Same as @ref AllocateAligned(std::size_t), just more explicit. Implemented via
		@ref AllocateAligned(Containers::NoInitT, std::size_t) with either a
		@ref std::memset() or a loop calling the constructors on the returned
		allocation.
	*/
	template<class T, std::size_t alignment = alignof(T)> Containers::Array<T> AllocateAligned(Containers::ValueInitT, std::size_t size);

	/**
		@brief Allocate aligned memory and leave it uninitialized

		Compared to @ref AllocateAligned(std::size_t), the memory is left in an
		unitialized state. For non-trivial types, destruction is always done using a
		custom deleter that explicitly calls the destructor on *all elements* --- which
		means that for non-trivial types you're expected to construct all elements
		using placement new (or for example @ref std::uninitialized_copy()) in order to
		avoid calling destructors on uninitialized memory.
	*/
	template<class T, std::size_t alignment = alignof(T)> Containers::Array<T> AllocateAligned(Containers::NoInitT, std::size_t size);

	/**
		@brief Converts a value from/to Big-Endian

		On Little-Endian systems calls @ref SwapBytes(), on Big-Endian systems returns the value unchanged.
		Only trivial types of size 2, 4, or 8 bytes are supported.
	*/
	template<typename T> inline T AsBE(T value);

	/**
		@brief Converts a value from/to Little-Endian

		On Big-Endian systems calls @ref SwapBytes(), on Little-Endian systems returns the value unchanged.
		Only trivial types of size 2, 4, or 8 bytes are supported.
	*/
	template<typename T> inline T AsLE(T value);

	/**
		@brief Returns a value of given size from unaligned pointer
	*/
	template<typename T, typename std::enable_if<std::is_trivially_copyable<T>::value, int>::type = 0>
	inline constexpr T LoadUnaligned(const void* p) noexcept {
		std::remove_const_t<T> v;
		std::memcpy(&v, p, sizeof(T));
		return v;
	}

	/**
		@brief Stores a value of given size to unaligned pointer
	*/
	template<typename T, typename std::enable_if<std::is_trivially_copyable<T>::value, int>::type = 0>
	inline constexpr void StoreUnaligned(void* p, T v) noexcept {
		std::memcpy(p, &v, sizeof(T));
	}

	/**
		@brief Endian-swap bytes of given value

		Only trivial types of size 2, 4, or 8 bytes are supported.
	*/
	template<class T> inline T SwapBytes(T value);

	namespace Implementation
	{
#if defined(DEATH_TARGET_WINDOWS)
		template<class T, typename std::enable_if<std::is_trivially_destructible<T>::value, int>::type = 0> void AlignedDeleter(T* const data, std::size_t) {
			_aligned_free(data);
		}
		template<class T, typename std::enable_if<!std::is_trivially_destructible<T>::value, int>::type = 0> void AlignedDeleter(T* const data, std::size_t size) {
			for (std::size_t i = 0; i != size; ++i) data[i].~T();
			_aligned_free(data);
		}
#else
		template<class T, typename std::enable_if<std::is_trivially_destructible<T>::value, int>::type = 0> void AlignedDeleter(T* const data, std::size_t) {
			std::free(data);
		}
		template<class T, typename std::enable_if<!std::is_trivially_destructible<T>::value, int>::type = 0> void AlignedDeleter(T* const data, std::size_t size) {
			for (std::size_t i = 0; i != size; ++i) data[i].~T();
			std::free(data);
		}
#	if !defined(DEATH_TARGET_UNIX)
		template<class T, typename std::enable_if<std::is_trivially_destructible<T>::value, int>::type = 0> void AlignedOffsetDeleter(T* const data, std::size_t) {
			// Using a unsigned byte in order to be able to represent a 255 byte offset as well
			std::uint8_t* const dataChar = reinterpret_cast<std::uint8_t*>(data);
			std::free(dataChar - *(dataChar - 1));
		}
		template<class T, typename std::enable_if<!std::is_trivially_destructible<T>::value, int>::type = 0> void AlignedOffsetDeleter(T* const data, std::size_t size) {
			for (std::size_t i = 0; i != size; ++i) data[i].~T();

			// Using a unsigned byte in order to be able to represent a 255 byte offset as well
			std::uint8_t* const dataChar = reinterpret_cast<std::uint8_t*>(data);
			std::free(dataChar - dataChar[-1]);
		}
#	endif
#endif

		DEATH_ALWAYS_INLINE std::uint16_t SwapBytes(std::uint16_t x) {
#if defined(DEATH_TARGET_GCC) || defined(DEATH_TARGET_CLANG)
			return __builtin_bswap16(x);
#elif defined(DEATH_TARGET_MSVC)
			return _byteswap_ushort(x);
#elif defined(DEATH_TARGET_APPLE)
			return _OSSwapInt16(value);
#else
			return static_cast<std::uint16_t>((x >> 8) | (x << 8));
#endif
		}

		DEATH_ALWAYS_INLINE std::uint32_t SwapBytes(std::uint32_t x) {
#if defined(DEATH_TARGET_GCC) || defined(DEATH_TARGET_CLANG)
			return __builtin_bswap32(x);
#elif defined(DEATH_TARGET_MSVC)
			return _byteswap_ulong(x);
#elif defined(DEATH_TARGET_APPLE)
			return _OSSwapInt32(value);
#else
			return (x << 24) |
				  ((x & 0x0000FF00u) << 8)  |
				  ((x & 0x00FF0000u) >> 8)  |
				   (x >> 24);
#endif
		}

		DEATH_ALWAYS_INLINE std::uint64_t SwapBytes(std::uint64_t x) {
#if defined(DEATH_TARGET_GCC) || defined(DEATH_TARGET_CLANG)
			return __builtin_bswap64(x);
#elif defined(DEATH_TARGET_MSVC)
			return _byteswap_uint64(x);
#elif defined(DEATH_TARGET_APPLE)
			return _OSSwapInt64(value);
#else
			return (x << 56) |
				  ((x & 0x000000000000FF00ull) << 40) |
				  ((x & 0x0000000000FF0000ull) << 24) |
				  ((x & 0x00000000FF000000ull) << 8)  |
				  ((x & 0x000000FF00000000ull) >> 8)  |
				  ((x & 0x0000FF0000000000ull) >> 24) |
				  ((x & 0x00FF000000000000ull) >> 40) |
				   (x >> 56);
#endif
		}
	}

	template<class T, std::size_t alignment> Containers::Array<T> AllocateAligned(Containers::NoInitT, const std::size_t size) {
		// On non-Unix non-Windows platforms we're storing the alignment offset in a byte right before the returned pointer.
		// Because it's a byte, we can represent a value of at most 255 there (256 would make no sense as a 256-byte-aligned
		// allocation can be only off by 255 bytes at most). Again it's good to have the same requirements on all platforms
		// so checking this always.
		static_assert(alignment && !(alignment & (alignment - 1)) && alignment <= 256,
			"Alignment expected to be a power of two not larger than 256");

		// Required only by aligned_alloc() I think, but it's good to have the same requirements on all platforms for better portability
		DEATH_ASSERT(size * sizeof(T) % alignment == 0, ("Total byte size {} not a multiple of a {}-byte alignment", size * sizeof(T), alignment), {});

#if defined(DEATH_TARGET_UNIX)
		// Unix platforms
		// For some reason, allocating zero bytes still returns a non-null pointer which seems weird and confusing. Handle that explicitly instead.
		if (!size) return {};

		// I would use aligned_alloc() but then there's APPLE who comes and says NO. And on top of everything
		// they DARE to have posix_memalign() in a different header.
		// What's perhaps a bit surprising is that posix_memalign() requires the alignment to be >= sizeof(void*).
		// It seems like a strange requirement -- it could just overalign for lower alignment values instead of failing.
		// Which is what we do here. The Windows _aligned_malloc() API doesn't have this requirement.
		void* data = {};
		int result = posix_memalign(&data, alignment < sizeof(void*) ? sizeof(void*) : alignment, size * sizeof(T));
		DEATH_DEBUG_ASSERT(result == 0);
		return Containers::Array<T>{static_cast<T*>(data), size, Implementation::AlignedDeleter<T>};
#elif defined(DEATH_TARGET_WINDOWS)
		// Windows platforms
		// Zero size is not allowed: https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/aligned-malloc
		if (!size) return {};

		return Containers::Array<T>{static_cast<T*>(_aligned_malloc(size * sizeof(T), alignment)), size, Implementation::AlignedDeleter<T>};
#else
		// Other -- for allocations larger than the min alignment allocate with (align - 1) more and align manually,
		// then provide a custom deleter that undoes this.
		// Because we always allocate `alignment - 1` more than the size, it means even zero-size allocations would be allocations.
		if (!size) return {};

		// Using a unsigned byte in order to be able to represent a 255 byte offset as well
		std::uint8_t* pointer;
		std::ptrdiff_t offset;
		if (alignment <= Containers::Implementation::DefaultAllocationAlignment) {
			pointer = static_cast<std::uint8_t*>(std::malloc(size * sizeof(T)));
			offset = 0;
		} else {
			pointer = static_cast<std::uint8_t*>(std::malloc(size * sizeof(T) + alignment - 1));
			if (reinterpret_cast<std::ptrdiff_t>(pointer) % alignment == 0) {
				offset = 0;
			} else {
				offset = alignment - reinterpret_cast<std::ptrdiff_t>(pointer) % alignment;
			}
		}
		DEATH_DEBUG_ASSERT((reinterpret_cast<std::ptrdiff_t>(pointer) + offset) % alignment == 0);

		// If the offset is zero, use the classic std::free() directly. If not, save the offset in the byte right
		// before what the output pointer will point to and use a different deleter that will undo this offset
		// before calling std::free().
		void(*deleter)(T*, std::size_t);
		if (offset == 0) {
			deleter = Implementation::AlignedDeleter<T>;
		} else {
			(pointer + offset)[-1] = offset;
			deleter = Implementation::AlignedOffsetDeleter<T>;
		}
		return Containers::Array<T>{reinterpret_cast<T*>(pointer + offset), size, deleter};
#endif
	}

	template<class T, std::size_t alignment> Containers::Array<T> AllocateAligned(Containers::ValueInitT, const std::size_t size) {
		Containers::Array<T> out = AllocateAligned<T, alignment>(Containers::NoInit, size);
		Containers::Implementation::arrayConstruct(Containers::ValueInit, out.begin(), out.end());
		return out;
	}

	template<class T, std::size_t alignment> inline Containers::Array<T> AllocateAligned(std::size_t size) {
		return AllocateAligned<T, alignment>(Containers::ValueInit, size);
	}

	template<typename T> inline T AsBE(T value) {
#if !defined(DEATH_TARGET_BIG_ENDIAN)
		return SwapBytes(value);
#else
		return value;
#endif
	}

	template<typename T> inline T AsLE(T value) {
#if defined(DEATH_TARGET_BIG_ENDIAN)
		return SwapBytes(value);
#else
		return value;
#endif
	}

	template<class T> inline T SwapBytes(T value) {
		static_assert(std::is_trivially_copyable<T>::value, "SwapBytes() requires the source type to be trivially copyable");

		typedef typename Death::Implementation::TypeFor<sizeof(T)>::Type U;

		// std::memcpy() is used instead of std::bit_cast() for compatibility with C++11
		U tmp;
		std::memcpy(&tmp, &value, sizeof(U));
		tmp = Implementation::SwapBytes(tmp);
		std::memcpy(&value, &tmp, sizeof(U));
		return value;
	}

}}