1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
|
#pragma once
#include "Platform.h"
#include <stddef.h> // for size_t
#ifdef __cplusplus
extern "C" {
#else
#ifdef VISUAL_STUDIO
// in c-mode, the inline keyword does not seem to be supported. Use 'static' instead, that is
// good enough there.
#define inline static
#endif
#endif
/**
* This file contains functions for managing the instruction- and data caches for communication
* between multiple cores. In most cases, it is enough to utilize the atomic operations available in
* the InlineAtomics header, but some times (especially when updating code), explicit cache
* maintenance is necessary. These functions expand as necessary on each of our supported platforms.
*
* Note: This file is included from C, so we need to ensure it compiles from C.
*/
// Invalidates the data cache to ensure writes has reached memory and are visible to other cores.
inline void invalidateDCache(void *start, void *end);
// Make sure any pending data operations are done executing on the current thread.
inline void dataBarrier();
// Invalidate the instruction cache for an individual word. This function assumes that this thread
// has updated the instruction at "start", and that it needs to be flushed from the data cache
// first. After execution, the changes are immediately visible to other CPUs, but all CPUs need to
// call "clearLocalICache" first.
inline void invalidateSingleICache(void *start);
// Invalidate the instruction cache for a range of memory. The function assumes that this thread
// hass updated the code in the range and that it needs to be flushed from the data cache
// first. After execution, the changes are immediately visible to other CPUs, but all CPUs need to
// call "clearLocalICache" first.
inline void invalidateICache(void *start, void *end);
// Make sure that any changes to the ICache are respected on the current CPU. While the other ICache
// functions ensure that changes are visible, instructions may still be prefetched on this CPU. This
// function flushes the prefetch, and any other local caches that need to be updated.
inline void clearLocalICache();
#if defined(GCC)
#if defined(X86) || defined(X64)
// Nothing special needed on X86/X64 except for preventing reordering from the compiler.
inline void invalidateDCache(void *start, void *end) {
(void)start;
(void)end;
__asm__ volatile ("" : : : "memory");
}
inline void dataBarrier() {
__asm__ volatile ("" : : : "memory");
}
inline void invalidateSingleICache(void *start) {
(void)start;
__asm__ volatile ("" : : : "memory");
}
inline void invalidateICache(void *start, void *end) {
(void)start;
(void)end;
__asm__ volatile ("" : : : "memory");
}
inline void clearLocalICache() {
__asm__ volatile ("" : : : "memory");
}
#elif defined(ARM64)
// We need to do things here...
// The code here is from the "Arm Architecture Reference Manual", section K11.5 (Barrier Litmus Tests)
inline void invalidateDCache(void *start, void *end) {
// We could probably do something cheaper here.
__builtin___clear_cache(start, end);
}
inline void dataBarrier() {
__asm__ volatile ("dsb ish\n\t" : : : "memory");
}
inline void clearLocalICache() {
__asm__ volatile ("isb\n\t" : : : "memory");
}
inline void invalidateSingleICache(void *start) {
__asm__ volatile (
"dc cvau, %0\n\t" // Clean data cache to point of unification.
"dsb ish\n\t" // Make sure previous operation is visible to all CPUs.
"ic ivau, %0\n\t" // Clean instruction cache to point of unification.
"dsb ish\n\t" // Make sure previous operation is visible to all CPUs.
: : "r"(start)
: "memory" );
}
inline void invalidateICache(void *start, void *end) {
size_t b = (size_t)start;
size_t e = (size_t)end;
// Get cache sizes.
unsigned int cache_info = 0;
__asm__ volatile ("mrs %0, ctr_el0\n\t" : "=r" (cache_info));
size_t icache = 4 << (cache_info & 0xF);
size_t dcache = 4 << ((cache_info >> 16) & 0xF);
// First, clear the data cache. Note: We need to round 'start' down to a multiple of the cache
// size. Otherwise, we might miss the last cacheline. We know that both icache and dcache are
// power of two, so it is fairly easy to round them cheaply with some bit twiddling.
for (size_t at = b & ~(dcache - 1); at < e; at += dcache)
__asm__ volatile ("dc cvau, %0\n\t" : : "r" (at));
// Make sure it is visible. We wait with the memory barrier until here. We don't care in which
// order the "dc cvau" instructions execute, the important thing is that they are all executed
// before the "dsb ish" instruction here. That is why the memory barrier is here and nowhere else.
__asm__ volatile ("dsb ish\n\t" : : : "memory");
// Then, we clear the instruction cache.
for (size_t at = b & ~(icache - 1); at < e; at += icache)
__asm__ volatile ("ic ivau, %0\n\t" : : "r" (at));
// Again, wait for the cleaning to be propagated properly.
__asm__ volatile ("dsb ish\n\t" : : : "memory");
}
#endif
#elif defined(VISUAL_STUDIO)
// Note: On MSVC we currently only support X86, where we don't need explicit cache control.
#if !defined(X86) && !defined(X64)
#error "You likely need to implement cache invalidation for this architecture."
#endif
inline void invalidateDCache(void *start, void *end) {}
inline void invalidateSingleICache(void *start) {}
inline void invalidateICache(void *start, void *end) {}
inline void clearLocalICache() {}
inline void dataBarrier() {}
#endif
#ifdef __cplusplus
}
#else
#ifdef VISUAL_STUDIO
#undef inline
#endif
#endif
|