File: Cache.h

package info (click to toggle)
storm-lang 0.7.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 52,004 kB
  • sloc: ansic: 261,462; cpp: 140,405; sh: 14,891; perl: 9,846; python: 2,525; lisp: 2,504; asm: 860; makefile: 678; pascal: 70; java: 52; xml: 37; awk: 12
file content (165 lines) | stat: -rw-r--r-- 5,494 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#pragma once

#include "Platform.h"
#include <stddef.h> // for size_t

#ifdef __cplusplus
extern "C" {
#else

#ifdef VISUAL_STUDIO
	// in c-mode, the inline keyword does not seem to be supported. Use 'static' instead, that is
	// good enough there.
#define inline static
#endif
#endif

/**
 * This file contains functions for managing the instruction- and data caches for communication
 * between multiple cores. In most cases, it is enough to utilize the atomic operations available in
 * the InlineAtomics header, but some times (especially when updating code), explicit cache
 * maintenance is necessary. These functions expand as necessary on each of our supported platforms.
 *
 * Note: This file is included from C, so we need to ensure it compiles from C.
 */

// Invalidates the data cache to ensure writes has reached memory and are visible to other cores.
inline void invalidateDCache(void *start, void *end);

// Make sure any pending data operations are done executing on the current thread.
inline void dataBarrier();

// Invalidate the instruction cache for an individual word. This function assumes that this thread
// has updated the instruction at "start", and that it needs to be flushed from the data cache
// first. After execution, the changes are immediately visible to other CPUs, but all CPUs need to
// call "clearLocalICache" first.
inline void invalidateSingleICache(void *start);

// Invalidate the instruction cache for a range of memory. The function assumes that this thread
// hass updated the code in the range and that it needs to be flushed from the data cache
// first. After execution, the changes are immediately visible to other CPUs, but all CPUs need to
// call "clearLocalICache" first.
inline void invalidateICache(void *start, void *end);

// Make sure that any changes to the ICache are respected on the current CPU. While the other ICache
// functions ensure that changes are visible, instructions may still be prefetched on this CPU. This
// function flushes the prefetch, and any other local caches that need to be updated.
inline void clearLocalICache();


#if defined(GCC)

#if defined(X86) || defined(X64)

// Nothing special needed on X86/X64 except for preventing reordering from the compiler.
inline void invalidateDCache(void *start, void *end) {
	(void)start;
	(void)end;
	__asm__ volatile ("" : : : "memory");
}

inline void dataBarrier() {
	__asm__ volatile ("" : : : "memory");
}

inline void invalidateSingleICache(void *start) {
	(void)start;
	__asm__ volatile ("" : : : "memory");
}

inline void invalidateICache(void *start, void *end) {
	(void)start;
	(void)end;
	__asm__ volatile ("" : : : "memory");
}

inline void clearLocalICache() {
	__asm__ volatile ("" : : : "memory");
}

#elif defined(ARM64)

// We need to do things here...
// The code here is from the "Arm Architecture Reference Manual", section K11.5 (Barrier Litmus Tests)

inline void invalidateDCache(void *start, void *end) {
	// We could probably do something cheaper here.
	__builtin___clear_cache(start, end);
}

inline void dataBarrier() {
	__asm__ volatile ("dsb ish\n\t" : : : "memory");
}

inline void clearLocalICache() {
	__asm__ volatile ("isb\n\t" : : : "memory");
}

inline void invalidateSingleICache(void *start) {
	__asm__ volatile (
		"dc cvau, %0\n\t"  // Clean data cache to point of unification.
		"dsb ish\n\t"      // Make sure previous operation is visible to all CPUs.
		"ic ivau, %0\n\t"  // Clean instruction cache to point of unification.
		"dsb ish\n\t"      // Make sure previous operation is visible to all CPUs.
		: : "r"(start)
		: "memory" );
}

inline void invalidateICache(void *start, void *end) {
	size_t b = (size_t)start;
	size_t e = (size_t)end;

	// Get cache sizes.
	unsigned int cache_info = 0;
	__asm__ volatile ("mrs %0, ctr_el0\n\t" : "=r" (cache_info));

	size_t icache = 4 << (cache_info & 0xF);
	size_t dcache = 4 << ((cache_info >> 16) & 0xF);

	// First, clear the data cache. Note: We need to round 'start' down to a multiple of the cache
	// size. Otherwise, we might miss the last cacheline. We know that both icache and dcache are
	// power of two, so it is fairly easy to round them cheaply with some bit twiddling.
	for (size_t at = b & ~(dcache - 1); at < e; at += dcache)
		__asm__ volatile ("dc cvau, %0\n\t" : : "r" (at));

	// Make sure it is visible. We wait with the memory barrier until here. We don't care in which
	// order the "dc cvau" instructions execute, the important thing is that they are all executed
	// before the "dsb ish" instruction here. That is why the memory barrier is here and nowhere else.
	__asm__ volatile ("dsb ish\n\t" : : : "memory");

	// Then, we clear the instruction cache.
	for (size_t at = b & ~(icache - 1); at < e; at += icache)
		__asm__ volatile ("ic ivau, %0\n\t" : : "r" (at));

	// Again, wait for the cleaning to be propagated properly.
	__asm__ volatile ("dsb ish\n\t" : : : "memory");
}

#endif


#elif defined(VISUAL_STUDIO)

// Note: On MSVC we currently only support X86, where we don't need explicit cache control.
#if !defined(X86) && !defined(X64)
#error "You likely need to implement cache invalidation for this architecture."
#endif

inline void invalidateDCache(void *start, void *end) {}
inline void invalidateSingleICache(void *start) {}
inline void invalidateICache(void *start, void *end) {}
inline void clearLocalICache() {}
inline void dataBarrier() {}


#endif

#ifdef __cplusplus
}
#else

#ifdef VISUAL_STUDIO
#undef inline
#endif

#endif