1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
|
// Copyright 2023 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// -----------------------------------------------------------------------------
// File: prefetch.h
// -----------------------------------------------------------------------------
//
// This header file defines prefetch functions to prefetch memory contents
// into the first level cache (L1) for the current CPU. The prefetch logic
// offered in this header is limited to prefetching first level cachelines
// only, and is aimed at relatively 'simple' prefetching logic.
//
#ifndef ABSL_BASE_PREFETCH_H_
#define ABSL_BASE_PREFETCH_H_
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#if defined(ABSL_INTERNAL_HAVE_SSE)
#include <xmmintrin.h>
#endif
#if defined(_MSC_VER)
#include <intrin.h>
#if defined(ABSL_INTERNAL_HAVE_SSE)
#pragma intrinsic(_mm_prefetch)
#endif
#endif
namespace absl {
ABSL_NAMESPACE_BEGIN
// Moves data into the L1 cache before it is read, or "prefetches" it.
//
// The value of `addr` is the address of the memory to prefetch. If
// the target and compiler support it, data prefetch instructions are
// generated. If the prefetch is done some time before the memory is
// read, it may be in the cache by the time the read occurs.
//
// This method prefetches data with the highest degree of temporal locality;
// data is prefetched where possible into all levels of the cache.
//
// Incorrect or gratuitous use of this function can degrade performance.
// Use this function only when representative benchmarks show an improvement.
//
// Example:
//
// // Computes incremental checksum for `data`.
// int ComputeChecksum(int sum, absl::string_view data);
//
// // Computes cumulative checksum for all values in `data`
// int ComputeChecksum(absl::Span<const std::string> data) {
// int sum = 0;
// auto it = data.begin();
// auto pit = data.begin();
// auto end = data.end();
// for (int dist = 8; dist > 0 && pit != data.end(); --dist, ++pit) {
// absl::PrefetchToLocalCache(pit->data());
// }
// for (; pit != end; ++pit, ++it) {
// sum = ComputeChecksum(sum, *it);
// absl::PrefetchToLocalCache(pit->data());
// }
// for (; it != end; ++it) {
// sum = ComputeChecksum(sum, *it);
// }
// return sum;
// }
//
void PrefetchToLocalCache(const void* addr);
// Moves data into the L1 cache before it is read, or "prefetches" it.
//
// This function is identical to `PrefetchToLocalCache()` except that it has
// non-temporal locality: the fetched data should not be left in any of the
// cache tiers. This is useful for cases where the data is used only once /
// short term, for example, invoking a destructor on an object.
//
// Incorrect or gratuitous use of this function can degrade performance.
// Use this function only when representative benchmarks show an improvement.
//
// Example:
//
// template <typename Iterator>
// void DestroyPointers(Iterator begin, Iterator end) {
// size_t distance = std::min(8U, bars.size());
//
// int dist = 8;
// auto prefetch_it = begin;
// while (prefetch_it != end && --dist;) {
// absl::PrefetchToLocalCacheNta(*prefetch_it++);
// }
// while (prefetch_it != end) {
// delete *begin++;
// absl::PrefetchToLocalCacheNta(*prefetch_it++);
// }
// while (begin != end) {
// delete *begin++;
// }
// }
//
void PrefetchToLocalCacheNta(const void* addr);
// Moves data into the L1 cache with the intent to modify it.
//
// This function is similar to `PrefetchToLocalCache()` except that it
// prefetches cachelines with an 'intent to modify' This typically includes
// invalidating cache entries for this address in all other cache tiers, and an
// exclusive access intent.
//
// Incorrect or gratuitous use of this function can degrade performance. As this
// function can invalidate cached cachelines on other caches and computer cores,
// incorrect usage of this function can have an even greater negative impact
// than incorrect regular prefetches.
// Use this function only when representative benchmarks show an improvement.
//
// Example:
//
// void* Arena::Allocate(size_t size) {
// void* ptr = AllocateBlock(size);
// absl::PrefetchToLocalCacheForWrite(ptr);
// return ptr;
// }
//
void PrefetchToLocalCacheForWrite(const void* addr);
#if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
#define ABSL_HAVE_PREFETCH 1
// See __builtin_prefetch:
// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
//
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
const void* addr) {
__builtin_prefetch(addr, 0, 3);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
const void* addr) {
__builtin_prefetch(addr, 0, 0);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
const void* addr) {
// [x86] gcc/clang don't generate PREFETCHW for __builtin_prefetch(.., 1)
// unless -march=broadwell or newer; this is not generally the default, so we
// manually emit prefetchw. PREFETCHW is recognized as a no-op on older Intel
// processors and has been present on AMD processors since the K6-2.
#if defined(__x86_64__) && !defined(__PRFCHW__)
asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
#else
__builtin_prefetch(addr, 1, 3);
#endif
}
#elif defined(ABSL_INTERNAL_HAVE_SSE)
#define ABSL_HAVE_PREFETCH 1
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
const void* addr) {
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
const void* addr) {
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
const void* addr) {
#if defined(_MM_HINT_ET0)
_mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_ET0);
#elif !defined(_MSC_VER) && defined(__x86_64__)
// _MM_HINT_ET0 is not universally supported. As we commented further
// up, PREFETCHW is recognized as a no-op on older Intel processors
// and has been present on AMD processors since the K6-2. We have this
// disabled for MSVC compilers as this miscompiles on older MSVC compilers.
asm("prefetchw %0" : : "m"(*reinterpret_cast<const char*>(addr)));
#endif
}
#else
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCache(
const void* addr) {}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheNta(
const void* addr) {}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline void PrefetchToLocalCacheForWrite(
const void* addr) {}
#endif
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_BASE_PREFETCH_H_
|