1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
// SPDX-License-Identifier: GPL-2.0
/*
* fill_buf benchmark
*
* Copyright (C) 2018 Intel Corporation
*
* Authors:
* Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
* Fenghua Yu <fenghua.yu@intel.com>
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <inttypes.h>
#include <string.h>
#include "resctrl.h"
#define CL_SIZE (64)
#define PAGE_SIZE (4 * 1024)
#define MB (1024 * 1024)
static void sb(void)
{
#if defined(__i386) || defined(__x86_64)
asm volatile("sfence\n\t"
: : : "memory");
#endif
}
static void cl_flush(void *p)
{
#if defined(__i386) || defined(__x86_64)
asm volatile("clflush (%0)\n\t"
: : "r"(p) : "memory");
#endif
}
void mem_flush(unsigned char *buf, size_t buf_size)
{
unsigned char *cp = buf;
size_t i = 0;
buf_size = buf_size / CL_SIZE; /* mem size in cache lines */
for (i = 0; i < buf_size; i++)
cl_flush(&cp[i * CL_SIZE]);
sb();
}
/*
* Buffer index step advance to workaround HW prefetching interfering with
* the measurements.
*
* Must be a prime to step through all indexes of the buffer.
*
* Some primes work better than others on some architectures (from MBA/MBM
* result stability point of view).
*/
#define FILL_IDX_MULT 23
static int fill_one_span_read(unsigned char *buf, size_t buf_size)
{
unsigned int size = buf_size / (CL_SIZE / 2);
unsigned int i, idx = 0;
unsigned char sum = 0;
/*
* Read the buffer in an order that is unexpected by HW prefetching
* optimizations to prevent them interfering with the caching pattern.
*
* The read order is (in terms of halves of cachelines):
* i * FILL_IDX_MULT % size
* The formula is open-coded below to avoiding modulo inside the loop
* as it improves MBA/MBM result stability on some architectures.
*/
for (i = 0; i < size; i++) {
sum += buf[idx * (CL_SIZE / 2)];
idx += FILL_IDX_MULT;
while (idx >= size)
idx -= size;
}
return sum;
}
void fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
{
int ret = 0;
while (1) {
ret = fill_one_span_read(buf, buf_size);
if (once)
break;
}
/* Consume read result so that reading memory is not optimized out. */
*value_sink = ret;
}
unsigned char *alloc_buffer(size_t buf_size, bool memflush)
{
void *buf = NULL;
uint64_t *p64;
ssize_t s64;
int ret;
ret = posix_memalign(&buf, PAGE_SIZE, buf_size);
if (ret < 0)
return NULL;
/* Initialize the buffer */
p64 = buf;
s64 = buf_size / sizeof(uint64_t);
while (s64 > 0) {
*p64 = (uint64_t)rand();
p64 += (CL_SIZE / sizeof(uint64_t));
s64 -= (CL_SIZE / sizeof(uint64_t));
}
/* Flush the memory before using to avoid "cache hot pages" effect */
if (memflush)
mem_flush(buf, buf_size);
return buf;
}
ssize_t get_fill_buf_size(int cpu_no, const char *cache_type)
{
unsigned long cache_total_size = 0;
int ret;
ret = get_cache_size(cpu_no, cache_type, &cache_total_size);
if (ret)
return ret;
return cache_total_size * 2 > MINIMUM_SPAN ?
cache_total_size * 2 : MINIMUM_SPAN;
}
|