1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2020-2022 Martin Whitaker.
#include <stdbool.h>
#include <stddef.h>
#include "cpulocal.h"
#include "smp.h"
#include "assert.h"
#include "barrier.h"
//------------------------------------------------------------------------------
// Public Functions
//------------------------------------------------------------------------------
void barrier_init(barrier_t *barrier, int num_threads)
{
barrier->flag_num = allocate_local_flag();
assert(barrier->flag_num >= 0);
barrier_reset(barrier, num_threads);
}
void barrier_reset(barrier_t *barrier, int num_threads)
{
barrier->num_threads = num_threads;
barrier->count = num_threads;
local_flag_t *waiting_flags = local_flags(barrier->flag_num);
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
waiting_flags[cpu_num].flag = false;
}
}
void barrier_spin_wait(barrier_t *barrier)
{
if (barrier == NULL || barrier->num_threads < 2) {
return;
}
local_flag_t *waiting_flags = local_flags(barrier->flag_num);
int my_cpu = smp_my_cpu_num();
waiting_flags[my_cpu].flag = true;
if (__sync_sub_and_fetch(&barrier->count, 1) != 0) {
volatile bool *i_am_blocked = &waiting_flags[my_cpu].flag;
while (*i_am_blocked) {
#if defined(__x86_64) || defined(__i386__)
__builtin_ia32_pause();
#elif defined (__loongarch_lp64)
__asm__ __volatile__ (
"nop \n\t" \
"nop \n\t" \
"nop \n\t" \
"nop \n\t" \
"nop \n\t" \
"nop \n\t" \
"nop \n\t" \
"nop \n\t" \
);
#endif
}
return;
}
// Last one here, so reset the barrier and wake the others. No need to
// check if a CPU core is actually waiting - just clear all the flags.
barrier->count = barrier->num_threads;
__sync_synchronize();
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
waiting_flags[cpu_num].flag = false;
}
}
void barrier_halt_wait(barrier_t *barrier)
{
if (barrier == NULL || barrier->num_threads < 2) {
return;
}
local_flag_t *waiting_flags = local_flags(barrier->flag_num);
int my_cpu = smp_my_cpu_num();
waiting_flags[my_cpu].flag = true;
//
// There is a small window of opportunity for the wakeup signal to arrive
// between us decrementing the barrier count and halting. So code the
// following in assembler, both to ensure the window of opportunity is as
// small as possible, and also to allow us to detect and skip over the
// halt in the interrupt handler.
//
// if (__sync_sub_and_fetch(&barrier->count, 1) != 0) {
// __asm__ __volatile__ ("hlt");
// return;
// }
//
#if defined(__i386__) || defined(__x86_64__)
__asm__ goto ("\t"
"lock decl %0 \n\t"
"je 0f \n\t"
"hlt \n\t"
"jmp %l[end] \n"
"0: \n"
: /* no outputs */
: "m" (barrier->count)
: /* no clobbers */
: end
);
#elif defined(__loongarch_lp64)
__asm__ goto ("\t"
"li.w $t0, -1\n\t" \
"li.w $t2, 1\n\t" \
"amadd_db.w $t1, $t0, %0\n\t" \
"bge $t2, $t1, 0f\n\t" \
"1: \n\t" \
"idle 0x0\n\t" \
"b 1b\n\t" \
"bl %l[end]\n\t" \
"0:\n\t" \
: /* no outputs */
: "r" (&(barrier->count))
: "$t0", "t1", "$t2"
: end
);
#endif
// Last one here, so reset the barrier and wake the others.
barrier->count = barrier->num_threads;
__sync_synchronize();
waiting_flags[my_cpu].flag = false;
for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
if (waiting_flags[cpu_num].flag) {
waiting_flags[cpu_num].flag = false;
smp_send_nmi(cpu_num);
}
}
end:
return;
}
|