File: barrier.c

package info (click to toggle)
memtest86%2B 8.00-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,768 kB
  • sloc: ansic: 23,121; asm: 2,488; makefile: 625; sh: 408
file content (135 lines) | stat: -rw-r--r-- 3,976 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2020-2022 Martin Whitaker.

#include <stdbool.h>
#include <stddef.h>

#include "cpulocal.h"
#include "smp.h"

#include "assert.h"

#include "barrier.h"

//------------------------------------------------------------------------------
// Public Functions
//------------------------------------------------------------------------------

void barrier_init(barrier_t *barrier, int num_threads)
{
    barrier->flag_num = allocate_local_flag();
    assert(barrier->flag_num >= 0);

    barrier_reset(barrier, num_threads);
}

void barrier_reset(barrier_t *barrier, int num_threads)
{
    barrier->num_threads = num_threads;
    barrier->count       = num_threads;

    local_flag_t *waiting_flags = local_flags(barrier->flag_num);
    for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
        waiting_flags[cpu_num].flag = false;
    }
}

void barrier_spin_wait(barrier_t *barrier)
{
    if (barrier == NULL || barrier->num_threads < 2) {
        return;
    }
    local_flag_t *waiting_flags = local_flags(barrier->flag_num);
    int my_cpu = smp_my_cpu_num();
    waiting_flags[my_cpu].flag = true;
    if (__sync_sub_and_fetch(&barrier->count, 1) != 0) {
        volatile bool *i_am_blocked = &waiting_flags[my_cpu].flag;
        while (*i_am_blocked) {
#if defined(__x86_64) || defined(__i386__)
            __builtin_ia32_pause();
#elif defined (__loongarch_lp64)
            __asm__ __volatile__ (
              "nop \n\t" \
              "nop \n\t" \
              "nop \n\t" \
              "nop \n\t" \
              "nop \n\t" \
              "nop \n\t" \
              "nop \n\t" \
              "nop \n\t" \
            );
#endif
        }
        return;
    }
    // Last one here, so reset the barrier and wake the others. No need to
    // check if a CPU core is actually waiting - just clear all the flags.
    barrier->count = barrier->num_threads;
    __sync_synchronize();
    for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
        waiting_flags[cpu_num].flag = false;
    }
}

void barrier_halt_wait(barrier_t *barrier)
{
    if (barrier == NULL || barrier->num_threads < 2) {
        return;
    }
    local_flag_t *waiting_flags = local_flags(barrier->flag_num);
    int my_cpu = smp_my_cpu_num();
    waiting_flags[my_cpu].flag = true;
    //
    // There is a small window of opportunity for the wakeup signal to arrive
    // between us decrementing the barrier count and halting. So code the
    // following in assembler, both to ensure the window of opportunity is as
    // small as possible, and also to allow us to detect and skip over the
    // halt in the interrupt handler.
    //
    // if (__sync_sub_and_fetch(&barrier->count, 1) != 0) {
    //     __asm__ __volatile__ ("hlt");
    //     return;
    // }
    //
#if defined(__i386__) || defined(__x86_64__)
    __asm__ goto ("\t"
        "lock decl %0 \n\t"
        "je 0f        \n\t"
        "hlt          \n\t"
        "jmp %l[end]  \n"
        "0:           \n"
        : /* no outputs */
        : "m" (barrier->count)
        : /* no clobbers */
        : end
    );
#elif defined(__loongarch_lp64)
    __asm__ goto ("\t"
        "li.w $t0, -1\n\t" \
        "li.w $t2, 1\n\t" \
        "amadd_db.w $t1, $t0, %0\n\t" \
        "bge $t2, $t1, 0f\n\t" \
        "1:          \n\t" \
        "idle 0x0\n\t" \
        "b    1b\n\t" \
        "bl %l[end]\n\t" \
        "0:\n\t" \
        : /* no outputs */
        : "r" (&(barrier->count))
        : "$t0", "t1", "$t2"
        : end
    );
#endif
    // Last one here, so reset the barrier and wake the others.
    barrier->count = barrier->num_threads;
    __sync_synchronize();
    waiting_flags[my_cpu].flag = false;
    for (int cpu_num = 0; cpu_num < num_available_cpus; cpu_num++) {
        if (waiting_flags[cpu_num].flag) {
            waiting_flags[cpu_num].flag = false;
            smp_send_nmi(cpu_num);
        }
    }
end:
    return;
}