1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
From: Konstantin Taranov <kotaranov@microsoft.com>
Date: Tue, 11 Mar 2025 20:49:21 +0100
Subject: providers/mana: improve synchronization on the shadow queue
Use release/acquire semantics for the shadow queue.
It ensures synchronization between sender and poller threads.
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Origin: upstream, https://github.com/linux-rdma/rdma-core/pull/1584
---
providers/mana/shadow_queue.h | 58 ++++++++++++++++++++++++++-----------------
1 file changed, 35 insertions(+), 23 deletions(-)
diff --git a/providers/mana/shadow_queue.h b/providers/mana/shadow_queue.h
index 1073f7c..9343fec 100644
--- a/providers/mana/shadow_queue.h
+++ b/providers/mana/shadow_queue.h
@@ -12,6 +12,9 @@
#include <infiniband/verbs.h>
#include <sys/mman.h>
#include <util/util.h>
+#include <stdatomic.h>
+
+typedef _Atomic(uint64_t) _atomic_t;
#define MANA_NO_SIGNAL_WC (0xff)
@@ -83,8 +86,18 @@ static inline void destroy_shadow_queue(struct shadow_queue *queue)
}
}
+static inline _atomic_t *producer(struct shadow_queue *queue)
+{
+ return (_atomic_t *)&queue->prod_idx;
+}
+
+static inline _atomic_t *consumer(struct shadow_queue *queue)
+{
+ return (_atomic_t *)&queue->cons_idx;
+}
+
static inline struct shadow_wqe_header *
-shadow_queue_get_element(const struct shadow_queue *queue, uint64_t unmasked_index)
+shadow_queue_get_element(struct shadow_queue *queue, uint64_t unmasked_index)
{
uint32_t index = unmasked_index & (queue->length - 1);
@@ -93,53 +106,51 @@ shadow_queue_get_element(const struct shadow_queue *queue, uint64_t unmasked_ind
static inline bool shadow_queue_full(struct shadow_queue *queue)
{
- return (queue->prod_idx - queue->cons_idx) >= queue->length;
+ uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_relaxed);
+ uint64_t cons_idx = atomic_load_explicit(consumer(queue), memory_order_acquire);
+
+ return (prod_idx - cons_idx) >= queue->length;
}
static inline struct shadow_wqe_header *
shadow_queue_producer_entry(struct shadow_queue *queue)
{
- return shadow_queue_get_element(queue, queue->prod_idx);
+ uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_relaxed);
+
+ return shadow_queue_get_element(queue, prod_idx);
}
static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
{
- queue->prod_idx++;
-}
+ uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_relaxed);
-static inline void shadow_queue_retreat_producer(struct shadow_queue *queue)
-{
- queue->prod_idx--;
+ atomic_store_explicit(producer(queue), prod_idx + 1, memory_order_release);
}
static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
{
- queue->cons_idx++;
-}
+ uint64_t cons_idx = atomic_load_explicit(consumer(queue), memory_order_relaxed);
-static inline bool shadow_queue_empty(struct shadow_queue *queue)
-{
- return queue->prod_idx == queue->cons_idx;
-}
-
-static inline uint32_t shadow_queue_get_pending_wqe_count(struct shadow_queue *queue)
-{
- return (uint32_t)(queue->prod_idx - queue->next_to_complete_idx);
+ atomic_store_explicit(consumer(queue), cons_idx + 1, memory_order_release);
}
static inline struct shadow_wqe_header *
-shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+shadow_queue_get_next_to_consume(struct shadow_queue *queue)
{
- if (queue->cons_idx == queue->next_to_complete_idx)
+ uint64_t cons_idx = atomic_load_explicit(consumer(queue), memory_order_relaxed);
+
+ if (cons_idx == queue->next_to_complete_idx)
return NULL;
- return shadow_queue_get_element(queue, queue->cons_idx);
+ return shadow_queue_get_element(queue, cons_idx);
}
static inline struct shadow_wqe_header *
shadow_queue_get_next_to_complete(struct shadow_queue *queue)
{
- if (queue->next_to_complete_idx == queue->prod_idx)
+ uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_acquire);
+
+ if (queue->next_to_complete_idx == prod_idx)
return NULL;
return shadow_queue_get_element(queue, queue->next_to_complete_idx);
@@ -153,10 +164,11 @@ static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *qu
static inline struct shadow_wqe_header *
shadow_queue_get_next_to_signal(struct shadow_queue *queue)
{
+ uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_acquire);
struct shadow_wqe_header *wqe = NULL;
queue->next_to_signal_idx = max(queue->next_to_signal_idx, queue->next_to_complete_idx);
- while (queue->next_to_signal_idx < queue->prod_idx) {
+ while (queue->next_to_signal_idx < prod_idx) {
wqe = shadow_queue_get_element(queue, queue->next_to_signal_idx);
queue->next_to_signal_idx++;
if (wqe->flags != MANA_NO_SIGNAL_WC)
|