File: providers-mana-improve-synchronization-on-the-shadow-queu.patch

package info (click to toggle)
rdma-core 56.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 17,196 kB
  • sloc: ansic: 171,361; python: 13,724; sh: 2,774; perl: 1,465; makefile: 73
file content (133 lines) | stat: -rw-r--r-- 4,735 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
From: Konstantin Taranov <kotaranov@microsoft.com>
Date: Tue, 11 Mar 2025 20:49:21 +0100
Subject: providers/mana: improve synchronization on the shadow queue

Use release/acquire semantics for the shadow queue.
It ensures synchronization between sender and poller threads.

Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Origin: upstream, https://github.com/linux-rdma/rdma-core/pull/1584
---
 providers/mana/shadow_queue.h | 58 ++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/providers/mana/shadow_queue.h b/providers/mana/shadow_queue.h
index 1073f7c..9343fec 100644
--- a/providers/mana/shadow_queue.h
+++ b/providers/mana/shadow_queue.h
@@ -12,6 +12,9 @@
 #include <infiniband/verbs.h>
 #include <sys/mman.h>
 #include <util/util.h>
+#include <stdatomic.h>
+
+typedef _Atomic(uint64_t) _atomic_t;
 
 #define MANA_NO_SIGNAL_WC (0xff)
 
@@ -83,8 +86,18 @@ static inline void destroy_shadow_queue(struct shadow_queue *queue)
 	}
 }
 
+static inline _atomic_t *producer(struct shadow_queue *queue)
+{
+	return (_atomic_t *)&queue->prod_idx;
+}
+
+static inline _atomic_t *consumer(struct shadow_queue *queue)
+{
+	return (_atomic_t *)&queue->cons_idx;
+}
+
 static inline struct shadow_wqe_header *
-shadow_queue_get_element(const struct shadow_queue *queue, uint64_t unmasked_index)
+shadow_queue_get_element(struct shadow_queue *queue, uint64_t unmasked_index)
 {
 	uint32_t index = unmasked_index & (queue->length - 1);
 
@@ -93,53 +106,51 @@ shadow_queue_get_element(const struct shadow_queue *queue, uint64_t unmasked_ind
 
 static inline bool shadow_queue_full(struct shadow_queue *queue)
 {
-	return (queue->prod_idx - queue->cons_idx) >= queue->length;
+	uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_relaxed);
+	uint64_t cons_idx = atomic_load_explicit(consumer(queue), memory_order_acquire);
+
+	return (prod_idx - cons_idx) >= queue->length;
 }
 
 static inline struct shadow_wqe_header *
 shadow_queue_producer_entry(struct shadow_queue *queue)
 {
-	return shadow_queue_get_element(queue, queue->prod_idx);
+	uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_relaxed);
+
+	return shadow_queue_get_element(queue, prod_idx);
 }
 
 static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
 {
-	queue->prod_idx++;
-}
+	uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_relaxed);
 
-static inline void shadow_queue_retreat_producer(struct shadow_queue *queue)
-{
-	queue->prod_idx--;
+	atomic_store_explicit(producer(queue), prod_idx + 1, memory_order_release);
 }
 
 static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
 {
-	queue->cons_idx++;
-}
+	uint64_t cons_idx = atomic_load_explicit(consumer(queue), memory_order_relaxed);
 
-static inline bool shadow_queue_empty(struct shadow_queue *queue)
-{
-	return queue->prod_idx == queue->cons_idx;
-}
-
-static inline uint32_t shadow_queue_get_pending_wqe_count(struct shadow_queue *queue)
-{
-	return (uint32_t)(queue->prod_idx - queue->next_to_complete_idx);
+	atomic_store_explicit(consumer(queue), cons_idx + 1, memory_order_release);
 }
 
 static inline struct shadow_wqe_header *
-shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+shadow_queue_get_next_to_consume(struct shadow_queue *queue)
 {
-	if (queue->cons_idx == queue->next_to_complete_idx)
+	uint64_t cons_idx = atomic_load_explicit(consumer(queue), memory_order_relaxed);
+
+	if (cons_idx == queue->next_to_complete_idx)
 		return NULL;
 
-	return shadow_queue_get_element(queue, queue->cons_idx);
+	return shadow_queue_get_element(queue, cons_idx);
 }
 
 static inline struct shadow_wqe_header *
 shadow_queue_get_next_to_complete(struct shadow_queue *queue)
 {
-	if (queue->next_to_complete_idx == queue->prod_idx)
+	uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_acquire);
+
+	if (queue->next_to_complete_idx == prod_idx)
 		return NULL;
 
 	return shadow_queue_get_element(queue, queue->next_to_complete_idx);
@@ -153,10 +164,11 @@ static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *qu
 static inline struct shadow_wqe_header *
 shadow_queue_get_next_to_signal(struct shadow_queue *queue)
 {
+	uint64_t prod_idx = atomic_load_explicit(producer(queue), memory_order_acquire);
 	struct shadow_wqe_header *wqe = NULL;
 
 	queue->next_to_signal_idx = max(queue->next_to_signal_idx, queue->next_to_complete_idx);
-	while (queue->next_to_signal_idx < queue->prod_idx) {
+	while (queue->next_to_signal_idx < prod_idx) {
 		wqe = shadow_queue_get_element(queue, queue->next_to_signal_idx);
 		queue->next_to_signal_idx++;
 		if (wqe->flags != MANA_NO_SIGNAL_WC)