From: Thomas Schwinge <thomas@schwinge.name>
Subject: [PATCH] bits_atomic.h_multiple_threads

TODO.  bits/atomic.h for GNU Hurd.
    
Source: Debian, eglibc-2.10/debian/patches/hurd-i386/local-atomic-no-multiple_threads.diff, r3536.
Author: Samuel Thibault <samuel.thibault@ens-lyon.org>

We always at least start the sigthread anyway.  For now, let's avoid forking
the file (which would mean having to maintain it).

Need to override sysdeps/i386/i486/bits/atomic.h to remove Linuxisms.

---
 sysdeps/i386/atomic-machine.h | 107 +++++++++++++++---------------------------
 1 file changed, 37 insertions(+), 70 deletions(-)

Index: glibc-2.23/sysdeps/x86/atomic-machine.h
===================================================================
--- glibc-2.23.orig/sysdeps/x86/atomic-machine.h
+++ glibc-2.23/sysdeps/x86/atomic-machine.h
@@ -66,35 +66,26 @@ typedef uintmax_t uatomic_max_t;
 
 #define __arch_c_compare_and_exchange_val_8_acq(mem, newval, oldval) \
   ({ __typeof (*mem) ret;						      \
-     __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t"			      \
-		       "je 0f\n\t"					      \
-		       "lock\n"						      \
-		       "0:\tcmpxchgb %b2, %1"				      \
+     __asm __volatile ("lock\n"					      \
+		       "\tcmpxchgb %b2, %1"				      \
 		       : "=a" (ret), "=m" (*mem)			      \
-		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval),    \
-			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \
+		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval));   \
      ret; })
 
 #define __arch_c_compare_and_exchange_val_16_acq(mem, newval, oldval) \
   ({ __typeof (*mem) ret;						      \
-     __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t"			      \
-		       "je 0f\n\t"					      \
-		       "lock\n"						      \
-		       "0:\tcmpxchgw %w2, %1"				      \
+     __asm __volatile ("lock\n"					      \
+		       "\tcmpxchgw %w2, %1"				      \
 		       : "=a" (ret), "=m" (*mem)			      \
-		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval),    \
-			 "i" (offsetof (tcbhead_t, multiple_threads)));	      \
+		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval));   \
      ret; })
 
 #define __arch_c_compare_and_exchange_val_32_acq(mem, newval, oldval) \
   ({ __typeof (*mem) ret;						      \
-     __asm __volatile ("cmpl $0, %%" SEG_REG ":%P5\n\t"			      \
-		       "je 0f\n\t"					      \
-		       "lock\n"						      \
-		       "0:\tcmpxchgl %2, %1"				      \
+     __asm __volatile ("lock\n"					      \
+		       "\tcmpxchgl %2, %1"				      \
 		       : "=a" (ret), "=m" (*mem)			      \
-		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval),    \
-			 "i" (offsetof (tcbhead_t, multiple_threads)));       \
+		       : BR_CONSTRAINT (newval), "m" (*mem), "0" (oldval));   \
      ret; })
 
 #ifdef __x86_64__
@@ -210,24 +195,20 @@ typedef uintmax_t uatomic_max_t;
      if (sizeof (*mem) == 1)						      \
        __asm __volatile (lock "xaddb %b0, %1"				      \
 			 : "=q" (__result), "=m" (*mem)			      \
-			 : "0" (__addval), "m" (*mem),			      \
-			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
+			 : "0" (__addval), "m" (*mem));			      \
      else if (sizeof (*mem) == 2)					      \
        __asm __volatile (lock "xaddw %w0, %1"				      \
 			 : "=r" (__result), "=m" (*mem)			      \
-			 : "0" (__addval), "m" (*mem),			      \
-			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
+			 : "0" (__addval), "m" (*mem));			      \
      else if (sizeof (*mem) == 4)					      \
        __asm __volatile (lock "xaddl %0, %1"				      \
 			 : "=r" (__result), "=m" (*mem)			      \
-			 : "0" (__addval), "m" (*mem),			      \
-			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
+			 : "0" (__addval), "m" (*mem));			      \
      else if (__HAVE_64B_ATOMICS)					      \
        __asm __volatile (lock "xaddq %q0, %1"				      \
 			 : "=r" (__result), "=m" (*mem)			      \
 			 : "0" ((int64_t) cast_to_integer (__addval)),     \
-			   "m" (*mem),					      \
-			   "i" (offsetof (tcbhead_t, multiple_threads)));     \
+			   "m" (*mem));					      \
      else								      \
        __result = do_exchange_and_add_val_64_acq (pfx, (mem), __addval);      \
      __result; })
@@ -238,7 +220,7 @@ typedef uintmax_t uatomic_max_t;
   __sync_fetch_and_add (mem, value)
 
 #define __arch_exchange_and_add_cprefix \
-  "cmpl $0, %%" SEG_REG ":%P4\n\tje 0f\n\tlock\n0:\t"
+  "lock\n\t"
 
 #define catomic_exchange_and_add(mem, value) \
   __arch_exchange_and_add_body (__arch_exchange_and_add_cprefix, __arch_c,    \
@@ -254,24 +236,20 @@ typedef uintmax_t uatomic_max_t;
     else if (sizeof (*mem) == 1)					      \
       __asm __volatile (lock "addb %b1, %0"				      \
 			: "=m" (*mem)					      \
-			: IBR_CONSTRAINT (value), "m" (*mem),		      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: IBR_CONSTRAINT (value), "m" (*mem));		      \
     else if (sizeof (*mem) == 2)					      \
       __asm __volatile (lock "addw %w1, %0"				      \
 			: "=m" (*mem)					      \
-			: "ir" (value), "m" (*mem),			      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "ir" (value), "m" (*mem));			      \
     else if (sizeof (*mem) == 4)					      \
       __asm __volatile (lock "addl %1, %0"				      \
 			: "=m" (*mem)					      \
-			: "ir" (value), "m" (*mem),			      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "ir" (value), "m" (*mem));			      \
     else if (__HAVE_64B_ATOMICS)					      \
       __asm __volatile (lock "addq %q1, %0"				      \
 			: "=m" (*mem)					      \
 			: "ir" ((int64_t) cast_to_integer (value)),	      \
-			  "m" (*mem),					      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			  "m" (*mem));					      \
     else								      \
       do_add_val_64_acq (apfx, (mem), (value));				      \
   } while (0)
@@ -283,7 +262,7 @@ typedef uintmax_t uatomic_max_t;
   __arch_add_body (LOCK_PREFIX, atomic, __arch, mem, value)
 
 #define __arch_add_cprefix \
-  "cmpl $0, %%" SEG_REG ":%P3\n\tje 0f\n\tlock\n0:\t"
+  "lock\n\t"
 
 #define catomic_add(mem, value) \
   __arch_add_body (__arch_add_cprefix, atomic, __arch_c, mem, value)
@@ -332,23 +311,19 @@ typedef uintmax_t uatomic_max_t;
     if (sizeof (*mem) == 1)						      \
       __asm __volatile (lock "incb %b0"					      \
 			: "=m" (*mem)					      \
-			: "m" (*mem),					      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "m" (*mem));					      \
     else if (sizeof (*mem) == 2)					      \
       __asm __volatile (lock "incw %w0"					      \
 			: "=m" (*mem)					      \
-			: "m" (*mem),					      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "m" (*mem));					      \
     else if (sizeof (*mem) == 4)					      \
       __asm __volatile (lock "incl %0"					      \
 			: "=m" (*mem)					      \
-			: "m" (*mem),					      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "m" (*mem));					      \
     else if (__HAVE_64B_ATOMICS)					      \
       __asm __volatile (lock "incq %q0"					      \
 			: "=m" (*mem)					      \
-			: "m" (*mem),					      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "m" (*mem));					      \
     else								      \
       do_add_val_64_acq (pfx, mem, 1);					      \
   } while (0)
@@ -359,7 +335,7 @@ typedef uintmax_t uatomic_max_t;
 #define atomic_increment(mem) __arch_increment_body (LOCK_PREFIX, __arch, mem)
 
 #define __arch_increment_cprefix \
-  "cmpl $0, %%" SEG_REG ":%P2\n\tje 0f\n\tlock\n0:\t"
+  "lock\n\t"
 
 #define catomic_increment(mem) \
   __arch_increment_body (__arch_increment_cprefix, __arch_c, mem)
@@ -389,23 +365,19 @@ typedef uintmax_t uatomic_max_t;
     if (sizeof (*mem) == 1)						      \
       __asm __volatile (lock "decb %b0"					      \
 			: "=m" (*mem)					      \
-			: "m" (*mem),					      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "m" (*mem));					      \
     else if (sizeof (*mem) == 2)					      \
       __asm __volatile (lock "decw %w0"					      \
 			: "=m" (*mem)					      \
-			: "m" (*mem),					      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "m" (*mem));					      \
     else if (sizeof (*mem) == 4)					      \
       __asm __volatile (lock "decl %0"					      \
 			: "=m" (*mem)					      \
-			: "m" (*mem),					      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "m" (*mem));					      \
     else if (__HAVE_64B_ATOMICS)					      \
       __asm __volatile (lock "decq %q0"					      \
 			: "=m" (*mem)					      \
-			: "m" (*mem),					      \
+			: "m" (*mem));					      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
     else								      \
       do_add_val_64_acq (pfx, mem, -1);					      \
   } while (0)
@@ -416,7 +389,7 @@ typedef uintmax_t uatomic_max_t;
 #define atomic_decrement(mem) __arch_decrement_body (LOCK_PREFIX, __arch, mem)
 
 #define __arch_decrement_cprefix \
-  "cmpl $0, %%" SEG_REG ":%P2\n\tje 0f\n\tlock\n0:\t"
+  "lock\n\t"
 
 #define catomic_decrement(mem) \
   __arch_decrement_body (__arch_decrement_cprefix, __arch_c, mem)
@@ -487,29 +460,25 @@ typedef uintmax_t uatomic_max_t;
     if (sizeof (*mem) == 1)						      \
       __asm __volatile (lock "andb %b1, %0"				      \
 			: "=m" (*mem)					      \
-			: IBR_CONSTRAINT (mask), "m" (*mem),		      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: IBR_CONSTRAINT (mask), "m" (*mem));		      \
     else if (sizeof (*mem) == 2)					      \
       __asm __volatile (lock "andw %w1, %0"				      \
 			: "=m" (*mem)					      \
-			: "ir" (mask), "m" (*mem),			      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "ir" (mask), "m" (*mem));			      \
     else if (sizeof (*mem) == 4)					      \
       __asm __volatile (lock "andl %1, %0"				      \
 			: "=m" (*mem)					      \
-			: "ir" (mask), "m" (*mem),			      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "ir" (mask), "m" (*mem));			      \
     else if (__HAVE_64B_ATOMICS)					      \
       __asm __volatile (lock "andq %q1, %0"				      \
 			: "=m" (*mem)					      \
-			: "ir" (mask), "m" (*mem),			      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "ir" (mask), "m" (*mem));			      \
     else								      \
       __atomic_link_error ();						      \
   } while (0)
 
 #define __arch_cprefix \
-  "cmpl $0, %%" SEG_REG ":%P3\n\tje 0f\n\tlock\n0:\t"
+  "lock\n\t"
 
 #define atomic_and(mem, mask) __arch_and_body (LOCK_PREFIX, mem, mask)
 
@@ -516,23 +486,19 @@ typedef uintmax_t uatomic_max_t;
     if (sizeof (*mem) == 1)						      \
       __asm __volatile (lock "orb %b1, %0"				      \
 			: "=m" (*mem)					      \
-			: IBR_CONSTRAINT (mask), "m" (*mem),		      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: IBR_CONSTRAINT (mask), "m" (*mem));		      \
     else if (sizeof (*mem) == 2)					      \
       __asm __volatile (lock "orw %w1, %0"				      \
 			: "=m" (*mem)					      \
-			: "ir" (mask), "m" (*mem),			      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "ir" (mask), "m" (*mem));			      \
     else if (sizeof (*mem) == 4)					      \
       __asm __volatile (lock "orl %1, %0"				      \
 			: "=m" (*mem)					      \
-			: "ir" (mask), "m" (*mem),			      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "ir" (mask), "m" (*mem));			      \
     else if (__HAVE_64B_ATOMICS)					      \
       __asm __volatile (lock "orq %q1, %0"				      \
 			: "=m" (*mem)					      \
-			: "ir" (mask), "m" (*mem),			      \
-			  "i" (offsetof (tcbhead_t, multiple_threads)));      \
+			: "ir" (mask), "m" (*mem));			      \
     else								      \
       __atomic_link_error ();						      \
   } while (0)
