From: Thomas Schwinge <thomas@schwinge.name>
Subject: [PATCH] tls-threadvar

replace the custom threadvar mechanism with generic TLS.
That will fix sigaltstack.

---
 hurd/Versions                               |  7 +--
 hurd/hurd/signal.h                          | 24 +++++----
 hurd/hurd/threadvar.h                       | 81 +++--------------------------
 hurd/hurdsig.c                              | 52 +++++++++++-------
 hurd/hurdstartup.c                          |  1 -
 hurd/sigunwind.c                            |  4 +-
 include/errno.h                     |  2 +-
 sysdeps/mach/hurd/Versions                  |  6 ++-
 sysdeps/mach/hurd/bits/libc-lock.h          |  5 +-
 sysdeps/mach/hurd/bits/libc-tsd.h           | 34 ------------
 sysdeps/mach/hurd/cthreads.c                |  3 ++
 sysdeps/mach/hurd/dl-sysdep.c               | 19 -------
 sysdeps/mach/hurd/errno-loc.c               | 24 ++++++---
 sysdeps/mach/hurd/errno.c           |  1 -
 sysdeps/mach/hurd/fork.c                    |  7 ++-
 sysdeps/mach/hurd/i386/init-first.c         | 35 -------------
 sysdeps/mach/hurd/i386/makecontext-helper.c |  2 +
 sysdeps/mach/hurd/i386/makecontext.S        |  2 +-
 sysdeps/mach/hurd/i386/sigreturn.c          |  3 +-
 sysdeps/mach/hurd/i386/tls.h                | 26 +++++++++
 sysdeps/mach/hurd/mig-reply.c               | 39 ++++----------
 sysdeps/mach/hurd/profil.c                  |  6 +--
 sysdeps/mach/hurd/tls.h             |  1 +
 libpthread/libpthread.a             |    3 +++
 24 files changed, 135 insertions(+), 252 deletions(-)

--- a/hurd/Versions
+++ b/hurd/Versions
@@ -25,20 +25,16 @@ libc {
 
     # weak refs to libthreads functions that libc calls iff libthreads in use
     cthread_fork; cthread_detach;
+    pthread_getattr_np; pthread_attr_getstack;
 %endif
 
     # necessary for the Hurd brk implementation
     _end;
 
     # variables used in macros & inline functions
-    __hurd_sigthread_stack_base; __hurd_sigthread_stack_end;
-    __hurd_sigthread_variables;
     __hurd_threadvar_max;
     __hurd_threadvar_stack_mask; __hurd_threadvar_stack_offset;
 
-    # functions used in macros & inline functions
-    __hurd_errno_location;
-
     # functions used in libmachuser and libhurduser
     _S_catch_exception_raise;
     _S_catch_exception_raise_state;
@@ -161,6 +157,7 @@ libc {
   HURD_CTHREADS_0.3 {
     # weak refs to libthreads functions that libc calls iff libthreads in use
     cthread_fork; cthread_detach;
+    pthread_getattr_np; pthread_attr_getstack;
 
     # variables used for detecting cthreads
     _cthread_exit_routine; _cthread_init_routine;
--- a/hurd/hurd/signal.h
+++ b/hurd/hurd/signal.h
@@ -40,7 +40,6 @@
 #include <cthreads.h>		/* For `struct mutex'.  */
 #include <setjmp.h>		/* For `jmp_buf'.  */
 #include <spin-lock.h>
-#include <hurd/threadvar.h>	/* We cache sigstate in a threadvar.  */
 struct hurd_signal_preemptor;	/* <hurd/sigpreempt.h> */
 
 
@@ -129,11 +128,9 @@ extern struct hurd_sigstate *_hurd_self_
 _HURD_SIGNAL_H_EXTERN_INLINE struct hurd_sigstate *
 _hurd_self_sigstate (void)
 {
-  struct hurd_sigstate **location =
-    (void *) __hurd_threadvar_location (_HURD_THREADVAR_SIGSTATE);
-  if (*location == NULL)
-    *location = _hurd_thread_sigstate (__mach_thread_self ());
-  return *location;
+  if (THREAD_SELF->_hurd_sigstate == NULL)
+    THREAD_SELF->_hurd_sigstate = _hurd_thread_sigstate (__mach_thread_self ());
+  return THREAD_SELF->_hurd_sigstate;
 }
 
 /* Thread listening on our message port; also called the "signal thread".  */
@@ -167,16 +165,22 @@ extern int _hurd_core_limit;
 _HURD_SIGNAL_H_EXTERN_INLINE void *
 _hurd_critical_section_lock (void)
 {
-  struct hurd_sigstate **location =
-    (void *) __hurd_threadvar_location (_HURD_THREADVAR_SIGSTATE);
-  struct hurd_sigstate *ss = *location;
+  struct hurd_sigstate *ss;
+
+#ifdef __LIBC_NO_TLS
+  if (__LIBC_NO_TLS())
+    /* TLS is currently initializing, no need to enter critical section.  */
+    return NULL;
+#endif
+
+  ss = THREAD_SELF->_hurd_sigstate;
   if (ss == NULL)
     {
       /* The thread variable is unset; this must be the first time we've
 	 asked for it.  In this case, the critical section flag cannot
 	 possible already be set.  Look up our sigstate structure the slow
 	 way; this locks the sigstate lock.  */
-      ss = *location = _hurd_thread_sigstate (__mach_thread_self ());
+      ss = THREAD_SELF->_hurd_sigstate = _hurd_thread_sigstate (__mach_thread_self ());
       __spin_unlock (&ss->lock);
     }
 
--- a/hurd/hurd/threadvar.h
+++ b/hurd/hurd/threadvar.h
@@ -20,6 +20,7 @@
 #define	_HURD_THREADVAR_H
 
 #include <features.h>
+#include <tls.h>
 
 /* The per-thread variables are found by ANDing this mask
    with the value of the stack pointer and then adding this offset.
@@ -30,87 +31,24 @@
    __hurd_threadvar_stack_offset to a small offset that skips the data
    cthreads itself maintains at the base of each thread's stack.
 
-   In the single-threaded case, __hurd_threadvar_stack_mask is zero, so the
-   stack pointer is ignored; and __hurd_threadvar_stack_offset gives the
-   address of a small allocated region which contains the variables for the
-   single thread.  */
+   In the single-threaded or libpthread case, __hurd_threadvar_stack_mask is
+   zero, so the stack pointer is ignored. */
 
 extern unsigned long int __hurd_threadvar_stack_mask;
 extern unsigned long int __hurd_threadvar_stack_offset;
 
-/* A special case must always be made for the signal thread.  Even when there
-   is only one user thread and an allocated region can be used for the user
-   thread's variables, the signal thread needs to have its own location for
-   per-thread variables.  The variables __hurd_sigthread_stack_base and
+/* The variables __hurd_sigthread_stack_base and
    __hurd_sigthread_stack_end define the bounds of the stack used by the
    signal thread, so that thread can always be specifically identified.  */
 
 extern unsigned long int __hurd_sigthread_stack_base;
 extern unsigned long int __hurd_sigthread_stack_end;
-extern unsigned long int *__hurd_sigthread_variables;
 
 
-/* At the location described by the two variables above,
-   there are __hurd_threadvar_max `unsigned long int's of per-thread data.  */
+/* We do not use threadvars any more, this is kept as zero for compatibility with cthreads */
 extern unsigned int __hurd_threadvar_max;
 
-/* These values are the indices for the standard per-thread variables.  */
-enum __hurd_threadvar_index
-  {
-    _HURD_THREADVAR_MIG_REPLY,	/* Reply port for MiG user stub functions.  */
-    _HURD_THREADVAR_ERRNO,	/* `errno' value for this thread.  */
-    _HURD_THREADVAR_SIGSTATE,	/* This thread's `struct hurd_sigstate'.  */
-    _HURD_THREADVAR_DYNAMIC_USER, /* Dynamically-assigned user variables.  */
-    _HURD_THREADVAR_MALLOC,	/* For use of malloc.  */
-    _HURD_THREADVAR_DL_ERROR,	/* For use of -ldl and dynamic linker.  */
-    _HURD_THREADVAR_RPC_VARS,	/* For state of RPC functions.  */
-    _HURD_THREADVAR_LOCALE,	/* For thread-local locale setting.  */
-    _HURD_THREADVAR_CTYPE_B,	/* Cache of thread-local locale data.  */
-    _HURD_THREADVAR_CTYPE_TOLOWER, /* Cache of thread-local locale data.  */
-    _HURD_THREADVAR_CTYPE_TOUPPER, /* Cache of thread-local locale data.  */
-    _HURD_THREADVAR_MAX		/* Default value for __hurd_threadvar_max.  */
-  };
-
-
-#ifndef _HURD_THREADVAR_H_EXTERN_INLINE
-#define _HURD_THREADVAR_H_EXTERN_INLINE __extern_inline
-#endif
-
-/* Return the location of the value for the per-thread variable with index
-   INDEX used by the thread whose stack pointer is SP.  */
-
-extern unsigned long int *__hurd_threadvar_location_from_sp
-  (enum __hurd_threadvar_index __index, void *__sp);
-_HURD_THREADVAR_H_EXTERN_INLINE unsigned long int *
-__hurd_threadvar_location_from_sp (enum __hurd_threadvar_index __index,
-				   void *__sp)
-{
-  unsigned long int __stack = (unsigned long int) __sp;
-  return &((__stack >= __hurd_sigthread_stack_base &&
-	    __stack < __hurd_sigthread_stack_end)
-	   ? __hurd_sigthread_variables
-	   : (unsigned long int *) ((__stack & __hurd_threadvar_stack_mask) +
-				    __hurd_threadvar_stack_offset))[__index];
-}
-
-#include <machine-sp.h>		/* Define __thread_stack_pointer.  */
-
-/* Return the location of the current thread's value for the
-   per-thread variable with index INDEX.  */
-
-extern unsigned long int *
-__hurd_threadvar_location (enum __hurd_threadvar_index __index) __THROW
-     /* This declaration tells the compiler that the value is constant
-	given the same argument.  We assume this won't be called twice from
-	the same stack frame by different threads.  */
-     __attribute__ ((__const__));
-
-_HURD_THREADVAR_H_EXTERN_INLINE unsigned long int *
-__hurd_threadvar_location (enum __hurd_threadvar_index __index)
-{
-  return __hurd_threadvar_location_from_sp (__index,
-					    __thread_stack_pointer ());
-}
-
+extern mach_port_t __hurd_reply_port0;
+#define __hurd_local_reply_port (*(__LIBC_NO_TLS() ? &__hurd_reply_port0 : &THREAD_SELF->reply_port))
 
 #endif	/* hurd/threadvar.h */
--- a/hurd/hurdsig.c
+++ b/hurd/hurdsig.c
@@ -20,6 +20,7 @@
 #include <string.h>
 
 #include <cthreads.h>		/* For `struct mutex'.  */
+#include <pthread.h>
 #include <mach.h>
 #include <mach/thread_switch.h>
 
@@ -48,7 +49,6 @@ thread_t _hurd_sigthread;
 /* These are set up by _hurdsig_init.  */
 unsigned long int __hurd_sigthread_stack_base;
 unsigned long int __hurd_sigthread_stack_end;
-unsigned long int *__hurd_sigthread_variables;
 
 /* Linked-list of per-thread signal state.  */
 struct hurd_sigstate *_hurd_sigstates;
@@ -234,11 +237,11 @@ abort_thread (struct hurd_sigstate *ss,
    that this location can be set without faulting, or else return NULL.  */
 
 static mach_port_t *
-interrupted_reply_port_location (struct machine_thread_all_state *thread_state,
+interrupted_reply_port_location (thread_t thread,
+				 struct machine_thread_all_state *thread_state,
 				 int sigthread)
 {
-  mach_port_t *portloc = (mach_port_t *) __hurd_threadvar_location_from_sp
-    (_HURD_THREADVAR_MIG_REPLY, (void *) thread_state->basic.SP);
+  mach_port_t *portloc = &THREAD_TCB(thread, thread_state)->reply_port;
 
   if (sigthread && _hurdsig_catch_memory_fault (portloc))
     /* Faulted trying to read the stack.  */
@@ -323,7 +326,8 @@ _hurdsig_abort_rpcs (struct hurd_sigstat
 	   our nonzero return tells the trampoline code to finish the message
 	   receive operation before running the handler.  */
 
-	mach_port_t *reply = interrupted_reply_port_location (state,
+	mach_port_t *reply = interrupted_reply_port_location (ss->thread,
+							      state,
 							      sigthread);
 	error_t err = __interrupt_operation (intr_port, _hurdsig_interrupt_timeout);
 
@@ -835,7 +839,8 @@ _hurd_internal_post_signal (struct hurd_
 
 	    if (! machine_get_basic_state (ss->thread, &thread_state))
 	      goto sigbomb;
-	    loc = interrupted_reply_port_location (&thread_state, 1);
+	    loc = interrupted_reply_port_location (ss->thread,
+						   &thread_state, 1);
 	    if (loc && *loc != MACH_PORT_NULL)
 	      /* This is the reply port for the context which called
 		 sigreturn.  Since we are abandoning that context entirely
@@ -901,7 +906,8 @@ _hurd_internal_post_signal (struct hurd_
 	{
 	  /* Fetch the thread variable for the MiG reply port,
 	     and set it to MACH_PORT_NULL.  */
-	  mach_port_t *loc = interrupted_reply_port_location (&thread_state,
+	  mach_port_t *loc = interrupted_reply_port_location (ss->thread,
+							      &thread_state,
 							      1);
 	  if (loc)
 	    {
@@ -1255,7 +1261,11 @@ _hurdsig_init (const int *intarray, size
 
   /* Start the signal thread listening on the message port.  */
 
-  if (__hurd_threadvar_stack_mask == 0)
+#pragma weak cthread_fork
+#pragma weak cthread_detach
+#pragma weak pthread_getattr_np
+#pragma weak pthread_attr_getstack
+  if (!cthread_fork)
     {
       err = __thread_create (__mach_task_self (), &_hurd_msgport_thread);
       assert_perror (err);
@@ -1270,14 +1280,6 @@ _hurdsig_init (const int *intarray, size
       assert_perror (err);
 
       __hurd_sigthread_stack_end = __hurd_sigthread_stack_base + stacksize;
-      __hurd_sigthread_variables =
-	malloc (__hurd_threadvar_max * sizeof (unsigned long int));
-      if (__hurd_sigthread_variables == NULL)
-	__libc_fatal ("hurd: Can't allocate threadvars for signal thread\n");
-      memset (__hurd_sigthread_variables, 0,
-	      __hurd_threadvar_max * sizeof (unsigned long int));
-      __hurd_sigthread_variables[_HURD_THREADVAR_LOCALE]
-	= (unsigned long int) &_nl_global_locale;
 
       /* Reinitialize the MiG support routines so they will use a per-thread
 	 variable for the cached reply port.  */
@@ -1288,6 +1290,7 @@ _hurdsig_init (const int *intarray, size
     }
   else
     {
+      cthread_t thread;
       /* When cthreads is being used, we need to make the signal thread a
          proper cthread.  Otherwise it cannot use mutex_lock et al, which
          will be the cthreads versions.  Various of the message port RPC
@@ -1297,9 +1300,20 @@ _hurdsig_init (const int *intarray, size
          we'll let the signal thread's per-thread variables be found as for
          any normal cthread, and just leave the magic __hurd_sigthread_*
          values all zero so they'll be ignored.  */
-#pragma weak cthread_fork
-#pragma weak cthread_detach
-      cthread_detach (cthread_fork ((cthread_fn_t) &_hurd_msgport_receive, 0));
+      cthread_detach (thread = cthread_fork ((cthread_fn_t) &_hurd_msgport_receive, 0));
+
+      if (pthread_getattr_np)
+	{
+	  /* Record stack layout for fork() */
+	  pthread_attr_t attr;
+	  void *addr;
+	  size_t size;
+
+	  pthread_getattr_np ((pthread_t) thread, &attr);
+	  pthread_attr_getstack (&attr, &addr, &size);
+	  __hurd_sigthread_stack_base = (uintptr_t) addr;
+	  __hurd_sigthread_stack_end = __hurd_sigthread_stack_base + size;
+	}
 
       /* XXX We need the thread port for the signal thread further on
          in this thread (see hurdfault.c:_hurdsigfault_init).
--- a/hurd/hurdstartup.c
+++ b/hurd/hurdstartup.c
@@ -23,7 +23,6 @@
 #include <hurd.h>
 #include <hurd/exec_startup.h>
 #include <sysdep.h>
-#include <hurd/threadvar.h>
 #include <unistd.h>
 #include <elf.h>
 #include <set-hooks.h>
--- a/hurd/sigunwind.c
+++ b/hurd/sigunwind.c
@@ -18,6 +18,7 @@
 
 #include <hurd.h>
 #include <thread_state.h>
+#include <hurd/threadvar.h>
 #include <jmpbuf-unwind.h>
 #include <assert.h>
 #include <stdint.h>
@@ -38,8 +39,7 @@ _hurdsig_longjmp_from_handler (void *dat
     {
       /* Destroy the MiG reply port used by the signal handler, and restore
 	 the reply port in use by the thread when interrupted.  */
-      mach_port_t *reply_port =
-	(mach_port_t *) __hurd_threadvar_location (_HURD_THREADVAR_MIG_REPLY);
+      mach_port_t *reply_port = &__hurd_local_reply_port;
       if (*reply_port)
 	{
 	  mach_port_t port = *reply_port;
--- a/include/errno.h
+++ b/include/errno.h
@@ -21,7 +21,7 @@ extern int rtld_errno attribute_hidden;
 
 #  include <tls.h>
 
-#  if !defined(__GNU__)
+#  if !(defined(__GNU__) && defined IS_IN_rtld)
 #   undef  errno
 #   ifndef NOT_IN_libc
 #    define errno __libc_errno
--- a/sysdeps/mach/hurd/Versions
+++ b/sysdeps/mach/hurd/Versions
@@ -6,6 +6,7 @@ libc {
   GLIBC_PRIVATE {
     # Functions shared with the dynamic linker
     __libc_read; __libc_write; __libc_lseek64;
+    __libc_lock_self0; __libc_get_lock_self;
 
     _dl_init_first;
   }
@@ -14,8 +16,6 @@ libc {
 ld {
   GLIBC_2.0 {
     # variables that must be shared with libc
-    __hurd_sigthread_stack_base; __hurd_sigthread_stack_end;
-    __hurd_sigthread_variables;
     __hurd_threadvar_stack_mask;  __hurd_threadvar_stack_offset;
 
     # functions that must be shared with libc
@@ -33,5 +33,6 @@ ld {
 
     # functions that must be shared with libc
     __libc_read; __libc_write; __libc_lseek64;
+    __libc_lock_self0; __libc_get_lock_self;
   }
 }
--- a/sysdeps/mach/hurd/bits/libc-lock.h
+++ b/sysdeps/mach/hurd/bits/libc-lock.h
@@ -24,7 +24,6 @@
 #include <tls.h>
 #endif
 #include <cthreads.h>
-#include <hurd/threadvar.h>
 
 typedef struct mutex __libc_lock_t;
 typedef struct
@@ -35,7 +34,12 @@ typedef struct
 } __libc_lock_recursive_t;
 typedef __libc_lock_recursive_t __rtld_lock_recursive_t;
 
-#define __libc_lock_owner_self() ((void *) __hurd_threadvar_location (0))
+extern char __libc_lock_self0[0];
+/* We have to hide the __libc_lock_self access behind a function call,
+   otherwise gcc >= 4.9 would try to prefetch the TLS dereference even before
+   the __LIBC_NO_TLS test is finished... */
+extern void *__libc_get_lock_self(void);
+#define __libc_lock_owner_self() (__LIBC_NO_TLS() ? &__libc_lock_self0 : __libc_get_lock_self())
 
 #else
 typedef struct __libc_lock_opaque__ __libc_lock_t;
--- a/sysdeps/mach/hurd/bits/libc-tsd.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* libc-internal interface for thread-specific data.  Hurd version.
-   Copyright (C) 1998-2014 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _BITS_LIBC_TSD_H
-#define _BITS_LIBC_TSD_H 1
-
-#include <hurd/threadvar.h>
-
-#define __libc_tsd_define(CLASS, TYPE, KEY) /* nothing, always have threadvars */
-
-#define __libc_tsd_address(TYPE, KEY) \
-  ((TYPE *) __hurd_threadvar_location (_HURD_THREADVAR_##KEY))
-
-#define __libc_tsd_get(TYPE, KEY) \
-  (*__libc_tsd_address (TYPE, KEY))
-#define __libc_tsd_set(TYPE, KEY, VALUE) \
-  (*__libc_tsd_address (TYPE, KEY) = (VALUE))
-
-#endif	/* bits/libc-tsd.h */
--- a/sysdeps/mach/hurd/cthreads.c
+++ b/sysdeps/mach/hurd/cthreads.c
@@ -19,6 +19,15 @@
 #include <errno.h>
 #include <stdlib.h>
 
+char __libc_lock_self0[0];
+static __thread char __libc_lock_self[0];
+
+void *
+__libc_get_lock_self(void)
+{
+  return (void*) &__libc_lock_self;
+}
+
 /* Placeholder for key creation routine from Hurd cthreads library.  */
 int
 weak_function
--- a/sysdeps/mach/hurd/dl-sysdep.c
+++ b/sysdeps/mach/hurd/dl-sysdep.c
@@ -64,25 +64,6 @@ hp_timing_t _dl_cpuclock_offset;
 
 struct hurd_startup_data *_dl_hurd_data;
 
-/* This is used only within ld.so, via dl-minimal.c's __errno_location.  */
-#undef errno
-int errno attribute_hidden;
-
-/* Defining these variables here avoids the inclusion of hurdsig.c.  */
-unsigned long int __hurd_sigthread_stack_base;
-unsigned long int __hurd_sigthread_stack_end;
-unsigned long int *__hurd_sigthread_variables;
-
-/* Defining these variables here avoids the inclusion of init-first.c.
-   We need to provide temporary storage for the per-thread variables
-   of the main user thread here, since it is used for storing the
-   `errno' variable.  Note that this information is lost once we
-   relocate the dynamic linker.  */
-static unsigned long int threadvars[_HURD_THREADVAR_MAX];
-unsigned long int __hurd_threadvar_stack_offset
-  = (unsigned long int) &threadvars;
-unsigned long int __hurd_threadvar_stack_mask;
-
 #define FMH defined(__i386__)
 #if ! FMH
 # define fmh()		((void)0)
--- a/sysdeps/mach/hurd/errno-loc.c
+++ b/sysdeps/mach/hurd/errno-loc.c
@@ -16,13 +16,21 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <errno.h>
-#include <hurd/threadvar.h>
-
-int *
+#ifdef IS_IN_rtld
+/*
+ * rtld can not access TLS too early, thus rtld_errno.
+ *
+ * Instead of making __open/__close pass errno from TLS to rtld_errno, simply
+ * use a weak __errno_location using rtld_errno, which will be overriden by the
+ * libc definition.
+ */
+static int rtld_errno;
+int * weak_function
 __errno_location (void)
 {
-  return (int *) __hurd_threadvar_location (_HURD_THREADVAR_ERRNO);
+  return &rtld_errno;
 }
-strong_alias (__errno_location, __hurd_errno_location)
-libc_hidden_def (__errno_location)
+libc_hidden_weak (__errno_location)
+#else
+#include <../../../csu/errno-loc.c>
+#endif
--- a/sysdeps/mach/hurd/errno.c
+++ /dev/null
@@ -1 +0,0 @@
-/* No definition of `errno' variable on the Hurd.  */
--- a/sysdeps/mach/hurd/fork.c
+++ b/sysdeps/mach/hurd/fork.c
@@ -19,6 +19,7 @@
 #include <unistd.h>
 #include <hurd.h>
 #include <hurd/signal.h>
+#include <hurd/threadvar.h>
 #include <setjmp.h>
 #include <thread_state.h>
 #include <sysdep.h>		/* For stack growth direction.  */
@@ -504,19 +505,17 @@ __fork (void)
 				    (natural_t *) &state, &statecount))
 	LOSE;
 #if STACK_GROWTH_UP
-#define THREADVAR_SPACE (__hurd_threadvar_max \
-			 * sizeof *__hurd_sightread_variables)
       if (__hurd_sigthread_stack_base == 0)
 	{
 	  state.SP &= __hurd_threadvar_stack_mask;
-	  state.SP += __hurd_threadvar_stack_offset + THREADVAR_SPACE;
+	  state.SP += __hurd_threadvar_stack_offset;
 	}
       else
 	state.SP = __hurd_sigthread_stack_base;
 #else
       if (__hurd_sigthread_stack_end == 0)
 	{
-	  /* The signal thread has a normal stack assigned by cthreads.
+	  /* The signal thread has a stack assigned by cthreads.
 	     The threadvar_stack variables conveniently tell us how
 	     to get to the highest address in the stack, just below
 	     the per-thread variables.  */
--- a/sysdeps/mach/hurd/i386/init-first.c
+++ b/sysdeps/mach/hurd/i386/init-first.c
@@ -152,15 +152,6 @@ init (int *data)
   char **argv = (void *) (data + 1);
   char **envp = &argv[argc + 1];
   struct hurd_startup_data *d;
-  unsigned long int threadvars[_HURD_THREADVAR_MAX];
-
-  /* Provide temporary storage for thread-specific variables on the
-     startup stack so the cthreads initialization code can use them
-     for malloc et al, or so we can use malloc below for the real
-     threadvars array.  */
-  memset (threadvars, 0, sizeof threadvars);
-  threadvars[_HURD_THREADVAR_LOCALE] = (unsigned long int) &_nl_global_locale;
-  __hurd_threadvar_stack_offset = (unsigned long int) threadvars;
 
   /* Since the cthreads initialization code uses malloc, and the
      malloc initialization code needs to get at the environment, make
@@ -206,13 +197,6 @@ init (int *data)
   __pthread_initialize_minimal();
 #endif
 
-  /* The user might have defined a value for this, to get more variables.
-     Otherwise it will be zero on startup.  We must make sure it is set
-     properly before before cthreads initialization, so cthreads can know
-     how much space to leave for thread variables.  */
-  if (__hurd_threadvar_max < _HURD_THREADVAR_MAX)
-    __hurd_threadvar_max = _HURD_THREADVAR_MAX;
-
 
   /* After possibly switching stacks, call `init1' (above) with the user
      code as the return address, and the argument data immediately above
@@ -228,11 +212,6 @@ init (int *data)
 
       __libc_stack_end = newsp;
 
-      /* Copy per-thread variables from that temporary
-	 area onto the new cthread stack.  */
-      memcpy (__hurd_threadvar_location_from_sp (0, newsp),
-	      threadvars, sizeof threadvars);
-
       /* Copy the argdata from the old stack to the new one.  */
       newsp = memcpy (newsp - ((char *) &d[1] - (char *) data), data,
 		      (char *) d - (char *) data);
@@ -273,25 +252,11 @@ init (int *data)
     }
   else
     {
-      /* We are not using cthreads, so we will have just a single allocated
-	 area for the per-thread variables of the main user thread.  */
-      unsigned long int *array;
       unsigned int i;
       int usercode;
 
       void call_init1 (void);
 
-      array = malloc (__hurd_threadvar_max * sizeof (unsigned long int));
-      if (array == NULL)
-	__libc_fatal ("Can't allocate single-threaded thread variables.");
-
-      /* Copy per-thread variables from the temporary array into the
-	 newly malloc'd space.  */
-      memcpy (array, threadvars, sizeof threadvars);
-      __hurd_threadvar_stack_offset = (unsigned long int) array;
-      for (i = _HURD_THREADVAR_MAX; i < __hurd_threadvar_max; ++i)
-	array[i] = 0;
-
       /* The argument data is just above the stack frame we will unwind by
 	 returning.  Mutate our own return address to run the code below.  */
       /* The following expression would typically be written as
--- a/sysdeps/mach/hurd/i386/makecontext-helper.c
+++ b/sysdeps/mach/hurd/i386/makecontext-helper.c
@@ -22,6 +22,7 @@
 #include <string.h>
 #include <ucontext.h>
 
+#if 0
 
 void
 __makecontext_helper (ucontext_t *ucp)
@@ -67,3 +68,4 @@ __makecontext_helper (ucontext_t *ucp)
       ucp->uc_stack.ss_size -= t_size;
     }
 }
+#endif
--- a/sysdeps/mach/hurd/i386/makecontext.S
+++ b/sysdeps/mach/hurd/i386/makecontext.S
@@ -27,7 +27,7 @@ ENTRY(__makecontext)
 	subl	$4, %esp
 	cfi_adjust_cfa_offset (4)
 	movl	%eax, (%esp)
-	call	HIDDEN_JUMPTARGET (__makecontext_helper)
+	/* call	HIDDEN_JUMPTARGET (__makecontext_helper) */
 	addl	$4, %esp
 	cfi_adjust_cfa_offset (-4)
 
--- a/sysdeps/mach/hurd/i386/sigreturn.c
+++ b/sysdeps/mach/hurd/i386/sigreturn.c
@@ -77,8 +77,7 @@ __sigreturn (struct sigcontext *scp)
 
   /* Destroy the MiG reply port used by the signal handler, and restore the
      reply port in use by the thread when interrupted.  */
-  reply_port =
-    (mach_port_t *) __hurd_threadvar_location (_HURD_THREADVAR_MIG_REPLY);
+  reply_port = &__hurd_local_reply_port;
   if (*reply_port)
     {
       mach_port_t port = *reply_port;
--- a/sysdeps/mach/hurd/i386/tls.h
+++ b/sysdeps/mach/hurd/i386/tls.h
@@ -47,6 +47,8 @@ typedef struct
   void *__private_tm[4];
   /* GCC split stack support.  */
   void *__private_ss;
+  mach_port_t reply_port;      /* This thread's reply port.  */
+  struct hurd_sigstate *_hurd_sigstate;
 } tcbhead_t;
 #endif
 
@@ -53,6 +53,17 @@
       | (((unsigned int) (tcb)) & 0xff000000) /* base 24..31 */		      \
     }
 
+# define HURD_DESC_TLS(desc)						      \
+  ({									      \
+   (tcbhead_t *) (   (desc->low_word >> 16)				      \
+                  | ((desc->high_word & 0xff) << 16)			      \
+                  |  (desc->high_word & 0xff000000)			      \
+     );})
+
+#define __LIBC_NO_TLS()							      \
+  ({ unsigned short ds, gs;						      \
+     asm ("movw %%ds,%w0; movw %%gs,%w1" : "=q" (ds), "=q" (gs));	      \
+     ds == gs; })
 
 static inline const char * __attribute__ ((unused))
 _hurd_tls_init (tcbhead_t *tcb, int secondcall)
@@ -126,6 +137,20 @@ _hurd_tls_init (tcbhead_t *tcb, int seco
 	      : "i" (offsetof (tcbhead_t, tcb)));			      \
      __tcb;})
 
+/* Return the TCB address of a thread given its state.  */
+# define THREAD_TCB(thread, thread_state)				      \
+  ({ int __sel = (thread_state)->basic.gs;				      \
+     struct descriptor __desc, *___desc = &__desc;			      \
+     unsigned int __count = 1;						      \
+     kern_return_t __err;						      \
+     if (__builtin_expect (__sel, 0x48) & 4) /* LDT selector */		      \
+       __err = __i386_get_ldt ((thread), __sel, 1, &___desc, &__count);	      \
+     else								      \
+       __err = __i386_get_gdt ((thread), __sel, &__desc);		      \
+     assert_perror (__err);						      \
+     assert (__count == 1);						      \
+     HURD_DESC_TLS(___desc);})
+
 /* Install new dtv for current thread.  */
 # define INSTALL_NEW_DTV(dtvp)						      \
   ({ asm volatile ("movl %0,%%gs:%P1"					      \
--- a/sysdeps/mach/hurd/mig-reply.c
+++ b/sysdeps/mach/hurd/mig-reply.c
@@ -18,26 +18,20 @@
 #include <mach.h>
 #include <hurd/threadvar.h>
 
-#define GETPORT \
-  mach_port_t *portloc = \
-    (mach_port_t *) __hurd_threadvar_location (_HURD_THREADVAR_MIG_REPLY)
-#define reply_port (*(use_threadvar ? portloc : &global_reply_port))
-
-static int use_threadvar;
-static mach_port_t global_reply_port;
-
 /* These functions are called by MiG-generated code.  */
 
+mach_port_t __hurd_reply_port0;
+
 /* Called by MiG to get a reply port.  */
 mach_port_t
 __mig_get_reply_port (void)
 {
-  GETPORT;
-
-  if (reply_port == MACH_PORT_NULL)
-    reply_port = __mach_reply_port ();
+  if (__hurd_local_reply_port == MACH_PORT_NULL ||
+      (&__hurd_local_reply_port != &__hurd_reply_port0
+       && __hurd_local_reply_port == __hurd_reply_port0))
+    __hurd_local_reply_port = __mach_reply_port ();
 
-  return reply_port;
+  return __hurd_local_reply_port;
 }
 weak_alias (__mig_get_reply_port, mig_get_reply_port)
 
@@ -45,12 +39,8 @@ weak_alias (__mig_get_reply_port, mig_ge
 void
 __mig_dealloc_reply_port (mach_port_t arg)
 {
-  mach_port_t port;
-
-  GETPORT;
-
-  port = reply_port;
-  reply_port = MACH_PORT_NULL;	/* So the mod_refs RPC won't use it.  */
+  mach_port_t port = __hurd_local_reply_port;
+  __hurd_local_reply_port = MACH_PORT_NULL;	/* So the mod_refs RPC won't use it.  */
 
   if (MACH_PORT_VALID (port))
     __mach_port_mod_refs (__mach_task_self (), port,
@@ -73,15 +63,6 @@ weak_alias (__mig_put_reply_port, mig_pu
 void
 __mig_init (void *stack)
 {
-  use_threadvar = stack != 0;
-
-  if (use_threadvar)
-    {
-      /* Recycle the reply port used before multithreading was enabled.  */
-      mach_port_t *portloc = (mach_port_t *)
-	__hurd_threadvar_location_from_sp (_HURD_THREADVAR_MIG_REPLY, stack);
-      *portloc = global_reply_port;
-      global_reply_port = MACH_PORT_NULL;
-    }
+  /* Do nothing.  */
 }
 weak_alias (__mig_init, mig_init)
--- a/sysdeps/mach/hurd/profil.c
+++ b/sysdeps/mach/hurd/profil.c
@@ -139,7 +139,7 @@ __profil (u_short *sample_buffer, size_t
 weak_alias (__profil, profil)
 
 /* Fetch PC samples.  This function must be very careful not to depend
-   on Hurd threadvar variables.  We arrange that by using a special
+   on Hurd TLS variables.  We arrange that by using a special
    stub arranged for at the end of this file. */
 static void
 fetch_samples (void)
@@ -175,7 +175,7 @@ fetch_samples (void)
 }
 
 
-/* This function must be very careful not to depend on Hurd threadvar
+/* This function must be very careful not to depend on Hurd TLS
    variables.  We arrange that by using special stubs arranged for at the
    end of this file. */
 static void
@@ -267,7 +267,7 @@ text_set_element (_hurd_fork_child_hook,
    are fatal in profile_waiter anyhow. */
 #define __mig_put_reply_port(foo)
 
-/* Use our static variable instead of the usual threadvar mechanism for
+/* Use our static variable instead of the usual TLS mechanism for
    this. */
 #define __mig_get_reply_port() profil_reply_port
 
