1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
|
/*
* Copyright (c) 2000-2005 by Hewlett-Packard Company. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
* OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program
* for any purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is granted,
* provided the above notices are retained, and a notice that the code was
* modified is included with the above copyright notice.
*/
#include "private/gc_priv.h"
#if defined(THREAD_LOCAL_ALLOC)
#ifndef THREADS
# error "invalid config - THREAD_LOCAL_ALLOC requires GC_THREADS"
#endif
#include "private/thread_local_alloc.h"
#include <stdlib.h>
#if defined(USE_COMPILER_TLS)
__thread GC_ATTR_TLS_FAST
#elif defined(USE_WIN32_COMPILER_TLS)
__declspec(thread) GC_ATTR_TLS_FAST
#endif
GC_key_t GC_thread_key;
static GC_bool keys_initialized;
/* Return a single nonempty freelist fl to the global one pointed to */
/* by gfl. */
static void return_single_freelist(void *fl, void **gfl)
{
if (*gfl == 0) {
*gfl = fl;
} else {
void *q, **qptr;
GC_ASSERT(GC_size(fl) == GC_size(*gfl));
/* Concatenate: */
qptr = &(obj_link(fl));
while ((word)(q = *qptr) >= HBLKSIZE)
qptr = &(obj_link(q));
GC_ASSERT(0 == q);
*qptr = *gfl;
*gfl = fl;
}
}
/* Recover the contents of the freelist array fl into the global one gfl.*/
/* We hold the allocator lock. */
static void return_freelists(void **fl, void **gfl)
{
int i;
for (i = 1; i < TINY_FREELISTS; ++i) {
if ((word)(fl[i]) >= HBLKSIZE) {
return_single_freelist(fl[i], &gfl[i]);
}
/* Clear fl[i], since the thread structure may hang around. */
/* Do it in a way that is likely to trap if we access it. */
fl[i] = (ptr_t)HBLKSIZE;
}
/* The 0 granule freelist really contains 1 granule objects. */
# ifdef GC_GCJ_SUPPORT
if (fl[0] == ERROR_FL) return;
# endif
if ((word)(fl[0]) >= HBLKSIZE) {
return_single_freelist(fl[0], &gfl[1]);
}
}
#ifdef USE_PTHREAD_SPECIFIC
/* Re-set the TLS value on thread cleanup to allow thread-local */
/* allocations to happen in the TLS destructors. */
/* GC_unregister_my_thread (and similar routines) will finally set */
/* the GC_thread_key to NULL preventing this destructor from being */
/* called repeatedly. */
static void reset_thread_key(void* v) {
pthread_setspecific(GC_thread_key, v);
}
#else
# define reset_thread_key 0
#endif
/* Each thread structure must be initialized. */
/* This call must be made from the new thread. */
GC_INNER void GC_init_thread_local(GC_tlfs p)
{
int i, j, res;
GC_ASSERT(I_HOLD_LOCK());
if (!EXPECT(keys_initialized, TRUE)) {
# ifdef USE_CUSTOM_SPECIFIC
/* Ensure proper alignment of a "pushed" GC symbol. */
GC_ASSERT((word)&GC_thread_key % sizeof(word) == 0);
# endif
res = GC_key_create(&GC_thread_key, reset_thread_key);
if (COVERT_DATAFLOW(res) != 0) {
ABORT("Failed to create key for local allocator");
}
keys_initialized = TRUE;
}
res = GC_setspecific(GC_thread_key, p);
if (COVERT_DATAFLOW(res) != 0) {
ABORT("Failed to set thread specific allocation pointers");
}
for (j = 0; j < TINY_FREELISTS; ++j) {
for (i = 0; i < THREAD_FREELISTS_KINDS; ++i) {
p -> _freelists[i][j] = (void *)(word)1;
}
# ifdef GC_GCJ_SUPPORT
p -> gcj_freelists[j] = (void *)(word)1;
# endif
}
/* The size 0 free lists are handled like the regular free lists, */
/* to ensure that the explicit deallocation works. However, */
/* allocation of a size 0 "gcj" object is always an error. */
# ifdef GC_GCJ_SUPPORT
p -> gcj_freelists[0] = ERROR_FL;
# endif
}
/* We hold the allocator lock. */
GC_INNER void GC_destroy_thread_local(GC_tlfs p)
{
int k;
/* We currently only do this from the thread itself. */
GC_STATIC_ASSERT(THREAD_FREELISTS_KINDS <= MAXOBJKINDS);
for (k = 0; k < THREAD_FREELISTS_KINDS; ++k) {
if (k == (int)GC_n_kinds)
break; /* kind is not created */
return_freelists(p -> _freelists[k], GC_obj_kinds[k].ok_freelist);
}
# ifdef GC_GCJ_SUPPORT
return_freelists(p -> gcj_freelists, (void **)GC_gcjobjfreelist);
# endif
}
GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc_kind(size_t bytes, int kind)
{
size_t granules;
void *tsd;
void *result;
# if MAXOBJKINDS > THREAD_FREELISTS_KINDS
if (EXPECT(kind >= THREAD_FREELISTS_KINDS, FALSE)) {
return GC_malloc_kind_global(bytes, kind);
}
# endif
# if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC)
{
GC_key_t k = GC_thread_key;
if (EXPECT(0 == k, FALSE)) {
/* We haven't yet run GC_init_parallel. That means */
/* we also aren't locking, so this is fairly cheap. */
return GC_malloc_kind_global(bytes, kind);
}
tsd = GC_getspecific(k);
}
# else
if (!EXPECT(keys_initialized, TRUE))
return GC_malloc_kind_global(bytes, kind);
tsd = GC_getspecific(GC_thread_key);
# endif
# if !defined(USE_COMPILER_TLS) && !defined(USE_WIN32_COMPILER_TLS)
if (EXPECT(0 == tsd, FALSE)) {
return GC_malloc_kind_global(bytes, kind);
}
# endif
GC_ASSERT(GC_is_initialized);
GC_ASSERT(GC_is_thread_tsd_valid(tsd));
granules = ROUNDED_UP_GRANULES(bytes);
# if defined(CPPCHECK)
# define MALLOC_KIND_PTRFREE_INIT (void*)1
# else
# define MALLOC_KIND_PTRFREE_INIT NULL
# endif
GC_FAST_MALLOC_GRANS(result, granules,
((GC_tlfs)tsd) -> _freelists[kind], DIRECT_GRANULES,
kind, GC_malloc_kind_global(bytes, kind),
(void)(kind == PTRFREE ? MALLOC_KIND_PTRFREE_INIT
: (obj_link(result) = 0)));
# ifdef LOG_ALLOCS
GC_log_printf("GC_malloc_kind(%lu, %d) returned %p, recent GC #%lu\n",
(unsigned long)bytes, kind, result,
(unsigned long)GC_gc_no);
# endif
return result;
}
#ifdef GC_GCJ_SUPPORT
# include "gc_gcj.h"
/* Gcj-style allocation without locks is extremely tricky. The */
/* fundamental issue is that we may end up marking a free list, which */
/* has freelist links instead of "vtable" pointers. That is usually */
/* OK, since the next object on the free list will be cleared, and */
/* will thus be interpreted as containing a zero descriptor. That's */
/* fine if the object has not yet been initialized. But there are */
/* interesting potential races. */
/* In the case of incremental collection, this seems hopeless, since */
/* the marker may run asynchronously, and may pick up the pointer to */
/* the next freelist entry (which it thinks is a vtable pointer), get */
/* suspended for a while, and then see an allocated object instead */
/* of the vtable. This may be avoidable with either a handshake with */
/* the collector or, probably more easily, by moving the free list */
/* links to the second word of each object. The latter isn't a */
/* universal win, since on architecture like Itanium, nonzero offsets */
/* are not necessarily free. And there may be cache fill order issues. */
/* For now, we punt with incremental GC. This probably means that */
/* incremental GC should be enabled before we fork a second thread. */
/* Unlike the other thread local allocation calls, we assume that the */
/* collector has been explicitly initialized. */
GC_API GC_ATTR_MALLOC void * GC_CALL GC_gcj_malloc(size_t bytes,
void * ptr_to_struct_containing_descr)
{
if (EXPECT(GC_incremental, FALSE)) {
return GC_core_gcj_malloc(bytes, ptr_to_struct_containing_descr);
} else {
size_t granules = ROUNDED_UP_GRANULES(bytes);
void *result;
void **tiny_fl;
GC_ASSERT(GC_gcjobjfreelist != NULL);
tiny_fl = ((GC_tlfs)GC_getspecific(GC_thread_key))->gcj_freelists;
GC_FAST_MALLOC_GRANS(result, granules, tiny_fl, DIRECT_GRANULES,
GC_gcj_kind,
GC_core_gcj_malloc(bytes,
ptr_to_struct_containing_descr),
{AO_compiler_barrier();
*(void **)result = ptr_to_struct_containing_descr;});
/* This forces the initialization of the "method ptr". */
/* This is necessary to ensure some very subtle properties */
/* required if a GC is run in the middle of such an allocation. */
/* Here we implicitly also assume atomicity for the free list. */
/* and method pointer assignments. */
/* We must update the freelist before we store the pointer. */
/* Otherwise a GC at this point would see a corrupted */
/* free list. */
/* A real memory barrier is not needed, since the */
/* action of stopping this thread will cause prior writes */
/* to complete. */
/* We assert that any concurrent marker will stop us. */
/* Thus it is impossible for a mark procedure to see the */
/* allocation of the next object, but to see this object */
/* still containing a free list pointer. Otherwise the */
/* marker, by misinterpreting the freelist link as a vtable */
/* pointer, might find a random "mark descriptor" in the next */
/* object. */
return result;
}
}
#endif /* GC_GCJ_SUPPORT */
/* The thread support layer must arrange to mark thread-local */
/* free lists explicitly, since the link field is often */
/* invisible to the marker. It knows how to find all threads; */
/* we take care of an individual thread freelist structure. */
GC_INNER void GC_mark_thread_local_fls_for(GC_tlfs p)
{
ptr_t q;
int i, j;
for (j = 0; j < TINY_FREELISTS; ++j) {
for (i = 0; i < THREAD_FREELISTS_KINDS; ++i) {
/* Load the pointer atomically as it might be updated */
/* concurrently by GC_FAST_MALLOC_GRANS. */
q = (ptr_t)AO_load((volatile AO_t *)&p->_freelists[i][j]);
if ((word)q > HBLKSIZE)
GC_set_fl_marks(q);
}
# ifdef GC_GCJ_SUPPORT
if (EXPECT(j > 0, TRUE)) {
q = (ptr_t)AO_load((volatile AO_t *)&p->gcj_freelists[j]);
if ((word)q > HBLKSIZE)
GC_set_fl_marks(q);
}
# endif
}
}
#if defined(GC_ASSERTIONS)
/* Check that all thread-local free-lists in p are completely marked. */
void GC_check_tls_for(GC_tlfs p)
{
int i, j;
for (j = 1; j < TINY_FREELISTS; ++j) {
for (i = 0; i < THREAD_FREELISTS_KINDS; ++i) {
GC_check_fl_marks(&p->_freelists[i][j]);
}
# ifdef GC_GCJ_SUPPORT
GC_check_fl_marks(&p->gcj_freelists[j]);
# endif
}
}
#endif /* GC_ASSERTIONS */
#endif /* THREAD_LOCAL_ALLOC */
|