File: speedy-tls.h

package info (click to toggle)
spring 0.81.2.1%2Bdfsg1-6
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 28,496 kB
  • ctags: 37,096
  • sloc: cpp: 238,659; ansic: 13,784; java: 12,175; awk: 3,428; python: 1,159; xml: 738; perl: 405; sh: 297; makefile: 267; pascal: 228; objc: 192
file content (151 lines) | stat: -rw-r--r-- 9,552 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
//Copyright (C) 2008 Kevin Hoffman. See LICENSE for use and warranty disclaimer.
//Speedy TLS 1.0. Latest version at http://www.kevinjhoffman.com/
//Contains macros that can be used to very quickly (one instruction) access thread-local memory.

#ifndef __SPEEDY_TLS_H__
#define __SPEEDY_TLS_H__

//Allocates specified amount of memory (rounded up to nearest page).
//Results are undefined if you call this more than once on a thread.
//Returns system error or 0 on success.
int speedy_tls_init(int numBytes);

//Initializes the thread-local storage area to the memory region indicated. Size must be a multiple of the page size.
//Results are undefined if you call this more than once on a thread.
//Returns system error or 0 on success.
int speedy_tls_init_foraddr(void* addr, int numBytes);

//Returns the base address of the thread-local storage area or NULL if not initialized.
void* speedy_tls_get_base();

#if defined(__i386__)

#define __intel__
#define __speedy_tls_reg__ "%%fs"

#define speedy_tls_ptr_to_int32(x) ((int)(x))
#define speedy_tls_int32_to_ptr(x) ((void*)(x))

#elif defined(__x86_64__)

#define __intel__
#define __speedy_tls_reg__ "%%gs"

#define speedy_tls_ptr_to_int32(x) ((int)(long long int)(x))
#define speedy_tls_int32_to_ptr(x) ((void*)(long long int)(x))

#else
#error Fast TLS operations have not yet been implemented for this architecture. Please contribute.
#endif

//We can have a common assembly implementation for x86 architecture (uses fs on x86 and gs on x64).
#ifdef __intel__

//Use for information only on Intel only.
int speedy_tls_get_number_ldt_entries();

//-------------------------------------------------------------------------------------------------------------------------------------
//MACROS TO GET AND SET TLS VALUES (SAFE FOR SMP)
//-------------------------------------------------------------------------------------------------------------------------------------

#define speedy_tls_get_int8(base, index, scale, output) \
	__asm__ __volatile__ ( "movb " __speedy_tls_reg__ ":(%1,%2," #scale "), %0" : "=r"(output) : "r"(base), "r"(index) );
#define speedy_tls_get_int16(base, index, scale, output) \
	__asm__ __volatile__ ( "movw " __speedy_tls_reg__ ":(%1,%2," #scale "), %0" : "=r"(output) : "r"(base), "r"(index) );
#define speedy_tls_get_int32(base, index, scale, output) \
	__asm__ __volatile__ ( "movl " __speedy_tls_reg__ ":(%1,%2," #scale "), %0" : "=r"(output) : "r"(base), "r"(index) );

#define speedy_tls_put_int8(base, index, scale, input) \
	__asm__ __volatile__ ( "movb %0, " __speedy_tls_reg__ ":(%1,%2," #scale ")" : : "r"(input), "r"(base), "r"(index) );
#define speedy_tls_put_int16(base, index, scale, input) \
	__asm__ __volatile__ ( "movw %0, " __speedy_tls_reg__ ":(%1,%2," #scale ")" : : "r"(input), "r"(base), "r"(index) );
#define speedy_tls_put_int32(base, index, scale, input) \
	__asm__ __volatile__ ( "movl %0, " __speedy_tls_reg__ ":(%1,%2," #scale ")" : : "r"(input), "r"(base), "r"(index) );

//-------------------------------------------------------------------------------------------------------------------------------------
//MACROS THAT USE TLS VALUES (NOT SAFE FOR SMP)
//-------------------------------------------------------------------------------------------------------------------------------------

#define speedy_tls_add_int8(base, index, scale, input) \
	__asm__ __volatile__ ( "addb %0, " __speedy_tls_reg__ ":(%1,%2," #scale ")" : : "r"(input), "r"(base), "r"(index) );
#define speedy_tls_add_int16(base, index, scale, input) \
	__asm__ __volatile__ ( "addw %0, " __speedy_tls_reg__ ":(%1,%2," #scale ")" : : "r"(input), "r"(base), "r"(index) );
#define speedy_tls_add_int32(base, index, scale, input) \
	__asm__ __volatile__ ( "addl %0, " __speedy_tls_reg__ ":(%1,%2," #scale ")" : : "r"(input), "r"(base), "r"(index) );

#define speedy_tls_inc_int8(base, index, scale) \
	__asm__ __volatile__ ( "incb " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );
#define speedy_tls_inc_int16(base, index, scale) \
	__asm__ __volatile__ ( "incw " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );
#define speedy_tls_inc_int32(base, index, scale) \
	__asm__ __volatile__ ( "incl " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );

#define speedy_tls_dec_int8(base, index, scale) \
	__asm__ __volatile__ ( "decb " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );
#define speedy_tls_dec_int16(base, index, scale) \
	__asm__ __volatile__ ( "decw " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );
#define speedy_tls_dec_int32(base, index, scale) \
	__asm__ __volatile__ ( "decl " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );

//-------------------------------------------------------------------------------------------------------------------------------------
//MACROS THAT USE TLS VALUES (SAFE FOR SMP)
//-------------------------------------------------------------------------------------------------------------------------------------

#define speedy_tls_atomic_add_int8(base, index, scale, input) \
	__asm__ __volatile__ ( "lock; addb %0, " __speedy_tls_reg__ ":(%1,%2," #scale ")" : : "r"(input), "r"(base), "r"(index) );
#define speedy_tls_atomic_add_int16(base, index, scale, input) \
	__asm__ __volatile__ ( "lock; addw %0, " __speedy_tls_reg__ ":(%1,%2," #scale ")" : : "r"(input), "r"(base), "r"(index) );
#define speedy_tls_atomic_add_int32(base, index, scale, input) \
	__asm__ __volatile__ ( "lock; addl %0, " __speedy_tls_reg__ ":(%1,%2," #scale ")" : : "r"(input), "r"(base), "r"(index) );

#define speedy_tls_atomic_inc_int8(base, index, scale) \
	__asm__ __volatile__ ( "lock; incb " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );
#define speedy_tls_atomic_inc_int16(base, index, scale) \
	__asm__ __volatile__ ( "lock; incw " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );
#define speedy_tls_atomic_inc_int32(base, index, scale) \
	__asm__ __volatile__ ( "lock; incl " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );

#define speedy_tls_atomic_dec_int8(base, index, scale) \
	__asm__ __volatile__ ( "lock; decb " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );
#define speedy_tls_atomic_dec_int16(base, index, scale) \
	__asm__ __volatile__ ( "lock; decw " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );
#define speedy_tls_atomic_dec_int32(base, index, scale) \
	__asm__ __volatile__ ( "lock; decl " __speedy_tls_reg__ ":(%0,%1," #scale ")" : : "r"(base), "r"(index) );

//Gets value of TLS integer and saves in original_value, and then adds input to the TLS integer.
#define speedy_tls_atomic_get_and_add_int8(base, index, scale, input, original_value) \
	__asm__ __volatile__ ( "lock; xaddb %0, " __speedy_tls_reg__ ":(%2,%3," #scale ")" : "=r"(original_value) : "0"(input), "r"(base), "r"(index) );
#define speedy_tls_atomic_get_and_add_int16(base, index, scale, input, original_value) \
	__asm__ __volatile__ ( "lock; xaddw %0, " __speedy_tls_reg__ ":(%2,%3," #scale ")" : "=r"(original_value) : "0"(input), "r"(base), "r"(index) );
#define speedy_tls_atomic_get_and_add_int32(base, index, scale, input, original_value) \
	__asm__ __volatile__ ( "lock; xaddl %0, " __speedy_tls_reg__ ":(%2,%3," #scale ")" : "=r"(original_value) : "0"(input), "r"(base), "r"(index) );

//-------------------------------------------------------------------------------------------------------------------------------------
//MACROS THAT USE LOCAL VARIABLES INSTEAD OF TLS (NOT SMP SAFE)
//-------------------------------------------------------------------------------------------------------------------------------------

//Saves value of var in original_value and then adds num to var and saves in var.
#define speedy_local_get_and_add_int8(var, num, original_value) \
        __asm__ __volatile__ ( "xaddb %0,%1" : "=r" (original_value), "+m" (var) : "0" (num) : "memory" );
#define speedy_local_get_and_add_int16(var, num, original_value) \
        __asm__ __volatile__ ( "xaddw %0,%1" : "=r" (original_value), "+m" (var) : "0" (num) : "memory" );
#define speedy_local_get_and_add_int32(var, num, original_value) \
        __asm__ __volatile__ ( "xaddl %0,%1" : "=r" (original_value), "+m" (var) : "0" (num) : "memory" );

//-------------------------------------------------------------------------------------------------------------------------------------
//MACROS THAT USE LOCAL VARIABLES INSTEAD OF TLS (SMP SAFE)
//-------------------------------------------------------------------------------------------------------------------------------------

//Saves value of var in original_value and then adds num to var and saves in var.
#define speedy_local_atomic_get_and_add_int8(var, num, original_value) \
        __asm__ __volatile__ ( "lock; xaddb %0,%1" : "=r" (original_value), "+m" (var) : "0" (num) : "memory" );
#define speedy_local_atomic_get_and_add_int16(var, num, original_value) \
        __asm__ __volatile__ ( "lock; xaddw %0,%1" : "=r" (original_value), "+m" (var) : "0" (num) : "memory" );
#define speedy_local_atomic_get_and_add_int32(var, num, original_value) \
        __asm__ __volatile__ ( "lock; xaddl %0,%1" : "=r" (original_value), "+m" (var) : "0" (num) : "memory" );


#endif //__intel__

#endif //__SPEEDY_TLS_H__