File: tcb.c

package info (click to toggle)
optee-os 4.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 31,560 kB
  • sloc: ansic: 441,914; asm: 12,903; python: 3,719; makefile: 1,676; sh: 238
file content (207 lines) | stat: -rw-r--r-- 6,141 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
// SPDX-License-Identifier: BSD-2-Clause
/*
 * Copyright (c) 2020, Huawei Technologies Co., Ltd
 */
/*
 * Support for Thread-Local Storage (TLS) ABIs for ARMv7/Aarch32 and Aarch64.
 *
 * TAs are currently single-threaded, so the only benefit of implementing these
 * ABIs is to support toolchains that need them even when the target program is
 * single-threaded. Such as, the g++ compiler from the GCC toolchain targeting a
 * "Posix thread" Linux runtime, which OP-TEE has been using for quite some time
 * (arm-linux-gnueabihf-* and aarch64-linux-gnu-*). This allows building C++ TAs
 * without having to build a specific toolchain with --disable-threads.
 *
 * This implementation is based on [1].
 *
 *  - "TLS data structures variant 1" (section 3): the AArch64 compiler uses the
 *    TPIDR_EL0 to access TLS data directly. This assumes a specific layout for
 *    the TCB, and (for shared objects) the use of R_AARCH64_TLS_TPREL
 *    relocations.
 *  - The "General Dynamic access model" (section 4.1): the ARMv7/Aarch32
 *    compiler inserts calls to the __tls_get_addr() function which has to be
 *    implemented by the runtime library. The function takes a module ID and an
 *    offset parameter, which are provided thanks to R_ARM_TLS_DTPMOD32 and
 *    R_ARM_TLS_DTPOFF32 relocations.
 *
 * In addition, dl_iterate_phdr() is implemented here, because it is used by the
 * g++ Aarch64 exception handling and it does use the TCB to provide TLS
 * information to the caller.
 *
 * [1] "ELF Handling For Thread-Local Storage"
 *     https://www.akkadia.org/drepper/tls.pdf
 */

#include <arm64_user_sysreg.h>
#include <assert.h>
#include <link.h>
#include <stdlib.h>
#include <string.h>
#include <sys/queue.h>
#include "user_ta_header.h"

/* DTV - Dynamic Thread Vector
 *
 * Maintains an array of pointers to TLS data for each module in the TCB. Each
 * module that has a TLS segment has an entry (and consequently, some space in
 * the tcb_head::tls buffer). The index is the "module ID".
 * dtv[0].size is the number of elements in the vector
 * dtv[1].tls points to TLS for the main executable (may be NULL)
 * tls[2 .. (size-1)] are for shared libraries
 */
union dtv {
	unsigned long size;
	uint8_t *tls;
};

#define DTV_SIZE(size) (sizeof(union dtv) + (size))

/* Thread Control Block */
struct tcb_head {
	/* Two words are reserved as per the "TLS variant 1" ABI */
	union dtv *dtv;
	unsigned long reserved;
	/*
	 * The rest of the structure contains the TLS blocks for each ELF module
	 * having a PT_TLS segment. Each block is a copy of the .tdata section
	 * plus some zero-initialized space for .tbss.
	 */
	uint8_t tls[];
};

/*
 * Since TAs are single threaded, only one TCB is needed. This would need to
 * change if multi-threading is introduced.
 */
static struct tcb_head *_tcb;
static size_t _tls_size;

#define TCB_SIZE(tls_size) (sizeof(*_tcb) + (tls_size))

/*
 * Initialize or update the TCB.
 * Called on application initialization and when additional shared objects are
 * loaded via dlopen().
 */
void __utee_tcb_init(void)
{
	struct dl_phdr_info *dlpi = NULL;
	const Elf_Phdr *phdr = NULL;
	size_t total_size = 0;
	size_t size = 0;
	size_t i = 0;
	size_t j = 0;

	/* Compute the size needed for all the TLS blocks */
	for (i = 0; i < __elf_phdr_info.count; i++) {
		dlpi = __elf_phdr_info.dlpi + i;
		for (j = 0; j < dlpi->dlpi_phnum; j++) {
			phdr = dlpi->dlpi_phdr + j;
			if (phdr->p_type == PT_TLS) {
				total_size += phdr->p_memsz;
				break;
			}
		}
	}

	/* ELF modules currently cannot be unmapped */
	assert(total_size >= _tls_size);

	if (total_size == _tls_size)
		return;

	/* (Re-)allocate the TCB */
	_tcb = malloc_flags(MAF_ZERO_INIT, _tcb, 1, TCB_SIZE(total_size));
	if (!_tcb) {
		EMSG("TCB allocation failed (%zu bytes)", TCB_SIZE(total_size));
		abort();
	}

	/* (Re-)allocate the DTV. + 1 since dtv[0] holds the size */
	size = DTV_SIZE((__elf_phdr_info.count + 1) * sizeof(union dtv));
	_tcb->dtv = malloc_flags(MAF_ZERO_INIT, _tcb->dtv, 1, size);
	if (!_tcb->dtv) {
		EMSG("DTV allocation failed (%zu bytes)", size);
		abort();
	}

	/* Copy TLS data to the TCB */
	size = 0;
	for (i = 0; i < __elf_phdr_info.count; i++) {
		dlpi = __elf_phdr_info.dlpi + i;
		for (j = 0; j < dlpi->dlpi_phnum; j++) {
			phdr = dlpi->dlpi_phdr + j;
			if (phdr->p_type != PT_TLS)
				continue;
			if (size + phdr->p_memsz <= _tls_size) {
				/* Already copied */
				break;
			}
			_tcb->dtv[i + 1].tls = _tcb->tls + size;
			/* Copy .tdata */
			memcpy(_tcb->tls + size,
			       (void *)(dlpi->dlpi_addr + phdr->p_vaddr),
			       phdr->p_filesz);
			/* Initialize .tbss */
			memset(_tcb->tls + size + phdr->p_filesz, 0,
			       phdr->p_memsz - phdr->p_filesz);
			size += phdr->p_memsz;
		}
	}
	_tcb->dtv[0].size = i;

	_tls_size = total_size;
#ifdef ARM64
	/*
	 * Aarch64 ABI requirement: the thread pointer shall point to the
	 * thread's TCB. ARMv7 and Aarch32 access the TCB via _tls_get_addr().
	 */
	write_tpidr_el0((vaddr_t)_tcb);
#endif
}

struct tls_index {
	unsigned long module;
	unsigned long offset;
};

void *__tls_get_addr(struct tls_index *ti);

void *__tls_get_addr(struct tls_index *ti)
{
	return _tcb->dtv[ti->module].tls + ti->offset;
}

int dl_iterate_phdr(int (*callback)(struct dl_phdr_info *, size_t, void *),
		    void *data)
{
	struct dl_phdr_info *dlpi = NULL;
	size_t id = 0;
	size_t i = 0;
	int st = 0;

	/*
	 * dlpi_tls_data is thread-specific so if we were to support
	 * multi-threading, we would need one copy of struct dl_phdr_info per
	 * thread. Could be a pre-allocated area, or could be allocated on the
	 * heap. Doing the latter here so that it would at least work if/when we
	 * add thread support. Further optimization can always come later.
	 */
	dlpi = calloc(1, sizeof(*dlpi));
	if (!dlpi) {
		EMSG("dl_phdr_info allocation failed");
		abort();
	}

	for (i = 0; i < __elf_phdr_info.count; i++) {
		memcpy(dlpi, __elf_phdr_info.dlpi + i, sizeof(*dlpi));
		dlpi->dlpi_tls_data = NULL;
		id = dlpi->dlpi_tls_modid;
		if (id)
			dlpi->dlpi_tls_data = _tcb->dtv[id].tls;
		st = callback(dlpi, sizeof(*dlpi), data);
	}

	free(dlpi);
	return st;
}