File: addr-map.c

package info (click to toggle)
systemtap 5.1-5
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 47,964 kB
  • sloc: cpp: 80,838; ansic: 54,757; xml: 49,725; exp: 43,665; sh: 11,527; python: 5,003; perl: 2,252; tcl: 1,312; makefile: 1,006; javascript: 149; lisp: 105; awk: 101; asm: 91; java: 70; sed: 16
file content (371 lines) | stat: -rw-r--r-- 11,036 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
/* -*- linux-c -*- 
 * Map of addresses to disallow.
 * Copyright (C) 2005-2019 Red Hat Inc.
 *
 * This file is part of systemtap, and is free software.  You can
 * redistribute it and/or modify it under the terms of the GNU General
 * Public License (GPL); either version 2, or (at your option) any
 * later version.
 */

#ifndef _STAPLINUX_ADDR_MAP_C_
#define _STAPLINUX_ADDR_MAP_C_ 1

/** @file addr-map
 * @brief Implements functions used by the deref macro to blocklist
 * certain addresses.
 */

// PR12970 (populate runtime/bad-addr database) hasn't been
// fixed. Until we're ready to populate the bad address database,
// let's provide a "junior" version of lookup_bad_addr(), since that
// will allow us to skip grabbing the lock and searching the empty
// list.
//
// See <https://sourceware.org/bugzilla/show_bug.cgi?id=12970> for
// more details.

#include <linux/runtime.h>
#include <linux/uaccess.h>

#ifndef PR12970

#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
    ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
#include <asm/processor.h> /* For TASK_SIZE */
#endif


#ifdef STAPCONF_ASM_ACCESS_OK
/* access_ok() is designed for user context only and calling it from
 * perf event etc probe contexts would result in kernel warnings
 * on debug kernels. see upstream kernel commit d319f34456 for more details.
 */
#define stp_access_ok(x,y,z) __access_ok(y,z)
#elif defined(STAPCONF_ACCESS_OK_2ARGS)
#define stp_access_ok(x,y,z) access_ok(y,z)
#else
#define stp_access_ok(x,y,z) access_ok(x,y,z)
#endif

#ifdef STAPCONF_NMI_UACCESS_OKAY
#include <linux/mm.h>
#include <asm/tlbflush.h>
#endif

static inline bool
stp_nmi_uaccess_okay(void)
{
#ifdef STAPCONF_NMI_UACCESS_OKAY
    /* older kernels (like 5.0) still have this as an inline function in
     * their arch-specific headers */
    return nmi_uaccess_okay();
#else
    /* newer kernels (since 5.18) no longer export nmi_uaccess_okay()
     * but we can still get it from the kernel symbol tables. */
    if (kallsyms_nmi_uaccess_okay != NULL)
      return ibt_wrapper(bool, ((bool (*)(void)) kallsyms_nmi_uaccess_okay)());

    return true;  /* don't bother */
#endif
}

static __always_inline int
lookup_bad_addr_user(const int type, const unsigned long addr, const size_t size)
{
  /* Is this a valid memory access? */

  /* We must call stp_nmi_uaccess_okay() to protect races against CR3
   * context switching on X86. See upstream kernel commits 4012e77a90
   * and d319f344561de for more details.
   */
  if (!stp_nmi_uaccess_okay())
      return 1;

  /* Note we're using stp_access_ok() here. This is only supposed to be
   * called when we're in task context. Occasionally lookup_bad_addr()
   * gets called when not in task context. If in_task() exists, only
   * call stp_access_ok() when we're in task context (otherwise we'll get
   * a kernel warning). If we aren't in task context, we'll just do a
   * range check.
   */
#if !defined(in_task) || LINUX_VERSION_CODE >= KERNEL_VERSION(5,18,0)
  /* NB access_ok() checks TASK_SIZE_MAX (used to be user_addr_max()) on
   * all architectures since kernel 5.18 */
  if (size == 0 || ULONG_MAX - addr < size - 1
      || !stp_access_ok(type, (void *)addr, size))
    return 1;
#else
  if (size == 0 || ULONG_MAX - addr < size - 1
      || (in_task() && !stp_access_ok(type, (void *)addr, size))
      || (!in_task()
#if defined(user_addr_max)
	  && ((user_addr_max() - size) < addr)
#elif defined(TASK_SIZE_MAX)
	  && ((TASK_SIZE_MAX - size) < addr)
#endif
	      ))
    return 1;
#endif

#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
    ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
  /* Unprivileged users must not access memory while the context
     does not refer to their own process.  */
  if (! is_myproc ())
    return 1;
  /* Unprivileged users must not access kernel space memory.  */
  if (addr + size > TASK_SIZE)
    return 1;
#endif

  return 0;
}

static __always_inline int
lookup_bad_addr(const int type, const unsigned long addr, const size_t size,
                const stp_mm_segment_t seg)
{
  /* Is this a valid memory access?
   *
   * PR26811: Since kernel 5.10 due to set_fs() removal we need to
   * distinguish kernel- and user-space addresses when asking this
   * question.
   */
  if (stp_is_user_ds(seg))
    return lookup_bad_addr_user(type, addr, size);

  /* For kernel addr, skip the in_task() portion of the address checks: */
  if (size == 0 || ULONG_MAX - addr < size - 1)
    return 1;

  /* XXX: Kernel reads call copy_from_kernel_nofault() which also
     checks copy_from_kernel_fault_allowed(),
     currently a stub function returning true -- not worth invoking
     ourselves through kallsyms. Keep an eye on this, though. */

#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
    ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
  /* Unprivileged users must not access memory while the context
     does not refer to their own process.  */
  if (! is_myproc ())
    return 1;
  /* Unprivileged users must not access kernel space memory.  */
  if (addr + size > TASK_SIZE)
    return 1;
#endif

  return 0;
}

#else /* PR12970 */

struct addr_map_entry
{
  unsigned long min;
  unsigned long max;
};

struct addr_map
{
  size_t size;
  struct addr_map_entry entries[0];
};

static STP_DEFINE_RWLOCK(addr_map_lock);
static struct addr_map* blockmap;

/* Find address of entry where we can insert a new one. */
static size_t
upper_bound(unsigned long addr, struct addr_map* map)
{
  size_t start = 0;
  size_t end = map->size;
  struct addr_map_entry *entry = 0;

  if (end == 0)
    return 0;
  do
    {
      size_t new_idx;
      if (addr < map->entries[start].min)
        return start;
      if (addr >= map->entries[end-1].max)
        return end;
      new_idx = (end + start) / 2;
      entry = &map->entries[new_idx];
      if (addr < entry->min)
        end = new_idx;
      else
        start = new_idx + 1;
    } while (end != start);
  return entry - &map->entries[0];
}

/* Search for an entry in the given map which overlaps the range specified by
   addr and size.  */
static struct addr_map_entry*
lookup_addr_aux(unsigned long addr, size_t size, struct addr_map* map)
{
  size_t start = 0;
  size_t end;

  /* No map? No matching entry. */
  if (!map)
    return 0;
  /* Empty map? No matching entry.  */
  if (map->size == 0)
    return 0;

  /* Use binary search on the sorted map entries.  */
  end = map->size;
  do
    {
      int entry_idx;
      struct addr_map_entry *entry = 0;
      /* If the target range is beyond those of the remaining entries in the
	 map, then a matching entry will not be found  */
      if (addr + size <= map->entries[start].min ||
	  addr >= map->entries[end - 1].max)
        return 0;
      /* Choose the map entry at the mid point of the remaining entries.  */
      entry_idx = (end + start) / 2;
      entry = &map->entries[entry_idx];
      /* If our range overlaps the range of this entry, then we have a match. */
      if (addr + size > entry->min && addr < entry->max)
        return entry;
      /* If our range is below the range of this entry, then cull the entries
	 above this entry, otherwise cull the entries below it.  */
      if (addr + size <= entry->min)
        end = entry_idx;
      else
        start = entry_idx + 1;
    } while (start < end);
  return 0;
}

#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
    ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
#include <asm/processor.h> /* For TASK_SIZE */
#endif

static int
lookup_bad_addr(const int type, const unsigned long addr, const size_t size)
{
  struct addr_map_entry* result = 0;
  unsigned long flags;

  /* Is this a valid memory access?  */
  if (size == 0 || ULONG_MAX - addr < size - 1
      || !stp_access_ok(type, (void *)addr, size))
    return 1;

#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
    ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
  /* Unprivileged users must not access memory while the context
     does not refer to their own process.  */
  if (! is_myproc ())
    return 1;
  /* Unprivileged users must not access kernel space memory.  */
  if (addr + size > TASK_SIZE)
    return 1;
#endif

  /* Search for the given range in the block-listed map.  */
  stp_read_lock_irqsave(&addr_map_lock, flags);
  result = lookup_addr_aux(addr, size, blockmap);
  stp_read_unlock_irqrestore(&addr_map_lock, flags);
  if (result)
    return 1;
  else
    return 0;
}


static int
add_bad_addr_entry(unsigned long min_addr, unsigned long max_addr,
                   struct addr_map_entry** existing_min,
                   struct addr_map_entry** existing_max)
{
  struct addr_map* new_map = 0;
  struct addr_map* old_map = 0;
  struct addr_map_entry* min_entry = 0;
  struct addr_map_entry* max_entry = 0;
  struct addr_map_entry* new_entry = 0;
  size_t existing = 0;
  unsigned long flags;

  /* Loop allocating memory for a new entry in the map. */
  while (1)
    {
      size_t old_size = 0;
      stp_write_lock_irqsave(&addr_map_lock, flags);
      old_map = blockmap;
      if (old_map)
        old_size = old_map->size;
      /* Either this is the first time through the loop, or we
         allocated a map previous time, but someone has come in and
         added an entry while we were sleeping. */
      if (!new_map || (new_map && new_map->size < old_size + 1))
        {
          stp_write_unlock_irqrestore(&addr_map_lock, flags);
          if (new_map)
            {
	      _stp_kfree(new_map);
              new_map = 0;
            }
          new_map = _stp_kmalloc(sizeof(*new_map)
				 + sizeof(*new_entry) * (old_size + 1));
          if (!new_map)
            return -ENOMEM;
          new_map->size = old_size + 1;
        }
      else
        break;
    }
  if (!blockmap)
    {
      existing = 0;
    }
  else
    {
      min_entry = lookup_addr_aux(min_addr, 1, blockmap);
      max_entry = lookup_addr_aux(max_addr, 1, blockmap);
      if (min_entry || max_entry)
        {
          if (existing_min)
            *existing_min = min_entry;
          if (existing_max)
            *existing_max = max_entry;
          stp_write_unlock_irqrestore(&addr_map_lock, flags);
          _stp_kfree(new_map);
          return 1;
        }
      existing = upper_bound(min_addr, old_map);
    }
  new_entry = &new_map->entries[existing];
  new_entry->min = min_addr;
  new_entry->max = max_addr;
  if (old_map)
    {
      memcpy(&new_map->entries, old_map->entries,
             existing * sizeof(*new_entry));
      if (old_map->size > existing)
        memcpy(new_entry + 1, &old_map->entries[existing],
               (old_map->size - existing) * sizeof(*new_entry));
    }
  blockmap = new_map;
  stp_write_unlock_irqrestore(&addr_map_lock, flags);
  if (old_map)
    _stp_kfree(old_map);
  return 0;
}

static void
delete_bad_addr_entry(struct addr_map_entry* entry)
{
}

#endif /* PR12970 */

#endif /* _STAPLINUX_ADDR_MAP_C_ */