1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
|
/* -*- linux-c -*-
* Map of addresses to disallow.
* Copyright (C) 2005-2019 Red Hat Inc.
*
* This file is part of systemtap, and is free software. You can
* redistribute it and/or modify it under the terms of the GNU General
* Public License (GPL); either version 2, or (at your option) any
* later version.
*/
#ifndef _STAPLINUX_ADDR_MAP_C_
#define _STAPLINUX_ADDR_MAP_C_ 1
/** @file addr-map
* @brief Implements functions used by the deref macro to blocklist
* certain addresses.
*/
// PR12970 (populate runtime/bad-addr database) hasn't been
// fixed. Until we're ready to populate the bad address database,
// let's provide a "junior" version of lookup_bad_addr(), since that
// will allow us to skip grabbing the lock and searching the empty
// list.
//
// See <https://sourceware.org/bugzilla/show_bug.cgi?id=12970> for
// more details.
#include <linux/runtime.h>
#include <linux/uaccess.h>
#ifndef PR12970
#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
#include <asm/processor.h> /* For TASK_SIZE */
#endif
#ifdef STAPCONF_ASM_ACCESS_OK
/* access_ok() is designed for user context only and calling it from
* perf event etc probe contexts would result in kernel warnings
* on debug kernels. see upstream kernel commit d319f34456 for more details.
*/
#define stp_access_ok(x,y,z) __access_ok(y,z)
#elif defined(STAPCONF_ACCESS_OK_2ARGS)
#define stp_access_ok(x,y,z) access_ok(y,z)
#else
#define stp_access_ok(x,y,z) access_ok(x,y,z)
#endif
#ifdef STAPCONF_NMI_UACCESS_OKAY
#include <linux/mm.h>
#include <asm/tlbflush.h>
#endif
static inline bool
stp_nmi_uaccess_okay(void)
{
#ifdef STAPCONF_NMI_UACCESS_OKAY
/* older kernels (like 5.0) still have this as an inline function in
* their arch-specific headers */
return nmi_uaccess_okay();
#else
/* newer kernels (since 5.18) no longer export nmi_uaccess_okay()
* but we can still get it from the kernel symbol tables. */
if (kallsyms_nmi_uaccess_okay != NULL)
return ibt_wrapper(bool, ((bool (*)(void)) kallsyms_nmi_uaccess_okay)());
return true; /* don't bother */
#endif
}
static __always_inline int
lookup_bad_addr_user(const int type, const unsigned long addr, const size_t size)
{
/* Is this a valid memory access? */
/* We must call stp_nmi_uaccess_okay() to protect races against CR3
* context switching on X86. See upstream kernel commits 4012e77a90
* and d319f344561de for more details.
*/
if (!stp_nmi_uaccess_okay())
return 1;
/* Note we're using stp_access_ok() here. This is only supposed to be
* called when we're in task context. Occasionally lookup_bad_addr()
* gets called when not in task context. If in_task() exists, only
* call stp_access_ok() when we're in task context (otherwise we'll get
* a kernel warning). If we aren't in task context, we'll just do a
* range check.
*/
#if !defined(in_task) || LINUX_VERSION_CODE >= KERNEL_VERSION(5,18,0)
/* NB access_ok() checks TASK_SIZE_MAX (used to be user_addr_max()) on
* all architectures since kernel 5.18 */
if (size == 0 || ULONG_MAX - addr < size - 1
|| !stp_access_ok(type, (void *)addr, size))
return 1;
#else
if (size == 0 || ULONG_MAX - addr < size - 1
|| (in_task() && !stp_access_ok(type, (void *)addr, size))
|| (!in_task()
#if defined(user_addr_max)
&& ((user_addr_max() - size) < addr)
#elif defined(TASK_SIZE_MAX)
&& ((TASK_SIZE_MAX - size) < addr)
#endif
))
return 1;
#endif
#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
/* Unprivileged users must not access memory while the context
does not refer to their own process. */
if (! is_myproc ())
return 1;
/* Unprivileged users must not access kernel space memory. */
if (addr + size > TASK_SIZE)
return 1;
#endif
return 0;
}
static __always_inline int
lookup_bad_addr(const int type, const unsigned long addr, const size_t size,
const stp_mm_segment_t seg)
{
/* Is this a valid memory access?
*
* PR26811: Since kernel 5.10 due to set_fs() removal we need to
* distinguish kernel- and user-space addresses when asking this
* question.
*/
if (stp_is_user_ds(seg))
return lookup_bad_addr_user(type, addr, size);
/* For kernel addr, skip the in_task() portion of the address checks: */
if (size == 0 || ULONG_MAX - addr < size - 1)
return 1;
/* XXX: Kernel reads call copy_from_kernel_nofault() which also
checks copy_from_kernel_fault_allowed(),
currently a stub function returning true -- not worth invoking
ourselves through kallsyms. Keep an eye on this, though. */
#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
/* Unprivileged users must not access memory while the context
does not refer to their own process. */
if (! is_myproc ())
return 1;
/* Unprivileged users must not access kernel space memory. */
if (addr + size > TASK_SIZE)
return 1;
#endif
return 0;
}
#else /* PR12970 */
struct addr_map_entry
{
unsigned long min;
unsigned long max;
};
struct addr_map
{
size_t size;
struct addr_map_entry entries[0];
};
static STP_DEFINE_RWLOCK(addr_map_lock);
static struct addr_map* blockmap;
/* Find address of entry where we can insert a new one. */
static size_t
upper_bound(unsigned long addr, struct addr_map* map)
{
size_t start = 0;
size_t end = map->size;
struct addr_map_entry *entry = 0;
if (end == 0)
return 0;
do
{
size_t new_idx;
if (addr < map->entries[start].min)
return start;
if (addr >= map->entries[end-1].max)
return end;
new_idx = (end + start) / 2;
entry = &map->entries[new_idx];
if (addr < entry->min)
end = new_idx;
else
start = new_idx + 1;
} while (end != start);
return entry - &map->entries[0];
}
/* Search for an entry in the given map which overlaps the range specified by
addr and size. */
static struct addr_map_entry*
lookup_addr_aux(unsigned long addr, size_t size, struct addr_map* map)
{
size_t start = 0;
size_t end;
/* No map? No matching entry. */
if (!map)
return 0;
/* Empty map? No matching entry. */
if (map->size == 0)
return 0;
/* Use binary search on the sorted map entries. */
end = map->size;
do
{
int entry_idx;
struct addr_map_entry *entry = 0;
/* If the target range is beyond those of the remaining entries in the
map, then a matching entry will not be found */
if (addr + size <= map->entries[start].min ||
addr >= map->entries[end - 1].max)
return 0;
/* Choose the map entry at the mid point of the remaining entries. */
entry_idx = (end + start) / 2;
entry = &map->entries[entry_idx];
/* If our range overlaps the range of this entry, then we have a match. */
if (addr + size > entry->min && addr < entry->max)
return entry;
/* If our range is below the range of this entry, then cull the entries
above this entry, otherwise cull the entries below it. */
if (addr + size <= entry->min)
end = entry_idx;
else
start = entry_idx + 1;
} while (start < end);
return 0;
}
#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
#include <asm/processor.h> /* For TASK_SIZE */
#endif
static int
lookup_bad_addr(const int type, const unsigned long addr, const size_t size)
{
struct addr_map_entry* result = 0;
unsigned long flags;
/* Is this a valid memory access? */
if (size == 0 || ULONG_MAX - addr < size - 1
|| !stp_access_ok(type, (void *)addr, size))
return 1;
#if ! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPDEV) && \
! STP_PRIVILEGE_CONTAINS (STP_PRIVILEGE, STP_PR_STAPSYS)
/* Unprivileged users must not access memory while the context
does not refer to their own process. */
if (! is_myproc ())
return 1;
/* Unprivileged users must not access kernel space memory. */
if (addr + size > TASK_SIZE)
return 1;
#endif
/* Search for the given range in the block-listed map. */
stp_read_lock_irqsave(&addr_map_lock, flags);
result = lookup_addr_aux(addr, size, blockmap);
stp_read_unlock_irqrestore(&addr_map_lock, flags);
if (result)
return 1;
else
return 0;
}
static int
add_bad_addr_entry(unsigned long min_addr, unsigned long max_addr,
struct addr_map_entry** existing_min,
struct addr_map_entry** existing_max)
{
struct addr_map* new_map = 0;
struct addr_map* old_map = 0;
struct addr_map_entry* min_entry = 0;
struct addr_map_entry* max_entry = 0;
struct addr_map_entry* new_entry = 0;
size_t existing = 0;
unsigned long flags;
/* Loop allocating memory for a new entry in the map. */
while (1)
{
size_t old_size = 0;
stp_write_lock_irqsave(&addr_map_lock, flags);
old_map = blockmap;
if (old_map)
old_size = old_map->size;
/* Either this is the first time through the loop, or we
allocated a map previous time, but someone has come in and
added an entry while we were sleeping. */
if (!new_map || (new_map && new_map->size < old_size + 1))
{
stp_write_unlock_irqrestore(&addr_map_lock, flags);
if (new_map)
{
_stp_kfree(new_map);
new_map = 0;
}
new_map = _stp_kmalloc(sizeof(*new_map)
+ sizeof(*new_entry) * (old_size + 1));
if (!new_map)
return -ENOMEM;
new_map->size = old_size + 1;
}
else
break;
}
if (!blockmap)
{
existing = 0;
}
else
{
min_entry = lookup_addr_aux(min_addr, 1, blockmap);
max_entry = lookup_addr_aux(max_addr, 1, blockmap);
if (min_entry || max_entry)
{
if (existing_min)
*existing_min = min_entry;
if (existing_max)
*existing_max = max_entry;
stp_write_unlock_irqrestore(&addr_map_lock, flags);
_stp_kfree(new_map);
return 1;
}
existing = upper_bound(min_addr, old_map);
}
new_entry = &new_map->entries[existing];
new_entry->min = min_addr;
new_entry->max = max_addr;
if (old_map)
{
memcpy(&new_map->entries, old_map->entries,
existing * sizeof(*new_entry));
if (old_map->size > existing)
memcpy(new_entry + 1, &old_map->entries[existing],
(old_map->size - existing) * sizeof(*new_entry));
}
blockmap = new_map;
stp_write_unlock_irqrestore(&addr_map_lock, flags);
if (old_map)
_stp_kfree(old_map);
return 0;
}
static void
delete_bad_addr_entry(struct addr_map_entry* entry)
{
}
#endif /* PR12970 */
#endif /* _STAPLINUX_ADDR_MAP_C_ */
|