1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
|
// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/pgtable.h>
#include <asm/init.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sev.h>
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
extern unsigned int next_early_pgt;
static inline bool check_la57_support(void)
{
/*
* 5-level paging is detected and enabled at kernel decompression
* stage. Only check if it has been enabled there.
*/
if (!(native_read_cr4() & X86_CR4_LA57))
return false;
__pgtable_l5_enabled = 1;
pgdir_shift = 48;
ptrs_per_p4d = 512;
return true;
}
static unsigned long __init sme_postprocess_startup(struct boot_params *bp,
pmdval_t *pmd,
unsigned long p2v_offset)
{
unsigned long paddr, paddr_end;
int i;
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
/*
* Clear the memory encryption mask from the .bss..decrypted section.
* The bss section will be memset to zero later in the initialization so
* there is no need to zero it after changing the memory encryption
* attribute.
*/
if (sme_get_me_mask()) {
paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);
paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);
for (; paddr < paddr_end; paddr += PMD_SIZE) {
/*
* On SNP, transition the page to shared in the RMP table so that
* it is consistent with the page table attribute change.
*
* __start_bss_decrypted has a virtual address in the high range
* mapping (kernel .text). PVALIDATE, by way of
* early_snp_set_memory_shared(), requires a valid virtual
* address but the kernel is currently running off of the identity
* mapping so use the PA to get a *currently* valid virtual address.
*/
early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD);
i = pmd_index(paddr - p2v_offset);
pmd[i] -= sme_get_me_mask();
}
}
/*
* Return the SME encryption mask (if SME is active) to be used as a
* modifier for the initial pgdir entry programmed into CR3.
*/
return sme_get_me_mask();
}
/*
* This code is compiled using PIC codegen because it will execute from the
* early 1:1 mapping of memory, which deviates from the mapping expected by the
* linker. Due to this deviation, taking the address of a global variable will
* produce an ambiguous result when using the plain & operator. Instead,
* rip_rel_ptr() must be used, which will return the RIP-relative address in
* the 1:1 mapping of memory. Kernel virtual addresses can be determined by
* subtracting p2v_offset from the RIP-relative address.
*/
unsigned long __init __startup_64(unsigned long p2v_offset,
struct boot_params *bp)
{
pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);
unsigned long va_text, va_end;
unsigned long pgtable_flags;
unsigned long load_delta;
pgdval_t *pgd;
p4dval_t *p4d;
pudval_t *pud;
pmdval_t *pmd, pmd_entry;
bool la57;
int i;
la57 = check_la57_support();
/* Is the address too large? */
if (physaddr >> MAX_PHYSMEM_BITS)
for (;;);
/*
* Compute the delta between the address I am compiled to run at
* and the address I am actually running at.
*/
phys_base = load_delta = __START_KERNEL_map + p2v_offset;
/* Is the address not 2M aligned? */
if (load_delta & ~PMD_MASK)
for (;;);
va_text = physaddr - p2v_offset;
va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset;
/* Include the SME encryption mask in the fixup value */
load_delta += sme_get_me_mask();
/* Fixup the physical addresses in the page table */
pgd = rip_rel_ptr(early_top_pgt);
pgd[pgd_index(__START_KERNEL_map)] += load_delta;
if (la57) {
p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
}
level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta;
level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta;
for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
level2_fixmap_pgt[i].pmd += load_delta;
/*
* Set up the identity mapping for the switchover. These
* entries should *NOT* have the global bit set! This also
* creates a bunch of nonsense entries but that is fine --
* it avoids problems around wraparound.
*/
pud = &early_pgts[0]->pmd;
pmd = &early_pgts[1]->pmd;
next_early_pgt = 2;
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
if (la57) {
p4d = &early_pgts[next_early_pgt++]->pmd;
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
i = physaddr >> P4D_SHIFT;
p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
} else {
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
}
i = physaddr >> PUD_SHIFT;
pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
pmd_entry += sme_get_me_mask();
pmd_entry += physaddr;
for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {
int idx = i + (physaddr >> PMD_SHIFT);
pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
}
/*
* Fixup the kernel text+data virtual addresses. Note that
* we might write invalid pmds, when the kernel is relocated
* cleanup_highmap() fixes this up along with the mappings
* beyond _end.
*
* Only the region occupied by the kernel image has so far
* been checked against the table of usable memory regions
* provided by the firmware, so invalidate pages outside that
* region. A page table entry that maps to a reserved area of
* memory would allow processor speculation into that area,
* and on some hardware (particularly the UV platform) even
* speculative access to some reserved areas is caught as an
* error, causing the BIOS to halt the system.
*/
pmd = rip_rel_ptr(level2_kernel_pgt);
/* invalidate pages before the kernel image */
for (i = 0; i < pmd_index(va_text); i++)
pmd[i] &= ~_PAGE_PRESENT;
/* fixup pages that are part of the kernel image */
for (; i <= pmd_index(va_end); i++)
if (pmd[i] & _PAGE_PRESENT)
pmd[i] += load_delta;
/* invalidate pages after the kernel image */
for (; i < PTRS_PER_PMD; i++)
pmd[i] &= ~_PAGE_PRESENT;
return sme_postprocess_startup(bp, pmd, p2v_offset);
}
|