Skip to content

Commit

Permalink
x86/mm: Add support to encrypt the kernel in-place
Browse files Browse the repository at this point in the history
Add the support to encrypt the kernel in-place. This is done by creating
new page mappings for the kernel - a decrypted write-protected mapping
and an encrypted mapping. The kernel is encrypted by copying it through
a temporary buffer.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
Cc: kasan-dev@googlegroups.com
Cc: kvm@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-efi@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/c039bf9412ef95e1e6bf4fdf8facab95e00c717b.1500319216.git.thomas.lendacky@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Tom Lendacky authored and Ingo Molnar committed Jul 18, 2017
1 parent db51699 commit 6ebcb06
Show file tree
Hide file tree
Showing 4 changed files with 466 additions and 0 deletions.
6 changes: 6 additions & 0 deletions arch/x86/include/asm/mem_encrypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@

extern unsigned long sme_me_mask;

void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
unsigned long decrypted_kernel_vaddr,
unsigned long kernel_len,
unsigned long encryption_wa,
unsigned long encryption_pgd);

void __init sme_early_encrypt(resource_size_t paddr,
unsigned long size);
void __init sme_early_decrypt(resource_size_t paddr,
Expand Down
1 change: 1 addition & 0 deletions arch/x86/mm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o

obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
310 changes: 310 additions & 0 deletions arch/x86/mm/mem_encrypt.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <asm/setup.h>
#include <asm/bootparam.h>
#include <asm/set_memory.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>

/*
* Since SME related variables are set early in the boot process they must
Expand Down Expand Up @@ -199,8 +201,316 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
}

static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
unsigned long end)
{
unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p;

pgd_start = start & PGDIR_MASK;
pgd_end = end & PGDIR_MASK;

pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
pgd_size *= sizeof(pgd_t);

pgd_p = pgd_base + pgd_index(start);

memset(pgd_p, 0, pgd_size);
}

#define PGD_FLAGS _KERNPG_TABLE_NOENC
#define P4D_FLAGS _KERNPG_TABLE_NOENC
#define PUD_FLAGS _KERNPG_TABLE_NOENC
#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)

static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
unsigned long vaddr, pmdval_t pmd_val)
{
pgd_t *pgd_p;
p4d_t *p4d_p;
pud_t *pud_p;
pmd_t *pmd_p;

pgd_p = pgd_base + pgd_index(vaddr);
if (native_pgd_val(*pgd_p)) {
if (IS_ENABLED(CONFIG_X86_5LEVEL))
p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
else
pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
} else {
pgd_t pgd;

if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_p = pgtable_area;
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;

pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
} else {
pud_p = pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;

pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
}
native_set_pgd(pgd_p, pgd);
}

if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_p += p4d_index(vaddr);
if (native_p4d_val(*p4d_p)) {
pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
} else {
p4d_t p4d;

pud_p = pgtable_area;
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;

p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
native_set_p4d(p4d_p, p4d);
}
}

pud_p += pud_index(vaddr);
if (native_pud_val(*pud_p)) {
if (native_pud_val(*pud_p) & _PAGE_PSE)
goto out;

pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
} else {
pud_t pud;

pmd_p = pgtable_area;
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;

pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
native_set_pud(pud_p, pud);
}

pmd_p += pmd_index(vaddr);
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
native_set_pmd(pmd_p, native_make_pmd(pmd_val));

out:
return pgtable_area;
}

static unsigned long __init sme_pgtable_calc(unsigned long len)
{
unsigned long p4d_size, pud_size, pmd_size;
unsigned long total;

/*
* Perform a relatively simplistic calculation of the pagetable
* entries that are needed. That mappings will be covered by 2MB
* PMD entries so we can conservatively calculate the required
* number of P4D, PUD and PMD structures needed to perform the
* mappings. Incrementing the count for each covers the case where
* the addresses cross entries.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
} else {
p4d_size = 0;
pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
}
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;

total = p4d_size + pud_size + pmd_size;

/*
* Now calculate the added pagetable structures needed to populate
* the new pagetables.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
} else {
p4d_size = 0;
pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
}
pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;

total += p4d_size + pud_size + pmd_size;

return total;
}

void __init sme_encrypt_kernel(void)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
unsigned long kernel_start, kernel_end, kernel_len;
unsigned long pgtable_area_len;
unsigned long paddr, pmd_flags;
unsigned long decrypted_base;
void *pgtable_area;
pgd_t *pgd;

if (!sme_active())
return;

/*
* Prepare for encrypting the kernel by building new pagetables with
* the necessary attributes needed to encrypt the kernel in place.
*
* One range of virtual addresses will map the memory occupied
* by the kernel as encrypted.
*
* Another range of virtual addresses will map the memory occupied
* by the kernel as decrypted and write-protected.
*
* The use of write-protect attribute will prevent any of the
* memory from being cached.
*/

/* Physical addresses gives us the identity mapped virtual addresses */
kernel_start = __pa_symbol(_text);
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
kernel_len = kernel_end - kernel_start;

/* Set the encryption workarea to be immediately after the kernel */
workarea_start = kernel_end;

/*
* Calculate required number of workarea bytes needed:
* executable encryption area size:
* stack page (PAGE_SIZE)
* encryption routine page (PAGE_SIZE)
* intermediate copy buffer (PMD_PAGE_SIZE)
* pagetable structures for the encryption of the kernel
* pagetable structures for workarea (in case not currently mapped)
*/
execute_start = workarea_start;
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
execute_len = execute_end - execute_start;

/*
* One PGD for both encrypted and decrypted mappings and a set of
* PUDs and PMDs for each of the encrypted and decrypted mappings.
*/
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;

/* PUDs and PMDs needed in the current pagetables for the workarea */
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);

/*
* The total workarea includes the executable encryption area and
* the pagetable area.
*/
workarea_len = execute_len + pgtable_area_len;
workarea_end = workarea_start + workarea_len;

/*
* Set the address to the start of where newly created pagetable
* structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
* structures are created when the workarea is added to the current
* pagetables and when the new encrypted and decrypted kernel
* mappings are populated.
*/
pgtable_area = (void *)execute_end;

/*
* Make sure the current pagetable structure has entries for
* addressing the workarea.
*/
pgd = (pgd_t *)native_read_cr3_pa();
paddr = workarea_start;
while (paddr < workarea_end) {
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
paddr,
paddr + PMD_FLAGS);

paddr += PMD_PAGE_SIZE;
}

/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());

/*
* A new pagetable structure is being built to allow for the kernel
* to be encrypted. It starts with an empty PGD that will then be
* populated with new PUDs and PMDs as the encrypted and decrypted
* kernel mappings are created.
*/
pgd = pgtable_area;
memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;

/* Add encrypted kernel (identity) mappings */
pmd_flags = PMD_FLAGS | _PAGE_ENC;
paddr = kernel_start;
while (paddr < kernel_end) {
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
paddr,
paddr + pmd_flags);

paddr += PMD_PAGE_SIZE;
}

/*
* A different PGD index/entry must be used to get different
* pagetable entries for the decrypted mapping. Choose the next
* PGD index and convert it to a virtual address to be used as
* the base of the mapping.
*/
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
decrypted_base <<= PGDIR_SHIFT;

/* Add decrypted, write-protected kernel (non-identity) mappings */
pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
paddr = kernel_start;
while (paddr < kernel_end) {
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
paddr + decrypted_base,
paddr + pmd_flags);

paddr += PMD_PAGE_SIZE;
}

/* Add decrypted workarea mappings to both kernel mappings */
paddr = workarea_start;
while (paddr < workarea_end) {
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
paddr,
paddr + PMD_FLAGS);

pgtable_area = sme_populate_pgd(pgd, pgtable_area,
paddr + decrypted_base,
paddr + PMD_FLAGS);

paddr += PMD_PAGE_SIZE;
}

/* Perform the encryption */
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
kernel_len, workarea_start, (unsigned long)pgd);

/*
* At this point we are running encrypted. Remove the mappings for
* the decrypted areas - all that is needed for this is to remove
* the PGD entry/entries.
*/
sme_clear_pgd(pgd, kernel_start + decrypted_base,
kernel_end + decrypted_base);

sme_clear_pgd(pgd, workarea_start + decrypted_base,
workarea_end + decrypted_base);

/* Flush the TLB - no globals so cr3 is enough */
native_write_cr3(__native_read_cr3());
}

void __init sme_enable(void)
Expand Down
Loading

0 comments on commit 6ebcb06

Please sign in to comment.