Skip to content

Commit

Permalink
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
 "An unfortunately larger set of fixes, but a large portion is
  selftests:

   - Fix the missing clusterid initializaiton for x2apic cluster
     management which caused boot failures due to IPIs being sent to the
     wrong cluster

   - Drop TX_COMPAT when a 64bit executable is exec()'ed from a compat
     task

   - Wrap access to __supported_pte_mask in __startup_64() where clang
     compile fails due to a non PC relative access being generated.

   - Two fixes for 5 level paging fallout in the decompressor:

      - Handle GOT correctly for paging_prepare() and
        cleanup_trampoline()

      - Fix the page table handling in cleanup_trampoline() to avoid
        page table corruption.

   - Stop special casing protection key 0 as this is inconsistent with
     the manpage and also inconsistent with the allocation map handling.

   - Override the protection key wen moving away from PROT_EXEC to
     prevent inaccessible memory.

   - Fix and update the protection key selftests to address breakage and
     to cover the above issue

   - Add a MOV SS self test"

[ Part of the x86 fixes were in the earlier core pull due to dependencies ]

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  x86/mm: Drop TS_COMPAT on 64-bit exec() syscall
  x86/apic/x2apic: Initialize cluster ID properly
  x86/boot/compressed/64: Fix moving page table out of trampoline memory
  x86/boot/compressed/64: Set up GOT for paging_prepare() and cleanup_trampoline()
  x86/pkeys: Do not special case protection key 0
  x86/pkeys/selftests: Add a test for pkey 0
  x86/pkeys/selftests: Save off 'prot' for allocations
  x86/pkeys/selftests: Fix pointer math
  x86/pkeys: Override pkey when moving away from PROT_EXEC
  x86/pkeys/selftests: Fix pkey exhaustion test off-by-one
  x86/pkeys/selftests: Add PROT_EXEC test
  x86/pkeys/selftests: Factor out "instruction page"
  x86/pkeys/selftests: Allow faults on unknown keys
  x86/pkeys/selftests: Avoid printf-in-signal deadlocks
  x86/pkeys/selftests: Remove dead debugging code, fix dprint_in_signal
  x86/pkeys/selftests: Stop using assert()
  x86/pkeys/selftests: Give better unexpected fault error messages
  x86/selftests: Add mov_to_ss test
  x86/mpx/selftests: Adjust the self-test to fresh distros that export the MPX ABI
  x86/pkeys/selftests: Adjust the self-test to fresh distros that export the pkeys ABI
  ...
  • Loading branch information
Linus Torvalds committed May 20, 2018
2 parents b9aad92 + acf4602 commit 8a6bd2f
Show file tree
Hide file tree
Showing 13 changed files with 585 additions and 129 deletions.
79 changes: 66 additions & 13 deletions arch/x86/boot/compressed/head_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,25 @@ ENTRY(startup_64)
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp

/*
* paging_prepare() and cleanup_trampoline() below can have GOT
* references. Adjust the table with address we are running at.
*
* Zero RAX for adjust_got: the GOT was not adjusted before;
* there's no adjustment to undo.
*/
xorq %rax, %rax

/*
* Calculate the address the binary is loaded at and use it as
* a GOT adjustment.
*/
call 1f
1: popq %rdi
subq $1b, %rdi

call adjust_got

/*
* At this point we are in long mode with 4-level paging enabled,
* but we might want to enable 5-level paging or vice versa.
Expand Down Expand Up @@ -370,17 +389,36 @@ trampoline_return:
/*
* cleanup_trampoline() would restore trampoline memory.
*
* RDI is address of the page table to use instead of page table
* in trampoline memory (if required).
*
* RSI holds real mode data and needs to be preserved across
* this function call.
*/
pushq %rsi
leaq top_pgtable(%rbx), %rdi
call cleanup_trampoline
popq %rsi

/* Zero EFLAGS */
pushq $0
popfq

/*
* Previously we've adjusted the GOT with address the binary was
* loaded at. Now we need to re-adjust for relocation address.
*
* Calculate the address the binary is loaded at, so that we can
* undo the previous GOT adjustment.
*/
call 1f
1: popq %rax
subq $1b, %rax

/* The new adjustment is the relocation address */
movq %rbx, %rdi
call adjust_got

/*
* Copy the compressed kernel to the end of our buffer
* where decompression in place becomes safe.
Expand Down Expand Up @@ -481,19 +519,6 @@ relocated:
shrq $3, %rcx
rep stosq

/*
* Adjust our own GOT
*/
leaq _got(%rip), %rdx
leaq _egot(%rip), %rcx
1:
cmpq %rcx, %rdx
jae 2f
addq %rbx, (%rdx)
addq $8, %rdx
jmp 1b
2:

/*
* Do the extraction, and jump to the new kernel..
*/
Expand All @@ -512,6 +537,27 @@ relocated:
*/
jmp *%rax

/*
* Adjust the global offset table
*
* RAX is the previous adjustment of the table to undo (use 0 if it's the
* first time we touch GOT).
* RDI is the new adjustment to apply.
*/
adjust_got:
/* Walk through the GOT adding the address to the entries */
leaq _got(%rip), %rdx
leaq _egot(%rip), %rcx
1:
cmpq %rcx, %rdx
jae 2f
subq %rax, (%rdx) /* Undo previous adjustment */
addq %rdi, (%rdx) /* Apply the new adjustment */
addq $8, %rdx
jmp 1b
2:
ret

.code32
/*
* This is the 32-bit trampoline that will be copied over to low memory.
Expand Down Expand Up @@ -649,3 +695,10 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0

/*
* The page table is going to be used instead of page table in the trampoline
* memory.
*/
top_pgtable:
.fill PAGE_SIZE, 1, 0
14 changes: 3 additions & 11 deletions arch/x86/boot/compressed/pgtable_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,6 @@ struct paging_config {
/* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];

/*
* The page table is going to be used instead of page table in the trampoline
* memory.
*
* It must not be in BSS as BSS is cleared after cleanup_trampoline().
*/
static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);

/*
* Trampoline address will be printed by extract_kernel() for debugging
* purposes.
Expand Down Expand Up @@ -134,7 +126,7 @@ struct paging_config paging_prepare(void)
return paging_config;
}

void cleanup_trampoline(void)
void cleanup_trampoline(void *pgtable)
{
void *trampoline_pgtable;

Expand All @@ -145,8 +137,8 @@ void cleanup_trampoline(void)
* if it's there.
*/
if ((void *)__native_read_cr3() == trampoline_pgtable) {
memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
native_write_cr3((unsigned long)top_pgtable);
memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
native_write_cr3((unsigned long)pgtable);
}

/* Restore trampoline memory */
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk,

#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
/* pkey 0 is the default and always allocated */
/* pkey 0 is the default and allocated implicitly */
mm->context.pkey_allocation_map = 0x1;
/* -1 means unallocated or invalid */
mm->context.execute_only_pkey = -1;
Expand Down
18 changes: 14 additions & 4 deletions arch/x86/include/asm/pkeys.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#ifndef _ASM_X86_PKEYS_H
#define _ASM_X86_PKEYS_H

#define ARCH_DEFAULT_PKEY 0

#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)

extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
Expand All @@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm);
static inline int execute_only_pkey(struct mm_struct *mm)
{
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return 0;
return ARCH_DEFAULT_PKEY;

return __execute_only_pkey(mm);
}
Expand Down Expand Up @@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
{
/*
* "Allocated" pkeys are those that have been returned
* from pkey_alloc(). pkey 0 is special, and never
* returned from pkey_alloc().
* from pkey_alloc() or pkey 0 which is allocated
* implicitly when the mm is created.
*/
if (pkey <= 0)
if (pkey < 0)
return false;
if (pkey >= arch_max_pkey())
return false;
/*
* The exec-only pkey is set in the allocation map, but
* is not available to any of the user interfaces like
* mprotect_pkey().
*/
if (pkey == mm->context.execute_only_pkey)
return false;

return mm_pkey_allocation_map(mm) & (1U << pkey);
}

Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/apic/x2apic_cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ static void init_x2apic_ldr(void)
goto update;
}
cmsk = cluster_hotplug_mask;
cmsk->clusterid = cluster;
cluster_hotplug_mask = NULL;
update:
this_cpu_write(cluster_masks, cmsk);
Expand Down
10 changes: 9 additions & 1 deletion arch/x86/kernel/head64.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr)
}
#endif

/* Code in __startup_64() can be relocated during execution, but the compiler
* doesn't have to generate PC-relative relocations when accessing globals from
* that function. Clang actually does not generate them, which leads to
* boot-time crashes. To work around this problem, every global pointer must
* be adjusted using fixup_pointer().
*/
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
Expand All @@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
p4dval_t *p4d;
pudval_t *pud;
pmdval_t *pmd, pmd_entry;
pteval_t *mask_ptr;
bool la57;
int i;
unsigned int *next_pgt_ptr;
Expand Down Expand Up @@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr,

pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
/* Filter out unsupported __PAGE_KERNEL_* bits: */
pmd_entry &= __supported_pte_mask;
mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr);
pmd_entry &= *mask_ptr;
pmd_entry += sme_get_me_mask();
pmd_entry += physaddr;

Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/process_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ void set_personality_64bit(void)
clear_thread_flag(TIF_X32);
/* Pretend that this comes from a 64bit execve */
task_pt_regs(current)->orig_ax = __NR_execve;
current_thread_info()->status &= ~TS_COMPAT;

/* Ensure the corresponding mm is not marked. */
if (current->mm)
Expand Down
21 changes: 11 additions & 10 deletions arch/x86/mm/pkeys.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
*/
if (pkey != -1)
return pkey;
/*
* Look for a protection-key-drive execute-only mapping
* which is now being given permissions that are not
* execute-only. Move it back to the default pkey.
*/
if (vma_is_pkey_exec_only(vma) &&
(prot & (PROT_READ|PROT_WRITE))) {
return 0;
}

/*
* The mapping is execute-only. Go try to get the
* execute-only protection key. If we fail to do that,
* fall through as if we do not have execute-only
* support.
* support in this mm.
*/
if (prot == PROT_EXEC) {
pkey = execute_only_pkey(vma->vm_mm);
if (pkey > 0)
return pkey;
} else if (vma_is_pkey_exec_only(vma)) {
/*
* Protections are *not* PROT_EXEC, but the mapping
* is using the exec-only pkey. This mapping was
* PROT_EXEC and will no longer be. Move back to
* the default pkey.
*/
return ARCH_DEFAULT_PKEY;
}

/*
* This is a vanilla, non-pkey mprotect (or we failed to
* setup execute-only), inherit the pkey from the VMA we
Expand Down
2 changes: 1 addition & 1 deletion tools/testing/selftests/x86/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)

TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
protection_keys test_vdso test_vsyscall
protection_keys test_vdso test_vsyscall mov_ss_trap
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
Expand Down
Loading

0 comments on commit 8a6bd2f

Please sign in to comment.