Skip to content

Commit

Permalink
s390/smp: rework absolute lowcore access
Browse files Browse the repository at this point in the history
Temporary unsetting of the prefix page in memcpy_absolute() routine
poses a risk of executing code path with unexpectedly disabled prefix
page. This rework avoids the prefix page uninstalling and disabling
of normal and machine check interrupts when accessing the absolute
zero memory.

Although memcpy_absolute() routine can access the whole memory, it is
only used to update the absolute zero lowcore. This rework therefore
introduces a new mechanism for the absolute zero lowcore access and
scraps memcpy_absolute() routine for good.

Instead, an area is reserved in the virtual memory that is used for
the absolute lowcore access only. That area holds an array of 8KB
virtual mappings - one per CPU. Whenever a CPU is brought online, the
corresponding item is mapped to the real address of the previously
installed prefix page.

The absolute zero lowcore access works like this: a CPU calls the
new primitive get_abs_lowcore() to obtain its 8KB mapping as a
pointer to the struct lowcore. Virtual address references to that
pointer get translated to the real addresses of the prefix page,
which in turn gets swapped with the absolute zero memory addresses
due to prefixing. Once the pointer is not needed it must be released
with put_abs_lowcore() primitive:

	struct lowcore *abs_lc;
	unsigned long flags;

	abs_lc = get_abs_lowcore(&flags);
	abs_lc->... = ...;
	put_abs_lowcore(abs_lc, flags);

To ensure the described mechanism works large segment- and region-
table entries must be avoided for the 8KB mappings. Failure to do
so results in usage of Region-Frame Absolute Address (RFAA) or
Segment-Frame Absolute Address (SFAA) large page fields. In that
case absolute addresses would be used to address the prefix page
instead of the real ones and the prefixing would get bypassed.

Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
  • Loading branch information
Alexander Gordeev committed Jul 28, 2022
1 parent 2e2493c commit 7d06fed
Show file tree
Hide file tree
Showing 14 changed files with 294 additions and 83 deletions.
5 changes: 4 additions & 1 deletion arch/s390/boot/startup.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
#include <asm/sclp.h>
#include <asm/diag.h>
#include <asm/uv.h>
#include <asm/abs_lowcore.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"

unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata_preserved(__abs_lowcore);
unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END);
Expand Down Expand Up @@ -180,7 +182,8 @@ static void setup_kernel_memory_layout(void)
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
#endif
MODULES_END = vmax;
__abs_lowcore = round_down(vmax - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore));
MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;

Expand Down
17 changes: 17 additions & 0 deletions arch/s390/include/asm/abs_lowcore.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_ABS_LOWCORE_H
#define _ASM_S390_ABS_LOWCORE_H

#include <asm/lowcore.h>

#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))

extern unsigned long __abs_lowcore;
extern bool abs_lowcore_mapped;

struct lowcore *get_abs_lowcore(unsigned long *flags);
void put_abs_lowcore(struct lowcore *lc, unsigned long flags);
int abs_lowcore_map(int cpu, struct lowcore *lc);
void abs_lowcore_unmap(int cpu);

#endif /* _ASM_ABS_S390_LOWCORE_H */
2 changes: 2 additions & 0 deletions arch/s390/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -1781,6 +1781,8 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)

extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern void vmem_remove_mapping(unsigned long start, unsigned long size);
extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
extern void vmem_unmap_4k_page(unsigned long addr);
extern int s390_enable_sie(void);
extern int s390_enable_skey(void);
extern void s390_reset_cmma(struct mm_struct *mm);
Expand Down
15 changes: 0 additions & 15 deletions arch/s390/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -307,21 +307,6 @@ static __always_inline void __noreturn disabled_wait(void)
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL

extern int memcpy_real(void *, unsigned long, size_t);
extern void memcpy_absolute(void *, void *, size_t);

#define put_abs_lowcore(member, x) do { \
unsigned long __abs_address = offsetof(struct lowcore, member); \
__typeof__(((struct lowcore *)0)->member) __tmp = (x); \
\
memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp)); \
} while (0)

#define get_abs_lowcore(x, member) do { \
unsigned long __abs_address = offsetof(struct lowcore, member); \
__typeof__(((struct lowcore *)0)->member) *__ptr = &(x); \
\
memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr)); \
} while (0)

extern int s390_isolate_bp(void);
extern int s390_isolate_bp_guest(void);
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
obj-y += smp.o text_amode31.o stacktrace.o
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o

extra-y += head64.o vmlinux.lds

Expand Down
88 changes: 88 additions & 0 deletions arch/s390/kernel/abs_lowcore.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// SPDX-License-Identifier: GPL-2.0

#include <linux/pgtable.h>
#include <asm/pgtable.h>
#include <asm/abs_lowcore.h>

#define ABS_LOWCORE_UNMAPPED 1
#define ABS_LOWCORE_LAP_ON 2
#define ABS_LOWCORE_IRQS_ON 4

unsigned long __bootdata_preserved(__abs_lowcore);
bool __ro_after_init abs_lowcore_mapped;

int abs_lowcore_map(int cpu, struct lowcore *lc)
{
unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
unsigned long phys = __pa(lc);
int rc, i;

for (i = 0; i < LC_PAGES; i++) {
rc = vmem_map_4k_page(addr, phys, PAGE_KERNEL);
if (rc) {
for (--i; i >= 0; i--) {
addr -= PAGE_SIZE;
vmem_unmap_4k_page(addr);
}
return rc;
}
addr += PAGE_SIZE;
phys += PAGE_SIZE;
}
return 0;
}

void abs_lowcore_unmap(int cpu)
{
unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
int i;

for (i = 0; i < LC_PAGES; i++) {
vmem_unmap_4k_page(addr);
addr += PAGE_SIZE;
}
}

struct lowcore *get_abs_lowcore(unsigned long *flags)
{
unsigned long irq_flags;
union ctlreg0 cr0;
int cpu;

*flags = 0;
cpu = get_cpu();
if (abs_lowcore_mapped) {
return ((struct lowcore *)__abs_lowcore) + cpu;
} else {
if (cpu != 0)
panic("Invalid unmapped absolute lowcore access\n");
local_irq_save(irq_flags);
if (!irqs_disabled_flags(irq_flags))
*flags |= ABS_LOWCORE_IRQS_ON;
__ctl_store(cr0.val, 0, 0);
if (cr0.lap) {
*flags |= ABS_LOWCORE_LAP_ON;
__ctl_clear_bit(0, 28);
}
*flags |= ABS_LOWCORE_UNMAPPED;
return lowcore_ptr[0];
}
}

void put_abs_lowcore(struct lowcore *lc, unsigned long flags)
{
if (abs_lowcore_mapped) {
if (flags)
panic("Invalid mapped absolute lowcore release\n");
} else {
if (smp_processor_id() != 0)
panic("Invalid mapped absolute lowcore access\n");
if (!(flags & ABS_LOWCORE_UNMAPPED))
panic("Invalid unmapped absolute lowcore release\n");
if (flags & ABS_LOWCORE_LAP_ON)
__ctl_set_bit(0, 28);
if (flags & ABS_LOWCORE_IRQS_ON)
local_irq_enable();
}
put_cpu();
}
9 changes: 7 additions & 2 deletions arch/s390/kernel/ipl.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <asm/sclp.h>
#include <asm/checksum.h>
#include <asm/debug.h>
#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
Expand Down Expand Up @@ -1642,12 +1643,16 @@ static struct shutdown_action __refdata dump_action = {
static void dump_reipl_run(struct shutdown_trigger *trigger)
{
unsigned long ipib = (unsigned long) reipl_block_actual;
struct lowcore *abs_lc;
unsigned long flags;
unsigned int csum;

csum = (__force unsigned int)
csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
put_abs_lowcore(ipib, ipib);
put_abs_lowcore(ipib_checksum, csum);
abs_lc = get_abs_lowcore(&flags);
abs_lc->ipib = ipib;
abs_lc->ipib_checksum = csum;
put_abs_lowcore(abs_lc, flags);
dump_run(trigger);
}

Expand Down
8 changes: 7 additions & 1 deletion arch/s390/kernel/machine_kexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <asm/elf.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/set_memory.h>
#include <asm/stacktrace.h>
Expand Down Expand Up @@ -222,13 +223,18 @@ void machine_kexec_cleanup(struct kimage *image)

void arch_crash_save_vmcoreinfo(void)
{
struct lowcore *abs_lc;
unsigned long flags;

VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
abs_lc = get_abs_lowcore(&flags);
abs_lc->vmcore_info = paddr_vmcoreinfo_note();
put_abs_lowcore(abs_lc, flags);
}

void machine_shutdown(void)
Expand Down
9 changes: 6 additions & 3 deletions arch/s390/kernel/os_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <asm/checksum.h>
#include <asm/lowcore.h>
#include <asm/abs_lowcore.h>
#include <asm/os_info.h>
#include <asm/asm-offsets.h>

Expand Down Expand Up @@ -57,13 +57,16 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
*/
void __init os_info_init(void)
{
void *ptr = &os_info;
struct lowcore *abs_lc;
unsigned long flags;

os_info.version_major = OS_INFO_VERSION_MAJOR;
os_info.version_minor = OS_INFO_VERSION_MINOR;
os_info.magic = OS_INFO_MAGIC;
os_info.csum = os_info_csum(&os_info);
put_abs_lowcore(os_info, __pa(ptr));
abs_lc = get_abs_lowcore(&flags);
abs_lc->os_info = __pa(&os_info);
put_abs_lowcore(abs_lc, flags);
}

#ifdef CONFIG_CRASH_DUMP
Expand Down
34 changes: 21 additions & 13 deletions arch/s390/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
#include <asm/smp.h>
#include <asm/mmu_context.h>
#include <asm/cpcmd.h>
#include <asm/lowcore.h>
#include <asm/abs_lowcore.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/page.h>
Expand Down Expand Up @@ -411,8 +411,9 @@ void __init arch_call_rest_init(void)
static void __init setup_lowcore_dat_off(void)
{
unsigned long int_psw_mask = PSW_KERNEL_BITS;
struct lowcore *abs_lc, *lc;
unsigned long mcck_stack;
struct lowcore *lc;
unsigned long flags;

if (IS_ENABLED(CONFIG_KASAN))
int_psw_mask |= PSW_MASK_DAT;
Expand Down Expand Up @@ -474,11 +475,13 @@ static void __init setup_lowcore_dat_off(void)
lc->restart_data = 0;
lc->restart_source = -1U;

put_abs_lowcore(restart_stack, lc->restart_stack);
put_abs_lowcore(restart_fn, lc->restart_fn);
put_abs_lowcore(restart_data, lc->restart_data);
put_abs_lowcore(restart_source, lc->restart_source);
put_abs_lowcore(restart_psw, lc->restart_psw);
abs_lc = get_abs_lowcore(&flags);
abs_lc->restart_stack = lc->restart_stack;
abs_lc->restart_fn = lc->restart_fn;
abs_lc->restart_data = lc->restart_data;
abs_lc->restart_source = lc->restart_source;
abs_lc->restart_psw = lc->restart_psw;
put_abs_lowcore(abs_lc, flags);

mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
Expand All @@ -499,8 +502,8 @@ static void __init setup_lowcore_dat_off(void)

static void __init setup_lowcore_dat_on(void)
{
struct lowcore *lc = lowcore_ptr[0];
int cr;
struct lowcore *abs_lc;
unsigned long flags;

__ctl_clear_bit(0, 28);
S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
Expand All @@ -509,10 +512,15 @@ static void __init setup_lowcore_dat_on(void)
S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
__ctl_set_bit(0, 28);
__ctl_store(S390_lowcore.cregs_save_area, 0, 15);
put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
put_abs_lowcore(program_new_psw, lc->program_new_psw);
for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
abs_lc = get_abs_lowcore(&flags);
abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
abs_lc->program_new_psw = S390_lowcore.program_new_psw;
memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
sizeof(abs_lc->cregs_save_area));
put_abs_lowcore(abs_lc, flags);
if (abs_lowcore_map(0, lowcore_ptr[0]))
panic("Couldn't setup absolute lowcore");
abs_lowcore_mapped = true;
}

static struct resource code_resource = {
Expand Down
Loading

0 comments on commit 7d06fed

Please sign in to comment.