From e70452c4ba2ce1e24a3fdc18bd623edb7b56013c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 29 Nov 2024 01:49:26 +0100 Subject: [PATCH 01/53] s390/mm: Allow large pages for KASAN shadow mapping Commit c98d2ecae08f ("s390/mm: Uncouple physical vs virtual address spaces") introduced a large_allowed() helper that restricts which mapping modes can use large pages. This change unintentionally prevented KASAN shadow mappings from using large pages, despite there being no reason to avoid them. In fact, large pages are preferred for performance. Since commit d8073dc6bc04 ("s390/mm: Allow large pages only for aligned physical addresses"), both can_large_pud() and can_large_pmd() call _pa() to check if large page physical addresses are aligned. However, _pa() has a side effect: it allocates memory in POPULATE_KASAN_MAP_SHADOW mode. Rename large_allowed() to large_page_mapping_allowed() and add POPULATE_KASAN_MAP_SHADOW to the allowed list to restore large page mappings for KASAN shadows. While large_page_mapping_allowed() isn't strictly necessary with current mapping modes since disallowed modes either don't map anything or fail alignment and size checks, keep it for clarity. Rename _pa() to resolve_pa_may_alloc() for clarity and to emphasize existing side effect. Rework can_large_pud()/can_large_pmd() to take the side effect into consideration and actually return physical address instead of just checking conditions. Fixes: c98d2ecae08f ("s390/mm: Uncouple physical vs virtual address spaces") Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/boot/vmem.c | 74 +++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index a0c97cde5146d..d755e5340fcd6 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -13,6 +13,7 @@ #include "decompressor.h" #include "boot.h" +#define INVALID_PHYS_ADDR (~(phys_addr_t)0) struct ctlreg __bootdata_preserved(s390_invalid_asce); #ifdef CONFIG_PROC_FS @@ -228,11 +229,12 @@ static pte_t *boot_pte_alloc(void) return pte; } -static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) +static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, + enum populate_mode mode) { switch (mode) { case POPULATE_NONE: - return -1; + return INVALID_PHYS_ADDR; case POPULATE_DIRECT: return addr; case POPULATE_LOWCORE: @@ -250,33 +252,55 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m return addr; #endif default: - return -1; + return INVALID_PHYS_ADDR; } } -static bool large_allowed(enum populate_mode mode) +static bool large_page_mapping_allowed(enum populate_mode mode) { - return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL); + switch (mode) { + case POPULATE_DIRECT: + case POPULATE_IDENTITY: + case POPULATE_KERNEL: +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: +#endif + return true; + default: + return false; + } } -static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; + + if (!machine.has_edat2 || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) + return INVALID_PHYS_ADDR; - return machine.has_edat2 && large_allowed(mode) && - IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE); + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PUD_SIZE)) + return INVALID_PHYS_ADDR; + + return pa; } -static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end, - enum populate_mode mode) +static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, + enum populate_mode mode) { - unsigned long size = end - addr; + unsigned long pa, size = end - addr; + + if (!machine.has_edat1 || !large_page_mapping_allowed(mode) || + !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) + return INVALID_PHYS_ADDR; + + pa = resolve_pa_may_alloc(addr, size, mode); + if (!IS_ALIGNED(pa, PMD_SIZE)) + return INVALID_PHYS_ADDR; - return machine.has_edat1 && large_allowed(mode) && - IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) && - IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE); + return pa; } static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, @@ -290,7 +314,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e if (pte_none(*pte)) { if (kasan_pte_populate_zero_shadow(pte, mode)) continue; - entry = __pte(_pa(addr, PAGE_SIZE, mode)); + entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL); set_pte(pte, entry); pages++; @@ -303,7 +327,7 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pmd_t *pmd, entry; pte_t *pte; @@ -313,8 +337,9 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e if (pmd_none(*pmd)) { if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) continue; - if (can_large_pmd(pmd, addr, next, mode)) { - entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); + pa = try_get_large_pmd_pa(pmd, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pmd(pa); entry = set_pmd_bit(entry, SEGMENT_KERNEL); set_pmd(pmd, entry); pages++; @@ -334,7 +359,7 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next, pages = 0; + unsigned long pa, next, pages = 0; pud_t *pud, entry; pmd_t *pmd; @@ -344,8 +369,9 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e if (pud_none(*pud)) { if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) continue; - if (can_large_pud(pud, addr, next, mode)) { - entry = __pud(_pa(addr, _REGION3_SIZE, mode)); + pa = try_get_large_pud_pa(pud, addr, next, mode); + if (pa != INVALID_PHYS_ADDR) { + entry = __pud(pa); entry = set_pud_bit(entry, REGION3_KERNEL); set_pud(pud, entry); pages++; From 02415f1cf435ac1403743156c21b79f52adee6ab Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 13 Dec 2024 00:59:26 +0100 Subject: [PATCH 02/53] s390/boot: Rename physmem_alloc_top_down() to physmem_alloc_or_die() The new name better reflects the function's behavior, emphasizing that it will terminate execution if allocation fails. Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/boot/boot.h | 4 ++-- arch/s390/boot/ipl_report.c | 2 +- arch/s390/boot/kaslr.c | 2 +- arch/s390/boot/physmem_info.c | 4 ++-- arch/s390/boot/startup.c | 2 +- arch/s390/boot/vmem.c | 6 +++--- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 56244fe78182c..f269026246501 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -47,8 +47,8 @@ void physmem_set_usable_limit(unsigned long limit); void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); void physmem_free(enum reserved_range_type type); /* for continuous/multiple allocations per type */ -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align); +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align); /* for single allocations, 1 per type */ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, unsigned long align, unsigned long min, unsigned long max, diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index d00898852a88a..aea22e268c65d 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -155,7 +155,7 @@ void save_ipl_cert_comp_list(void) return; size = get_cert_comp_list_size(); - early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + early_ipl_comp_list_addr = physmem_alloc_or_die(RR_CERT_COMP_LIST, size, sizeof(int)); ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; copy_components_bootdata(); diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index f864d2bff7754..d6761e1e2f8d4 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -168,7 +168,7 @@ static unsigned long iterate_valid_positions(unsigned long size, unsigned long a * cannot have chains. * * On the other hand, "dynamic" or "repetitive" allocations are done via - * physmem_alloc_top_down(). These allocations are tightly packed together + * physmem_alloc_or_die(). These allocations are tightly packed together * top down from the end of online memory. physmem_alloc_pos represents * current position where those allocations start. * diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 7617aa2d2f7e9..34d310a6d8a52 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -343,8 +343,8 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s return addr; } -unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, - unsigned long align) +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align) { struct reserved_range *range = &physmem_info.reserved[type]; struct reserved_range *new_range; diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index e00aed22d0d40..70f90a19007a0 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -143,7 +143,7 @@ static void rescue_initrd(unsigned long min, unsigned long max) return; old_addr = addr; physmem_free(RR_INITRD); - addr = physmem_alloc_top_down(RR_INITRD, size, 0); + addr = physmem_alloc_or_die(RR_INITRD, size, 0); memmove((void *)addr, (void *)old_addr, size); } diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index d755e5340fcd6..3e69e4405d70b 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -201,7 +201,7 @@ static void *boot_crst_alloc(unsigned long val) unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; unsigned long *table; - table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); crst_table_init(table, val); __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); return table; @@ -217,7 +217,7 @@ static pte_t *boot_pte_alloc(void) * during POPULATE_KASAN_MAP_SHADOW when EDAT is off */ if (!pte_leftover) { - pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); + pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); pte = pte_leftover + _PAGE_TABLE_SIZE; __arch_set_page_dat(pte, 1); } else { @@ -247,7 +247,7 @@ static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size return __identity_pa(addr); #ifdef CONFIG_KASAN case POPULATE_KASAN_MAP_SHADOW: - addr = physmem_alloc_top_down(RR_VMEM, size, size); + addr = physmem_alloc_or_die(RR_VMEM, size, size); memset((void *)addr, 0, size); return addr; #endif From aeb4358a52365269e6ca658505a6410ed85570b2 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 29 Nov 2024 02:26:19 +0100 Subject: [PATCH 03/53] s390/boot: Add physmem_alloc() Add physmem_alloc() as a variant of physmem_alloc_or_die() that can return an error instead of triggering a panic on OOM. This allows callers to implement alternative fallback paths. Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/boot/boot.h | 2 ++ arch/s390/boot/physmem_info.c | 28 +++++++++++++++++++--------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index f269026246501..ce8422d8f9a89 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -49,6 +49,8 @@ void physmem_free(enum reserved_range_type type); /* for continuous/multiple allocations per type */ unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, unsigned long align); +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom); /* for single allocations, 1 per type */ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, unsigned long align, unsigned long min, unsigned long max, diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 34d310a6d8a52..f585f015abfe2 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -343,8 +343,8 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s return addr; } -unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, - unsigned long align) +unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, + unsigned long align, bool die_on_oom) { struct reserved_range *range = &physmem_info.reserved[type]; struct reserved_range *new_range; @@ -352,18 +352,22 @@ unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long unsigned long addr; addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, - &ranges_left, true); + &ranges_left, die_on_oom); + if (!addr) + return 0; /* if not a consecutive allocation of the same type or first allocation */ if (range->start != addr + size) { if (range->end) { - physmem_alloc_pos = __physmem_alloc_range( - sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, - physmem_alloc_ranges, &ranges_left, true); - new_range = (struct reserved_range *)physmem_alloc_pos; + addr = __physmem_alloc_range(sizeof(struct reserved_range), 0, 0, + physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + new_range = (struct reserved_range *)addr; + addr = __physmem_alloc_range(size, align, 0, addr, ranges_left, + &ranges_left, die_on_oom); + if (!addr) + return 0; *new_range = *range; range->chain = new_range; - addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, - ranges_left, &ranges_left, true); } range->end = addr + size; } @@ -373,6 +377,12 @@ unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long return addr; } +unsigned long physmem_alloc_or_die(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + return physmem_alloc(type, size, align, true); +} + unsigned long get_physmem_alloc_pos(void) { return physmem_alloc_pos; From 490a5e99ead57b16045a8e968fd8bc5fdcbfc90c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Dec 2024 11:18:32 +0100 Subject: [PATCH 04/53] s390/boot: Allow KASAN mapping to fallback to small pages For KASAN shadow mappings, switch from physmem_alloc_or_die() to physmem_alloc() and return INVALID_PHYS_ADDR on allocation failure. This allows gracefully falling back from large pages to smaller pages (1MB or 4KB) if the allocation of 2GB size/aligned pages cannot be fulfilled. Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/boot/vmem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 3e69e4405d70b..47011b5a9e3d2 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -247,9 +247,13 @@ static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size return __identity_pa(addr); #ifdef CONFIG_KASAN case POPULATE_KASAN_MAP_SHADOW: - addr = physmem_alloc_or_die(RR_VMEM, size, size); - memset((void *)addr, 0, size); - return addr; + /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ + addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); + if (addr) { + memset((void *)addr, 0, size); + return addr; + } + return INVALID_PHYS_ADDR; #endif default: return INVALID_PHYS_ADDR; From dccac2b22b1cabb37b141f7df5bebbcf10a2b44d Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 20 Nov 2024 15:58:56 +0100 Subject: [PATCH 05/53] s390/boot: Add %% support to boot_printk() Add "%%" support for the boot_printk() format string. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/printk.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 35f18f2b936e0..313b11daa353c 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -94,7 +94,11 @@ void boot_printk(const char *fmt, ...) *p++ = *fmt; continue; } - pad = isdigit(*++fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + if (*++fmt == '%') { + *p++ = '%'; + continue; + } + pad = isdigit(*fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; switch (*fmt) { case 's': p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); From 9291d572fe5b887ae1d93e9b40d8fd6ad10c2f30 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 20 Nov 2024 16:10:55 +0100 Subject: [PATCH 06/53] s390/boot: Add length modifiers to boot_printk() Add support for the 'l', 'h', 'hh', and 'z' length modifiers. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/printk.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 313b11daa353c..7379fdc9831f8 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -80,6 +80,13 @@ static noinline char *strsym(void *ip) return buf; } +#define va_arg_len_type(args, lenmod, typemod) \ + ((lenmod == 'l') ? va_arg(args, typemod long) : \ + (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \ + (lenmod == 'H') ? (typemod char)va_arg(args, typemod int) : \ + (lenmod == 'z') ? va_arg(args, typemod long) : \ + va_arg(args, typemod int)) + void boot_printk(const char *fmt, ...) { char buf[1024] = { 0 }; @@ -87,6 +94,7 @@ void boot_printk(const char *fmt, ...) unsigned long pad; char *p = buf; va_list args; + char lenmod; va_start(args, fmt); for (; p < end && *fmt; fmt++) { @@ -99,24 +107,26 @@ void boot_printk(const char *fmt, ...) continue; } pad = isdigit(*fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0; + if (lenmod == 'h' && *fmt == 'h') { + lenmod = 'H'; + fmt++; + } switch (*fmt) { case 's': + if (lenmod) + goto out; p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); break; case 'p': - if (*++fmt != 'S') + if (*++fmt != 'S' || lenmod) goto out; p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); break; - case 'l': - if (*++fmt != 'x' || end - p <= max(sizeof(long) * 2, pad)) - goto out; - p = as_hex(p, va_arg(args, unsigned long), pad); - break; case 'x': - if (end - p <= max(sizeof(int) * 2, pad)) + if (end - p <= max(sizeof(long) * 2, pad)) goto out; - p = as_hex(p, va_arg(args, unsigned int), pad); + p = as_hex(p, va_arg_len_type(args, lenmod, unsigned), pad); break; default: goto out; From 3d846daafb2fe89e9f1ecf48d2a5d8fec52189f9 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 28 Nov 2024 02:22:54 +0100 Subject: [PATCH 07/53] s390/boot: Add field width and padding handling to boot_printk() Enhance boot_printk() to support field width and padding across all argument types for better formatting. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/printk.c | 68 ++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 7379fdc9831f8..2f5d699768afc 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -13,13 +13,37 @@ const char hex_asc[] = "0123456789abcdef"; +#define MAX_NUMLEN 17 static char *as_hex(char *dst, unsigned long val, int pad) { - char *p, *end = p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); + char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); - for (*p-- = 0; p >= dst; val >>= 4) + for (*p-- = '\0'; p >= dst; val >>= 4) *p-- = hex_asc[val & 0x0f]; - return end; + return dst; +} + +static ssize_t strpad(char *dst, size_t dst_size, const char *src, + int _pad, bool zero_pad) +{ + ssize_t len = strlen(src), pad = _pad; + char *p = dst; + + if (max(len, abs(pad)) >= dst_size) + return -E2BIG; + + if (pad > len) { + memset(p, zero_pad ? '0' : ' ', pad - len); + p += pad - len; + } + memcpy(p, src, len); + p += len; + if (pad < 0 && -pad > len) { + memset(p, ' ', -pad - len); + p += -pad - len; + } + *p = '\0'; + return p - dst; } static char *symstart(char *p) @@ -58,22 +82,22 @@ static noinline char *findsym(unsigned long ip, unsigned short *off, unsigned sh return NULL; } -static noinline char *strsym(void *ip) +#define MAX_SYMLEN 64 +static noinline char *strsym(char *buf, void *ip) { - static char buf[64]; unsigned short off; unsigned short len; char *p; p = findsym((unsigned long)ip, &off, &len); if (p) { - strncpy(buf, p, sizeof(buf)); + strncpy(buf, p, MAX_SYMLEN); /* reserve 15 bytes for offset/len in symbol+0x1234/0x1234 */ - p = buf + strnlen(buf, sizeof(buf) - 15); + p = buf + strnlen(buf, MAX_SYMLEN - 15); strcpy(p, "+0x"); - p = as_hex(p + 3, off, 0); - strcpy(p, "/0x"); - as_hex(p + 3, len, 0); + as_hex(p + 3, off, 0); + strcat(p, "/0x"); + as_hex(p + strlen(p), len, 0); } else { as_hex(buf, (unsigned long)ip, 16); } @@ -91,10 +115,13 @@ void boot_printk(const char *fmt, ...) { char buf[1024] = { 0 }; char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ - unsigned long pad; - char *p = buf; + bool zero_pad; + char *strval, *p = buf; + char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)]; va_list args; char lenmod; + ssize_t len; + int pad; va_start(args, fmt); for (; p < end && *fmt; fmt++) { @@ -106,7 +133,8 @@ void boot_printk(const char *fmt, ...) *p++ = '%'; continue; } - pad = isdigit(*fmt) ? simple_strtol(fmt, (char **)&fmt, 10) : 0; + zero_pad = (*fmt == '0'); + pad = simple_strtol(fmt, (char **)&fmt, 10); lenmod = (*fmt == 'h' || *fmt == 'l' || *fmt == 'z') ? *fmt++ : 0; if (lenmod == 'h' && *fmt == 'h') { lenmod = 'H'; @@ -116,21 +144,25 @@ void boot_printk(const char *fmt, ...) case 's': if (lenmod) goto out; - p = buf + strlcat(buf, va_arg(args, char *), sizeof(buf)); + strval = va_arg(args, char *); + zero_pad = false; break; case 'p': if (*++fmt != 'S' || lenmod) goto out; - p = buf + strlcat(buf, strsym(va_arg(args, void *)), sizeof(buf)); + strval = strsym(valbuf, va_arg(args, void *)); + zero_pad = false; break; case 'x': - if (end - p <= max(sizeof(long) * 2, pad)) - goto out; - p = as_hex(p, va_arg_len_type(args, lenmod, unsigned), pad); + strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; default: goto out; } + len = strpad(p, end - p, strval, pad, zero_pad); + if (len == -E2BIG) + break; + p += len; } out: va_end(args); From 92b712fa7d5b67ffa3d1ee6147643e9f60154c10 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 28 Nov 2024 02:35:17 +0100 Subject: [PATCH 08/53] s390/boot: Add decimal conversion specifiers to boot_printk() Enable the boot_printk() function to print decimal values. Add the 'd', 'i', and 'u' conversion specifiers support. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/printk.c | 44 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 2f5d699768afc..41e576da1a4f3 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -13,7 +13,6 @@ const char hex_asc[] = "0123456789abcdef"; -#define MAX_NUMLEN 17 static char *as_hex(char *dst, unsigned long val, int pad) { char *p = dst + max(pad, (int)__fls(val | 1) / 4 + 1); @@ -23,8 +22,30 @@ static char *as_hex(char *dst, unsigned long val, int pad) return dst; } +#define MAX_NUMLEN 21 +static char *as_dec(char *buf, unsigned long val, bool is_signed) +{ + bool negative = false; + char *p = buf + MAX_NUMLEN; + + if (is_signed && (long)val < 0) { + val = (val == LONG_MIN ? LONG_MIN : -(long)val); + negative = true; + } + + *--p = '\0'; + do { + *--p = '0' + (val % 10); + val /= 10; + } while (val); + + if (negative) + *--p = '-'; + return p; +} + static ssize_t strpad(char *dst, size_t dst_size, const char *src, - int _pad, bool zero_pad) + int _pad, bool zero_pad, bool decimal) { ssize_t len = strlen(src), pad = _pad; char *p = dst; @@ -33,6 +54,12 @@ static ssize_t strpad(char *dst, size_t dst_size, const char *src, return -E2BIG; if (pad > len) { + if (decimal && zero_pad && *src == '-') { + *p++ = '-'; + src++; + len--; + pad--; + } memset(p, zero_pad ? '0' : ' ', pad - len); p += pad - len; } @@ -115,7 +142,7 @@ void boot_printk(const char *fmt, ...) { char buf[1024] = { 0 }; char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ - bool zero_pad; + bool zero_pad, decimal; char *strval, *p = buf; char valbuf[MAX(MAX_SYMLEN, MAX_NUMLEN)]; va_list args; @@ -140,6 +167,7 @@ void boot_printk(const char *fmt, ...) lenmod = 'H'; fmt++; } + decimal = false; switch (*fmt) { case 's': if (lenmod) @@ -153,13 +181,21 @@ void boot_printk(const char *fmt, ...) strval = strsym(valbuf, va_arg(args, void *)); zero_pad = false; break; + case 'd': + case 'i': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, signed), 1); + decimal = true; + break; + case 'u': + strval = as_dec(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); + break; case 'x': strval = as_hex(valbuf, va_arg_len_type(args, lenmod, unsigned), 0); break; default: goto out; } - len = strpad(p, end - p, strval, pad, zero_pad); + len = strpad(p, end - p, strval, pad, zero_pad, decimal); if (len == -E2BIG) break; p += len; From d538fdc49a7d7dd068fc29bcc6093e8dd45abbd9 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 20 Nov 2024 16:56:12 +0100 Subject: [PATCH 09/53] s390/boot: Add support for boot messages loglevels Add message severity levels for boot messages, similar to the main kernel. Support command-line options that control console output verbosity, including "loglevel," "ignore_loglevel," "debug," and "quiet". Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/boot.h | 12 ++++++++++++ arch/s390/boot/ipl_parm.c | 11 +++++++++++ arch/s390/boot/printk.c | 29 ++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index ce8422d8f9a89..80e86387bee55 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,6 +8,7 @@ #ifndef __ASSEMBLY__ +#include #include struct machine_info { @@ -76,7 +77,18 @@ void print_stacktrace(unsigned long sp); void error(char *m); int get_random(unsigned long limit, unsigned long *value); +#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG fmt, ##__VA_ARGS__) +#define boot_alert(fmt, ...) boot_printk(KERN_ALERT fmt, ##__VA_ARGS__) +#define boot_crit(fmt, ...) boot_printk(KERN_CRIT fmt, ##__VA_ARGS__) +#define boot_err(fmt, ...) boot_printk(KERN_ERR fmt, ##__VA_ARGS__) +#define boot_warn(fmt, ...) boot_printk(KERN_WARNING fmt, ##__VA_ARGS__) +#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE fmt, ##__VA_ARGS__) +#define boot_info(fmt, ...) boot_printk(KERN_INFO fmt, ##__VA_ARGS__) +#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG fmt, ##__VA_ARGS__) + extern struct machine_info machine; +extern int boot_console_loglevel; +extern bool boot_ignore_loglevel; /* Symbols defined by linker scripts */ extern const char kernel_version[]; diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 557462e62cd73..b3fb0b6729688 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -313,5 +313,16 @@ void parse_boot_command_line(void) #endif if (!strcmp(param, "relocate_lowcore") && test_facility(193)) relocate_lowcore = 1; + if (!strcmp(param, "debug")) + boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; + if (!strcmp(param, "quiet")) + boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; + if (!strcmp(param, "ignore_loglevel")) + boot_ignore_loglevel = true; + if (!strcmp(param, "loglevel")) { + boot_console_loglevel = simple_strtoull(val, NULL, 10); + if (boot_console_loglevel < CONSOLE_LOGLEVEL_MIN) + boot_console_loglevel = CONSOLE_LOGLEVEL_MIN; + } } } diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 41e576da1a4f3..7eeb43821cd0c 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -11,6 +11,9 @@ #include #include "boot.h" +int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT; +bool boot_ignore_loglevel; + const char hex_asc[] = "0123456789abcdef"; static char *as_hex(char *dst, unsigned long val, int pad) @@ -131,6 +134,25 @@ static noinline char *strsym(char *buf, void *ip) return buf; } +static inline int printk_loglevel(const char *buf) +{ + if (buf[0] == KERN_SOH_ASCII && buf[1]) { + switch (buf[1]) { + case '0' ... '7': + return buf[1] - '0'; + } + } + return MESSAGE_LOGLEVEL_DEFAULT; +} + +static void boot_console_earlyprintk(const char *buf) +{ + int level = printk_loglevel(buf); + + if (boot_ignore_loglevel || level < boot_console_loglevel) + sclp_early_printk(printk_skip_level(buf)); +} + #define va_arg_len_type(args, lenmod, typemod) \ ((lenmod == 'l') ? va_arg(args, typemod long) : \ (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \ @@ -150,6 +172,11 @@ void boot_printk(const char *fmt, ...) ssize_t len; int pad; + if (!printk_get_level(fmt)) { + *p++ = KERN_SOH_ASCII; + *p++ = '0' + MESSAGE_LOGLEVEL_DEFAULT; + } + va_start(args, fmt); for (; p < end && *fmt; fmt++) { if (*fmt != '%') { @@ -202,5 +229,5 @@ void boot_printk(const char *fmt, ...) } out: va_end(args); - sclp_early_printk(buf); + boot_console_earlyprintk(buf); } From 42fc158c7e789ccb8f9d42e5b18d00ca00d93cfe Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 20 Nov 2024 17:07:56 +0100 Subject: [PATCH 10/53] s390/boot: Replace boot_printk() with loglevel-specific helpers Replaces boot_printk() calls with appropriate loglevel-specific helpers such as boot_emerg(), boot_warn(), and boot_debug(). Using functions with explicit log levels improves log clarity and aligns the boot code with standard kernel logging practices. This makes it easier to filter and manage boot-time messages based on their severity. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/als.c | 10 +++---- arch/s390/boot/ipl_parm.c | 2 +- arch/s390/boot/kaslr.c | 2 +- arch/s390/boot/pgm_check_info.c | 51 ++++++++++++++++----------------- arch/s390/boot/physmem_info.c | 26 ++++++++--------- arch/s390/boot/startup.c | 5 ++-- 6 files changed, 48 insertions(+), 48 deletions(-) diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c index 11e0c3d5dbc8c..79afb5fa7f1f3 100644 --- a/arch/s390/boot/als.c +++ b/arch/s390/boot/als.c @@ -46,7 +46,7 @@ void print_missing_facilities(void) * z/VM adds a four character prefix. */ if (strlen(als_str) > 70) { - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); *als_str = '\0'; } u16_to_decimal(val_str, i * BITS_PER_LONG + j); @@ -54,7 +54,7 @@ void print_missing_facilities(void) first = 0; } } - boot_printk("%s\n", als_str); + boot_emerg("%s\n", als_str); } static void facility_mismatch(void) @@ -62,10 +62,10 @@ static void facility_mismatch(void) struct cpuid id; get_cpu_id(&id); - boot_printk("The Linux kernel requires more recent processor hardware\n"); - boot_printk("Detected machine-type number: %4x\n", id.machine); + boot_emerg("The Linux kernel requires more recent processor hardware\n"); + boot_emerg("Detected machine-type number: %4x\n", id.machine); print_missing_facilities(); - boot_printk("See Principles of Operations for facility bits\n"); + boot_emerg("See Principles of Operations for facility bits\n"); disabled_wait(); } diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index b3fb0b6729688..3f775bd6d9a23 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -215,7 +215,7 @@ static void check_cleared_facilities(void) for (i = 0; i < ARRAY_SIZE(als); i++) { if ((stfle_fac_list[i] & als[i]) != als[i]) { - boot_printk("Warning: The Linux kernel requires facilities cleared via command line option\n"); + boot_emerg("The Linux kernel requires facilities cleared via command line option\n"); print_missing_facilities(); break; } diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index d6761e1e2f8d4..941f4c9e27cca 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -32,7 +32,7 @@ struct prng_parm { static int check_prng(void) { if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) { - boot_printk("KASLR disabled: CPU has no PRNG\n"); + boot_warn("KASLR disabled: CPU has no PRNG\n"); return 0; } if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 5abe59fb3bc08..0d20336e170b7 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -17,13 +17,14 @@ void print_stacktrace(unsigned long sp) (unsigned long)_stack_end }; bool first = true; - boot_printk("Call Trace:\n"); + boot_emerg("Call Trace:\n"); while (!(sp & 0x7) && on_stack(&boot_stack, sp, sizeof(struct stack_frame))) { struct stack_frame *sf = (struct stack_frame *)sp; - boot_printk(first ? "(sp:%016lx [<%016lx>] %pS)\n" : - " sp:%016lx [<%016lx>] %pS\n", - sp, sf->gprs[8], (void *)sf->gprs[8]); + if (first) + boot_emerg("(sp:%016lx [<%016lx>] %pS)\n", sp, sf->gprs[8], (void *)sf->gprs[8]); + else + boot_emerg(" sp:%016lx [<%016lx>] %pS\n", sp, sf->gprs[8], (void *)sf->gprs[8]); if (sf->back_chain <= sp) break; sp = sf->back_chain; @@ -36,30 +37,28 @@ void print_pgm_check_info(void) unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area; struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area); - boot_printk("Linux version %s\n", kernel_version); + boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Kernel fault: interruption code %04x ilc:%x\n", - get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Kernel fault: interruption code %04x ilc:%x\n", + get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); if (kaslr_enabled()) { - boot_printk("Kernel random base: %lx\n", __kaslr_offset); - boot_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys); + boot_emerg("Kernel random base: %lx\n", __kaslr_offset); + boot_emerg("Kernel random base phys: %lx\n", __kaslr_offset_phys); } - boot_printk("PSW : %016lx %016lx (%pS)\n", - get_lowcore()->psw_save_area.mask, - get_lowcore()->psw_save_area.addr, - (void *)get_lowcore()->psw_save_area.addr); - boot_printk( - " R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", - psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, - psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, - psw->eaba); - boot_printk("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); - boot_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); + boot_emerg("PSW : %016lx %016lx (%pS)\n", + get_lowcore()->psw_save_area.mask, + get_lowcore()->psw_save_area.addr, + (void *)get_lowcore()->psw_save_area.addr); + boot_emerg(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n", + psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck, + psw->wait, psw->pstate, psw->as, psw->cc, psw->pm, psw->ri, psw->eaba); + boot_emerg("GPRS: %016lx %016lx %016lx %016lx\n", gpregs[0], gpregs[1], gpregs[2], gpregs[3]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[4], gpregs[5], gpregs[6], gpregs[7]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[8], gpregs[9], gpregs[10], gpregs[11]); + boot_emerg(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); print_stacktrace(get_lowcore()->gpregs_save_area[15]); - boot_printk("Last Breaking-Event-Address:\n"); - boot_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, - (void *)get_lowcore()->pgm_last_break); + boot_emerg("Last Breaking-Event-Address:\n"); + boot_emerg(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break, + (void *)get_lowcore()->pgm_last_break); } diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index f585f015abfe2..4cfd5ef1af4f7 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -241,27 +241,27 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min, enum reserved_range_type t; int i; - boot_printk("Linux version %s\n", kernel_version); + boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) - boot_printk("Kernel command line: %s\n", early_command_line); - boot_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", - size, align, min, max); - boot_printk("Reserved memory ranges:\n"); + boot_emerg("Kernel command line: %s\n", early_command_line); + boot_emerg("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", + size, align, min, max); + boot_emerg("Reserved memory ranges:\n"); for_each_physmem_reserved_range(t, range, &start, &end) { - boot_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); total_reserved_mem += end - start; } - boot_printk("Usable online memory ranges (info source: %s [%x]):\n", - get_physmem_info_source(), physmem_info.info_source); + boot_emerg("Usable online memory ranges (info source: %s [%x]):\n", + get_physmem_info_source(), physmem_info.info_source); for_each_physmem_usable_range(i, &start, &end) { - boot_printk("%016lx %016lx\n", start, end); + boot_emerg("%016lx %016lx\n", start, end); total_mem += end - start; } - boot_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", - total_mem, total_reserved_mem, - total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + boot_emerg("Usable online memory total: %lx Reserved: %lx Free: %lx\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); print_stacktrace(current_frame_address()); - boot_printk("\n\n -- System halted\n"); + boot_emerg(" -- System halted\n"); disabled_wait(); } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 70f90a19007a0..e9ae589c61694 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -42,7 +42,8 @@ struct machine_info machine; void error(char *x) { - boot_printk("\n\n%s\n\n -- System halted", x); + boot_emerg("%s\n", x); + boot_emerg(" -- System halted\n"); disabled_wait(); } @@ -305,7 +306,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE); } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) { kernel_start = round_down(vmax - kernel_size, THREAD_SIZE); - boot_printk("The kernel base address is forced to %lx\n", kernel_start); + boot_debug("The kernel base address is forced to %lx\n", kernel_start); } else { kernel_start = __NO_KASLR_START_KERNEL; } From bbbaf061237dbf5817a46f2e209a663750277e3b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 24 Nov 2024 14:47:58 +0100 Subject: [PATCH 11/53] s390/boot: Use decimal format specifiers in boot messages Now that boot_printk() supports decimal specifiers, update boot_emerg() messages to use %d and %lu instead of %x and %lx where appropriate. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/pgm_check_info.c | 2 +- arch/s390/boot/physmem_info.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 0d20336e170b7..e8a728c3db052 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -40,7 +40,7 @@ void print_pgm_check_info(void) boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) boot_emerg("Kernel command line: %s\n", early_command_line); - boot_emerg("Kernel fault: interruption code %04x ilc:%x\n", + boot_emerg("Kernel fault: interruption code %04x ilc:%d\n", get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1); if (kaslr_enabled()) { boot_emerg("Kernel random base: %lx\n", __kaslr_offset); diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 4cfd5ef1af4f7..37d201986261c 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -244,20 +244,20 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min, boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) boot_emerg("Kernel command line: %s\n", early_command_line); - boot_emerg("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", + boot_emerg("Out of memory allocating %lu bytes 0x%lx aligned in range %lx:%lx\n", size, align, min, max); boot_emerg("Reserved memory ranges:\n"); for_each_physmem_reserved_range(t, range, &start, &end) { boot_emerg("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); total_reserved_mem += end - start; } - boot_emerg("Usable online memory ranges (info source: %s [%x]):\n", + boot_emerg("Usable online memory ranges (info source: %s [%d]):\n", get_physmem_info_source(), physmem_info.info_source); for_each_physmem_usable_range(i, &start, &end) { boot_emerg("%016lx %016lx\n", start, end); total_mem += end - start; } - boot_emerg("Usable online memory total: %lx Reserved: %lx Free: %lx\n", + boot_emerg("Usable online memory total: %lu Reserved: %lu Free: %lu\n", total_mem, total_reserved_mem, total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); print_stacktrace(current_frame_address()); From 816b5feaed13946b06fb7c63b7343fbfd83e520f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 20 Nov 2024 20:10:50 +0100 Subject: [PATCH 12/53] s390/boot: Introduce ring buffer for boot messages Collect all boot messages into a ring buffer independent of the current log level. This allows to retain all boot-time messages, which is particularly useful for analyzing early crashes. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/printk.c | 20 +++++++++++++++++++- arch/s390/include/asm/boot_data.h | 13 +++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 7eeb43821cd0c..e49a9be6d555e 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -13,6 +13,20 @@ int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT; bool boot_ignore_loglevel; +char boot_rb[PAGE_SIZE * 2]; +size_t boot_rb_off; + +static void boot_rb_add(const char *str, size_t len) +{ + /* leave double '\0' in the end */ + size_t avail = sizeof(boot_rb) - boot_rb_off - 1; + + /* store strings separated by '\0' */ + if (len + 1 > avail) + boot_rb_off = 0; + strcpy(boot_rb + boot_rb_off, str); + boot_rb_off += len + 1; +} const char hex_asc[] = "0123456789abcdef"; @@ -229,5 +243,9 @@ void boot_printk(const char *fmt, ...) } out: va_end(args); - boot_console_earlyprintk(buf); + len = strlen(buf); + if (len) { + boot_rb_add(buf, len); + boot_console_earlyprintk(buf); + } } diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h index f7eed27b3220f..2cc35e968ff53 100644 --- a/arch/s390/include/asm/boot_data.h +++ b/arch/s390/include/asm/boot_data.h @@ -15,4 +15,17 @@ extern unsigned long ipl_cert_list_size; extern unsigned long early_ipl_comp_list_addr; extern unsigned long early_ipl_comp_list_size; +extern char boot_rb[PAGE_SIZE * 2]; +extern size_t boot_rb_off; + +#define boot_rb_foreach(cb) \ + do { \ + size_t off = boot_rb_off + strlen(boot_rb + boot_rb_off) + 1; \ + size_t len; \ + for (; off < sizeof(boot_rb) && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + for (off = 0; off < boot_rb_off && (len = strlen(boot_rb + off)); off += len + 1) \ + cb(boot_rb + off); \ + } while (0) + #endif /* _ASM_S390_BOOT_DATA_H */ From 847e5a4c713747e8c0e9e25bda00aea782d1062b Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 20 Nov 2024 20:36:46 +0100 Subject: [PATCH 13/53] s390/boot: Make boot_printk() return int Modify boot_printk() to return int, aligning it with printk(). Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/boot.h | 2 +- arch/s390/boot/printk.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 80e86387bee55..dbfba6822d6fd 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -72,7 +72,7 @@ void print_pgm_check_info(void); unsigned long randomize_within_range(unsigned long size, unsigned long align, unsigned long min, unsigned long max); void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit); -void __printf(1, 2) boot_printk(const char *fmt, ...); +int __printf(1, 2) boot_printk(const char *fmt, ...); void print_stacktrace(unsigned long sp); void error(char *m); int get_random(unsigned long limit, unsigned long *value); diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index e49a9be6d555e..311fb45c097c5 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -174,7 +174,7 @@ static void boot_console_earlyprintk(const char *buf) (lenmod == 'z') ? va_arg(args, typemod long) : \ va_arg(args, typemod int)) -void boot_printk(const char *fmt, ...) +int boot_printk(const char *fmt, ...) { char buf[1024] = { 0 }; char *end = buf + sizeof(buf) - 1; /* make sure buf is 0 terminated */ @@ -248,4 +248,5 @@ void boot_printk(const char *fmt, ...) boot_rb_add(buf, len); boot_console_earlyprintk(buf); } + return len; } From c09f8d0ad673afdfd5d097d95d2026a84fa4926e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 20 Nov 2024 21:46:17 +0100 Subject: [PATCH 14/53] s390/boot: Defer boot messages when earlyprintk is not enabled When earlyprintk is not specified, boot messages are only stored in a ring buffer to be printed later by printk when console driver is registered. Critical messages from boot_emerg() are always printed immediately, even without earlyprintk. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/ipl_parm.c | 2 ++ arch/s390/boot/printk.c | 9 +++++++-- arch/s390/include/asm/boot_data.h | 1 + arch/s390/kernel/setup.c | 18 ++++++++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 3f775bd6d9a23..fd4874ab30cb7 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -313,6 +313,8 @@ void parse_boot_command_line(void) #endif if (!strcmp(param, "relocate_lowcore") && test_facility(193)) relocate_lowcore = 1; + if (!strcmp(param, "earlyprintk")) + boot_earlyprintk = true; if (!strcmp(param, "debug")) boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; if (!strcmp(param, "quiet")) diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 311fb45c097c5..7b7203f078c80 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -13,8 +14,9 @@ int boot_console_loglevel = CONFIG_CONSOLE_LOGLEVEL_DEFAULT; bool boot_ignore_loglevel; -char boot_rb[PAGE_SIZE * 2]; -size_t boot_rb_off; +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); static void boot_rb_add(const char *str, size_t len) { @@ -163,6 +165,9 @@ static void boot_console_earlyprintk(const char *buf) { int level = printk_loglevel(buf); + /* always print emergency messages */ + if (level > LOGLEVEL_EMERG && !boot_earlyprintk) + return; if (boot_ignore_loglevel || level < boot_console_loglevel) sclp_early_printk(printk_skip_level(buf)); } diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h index 2cc35e968ff53..e7ef7524b847e 100644 --- a/arch/s390/include/asm/boot_data.h +++ b/arch/s390/include/asm/boot_data.h @@ -16,6 +16,7 @@ extern unsigned long early_ipl_comp_list_addr; extern unsigned long early_ipl_comp_list_size; extern char boot_rb[PAGE_SIZE * 2]; +extern bool boot_earlyprintk; extern size_t boot_rb_off; #define boot_rb_foreach(cb) \ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0ce550faf0733..feff1bb9ac2da 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -157,6 +157,10 @@ u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); struct oldmem_data __bootdata_preserved(oldmem_data); +char __bootdata(boot_rb)[PAGE_SIZE * 2]; +bool __bootdata(boot_earlyprintk); +size_t __bootdata(boot_rb_off); + unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); @@ -877,6 +881,17 @@ static void __init log_component_list(void) } } +/* + * Print avoiding interpretation of % in buf + */ +static void __init print_rb_entry(char *buf) +{ + char fmt[] = KERN_SOH "0boot: %s"; + + fmt[1] = printk_get_level(buf); + printk(fmt, printk_skip_level(buf)); +} + /* * Setup function called from init/main.c just after the banner * was printed. @@ -896,6 +911,9 @@ void __init setup_arch(char **cmdline_p) pr_info("Linux is running natively in 64-bit mode\n"); else pr_info("Linux is running as a guest in 64-bit mode\n"); + /* Print decompressor messages if not already printed */ + if (!boot_earlyprintk) + boot_rb_foreach(print_rb_entry); if (have_relocated_lowcore()) pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); From d20d8e51338fc39aedf98786a4e9b71d4af4bda7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 20 Nov 2024 22:23:48 +0100 Subject: [PATCH 15/53] s390/boot: Add bootdebug option to control debug messages Suppress decompressor debug messages by default, similar to regular kernel debug messages that require 'DEBUG' or 'dyndbg' to be enabled (depending on CONFIG_DYNAMIC_DEBUG). Introduce a 'bootdebug' option to enable printing these messages when needed. All messages are still stored in the boot ring buffer regardless. To enable boot debug messages: bootdebug debug Or combine with 'earlyprintk' to print them without delay: bootdebug debug earlyprintk Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/ipl_parm.c | 2 ++ arch/s390/boot/printk.c | 4 ++++ arch/s390/include/asm/boot_data.h | 1 + arch/s390/kernel/early.c | 1 + arch/s390/kernel/setup.c | 10 ++++++++-- 5 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index fd4874ab30cb7..397046f0c1dfe 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -317,6 +317,8 @@ void parse_boot_command_line(void) boot_earlyprintk = true; if (!strcmp(param, "debug")) boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; + if (!strcmp(param, "bootdebug")) + bootdebug = true; if (!strcmp(param, "quiet")) boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; if (!strcmp(param, "ignore_loglevel")) diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 7b7203f078c80..4c60245697ab0 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -17,6 +17,7 @@ bool boot_ignore_loglevel; char __bootdata(boot_rb)[PAGE_SIZE * 2]; bool __bootdata(boot_earlyprintk); size_t __bootdata(boot_rb_off); +bool __bootdata(bootdebug); static void boot_rb_add(const char *str, size_t len) { @@ -168,6 +169,9 @@ static void boot_console_earlyprintk(const char *buf) /* always print emergency messages */ if (level > LOGLEVEL_EMERG && !boot_earlyprintk) return; + /* print debug messages only when bootdebug is enabled */ + if (level == LOGLEVEL_DEBUG && !bootdebug) + return; if (boot_ignore_loglevel || level < boot_console_loglevel) sclp_early_printk(printk_skip_level(buf)); } diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h index e7ef7524b847e..da5527c50738e 100644 --- a/arch/s390/include/asm/boot_data.h +++ b/arch/s390/include/asm/boot_data.h @@ -18,6 +18,7 @@ extern unsigned long early_ipl_comp_list_size; extern char boot_rb[PAGE_SIZE * 2]; extern bool boot_earlyprintk; extern size_t boot_rb_off; +extern bool bootdebug; #define boot_rb_foreach(cb) \ do { \ diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 62f8f5a750a30..4cdf3bf33052e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -50,6 +50,7 @@ decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); decompressor_handled_param(relocate_lowcore); +decompressor_handled_param(bootdebug); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index feff1bb9ac2da..dd0979182890e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -160,6 +160,7 @@ struct oldmem_data __bootdata_preserved(oldmem_data); char __bootdata(boot_rb)[PAGE_SIZE * 2]; bool __bootdata(boot_earlyprintk); size_t __bootdata(boot_rb_off); +bool __bootdata(bootdebug); unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); @@ -882,13 +883,18 @@ static void __init log_component_list(void) } /* - * Print avoiding interpretation of % in buf + * Print avoiding interpretation of % in buf and taking bootdebug option + * into consideration. */ static void __init print_rb_entry(char *buf) { char fmt[] = KERN_SOH "0boot: %s"; + int level = printk_get_level(buf); - fmt[1] = printk_get_level(buf); + if (level == KERN_DEBUG[1] && !bootdebug) + return; + + fmt[1] = level; printk(fmt, printk_skip_level(buf)); } From b79015ae63d3c91f06ab78b6f06abba94fd88e12 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sat, 23 Nov 2024 00:02:25 +0100 Subject: [PATCH 16/53] s390/boot: Add prefix filtering to bootdebug messages Enhance boot debugging by allowing the "bootdebug" kernel parameter to accept an optional comma-separated list of prefixes. Only debug messages starting with these prefixes will be printed during boot. For example: bootdebug=startup,vmem Not specifying a filter for the "bootdebug" parameter prints all debug messages. The `boot_fmt` macro can be defined to set a common prefix: #define boot_fmt(fmt) "startup: " fmt Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/boot.h | 20 ++++++++++++-------- arch/s390/boot/ipl_parm.c | 5 ++++- arch/s390/boot/printk.c | 6 ++++-- arch/s390/include/asm/boot_data.h | 25 +++++++++++++++++++++++++ arch/s390/kernel/setup.c | 8 +++++--- 5 files changed, 50 insertions(+), 14 deletions(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index dbfba6822d6fd..688fa75f86516 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -77,14 +77,18 @@ void print_stacktrace(unsigned long sp); void error(char *m); int get_random(unsigned long limit, unsigned long *value); -#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG fmt, ##__VA_ARGS__) -#define boot_alert(fmt, ...) boot_printk(KERN_ALERT fmt, ##__VA_ARGS__) -#define boot_crit(fmt, ...) boot_printk(KERN_CRIT fmt, ##__VA_ARGS__) -#define boot_err(fmt, ...) boot_printk(KERN_ERR fmt, ##__VA_ARGS__) -#define boot_warn(fmt, ...) boot_printk(KERN_WARNING fmt, ##__VA_ARGS__) -#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE fmt, ##__VA_ARGS__) -#define boot_info(fmt, ...) boot_printk(KERN_INFO fmt, ##__VA_ARGS__) -#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG fmt, ##__VA_ARGS__) +#ifndef boot_fmt +#define boot_fmt(fmt) fmt +#endif + +#define boot_emerg(fmt, ...) boot_printk(KERN_EMERG boot_fmt(fmt), ##__VA_ARGS__) +#define boot_alert(fmt, ...) boot_printk(KERN_ALERT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_crit(fmt, ...) boot_printk(KERN_CRIT boot_fmt(fmt), ##__VA_ARGS__) +#define boot_err(fmt, ...) boot_printk(KERN_ERR boot_fmt(fmt), ##__VA_ARGS__) +#define boot_warn(fmt, ...) boot_printk(KERN_WARNING boot_fmt(fmt), ##__VA_ARGS__) +#define boot_notice(fmt, ...) boot_printk(KERN_NOTICE boot_fmt(fmt), ##__VA_ARGS__) +#define boot_info(fmt, ...) boot_printk(KERN_INFO boot_fmt(fmt), ##__VA_ARGS__) +#define boot_debug(fmt, ...) boot_printk(KERN_DEBUG boot_fmt(fmt), ##__VA_ARGS__) extern struct machine_info machine; extern int boot_console_loglevel; diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 397046f0c1dfe..d3731f2983b77 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -317,8 +317,11 @@ void parse_boot_command_line(void) boot_earlyprintk = true; if (!strcmp(param, "debug")) boot_console_loglevel = CONSOLE_LOGLEVEL_DEBUG; - if (!strcmp(param, "bootdebug")) + if (!strcmp(param, "bootdebug")) { bootdebug = true; + if (val) + strncpy(bootdebug_filter, val, sizeof(bootdebug_filter) - 1); + } if (!strcmp(param, "quiet")) boot_console_loglevel = CONSOLE_LOGLEVEL_QUIET; if (!strcmp(param, "ignore_loglevel")) diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 4c60245697ab0..092114e469376 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -17,6 +17,7 @@ bool boot_ignore_loglevel; char __bootdata(boot_rb)[PAGE_SIZE * 2]; bool __bootdata(boot_earlyprintk); size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; bool __bootdata(bootdebug); static void boot_rb_add(const char *str, size_t len) @@ -169,11 +170,12 @@ static void boot_console_earlyprintk(const char *buf) /* always print emergency messages */ if (level > LOGLEVEL_EMERG && !boot_earlyprintk) return; + buf = printk_skip_level(buf); /* print debug messages only when bootdebug is enabled */ - if (level == LOGLEVEL_DEBUG && !bootdebug) + if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(buf))) return; if (boot_ignore_loglevel || level < boot_console_loglevel) - sclp_early_printk(printk_skip_level(buf)); + sclp_early_printk(buf); } #define va_arg_len_type(args, lenmod, typemod) \ diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h index da5527c50738e..73c441f5e99ae 100644 --- a/arch/s390/include/asm/boot_data.h +++ b/arch/s390/include/asm/boot_data.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_S390_BOOT_DATA_H +#include #include #include @@ -18,6 +19,7 @@ extern unsigned long early_ipl_comp_list_size; extern char boot_rb[PAGE_SIZE * 2]; extern bool boot_earlyprintk; extern size_t boot_rb_off; +extern char bootdebug_filter[128]; extern bool bootdebug; #define boot_rb_foreach(cb) \ @@ -30,4 +32,27 @@ extern bool bootdebug; cb(boot_rb + off); \ } while (0) +/* + * bootdebug_filter is a comma separated list of strings, + * where each string can be a prefix of the message. + */ +static inline bool bootdebug_filter_match(const char *buf) +{ + char *p = bootdebug_filter, *s; + char *end; + + if (!*p) + return true; + + end = p + strlen(p); + while (p < end) { + p = skip_spaces(p); + s = memscan(p, ',', end - p); + if (!strncmp(p, buf, s - p)) + return true; + p = s + 1; + } + return false; +} + #endif /* _ASM_S390_BOOT_DATA_H */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index dd0979182890e..74e2ee75b1896 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -160,6 +160,7 @@ struct oldmem_data __bootdata_preserved(oldmem_data); char __bootdata(boot_rb)[PAGE_SIZE * 2]; bool __bootdata(boot_earlyprintk); size_t __bootdata(boot_rb_off); +char __bootdata(bootdebug_filter)[128]; bool __bootdata(bootdebug); unsigned long __bootdata_preserved(VMALLOC_START); @@ -886,16 +887,17 @@ static void __init log_component_list(void) * Print avoiding interpretation of % in buf and taking bootdebug option * into consideration. */ -static void __init print_rb_entry(char *buf) +static void __init print_rb_entry(const char *buf) { char fmt[] = KERN_SOH "0boot: %s"; int level = printk_get_level(buf); - if (level == KERN_DEBUG[1] && !bootdebug) + buf = printk_skip_level(buf); + if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf))) return; fmt[1] = level; - printk(fmt, printk_skip_level(buf)); + printk(fmt, buf); } /* From b2a992a55fb60395d1ab9a4d4b2ea3783e3b7ad9 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 20 Nov 2024 20:30:10 +0100 Subject: [PATCH 17/53] s390/boot: Dump message ring buffer on crash with bootdebug Dump the boot message ring buffer when a crash occurs during boot, but only if bootdebug is enabled. This helps assist in analyzing boot-time issues by providing additional debugging information. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/boot.h | 1 + arch/s390/boot/pgm_check_info.c | 2 ++ arch/s390/boot/printk.c | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 688fa75f86516..0cfa84a42ec17 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -76,6 +76,7 @@ int __printf(1, 2) boot_printk(const char *fmt, ...); void print_stacktrace(unsigned long sp); void error(char *m); int get_random(unsigned long limit, unsigned long *value); +void boot_rb_dump(void); #ifndef boot_fmt #define boot_fmt(fmt) fmt diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index e8a728c3db052..633f11600aab3 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -37,6 +37,8 @@ void print_pgm_check_info(void) unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area; struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area); + if (bootdebug) + boot_rb_dump(); boot_emerg("Linux version %s\n", kernel_version); if (!is_prot_virt_guest() && early_command_line[0]) boot_emerg("Kernel command line: %s\n", early_command_line); diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index 092114e469376..abfa05cf95624 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -32,6 +32,24 @@ static void boot_rb_add(const char *str, size_t len) boot_rb_off += len + 1; } +static void print_rb_entry(const char *str) +{ + sclp_early_printk(printk_skip_level(str)); +} + +static bool debug_messages_printed(void) +{ + return boot_earlyprintk && (boot_ignore_loglevel || boot_console_loglevel > LOGLEVEL_DEBUG); +} + +void boot_rb_dump(void) +{ + if (debug_messages_printed()) + return; + sclp_early_printk("Boot messages ring buffer:\n"); + boot_rb_foreach(print_rb_entry); +} + const char hex_asc[] = "0123456789abcdef"; static char *as_hex(char *dst, unsigned long val, int pad) From 70309dc77699dd598ade67ca7fca5a17ac3275c1 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 29 Nov 2024 12:56:48 +0100 Subject: [PATCH 18/53] s390/boot: Add timestamps to early boot messages When CONFIG_PRINTK_TIME is enabled, add timestamps to boot messages in the same format as regular printk. Timestamps appear only with earlyprintk and are stored in the boot messages ring buffer, but are not propagated to main kernel messages (if earlyprintk is not enabled). This prevents double timestamps in the output. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/printk.c | 27 ++++++++++++++++++++++----- arch/s390/include/asm/boot_data.h | 11 +++++++++++ arch/s390/kernel/setup.c | 2 +- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/arch/s390/boot/printk.c b/arch/s390/boot/printk.c index abfa05cf95624..b4c66fa667d54 100644 --- a/arch/s390/boot/printk.c +++ b/arch/s390/boot/printk.c @@ -190,12 +190,29 @@ static void boot_console_earlyprintk(const char *buf) return; buf = printk_skip_level(buf); /* print debug messages only when bootdebug is enabled */ - if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(buf))) + if (level == LOGLEVEL_DEBUG && (!bootdebug || !bootdebug_filter_match(skip_timestamp(buf)))) return; if (boot_ignore_loglevel || level < boot_console_loglevel) sclp_early_printk(buf); } +static char *add_timestamp(char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + union tod_clock *boot_clock = (union tod_clock *)&get_lowcore()->boot_clock; + unsigned long ns = tod_to_ns(get_tod_clock() - boot_clock->tod); + char ts[MAX_NUMLEN]; + + *buf++ = '['; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, ns / NSEC_PER_SEC, 0), 5, 0, 0); + *buf++ = '.'; + buf += strpad(buf, MAX_NUMLEN, as_dec(ts, (ns % NSEC_PER_SEC) / NSEC_PER_USEC, 0), 6, 1, 0); + *buf++ = ']'; + *buf++ = ' '; +#endif + return buf; +} + #define va_arg_len_type(args, lenmod, typemod) \ ((lenmod == 'l') ? va_arg(args, typemod long) : \ (lenmod == 'h') ? (typemod short)va_arg(args, typemod int) : \ @@ -215,10 +232,10 @@ int boot_printk(const char *fmt, ...) ssize_t len; int pad; - if (!printk_get_level(fmt)) { - *p++ = KERN_SOH_ASCII; - *p++ = '0' + MESSAGE_LOGLEVEL_DEFAULT; - } + *p++ = KERN_SOH_ASCII; + *p++ = printk_get_level(fmt) ?: '0' + MESSAGE_LOGLEVEL_DEFAULT; + p = add_timestamp(p); + fmt = printk_skip_level(fmt); va_start(args, fmt); for (; p < end && *fmt; fmt++) { diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h index 73c441f5e99ae..f55f8227058ef 100644 --- a/arch/s390/include/asm/boot_data.h +++ b/arch/s390/include/asm/boot_data.h @@ -55,4 +55,15 @@ static inline bool bootdebug_filter_match(const char *buf) return false; } +static inline const char *skip_timestamp(const char *buf) +{ +#ifdef CONFIG_PRINTK_TIME + const char *p = memchr(buf, ']', strlen(buf)); + + if (p && p[1] == ' ') + return p + 2; +#endif + return buf; +} + #endif /* _ASM_S390_BOOT_DATA_H */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 74e2ee75b1896..20fc68bdefa13 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -892,7 +892,7 @@ static void __init print_rb_entry(const char *buf) char fmt[] = KERN_SOH "0boot: %s"; int level = printk_get_level(buf); - buf = printk_skip_level(buf); + buf = skip_timestamp(printk_skip_level(buf)); if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf))) return; From d2ebe06bf5a18714b74cda0eea60bbb10553477e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 29 Nov 2024 12:57:44 +0100 Subject: [PATCH 19/53] s390: Use pr_info for "KernelAddressSanitizer initialized" message sclp_early_printk() ignores boot console debug settings and prints unconditionally. It also prints message without any timestamp or formatting. Convert it to pr_info(). Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/kernel/early.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 4cdf3bf33052e..2fa25164df7d1 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -59,7 +59,7 @@ static void __init kasan_early_init(void) { #ifdef CONFIG_KASAN init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized\n"); #endif } From 9688b17b4a3e608c35b22028e37641d3512faafe Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Dec 2024 11:06:19 +0100 Subject: [PATCH 20/53] s390/boot: Add physmem tracking debug support Introduce boot_debug() calls to track memory detection, online ranges, reserved areas, and allocations (except for VMEM allocations, which are too frequent). Instead introduce dump_physmem_reserved() function which prints out full memory tracking information. This helps in debugging early boot memory handling. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/boot.h | 1 + arch/s390/boot/physmem_info.c | 49 +++++++++++++++++++++++++--- arch/s390/boot/startup.c | 1 + arch/s390/include/asm/physmem_info.h | 4 +-- arch/s390/kernel/setup.c | 4 +-- 5 files changed, 51 insertions(+), 8 deletions(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 0cfa84a42ec17..69f261566a64a 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -57,6 +57,7 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s unsigned long align, unsigned long min, unsigned long max, bool die_on_oom); unsigned long get_physmem_alloc_pos(void); +void dump_physmem_reserved(void); bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, unsigned long *intersection_start); bool is_ipl_block_dump(void); diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c index 37d201986261c..aa096ef68e8c2 100644 --- a/arch/s390/boot/physmem_info.c +++ b/arch/s390/boot/physmem_info.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "physmem: " fmt #include #include #include @@ -28,7 +29,7 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n) return &physmem_info.online[n]; if (unlikely(!physmem_info.online_extended)) { physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( - RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + RR_MEM_DETECT_EXT, ENTRIES_EXTENDED_MAX, sizeof(long), 0, physmem_alloc_pos, true); } return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; @@ -207,11 +208,16 @@ unsigned long detect_max_physmem_end(void) max_physmem_end = search_mem_end(); physmem_info.info_source = MEM_DETECT_BIN_SEARCH; } + boot_debug("Max physical memory: 0x%016lx (info source: %s)\n", max_physmem_end, + get_physmem_info_source()); return max_physmem_end; } void detect_physmem_online_ranges(unsigned long max_physmem_end) { + unsigned long start, end; + int i; + if (!sclp_early_read_storage_info()) { physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; } else if (physmem_info.info_source == MEM_DETECT_DIAG500_STOR_LIMIT) { @@ -226,12 +232,16 @@ void detect_physmem_online_ranges(unsigned long max_physmem_end) } else if (max_physmem_end) { add_physmem_online_range(0, max_physmem_end); } + boot_debug("Online memory ranges (info source: %s):\n", get_physmem_info_source()); + for_each_physmem_online_range(i, &start, &end) + boot_debug(" online [%d]: 0x%016lx-0x%016lx\n", i, start, end); } void physmem_set_usable_limit(unsigned long limit) { physmem_info.usable = limit; physmem_alloc_pos = limit; + boot_debug("Usable memory limit: 0x%016lx\n", limit); } static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) @@ -265,14 +275,23 @@ static void die_oom(unsigned long size, unsigned long align, unsigned long min, disabled_wait(); } -void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +static void _physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) { physmem_info.reserved[type].start = addr; physmem_info.reserved[type].end = addr + size; } +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Reserve:", addr, addr + size, + get_rr_type_name(type)); +} + void physmem_free(enum reserved_range_type type) { + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Free:", physmem_info.reserved[type].start, + physmem_info.reserved[type].end, get_rr_type_name(type)); physmem_info.reserved[type].start = 0; physmem_info.reserved[type].end = 0; } @@ -339,7 +358,9 @@ unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long s max = min(max, physmem_alloc_pos); addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); if (addr) - physmem_reserve(type, addr, size); + _physmem_reserve(type, addr, size); + boot_debug("%-14s 0x%016lx-0x%016lx %s\n", "Alloc range:", addr, addr + size, + get_rr_type_name(type)); return addr; } @@ -347,7 +368,7 @@ unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, unsigned long align, bool die_on_oom) { struct reserved_range *range = &physmem_info.reserved[type]; - struct reserved_range *new_range; + struct reserved_range *new_range = NULL; unsigned int ranges_left; unsigned long addr; @@ -371,6 +392,10 @@ unsigned long physmem_alloc(enum reserved_range_type type, unsigned long size, } range->end = addr + size; } + if (type != RR_VMEM) { + boot_debug("%-14s 0x%016lx-0x%016lx %-20s align 0x%lx split %d\n", "Alloc topdown:", + addr, addr + size, get_rr_type_name(type), align, !!new_range); + } range->start = addr; physmem_alloc_pos = addr; physmem_alloc_ranges = ranges_left; @@ -387,3 +412,19 @@ unsigned long get_physmem_alloc_pos(void) { return physmem_alloc_pos; } + +void dump_physmem_reserved(void) +{ + struct reserved_range *range; + enum reserved_range_type t; + unsigned long start, end; + + boot_debug("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + if (end) { + boot_debug("%-14s 0x%016lx-0x%016lx @%012lx chain %012lx\n", + get_rr_type_name(t), start, end, (unsigned long)range, + (unsigned long)range->chain); + } + } +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index e9ae589c61694..86693d838e974 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -525,6 +525,7 @@ void startup_kernel(void) __kaslr_offset, __kaslr_offset_phys); kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); + dump_physmem_reserved(); copy_bootdata(); __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, (struct alt_instr *)_vmlinux_info.alt_instructions_end, diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h index 51b68a43e1957..7ef3bbec98b08 100644 --- a/arch/s390/include/asm/physmem_info.h +++ b/arch/s390/include/asm/physmem_info.h @@ -26,7 +26,7 @@ enum reserved_range_type { RR_AMODE31, RR_IPLREPORT, RR_CERT_COMP_LIST, - RR_MEM_DETECT_EXTENDED, + RR_MEM_DETECT_EXT, RR_VMEM, RR_MAX }; @@ -128,7 +128,7 @@ static inline const char *get_rr_type_name(enum reserved_range_type t) RR_TYPE_NAME(AMODE31); RR_TYPE_NAME(IPLREPORT); RR_TYPE_NAME(CERT_COMP_LIST); - RR_TYPE_NAME(MEM_DETECT_EXTENDED); + RR_TYPE_NAME(MEM_DETECT_EXT); RR_TYPE_NAME(VMEM); default: return "UNKNOWN"; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 20fc68bdefa13..0fcb2a4e3f5ee 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -702,7 +702,7 @@ static void __init reserve_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_reserve(addr, size); } @@ -710,7 +710,7 @@ static void __init free_physmem_info(void) { unsigned long addr, size; - if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size)) memblock_phys_free(addr, size); } From a56827e5a5fa02a5241eab7342f51c840d99ee15 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Dec 2024 11:59:50 +0100 Subject: [PATCH 21/53] s390/boot: Move command line parsing earlier Reorder the store_ipl_parmblock(), uv_query_info(), and command line setup calls to occur earlier. This ensures debug printing covers all memory tracking activities from the start. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/startup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 86693d838e974..89f1d12f1c100 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -411,6 +411,10 @@ void startup_kernel(void) psw_t psw; setup_lpp(); + store_ipl_parmblock(); + uv_query_info(); + setup_boot_command_line(); + parse_boot_command_line(); /* * Non-randomized kernel physical start address must be _SEGMENT_SIZE @@ -430,12 +434,8 @@ void startup_kernel(void) oldmem_data.start = parmarea.oldmem_base; oldmem_data.size = parmarea.oldmem_size; - store_ipl_parmblock(); read_ipl_report(); - uv_query_info(); sclp_early_read_info(); - setup_boot_command_line(); - parse_boot_command_line(); detect_facilities(); cmma_init(); sanitize_prot_virt_host(); From 418b4d5e617ad0a4c532496ea86f7fb28de9fd95 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Dec 2024 12:11:04 +0100 Subject: [PATCH 22/53] s390/boot: Add vmem debugging support Introduce boot_debug() calls in vmem code to log page table mappings including KASAN shadow mappings for improved early boot debugging. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/vmem.c | 47 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 47011b5a9e3d2..c8a2d51751143 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "vmem: " fmt #include #include #include @@ -32,12 +33,42 @@ enum populate_mode { POPULATE_IDENTITY, POPULATE_KERNEL, #ifdef CONFIG_KASAN + /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ POPULATE_KASAN_MAP_SHADOW, POPULATE_KASAN_ZERO_SHADOW, POPULATE_KASAN_SHALLOW #endif }; +#define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t +static inline const char *get_populate_mode_name(enum populate_mode t) +{ + switch (t) { + POPULATE_MODE_NAME(NONE); + POPULATE_MODE_NAME(DIRECT); + POPULATE_MODE_NAME(LOWCORE); + POPULATE_MODE_NAME(ABS_LOWCORE); + POPULATE_MODE_NAME(IDENTITY); + POPULATE_MODE_NAME(KERNEL); +#ifdef CONFIG_KASAN + POPULATE_MODE_NAME(KASAN_MAP_SHADOW); + POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); + POPULATE_MODE_NAME(KASAN_SHALLOW); +#endif + default: + return "UNKNOWN"; + } +} + +static bool is_kasan_populate_mode(enum populate_mode mode) +{ +#ifdef CONFIG_KASAN + return mode >= POPULATE_KASAN_MAP_SHADOW; +#else + return false; +#endif +} + static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); #ifdef CONFIG_KASAN @@ -53,9 +84,12 @@ static pte_t pte_z; static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) { - start = PAGE_ALIGN_DOWN(__sha(start)); - end = PAGE_ALIGN(__sha(end)); - pgtable_populate(start, end, mode); + unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); + unsigned long sha_end = PAGE_ALIGN(__sha(end)); + + boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), + start, end, sha_start, sha_end); + pgtable_populate(sha_start, sha_end, mode); } static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) @@ -418,6 +452,13 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat pgd_t *pgd; p4d_t *p4d; + if (!is_kasan_populate_mode(mode)) { + boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", + get_populate_mode_name(mode), addr, end, + resolve_pa_may_alloc(addr, 0, mode), + resolve_pa_may_alloc(end - 1, 0, mode) + 1); + } + pgd = pgd_offset(&init_mm, addr); for (; addr < end; addr = next, pgd++) { next = pgd_addr_end(addr, end); From ec6f9f7e5bbf983323d53b66e8a76d699d26736c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Dec 2024 16:57:21 +0100 Subject: [PATCH 23/53] s390/boot: Add startup debugging support Add boot_debug() calls to log various memory layout decisions and randomization details during early startup, improving debugging capabilities. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/startup.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 89f1d12f1c100..807594cf246c1 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define boot_fmt(fmt) "startup: " fmt #include #include #include @@ -223,12 +224,16 @@ static void setup_ident_map_size(unsigned long max_physmem_end) if (oldmem_data.start) { __kaslr_enabled = 0; ident_map_size = min(ident_map_size, oldmem_data.size); + boot_debug("kdump memory limit: 0x%016lx\n", oldmem_data.size); } else if (ipl_block_valid && is_ipl_block_dump()) { __kaslr_enabled = 0; - if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) + if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size) { ident_map_size = min(ident_map_size, hsa_size); + boot_debug("Stand-alone dump limit: 0x%016lx\n", hsa_size); + } } #endif + boot_debug("Identity map size: 0x%016lx\n", ident_map_size); } #define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)) @@ -266,6 +271,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); + boot_debug("vmem size estimated: 0x%016lx\n", vsize); if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE || (vsize > _REGION2_SIZE && kaslr_enabled())) { asce_limit = _REGION1_SIZE; @@ -289,8 +295,10 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) * otherwise asce_limit and rte_size would have been adjusted. */ vmax = adjust_to_uv_max(asce_limit); + boot_debug("%d level paging 0x%016lx vmax\n", vmax == _REGION1_SIZE ? 4 : 3, vmax); #ifdef CONFIG_KASAN BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START); + boot_debug("KASAN shadow area: 0x%016lx-0x%016lx\n", KASAN_SHADOW_START, KASAN_SHADOW_END); /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif @@ -304,19 +312,27 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) pos = 0; kernel_end = vmax - pos * THREAD_SIZE; kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE); + boot_debug("Randomization range: 0x%016lx-0x%016lx\n", vmax - kaslr_len, vmax); + boot_debug("kernel image: 0x%016lx-0x%016lx (kaslr)\n", kernel_start, + kernel_size + kernel_size); } else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) { kernel_start = round_down(vmax - kernel_size, THREAD_SIZE); - boot_debug("The kernel base address is forced to %lx\n", kernel_start); + boot_debug("kernel image: 0x%016lx-0x%016lx (constrained)\n", kernel_start, + kernel_start + kernel_size); } else { kernel_start = __NO_KASLR_START_KERNEL; + boot_debug("kernel image: 0x%016lx-0x%016lx (nokaslr)\n", kernel_start, + kernel_start + kernel_size); } __kaslr_offset = kernel_start; + boot_debug("__kaslr_offset: 0x%016lx\n", __kaslr_offset); MODULES_END = round_down(kernel_start, _SEGMENT_SIZE); MODULES_VADDR = MODULES_END - MODULES_LEN; VMALLOC_END = MODULES_VADDR; if (IS_ENABLED(CONFIG_KMSAN)) VMALLOC_END -= MODULES_LEN * 2; + boot_debug("modules area: 0x%016lx-0x%016lx\n", MODULES_VADDR, MODULES_END); /* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */ vsize = (VMALLOC_END - FIXMAP_SIZE) / 2; @@ -328,10 +344,15 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) VMALLOC_END -= vmalloc_size * 2; } VMALLOC_START = VMALLOC_END - vmalloc_size; + boot_debug("vmalloc area: 0x%016lx-0x%016lx\n", VMALLOC_START, VMALLOC_END); __memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE); + boot_debug("memcpy real area: 0x%016lx-0x%016lx\n", __memcpy_real_area, + __memcpy_real_area + MEMCPY_REAL_SIZE); __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); + boot_debug("abs lowcore: 0x%016lx-0x%016lx\n", __abs_lowcore, + __abs_lowcore + ABS_LOWCORE_MAP_SIZE); /* split remaining virtual space between 1:1 mapping & vmemmap array */ pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page)); @@ -353,6 +374,8 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) max_mappable = min(max_mappable, vmemmap_start); if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE)) __identity_base = round_down(vmemmap_start - max_mappable, rte_size); + boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base, + __identity_base + ident_map_size); return asce_limit; } @@ -542,5 +565,6 @@ void startup_kernel(void) */ psw.addr = __kaslr_offset + vmlinux.entry; psw.mask = PSW_KERNEL_BITS; + boot_debug("Starting kernel at: 0x%016lx\n", psw.addr); __load_psw(psw); } From 9af310ef10dedc2f8d400d26fdc99d771537b2fd Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 12 Dec 2024 16:39:05 +0100 Subject: [PATCH 24/53] s390/boot: Improve decompression error reporting Currently, decompression error messages can be very uninformative: [ 0.029853] startup: read error [ 0.040507] startup: -- System halted Improve these messages to make it clear that the error originates from the decompression code. Additionally, on decompression failures, if bootdebug is enabled, dump the message ring buffer before halting. This provides more context for diagnosing startup issues. Signed-off-by: Vasily Gorbik Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/decompressor.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/decompressor.c b/arch/s390/boot/decompressor.c index f478e8e9cbda4..03500b9d9fb9a 100644 --- a/arch/s390/boot/decompressor.c +++ b/arch/s390/boot/decompressor.c @@ -9,6 +9,7 @@ #include #include +#include #include #include "decompressor.h" #include "boot.h" @@ -63,6 +64,15 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #include "../../../../lib/decompress_unzstd.c" #endif +static void decompress_error(char *m) +{ + if (bootdebug) + boot_rb_dump(); + boot_emerg("Decompression error: %s\n", m); + boot_emerg(" -- System halted\n"); + disabled_wait(); +} + unsigned long mem_safe_offset(void) { return ALIGN(free_mem_end_ptr, PAGE_SIZE); @@ -71,5 +81,5 @@ unsigned long mem_safe_offset(void) void deploy_kernel(void *output) { __decompress(_compressed_start, _compressed_end - _compressed_start, - NULL, NULL, output, vmlinux.image_size, NULL, error); + NULL, NULL, output, vmlinux.image_size, NULL, decompress_error); } From d7bebcb4a898bd214bbd71107f975b7b9f0bde32 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Dec 2024 12:29:40 +0100 Subject: [PATCH 25/53] s390: Optimize __pa/__va when RANDOMIZE_IDENTITY_BASE is off Use a zero identity base when CONFIG_RANDOMIZE_IDENTITY_BASE is off, slightly optimizing __pa/__va calculations. Signed-off-by: Vasily Gorbik Acked-by: Alexander Gordeev Signed-off-by: Alexander Gordeev --- arch/s390/boot/startup.c | 5 +++-- arch/s390/include/asm/page.h | 4 ++++ arch/s390/kernel/setup.c | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 807594cf246c1..58e56aed5f7de 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -372,8 +372,9 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); max_mappable = max(ident_map_size, MAX_DCSS_ADDR); max_mappable = min(max_mappable, vmemmap_start); - if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE)) - __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); +#endif boot_debug("identity map: 0x%016lx-0x%016lx\n", __identity_base, __identity_base + ident_map_size); diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 4f43cdd9835b6..1ff145f7b52b1 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -184,7 +184,11 @@ extern struct vm_layout vm_layout; #define __kaslr_offset vm_layout.kaslr_offset #define __kaslr_offset_phys vm_layout.kaslr_offset_phys +#ifdef CONFIG_RANDOMIZE_IDENTITY_BASE #define __identity_base vm_layout.identity_base +#else +#define __identity_base 0UL +#endif #define ident_map_size vm_layout.identity_size static inline unsigned long kaslr_offset(void) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 0fcb2a4e3f5ee..f6765ea4f8281 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -740,7 +740,7 @@ static void __init reserve_lowcore(void) void *lowcore_end = lowcore_start + sizeof(struct lowcore); void *start, *end; - if ((void *)__identity_base < lowcore_end) { + if (absolute_pointer(__identity_base) < lowcore_end) { start = max(lowcore_start, (void *)__identity_base); end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); memblock_reserve(__pa(start), __pa(end)); From f684b7954ea6b29662caa25e0a4941231c05e413 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 11 Dec 2024 12:33:45 +0100 Subject: [PATCH 26/53] s390/ipl_report: Remove unused 'size' variable Remove unused since commit f913a6600491 ("s390/boot: rework decompressor reserved tracking") 'size' variable from get_cert_comp_list_size(). Signed-off-by: Vasily Gorbik Acked-by: Alexander Gordeev Signed-off-by: Alexander Gordeev --- arch/s390/boot/ipl_report.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index aea22e268c65d..f73cd757a5f73 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -30,7 +30,6 @@ static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; - size_t size; /* * Find the length for the IPL report boot data From ae02615b7fcea9ce9a4ec40b3c5b5dafd322b179 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Jan 2025 11:52:17 +0100 Subject: [PATCH 27/53] s390/fpu: Add fpc exception handler / remove fixup section again The fixup section was added again by mistake when test_fp_ctl() was removed. The reason for the removal of the fixup section is described in commit 484a8ed8b7d1 ("s390/extable: add dedicated uaccess handler"). Remove it again for the same reason. Add an exception handler which handles exceptions when the floating point control register is attempted to be set to invalid values. The exception handler sets the floating point control register to zero and continues execution at the specified address. The new sfpc inline assembly is open-coded to make back porting a bit easier. Fixes: 702644249d3e ("s390/fpu: get rid of test_fp_ctl()") Cc: stable@vger.kernel.org Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/asm-extable.h | 4 ++++ arch/s390/include/asm/fpu-insn.h | 15 ++++----------- arch/s390/kernel/vmlinux.lds.S | 1 - arch/s390/mm/extable.c | 9 +++++++++ 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 4a6b0a8b6412f..00a67464c4453 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -14,6 +14,7 @@ #define EX_TYPE_UA_LOAD_REG 5 #define EX_TYPE_UA_LOAD_REGPAIR 6 #define EX_TYPE_ZEROPAD 7 +#define EX_TYPE_FPC 8 #define EX_DATA_REG_ERR_SHIFT 0 #define EX_DATA_REG_ERR GENMASK(3, 0) @@ -84,4 +85,7 @@ #define EX_TABLE_ZEROPAD(_fault, _target, _regdata, _regaddr) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_ZEROPAD, _regdata, _regaddr, 0) +#define EX_TABLE_FPC(_fault, _target) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_FPC, __stringify(%%r0), __stringify(%%r0), 0) + #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index de510c9f6efa9..0f1b59eb4c5ae 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -100,19 +100,12 @@ static __always_inline void fpu_lfpc(unsigned int *fpc) */ static inline void fpu_lfpc_safe(unsigned int *fpc) { - u32 tmp; - instrument_read(fpc, sizeof(*fpc)); asm_inline volatile( - "0: lfpc %[fpc]\n" - "1: nopr %%r7\n" - ".pushsection .fixup, \"ax\"\n" - "2: lghi %[tmp],0\n" - " sfpc %[tmp]\n" - " jg 1b\n" - ".popsection\n" - EX_TABLE(1b, 2b) - : [tmp] "=d" (tmp) + " lfpc %[fpc]\n" + "0: nopr %%r7\n" + EX_TABLE_FPC(0b, 0b) + : : [fpc] "Q" (*fpc) : "memory"); } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 377b9aaf8c924..ff1ddba96352a 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -52,7 +52,6 @@ SECTIONS SOFTIRQENTRY_TEXT FTRACE_HOTPATCH_TRAMPOLINES_TEXT *(.text.*_indirect_*) - *(.fixup) *(.gnu.warning) . = ALIGN(PAGE_SIZE); _etext = .; /* End of text section */ diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 0a0738a473af0..812ec5be12916 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -77,6 +77,13 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt return true; } +static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs) +{ + asm volatile("sfpc %[val]\n" : : [val] "d" (0)); + regs->psw.addr = extable_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -99,6 +106,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_ua_load_reg(ex, true, regs); case EX_TYPE_ZEROPAD: return ex_handler_zeropad(ex, regs); + case EX_TYPE_FPC: + return ex_handler_fpc(ex, regs); } panic("invalid exception table entry"); } From 1d7a9719ba2dacac866bfa4fd17212d4dfc03bbe Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 10 Jan 2025 11:52:18 +0100 Subject: [PATCH 28/53] s390/extable: Replace open-coded sfpc inline assembly with fpu_sfpc() Use fpc_sfpc() instead of open-coding. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/mm/extable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 812ec5be12916..6e9a5e366ff39 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -7,6 +7,7 @@ #include #include #include +#include const struct exception_table_entry *s390_search_extables(unsigned long addr) { @@ -79,7 +80,7 @@ static bool ex_handler_zeropad(const struct exception_table_entry *ex, struct pt static bool ex_handler_fpc(const struct exception_table_entry *ex, struct pt_regs *regs) { - asm volatile("sfpc %[val]\n" : : [val] "d" (0)); + fpu_sfpc(0); regs->psw.addr = extable_fixup(ex); return true; } From 579a6633b8f720ed2c17e3445012e3834a8535a0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:36:55 +0100 Subject: [PATCH 29/53] s390: Rename GCC_ASM_FLAG_OUTPUT_BROKEN to CC_ASM_FLAG_OUTPUT_BROKEN Config options which can be used to check for compiler bugs and features have the compiler independent CC prefix in order to avoid duplicating and having to check config options for multiple compilers. Therefore rename the config option accordingly. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/Kconfig | 2 +- arch/s390/include/asm/asm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e4931a32b8960..621d70ec4bd01 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -52,7 +52,7 @@ config KASAN_SHADOW_OFFSET depends on KASAN default 0x1C000000000000 -config GCC_ASM_FLAG_OUTPUT_BROKEN +config CC_ASM_FLAG_OUTPUT_BROKEN def_bool CC_IS_GCC && GCC_VERSION < 140200 help GCC versions before 14.2.0 may die with an internal diff --git a/arch/s390/include/asm/asm.h b/arch/s390/include/asm/asm.h index ec011b94af2ab..e9062b01e2a24 100644 --- a/arch/s390/include/asm/asm.h +++ b/arch/s390/include/asm/asm.h @@ -28,7 +28,7 @@ * [var] also contains the program mask. CC_TRANSFORM() moves the condition * code to the two least significant bits and sets all other bits to zero. */ -#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_GCC_ASM_FLAG_OUTPUT_BROKEN)) +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_ASM_FLAG_OUTPUT_BROKEN)) #define __HAVE_ASM_FLAG_OUTPUTS__ From f340d270491dc6b4c2da036c33642fbbf1de47af Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:36:56 +0100 Subject: [PATCH 30/53] s390: Introduce CC_HAS_ASM_AOR_FORMAT_FLAGS Kconfig option Introduce CC_HAS_ASM_AOR_FORMAT_FLAGS Kconfig option. Use this option for inline assemblies where the A, O, or R format flags are used. Those flags are not available for Clang versions before 19.1.0. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 621d70ec4bd01..4e9bf698dc4d5 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -59,6 +59,12 @@ config CC_ASM_FLAG_OUTPUT_BROKEN compiler error in some configurations if flag output operands are used within inline assemblies. +config CC_HAS_ASM_AOR_FORMAT_FLAGS + def_bool !(CC_IS_CLANG && CLANG_VERSION < 190100) + help + Clang versions before 19.1.0 do not support A, + O, and R inline assembly format flags. + config S390 def_bool y # From 836968fa0a85f01eeeb1f1144dfa87e3ab94a3d7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:36:57 +0100 Subject: [PATCH 31/53] s390/fpu: Use CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS instead of CONFIG_CC_IS_CLANG Use the more precise CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS to tell if the compiler has support for the A, O, and R inline assembly format flags. This allows recent Clang compilers to generate better code. Move code around so the good (aka better) case at the top of each ifdef construct. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/fpu-insn.h | 168 +++++++++++++++---------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index 0f1b59eb4c5ae..f668bffd6dd3d 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -176,7 +176,19 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS + +static __always_inline void fpu_vl(u8 v1, const void *vxr) +{ + instrument_read(vxr, sizeof(__vector128)); + asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" + : + : [vxr] "Q" (*(__vector128 *)vxr), + [v1] "I" (v1) + : "memory"); +} + +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vl(u8 v1, const void *vxr) { @@ -190,19 +202,7 @@ static __always_inline void fpu_vl(u8 v1, const void *vxr) : "memory", "1"); } -#else /* CONFIG_CC_IS_CLANG */ - -static __always_inline void fpu_vl(u8 v1, const void *vxr) -{ - instrument_read(vxr, sizeof(__vector128)); - asm volatile("VL %[v1],%O[vxr],,%R[vxr]\n" - : - : [vxr] "Q" (*(__vector128 *)vxr), - [v1] "I" (v1) - : "memory"); -} - -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vleib(u8 v, s16 val, u8 index) { @@ -231,7 +231,7 @@ static __always_inline u64 fpu_vlgvf(u8 v, u16 index) return val; } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -239,17 +239,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile( - " la 1,%[vxr]\n" - " VLL %[v1],%[index],0,1\n" - : - : [vxr] "R" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory", "1"); + asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" + : + : [vxr] "Q" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory"); } -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) { @@ -257,17 +255,19 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile("VLL %[v1],%[index],%O[vxr],%R[vxr]\n" - : - : [vxr] "Q" (*(u8 *)vxr), - [index] "d" (index), - [v1] "I" (v1) - : "memory"); + asm volatile( + " la 1,%[vxr]\n" + " VLL %[v1],%[index],0,1\n" + : + : [vxr] "R" (*(u8 *)vxr), + [index] "d" (index), + [v1] "I" (v1) + : "memory", "1"); } -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -277,17 +277,15 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile( \ - " la 1,%[vxrs]\n" \ - " VLM %[v1],%[v3],0,1\n" \ - : \ - : [vxrs] "R" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : \ + : [vxrs] "Q" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vlm(_v1, _v3, _vxrs) \ ({ \ @@ -297,15 +295,17 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile("VLM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : \ - : [vxrs] "Q" (*_v), \ - [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VLM %[v1],%[v3],0,1\n" \ + : \ + : [vxrs] "R" (*_v), \ + [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vlr(u8 v1, u8 v2) { @@ -355,7 +355,18 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3) : "memory"); } -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS + +static __always_inline void fpu_vst(u8 v1, const void *vxr) +{ + instrument_write(vxr, sizeof(__vector128)); + asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" + : [vxr] "=Q" (*(__vector128 *)vxr) + : [v1] "I" (v1) + : "memory"); +} + +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vst(u8 v1, const void *vxr) { @@ -368,20 +379,23 @@ static __always_inline void fpu_vst(u8 v1, const void *vxr) : "memory", "1"); } -#else /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -static __always_inline void fpu_vst(u8 v1, const void *vxr) +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS + +static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { - instrument_write(vxr, sizeof(__vector128)); - asm volatile("VST %[v1],%O[vxr],,%R[vxr]\n" - : [vxr] "=Q" (*(__vector128 *)vxr) - : [v1] "I" (v1) + unsigned int size; + + size = min(index + 1, sizeof(__vector128)); + instrument_write(vxr, size); + asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" + : [vxr] "=Q" (*(u8 *)vxr) + : [index] "d" (index), [v1] "I" (v1) : "memory"); } -#endif /* CONFIG_CC_IS_CLANG */ - -#ifdef CONFIG_CC_IS_CLANG +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) { @@ -397,23 +411,9 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) : "memory", "1"); } -#else /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) -{ - unsigned int size; - - size = min(index + 1, sizeof(__vector128)); - instrument_write(vxr, size); - asm volatile("VSTL %[v1],%[index],%O[vxr],%R[vxr]\n" - : [vxr] "=Q" (*(u8 *)vxr) - : [index] "d" (index), [v1] "I" (v1) - : "memory"); -} - -#endif /* CONFIG_CC_IS_CLANG */ - -#ifdef CONFIG_CC_IS_CLANG +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -423,16 +423,14 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile( \ - " la 1,%[vxrs]\n" \ - " VSTM %[v1],%[v3],0,1\n" \ - : [vxrs] "=R" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory", "1"); \ + asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ + : [vxrs] "=Q" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory"); \ (_v3) - (_v1) + 1; \ }) -#else /* CONFIG_CC_IS_CLANG */ +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define fpu_vstm(_v1, _v3, _vxrs) \ ({ \ @@ -442,14 +440,16 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile("VSTM %[v1],%[v3],%O[vxrs],%R[vxrs]\n" \ - : [vxrs] "=Q" (*_v) \ - : [v1] "I" (_v1), [v3] "I" (_v3) \ - : "memory"); \ + asm volatile( \ + " la 1,%[vxrs]\n" \ + " VSTM %[v1],%[v3],0,1\n" \ + : [vxrs] "=R" (*_v) \ + : [v1] "I" (_v1), [v3] "I" (_v3) \ + : "memory", "1"); \ (_v3) - (_v1) + 1; \ }) -#endif /* CONFIG_CC_IS_CLANG */ +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ static __always_inline void fpu_vupllf(u8 v1, u8 v2) { From 722926ecbcb36c41e3cf3f051aa6cfbfaebe5959 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:36:58 +0100 Subject: [PATCH 32/53] s390/extable: Rename EX_TABLE_UA_STORE to EX_TABLE_UA_FAULT Rename EX_TABLE_UA_STORE to a more generic EX_TABLE_UA_FAULT name. This allows to use the extable type also for uaccess inline assemblies which read from userspace, without causing confusion. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/asm-extable.h | 6 +++--- arch/s390/include/asm/uaccess.h | 8 ++++---- arch/s390/mm/extable.c | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 00a67464c4453..5f47569c908ab 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -9,7 +9,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 -#define EX_TYPE_UA_STORE 3 +#define EX_TYPE_UA_FAULT 3 #define EX_TYPE_UA_LOAD_MEM 4 #define EX_TYPE_UA_LOAD_REG 5 #define EX_TYPE_UA_LOAD_REGPAIR 6 @@ -70,8 +70,8 @@ #define EX_TABLE_AMODE31(_fault, _target) \ __EX_TABLE(.amode31.ex_table, _fault, _target, EX_TYPE_FIXUP, __stringify(%%r0), __stringify(%%r0), 0) -#define EX_TABLE_UA_STORE(_fault, _target, _regerr) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_STORE, _regerr, _regerr, 0) +#define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \ + __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0) #define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a81f897a81ceb..0b724b98ba56f 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -103,8 +103,8 @@ __put_user_##type##_noinstr(unsigned type __user *to, \ "0: mvcos %[_to],%[_from],%[_size]\n" \ "1: xr %[rc],%[rc]\n" \ "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \ : [_size] "d" (size), [_from] "Q" (*(from)), \ [spec] "d" (__oac_spec.val) \ @@ -351,8 +351,8 @@ int __noreturn __put_kernel_bad(void); "0: " insn " %[_val],%[_to]\n" \ "1: xr %[rc],%[rc]\n" \ "2:\n" \ - EX_TABLE_UA_STORE(0b, 2b, %[rc]) \ - EX_TABLE_UA_STORE(1b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \ : [_val] "d" (val) \ : "cc"); \ diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 6e9a5e366ff39..37a3e6d46e0fb 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -27,7 +27,7 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, struct pt_r return true; } -static bool ex_handler_ua_store(const struct exception_table_entry *ex, struct pt_regs *regs) +static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct pt_regs *regs) { unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); @@ -97,8 +97,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); - case EX_TYPE_UA_STORE: - return ex_handler_ua_store(ex, regs); + case EX_TYPE_UA_FAULT: + return ex_handler_ua_fault(ex, regs); case EX_TYPE_UA_LOAD_MEM: return ex_handler_ua_load_mem(ex, regs); case EX_TYPE_UA_LOAD_REG: From 5b629e64e5dd22d2f68f730f0c341342d9b83dda Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:36:59 +0100 Subject: [PATCH 33/53] s390/uaccess: Implement __get_kernel_nofault()/__put_kernel_nofault() with mvc Use the mvc instruction in order to implement __get_kernel_nofault() and __put_kernel_nofault(). Both functions have a source and destination address where the code is supposed to read from and write to. Use the mvc instruction to copy from source to destination instead of lg/stg like instructions. This generates slightly better code. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 122 ++++++++++++-------------------- 1 file changed, 44 insertions(+), 78 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 0b724b98ba56f..d598cdcaa9faa 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -341,109 +341,75 @@ static inline void *s390_kernel_write(void *dst, const void *src, size_t size) return __s390_kernel_write(dst, src, size); } -int __noreturn __put_kernel_bad(void); +void __noreturn __mvc_kernel_nofault_bad(void); -#define __put_kernel_asm(val, to, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_to]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ - EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ - : [rc] "=d" (__rc), [_to] "+Q" (*(to)) \ - : [_val] "d" (val) \ - : "cc"); \ - __rc; \ -}) +#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS -#define __put_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - unsigned long __x = (unsigned long)(*((type *)(src))); \ - int __pk_err; \ + int __rc; \ \ switch (sizeof(type)) { \ case 1: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \ - break; \ case 2: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \ - break; \ case 4: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \ - break; \ case 8: \ - __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \ + asm_inline volatile( \ + "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \ + "1: lhi %[_rc],0\n" \ + "2:\n" \ + EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \ + : [_rc] "=d" (__rc), \ + [_dst] "=Q" (*(type *)dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_len] "I" (sizeof(type))); \ + if (__rc) \ + goto err_label; \ break; \ default: \ - __pk_err = __put_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__pk_err)) \ - goto err_label; \ } while (0) -int __noreturn __get_kernel_bad(void); - -#define __get_kernel_asm(val, from, insn) \ -({ \ - int __rc; \ - \ - asm volatile( \ - "0: " insn " %[_val],%[_from]\n" \ - "1: xr %[rc],%[rc]\n" \ - "2:\n" \ - EX_TABLE_UA_LOAD_REG(0b, 2b, %[rc], %[_val]) \ - EX_TABLE_UA_LOAD_REG(1b, 2b, %[rc], %[_val]) \ - : [rc] "=d" (__rc), [_val] "=d" (val) \ - : [_from] "Q" (*(from)) \ - : "cc"); \ - __rc; \ -}) +#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ -#define __get_kernel_nofault(dst, src, type, err_label) \ +#define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - int __gk_err; \ + type *(__dst) = (type *)(dst); \ + int __rc; \ \ switch (sizeof(type)) { \ - case 1: { \ - unsigned char __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 2: { \ - unsigned short __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 4: { \ - unsigned int __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \ - *((type *)(dst)) = (type)__x; \ - break; \ - }; \ - case 8: { \ - unsigned long __x; \ - \ - __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \ - *((type *)(dst)) = (type)__x; \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + asm_inline volatile( \ + "0: mvc 0(%[_len],%[_dst]),%[_src]\n" \ + "1: lhi %[_rc],0\n" \ + "2:\n" \ + EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \ + : [_rc] "=d" (__rc), \ + "=m" (*__dst) \ + : [_src] "Q" (*(type *)(src)), \ + [_dst] "a" (__dst), \ + [_len] "I" (sizeof(type))); \ + if (__rc) \ + goto err_label; \ break; \ - }; \ default: \ - __gk_err = __get_kernel_bad(); \ + __mvc_kernel_nofault_bad(); \ break; \ } \ - if (unlikely(__gk_err)) \ - goto err_label; \ } while (0) +#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ + +#define __get_kernel_nofault __mvc_kernel_nofault +#define __put_kernel_nofault __mvc_kernel_nofault + void __cmpxchg_user_key_called_with_bad_pointer(void); #define CMPXCHG_USER_KEY_MAX_LOOPS 128 From c7474da9fef537c0fc89cef552897116456a2e41 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:37:00 +0100 Subject: [PATCH 34/53] s390/uaccess: Use asm goto for __mvc_kernel_nofault() Use asm goto for __mvc_kernel_nofault() if available. This generates slightly better code, since the error checking happens implicitly with the goto (aka exception) and the good path comes without any checks and branches. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d598cdcaa9faa..b96bda0f12756 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -343,29 +343,25 @@ static inline void *s390_kernel_write(void *dst, const void *src, size_t size) void __noreturn __mvc_kernel_nofault_bad(void); -#ifdef CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS +#if defined(CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && defined(CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS) #define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ - int __rc; \ - \ switch (sizeof(type)) { \ case 1: \ case 2: \ case 4: \ case 8: \ - asm_inline volatile( \ + asm goto( \ "0: mvc %O[_dst](%[_len],%R[_dst]),%[_src]\n" \ - "1: lhi %[_rc],0\n" \ - "2:\n" \ - EX_TABLE_UA_FAULT(0b, 2b, %[_rc]) \ - EX_TABLE_UA_FAULT(1b, 2b, %[_rc]) \ - : [_rc] "=d" (__rc), \ - [_dst] "=Q" (*(type *)dst) \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[err_label]) \ + EX_TABLE(1b, %l[err_label]) \ + : [_dst] "=Q" (*(type *)dst) \ : [_src] "Q" (*(type *)(src)), \ - [_len] "I" (sizeof(type))); \ - if (__rc) \ - goto err_label; \ + [_len] "I" (sizeof(type)) \ + : \ + : err_label); \ break; \ default: \ __mvc_kernel_nofault_bad(); \ @@ -373,7 +369,7 @@ do { \ } \ } while (0) -#else /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT) && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define __mvc_kernel_nofault(dst, src, type, err_label) \ do { \ @@ -405,7 +401,7 @@ do { \ } \ } while (0) -#endif /* CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT && CONFIG_CC_HAS_ASM_AOR_FORMAT_FLAGS */ #define __get_kernel_nofault __mvc_kernel_nofault #define __put_kernel_nofault __mvc_kernel_nofault From 5e680e265f46b28bebc652638bc246cf815ed582 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:37:01 +0100 Subject: [PATCH 35/53] s390/uaccess: Move put_user() / __put_user() close to put_user() asm code Keep put_user() and get_user() code separated. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 60 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index b96bda0f12756..1bd60e365125b 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -160,6 +160,36 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon return rc; } +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + */ +#define __put_user(x, ptr) \ +({ \ + __typeof__(*(ptr)) __x = (x); \ + int __pu_err = -EFAULT; \ + \ + __chk_user_ptr(ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + case 2: \ + case 4: \ + case 8: \ + __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \ + break; \ + default: \ + __put_user_bad(); \ + break; \ + } \ + __builtin_expect(__pu_err, 0); \ +}) + +#define put_user(x, ptr) \ +({ \ + might_fault(); \ + __put_user(x, ptr); \ +}) + int __noreturn __get_user_bad(void); #define DEFINE_GET_USER(type) \ @@ -237,36 +267,6 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign return rc; } -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - */ -#define __put_user(x, ptr) \ -({ \ - __typeof__(*(ptr)) __x = (x); \ - int __pu_err = -EFAULT; \ - \ - __chk_user_ptr(ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - case 2: \ - case 4: \ - case 8: \ - __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \ - break; \ - default: \ - __put_user_bad(); \ - break; \ - } \ - __builtin_expect(__pu_err, 0); \ -}) - -#define put_user(x, ptr) \ -({ \ - might_fault(); \ - __put_user(x, ptr); \ -}) - #define __get_user(x, ptr) \ ({ \ int __gu_err = -EFAULT; \ From 718056f9d96872580f7c7cd73be1928a91924eb5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:37:02 +0100 Subject: [PATCH 36/53] s390/uaccess: Remove __put_user_fn()/__get_user_fn() wrappers The __put_user_fn() and __get_user_fn() wrappers are leftovers from the time where the kernel was compiled with or without mvcos support plus they were later used to workaround the problems that came with asm register constructs. Both reasons do not exist anymore, therefore remove the wrappers and shorten the code. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 113 +++++++++----------------------- 1 file changed, 32 insertions(+), 81 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 1bd60e365125b..95cb9dc956afc 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -128,60 +128,38 @@ DEFINE_PUT_USER(short); DEFINE_PUT_USER(int); DEFINE_PUT_USER(long); -static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) -{ - int rc; - - switch (size) { - case 1: - rc = __put_user_char((unsigned char __user *)ptr, - (unsigned char *)x, - size); - break; - case 2: - rc = __put_user_short((unsigned short __user *)ptr, - (unsigned short *)x, - size); - break; - case 4: - rc = __put_user_int((unsigned int __user *)ptr, - (unsigned int *)x, - size); - break; - case 8: - rc = __put_user_long((unsigned long __user *)ptr, - (unsigned long *)x, - size); - break; - default: - __put_user_bad(); - break; - } - return rc; -} - -/* - * These are the main single-value transfer routines. They automatically - * use the right size if we just have the right pointer type. - */ #define __put_user(x, ptr) \ ({ \ __typeof__(*(ptr)) __x = (x); \ - int __pu_err = -EFAULT; \ + int __prc; \ \ __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: \ + __prc = __put_user_char((unsigned char __user *)(ptr), \ + (unsigned char *)&__x, \ + sizeof(*(ptr))); \ + break; \ case 2: \ + __prc = __put_user_short((unsigned short __user *)(ptr),\ + (unsigned short *)&__x, \ + sizeof(*(ptr))); \ + break; \ case 4: \ + __prc = __put_user_int((unsigned int __user *)(ptr), \ + (unsigned int *)&__x, \ + sizeof(*(ptr))); \ + break; \ case 8: \ - __pu_err = __put_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __prc = __put_user_long((unsigned long __user *)(ptr), \ + (unsigned long *)&__x, \ + sizeof(*(ptr))); \ break; \ default: \ - __put_user_bad(); \ + __prc = __put_user_bad(); \ break; \ } \ - __builtin_expect(__pu_err, 0); \ + __builtin_expect(__prc, 0); \ }) #define put_user(x, ptr) \ @@ -195,7 +173,7 @@ int __noreturn __get_user_bad(void); #define DEFINE_GET_USER(type) \ static get_put_user_noinstr_attributes int \ __get_user_##type##_noinstr(unsigned type *to, \ - unsigned type __user *from, \ + const unsigned type __user *from, \ unsigned long size) \ { \ union oac __oac_spec = { \ @@ -220,7 +198,7 @@ __get_user_##type##_noinstr(unsigned type *to, \ } \ \ static __always_inline int \ -__get_user_##type(unsigned type *to, unsigned type __user *from, \ +__get_user_##type(unsigned type *to, const unsigned type __user *from, \ unsigned long size) \ { \ int rc; \ @@ -235,77 +213,50 @@ DEFINE_GET_USER(short); DEFINE_GET_USER(int); DEFINE_GET_USER(long); -static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) -{ - int rc; - - switch (size) { - case 1: - rc = __get_user_char((unsigned char *)x, - (unsigned char __user *)ptr, - size); - break; - case 2: - rc = __get_user_short((unsigned short *)x, - (unsigned short __user *)ptr, - size); - break; - case 4: - rc = __get_user_int((unsigned int *)x, - (unsigned int __user *)ptr, - size); - break; - case 8: - rc = __get_user_long((unsigned long *)x, - (unsigned long __user *)ptr, - size); - break; - default: - __get_user_bad(); - break; - } - return rc; -} - #define __get_user(x, ptr) \ ({ \ - int __gu_err = -EFAULT; \ + const __user void *____guptr = (ptr); \ + int __grc; \ \ __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ + const unsigned char __user *__guptr = ____guptr; \ unsigned char __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_char(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 2: { \ + const unsigned short __user *__guptr = ____guptr; \ unsigned short __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_short(&__x, __guptr, sizeof(*(ptr)));\ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 4: { \ + const unsigned int __user *__guptr = ____guptr; \ unsigned int __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_int(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ case 8: { \ + const unsigned long __user *__guptr = ____guptr; \ unsigned long __x; \ \ - __gu_err = __get_user_fn(&__x, ptr, sizeof(*(ptr))); \ + __grc = __get_user_long(&__x, __guptr, sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *)&__x; \ break; \ }; \ default: \ - __get_user_bad(); \ + __grc = __get_user_bad(); \ break; \ } \ - __builtin_expect(__gu_err, 0); \ + __builtin_expect(__grc, 0); \ }) #define get_user(x, ptr) \ From 67e959af254fe842c7b2ba4396c931985b289ef9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:37:03 +0100 Subject: [PATCH 37/53] s390/uaccess: Cleanup noinstr __put_user()/__get_user() inline assembly constraints Remove superfluous underscores, brackets, and early clobber to make the code a bit more readable. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 95cb9dc956afc..8f6170852410a 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -99,14 +99,14 @@ __put_user_##type##_noinstr(unsigned type __user *to, \ int rc; \ \ asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos %[_to],%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + " lr %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ - : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ + : [rc] "=d" (rc), [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ [spec] "d" (__oac_spec.val) \ : "cc", "0"); \ return rc; \ @@ -183,16 +183,16 @@ __get_user_##type##_noinstr(unsigned type *to, \ int rc; \ \ asm volatile( \ - " lr 0,%[spec]\n" \ - "0: mvcos 0(%[_to]),%[_from],%[_size]\n" \ - "1: xr %[rc],%[rc]\n" \ + " lr %%r0,%[spec]\n" \ + "0: mvcos 0(%[to]),%[from],%[size]\n" \ + "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \ - EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \ - : [rc] "=&d" (rc), "=Q" (*(to)) \ - : [_size] "d" (size), [_from] "Q" (*(from)), \ - [spec] "d" (__oac_spec.val), [_to] "a" (to), \ - [_ksize] "K" (size) \ + EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[to], %[ksize]) \ + EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[to], %[ksize]) \ + : [rc] "=d" (rc), "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "d" (__oac_spec.val), [to] "a" (to), \ + [ksize] "K" (size) \ : "cc", "0"); \ return rc; \ } \ From dc7ff4b8cb782901c0c6809c708502293165a780 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:37:04 +0100 Subject: [PATCH 38/53] s390/uaccess: Replace EX_TABLE_UA_LOAD_MEM exception handling Remove EX_TABLE_UA_LOAD_MEM exception handling and replace it with EX_TABLE_UA_FAULT. Open code the return code check, and also open code setting of the destination to zero in case of an error. In almost all cases the compiler is able to optimize the open coded checks away, since all users of get_users() must check the return code, and are not supposed to use the result in case of an error. In addition this allows to change the get_user() inline assembly so that the "Q" constraint can be used for the destination, instead of only an "a" constraint. This generates slightly better code. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/asm-extable.h | 4 ---- arch/s390/include/asm/uaccess.h | 14 ++++++++------ arch/s390/mm/extable.c | 14 -------------- 3 files changed, 8 insertions(+), 24 deletions(-) diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index 5f47569c908ab..2e829c16fd8ad 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -10,7 +10,6 @@ #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 #define EX_TYPE_UA_FAULT 3 -#define EX_TYPE_UA_LOAD_MEM 4 #define EX_TYPE_UA_LOAD_REG 5 #define EX_TYPE_UA_LOAD_REGPAIR 6 #define EX_TYPE_ZEROPAD 7 @@ -73,9 +72,6 @@ #define EX_TABLE_UA_FAULT(_fault, _target, _regerr) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_FAULT, _regerr, _regerr, 0) -#define EX_TABLE_UA_LOAD_MEM(_fault, _target, _regerr, _regmem, _len) \ - __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_MEM, _regerr, _regmem, _len) - #define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \ __EX_TABLE(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 8f6170852410a..db22aefd452c5 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -184,16 +184,18 @@ __get_user_##type##_noinstr(unsigned type *to, \ \ asm volatile( \ " lr %%r0,%[spec]\n" \ - "0: mvcos 0(%[to]),%[from],%[size]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ "1: lhi %[rc],0\n" \ "2:\n" \ - EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[to], %[ksize]) \ - EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[to], %[ksize]) \ - : [rc] "=d" (rc), "=Q" (*to) \ + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ + : [rc] "=d" (rc), [to] "=Q" (*to) \ : [size] "d" (size), [from] "Q" (*from), \ - [spec] "d" (__oac_spec.val), [to] "a" (to), \ - [ksize] "K" (size) \ + [spec] "d" (__oac_spec.val) \ : "cc", "0"); \ + if (likely(!rc)) \ + return 0; \ + *to = 0; \ return rc; \ } \ \ diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 37a3e6d46e0fb..a046be1715cf0 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -36,18 +36,6 @@ static bool ex_handler_ua_fault(const struct exception_table_entry *ex, struct p return true; } -static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struct pt_regs *regs) -{ - unsigned int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); - unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); - size_t len = FIELD_GET(EX_DATA_LEN, ex->data); - - regs->gprs[reg_err] = -EFAULT; - memset((void *)regs->gprs[reg_addr], 0, len); - regs->psw.addr = extable_fixup(ex); - return true; -} - static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, bool pair, struct pt_regs *regs) { @@ -99,8 +87,6 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_bpf(ex, regs); case EX_TYPE_UA_FAULT: return ex_handler_ua_fault(ex, regs); - case EX_TYPE_UA_LOAD_MEM: - return ex_handler_ua_load_mem(ex, regs); case EX_TYPE_UA_LOAD_REG: return ex_handler_ua_load_reg(ex, false, regs); case EX_TYPE_UA_LOAD_REGPAIR: From 636d35aec55d1e9062ce81cf0da66d05ed561096 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:37:05 +0100 Subject: [PATCH 39/53] s390/uaccess: Remove usage of the oac specifier Remove usage of the operand access control specifier for put_user() and get_user() (again). Instead hardcode the specifier for both inline assemblies. This saves one instruction. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index db22aefd452c5..bdf46510b214e 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -92,14 +92,10 @@ __put_user_##type##_noinstr(unsigned type __user *to, \ unsigned type *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac1.as = PSW_BITS_AS_SECONDARY, \ - .oac1.a = 1, \ - }; \ int rc; \ \ asm volatile( \ - " lr %%r0,%[spec]\n" \ + " llilh %%r0,%[spec]\n" \ "0: mvcos %[to],%[from],%[size]\n" \ "1: lhi %[rc],0\n" \ "2:\n" \ @@ -107,7 +103,7 @@ __put_user_##type##_noinstr(unsigned type __user *to, \ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ : [rc] "=d" (rc), [to] "+Q" (*to) \ : [size] "d" (size), [from] "Q" (*from), \ - [spec] "d" (__oac_spec.val) \ + [spec] "I" (0x81) \ : "cc", "0"); \ return rc; \ } \ @@ -176,14 +172,10 @@ __get_user_##type##_noinstr(unsigned type *to, \ const unsigned type __user *from, \ unsigned long size) \ { \ - union oac __oac_spec = { \ - .oac2.as = PSW_BITS_AS_SECONDARY, \ - .oac2.a = 1, \ - }; \ int rc; \ \ asm volatile( \ - " lr %%r0,%[spec]\n" \ + " lhi %%r0,%[spec]\n" \ "0: mvcos %[to],%[from],%[size]\n" \ "1: lhi %[rc],0\n" \ "2:\n" \ @@ -191,7 +183,7 @@ __get_user_##type##_noinstr(unsigned type *to, \ EX_TABLE_UA_FAULT(1b, 2b, %[rc]) \ : [rc] "=d" (rc), [to] "=Q" (*to) \ : [size] "d" (size), [from] "Q" (*from), \ - [spec] "d" (__oac_spec.val) \ + [spec] "I" (0x81) \ : "cc", "0"); \ if (likely(!rc)) \ return 0; \ From ea5ae3a7f0a95cea70bd8afd165c95e19404a216 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2025 13:37:06 +0100 Subject: [PATCH 40/53] s390/uaccess: Use asm goto for put_user()/get_user() Use asm goto if available for put_user() and get_user(). This generates slightly better code. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 83 ++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index bdf46510b214e..3734d0a02b8dc 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -86,7 +86,34 @@ int __noreturn __put_user_bad(void); #define get_put_user_noinstr_attributes __always_inline #endif -#define DEFINE_PUT_USER(type) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define DEFINE_PUT_USER_NOINSTR(type) \ +static get_put_user_noinstr_attributes int \ +__put_user_##type##_noinstr(unsigned type __user *to, \ + unsigned type *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " llilh %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "+Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_PUT_USER_NOINSTR(type) \ static get_put_user_noinstr_attributes int \ __put_user_##type##_noinstr(unsigned type __user *to, \ unsigned type *from, \ @@ -106,8 +133,16 @@ __put_user_##type##_noinstr(unsigned type __user *to, \ [spec] "I" (0x81) \ : "cc", "0"); \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_PUT_USER_NOINSTR(char); +DEFINE_PUT_USER_NOINSTR(short); +DEFINE_PUT_USER_NOINSTR(int); +DEFINE_PUT_USER_NOINSTR(long); + +#define DEFINE_PUT_USER(type) \ static __always_inline int \ __put_user_##type(unsigned type __user *to, unsigned type *from, \ unsigned long size) \ @@ -166,7 +201,35 @@ DEFINE_PUT_USER(long); int __noreturn __get_user_bad(void); -#define DEFINE_GET_USER(type) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT + +#define DEFINE_GET_USER_NOINSTR(type) \ +static get_put_user_noinstr_attributes int \ +__get_user_##type##_noinstr(unsigned type *to, \ + const unsigned type __user *from, \ + unsigned long size) \ +{ \ + asm goto( \ + " lhi %%r0,%[spec]\n" \ + "0: mvcos %[to],%[from],%[size]\n" \ + "1: nopr %%r7\n" \ + EX_TABLE(0b, %l[Efault]) \ + EX_TABLE(1b, %l[Efault]) \ + : [to] "=Q" (*to) \ + : [size] "d" (size), [from] "Q" (*from), \ + [spec] "I" (0x81) \ + : "cc", "0" \ + : Efault \ + ); \ + return 0; \ +Efault: \ + *to = 0; \ + return -EFAULT; \ +} + +#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +#define DEFINE_GET_USER_NOINSTR(type) \ static get_put_user_noinstr_attributes int \ __get_user_##type##_noinstr(unsigned type *to, \ const unsigned type __user *from, \ @@ -189,8 +252,16 @@ __get_user_##type##_noinstr(unsigned type *to, \ return 0; \ *to = 0; \ return rc; \ -} \ - \ +} + +#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ + +DEFINE_GET_USER_NOINSTR(char); +DEFINE_GET_USER_NOINSTR(short); +DEFINE_GET_USER_NOINSTR(int); +DEFINE_GET_USER_NOINSTR(long); + +#define DEFINE_GET_USER(type) \ static __always_inline int \ __get_user_##type(unsigned type *to, const unsigned type __user *from, \ unsigned long size) \ From 884f0582b232f521333c8ad1899c977d580dd755 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Jan 2025 16:24:31 +0100 Subject: [PATCH 41/53] s390/uaccess: Remove INLINE_COPY_FROM_USER and INLINE_COPY_TO_USER The s390 implementations of raw_copy_from_user() and raw_copy_to_user() are never inlined. However INLINE_COPY_FROM_USER and INLINE_COPY_TO_USER are still set. This leads to the odd situation that only the error handling (memset to zero of the not copied bytes) of copy_from_user() is inlined, while the actual fast path code is out-of-line. This would make sense if raw_copy_from_user() and raw_copy_to_user() were implemented in assembler files, where inlining is not possible. But the current s390 setup does not make any sense. Address this by moving the raw uaccess copy inline assemblies to the uaccess header file, and remove INLINE_COPY_FROM_USER and INLINE_COPY_TO_USER definitions. This way the uaccess code, but now including error handling, is still out-of-line with the common code _copy_from_user() and _copy_to_user() variants, which inline the raw uaccess functions via _inline_copy_from_user() and _inline_copy_to_user(). This reduces the size of the kernel image by ~17kb. (defconfig, gcc 14.2.0) Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 139 +++++++++++++++++++++++++------- arch/s390/lib/uaccess.c | 90 --------------------- 2 files changed, 109 insertions(+), 120 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 3734d0a02b8dc..680bb5ce95ed5 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -22,16 +22,117 @@ void debug_user_asce(int exit); -unsigned long __must_check -raw_copy_from_user(void *to, const void __user *from, unsigned long n); +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; -unsigned long __must_check -raw_copy_to_user(void __user *to, const void *from, unsigned long n); +static __always_inline __must_check unsigned long +raw_copy_from_user_key(void *to, const void __user *from, unsigned long size, unsigned long key) +{ + unsigned long rem; + union oac spec = { + .oac2.key = key, + .oac2.as = PSW_BITS_AS_SECONDARY, + .oac2.k = 1, + .oac2.a = 1, + }; -#ifndef CONFIG_KASAN -#define INLINE_COPY_FROM_USER -#define INLINE_COPY_TO_USER -#endif + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos 0(%[to]),0(%[from]),%[size]\n" + "1: jz 5f\n" + " algr %[size],%[val]\n" + " slgr %[from],%[val]\n" + " slgr %[to],%[val]\n" + " j 0b\n" + "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ + " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ + " slgr %[rem],%[from]\n" + " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" + "4: slgr %[size],%[rem]\n" + " j 6f\n" + "5: lghi %[size],0\n" + "6:\n" + EX_TABLE(0b, 2b) + EX_TABLE(1b, 2b) + EX_TABLE(3b, 6b) + EX_TABLE(4b, 6b) + : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) + : [val] "a" (-4096UL), [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; +} + +static __always_inline __must_check unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return raw_copy_from_user_key(to, from, n, 0); +} + +static __always_inline __must_check unsigned long +raw_copy_to_user_key(void __user *to, const void *from, unsigned long size, unsigned long key) +{ + unsigned long rem; + union oac spec = { + .oac1.key = key, + .oac1.as = PSW_BITS_AS_SECONDARY, + .oac1.k = 1, + .oac1.a = 1, + }; + + asm_inline volatile( + " lr %%r0,%[spec]\n" + "0: mvcos 0(%[to]),0(%[from]),%[size]\n" + "1: jz 5f\n" + " algr %[size],%[val]\n" + " slgr %[to],%[val]\n" + " slgr %[from],%[val]\n" + " j 0b\n" + "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ + " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ + " slgr %[rem],%[to]\n" + " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ + " jnh 6f\n" + "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" + "4: slgr %[size],%[rem]\n" + " j 6f\n" + "5: lghi %[size],0\n" + "6:\n" + EX_TABLE(0b, 2b) + EX_TABLE(1b, 2b) + EX_TABLE(3b, 6b) + EX_TABLE(4b, 6b) + : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) + : [val] "a" (-4096UL), [spec] "d" (spec.val) + : "cc", "memory", "0"); + return size; +} + +static __always_inline __must_check unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return raw_copy_to_user_key(to, from, n, 0); +} unsigned long __must_check _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); @@ -55,28 +156,6 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo return n; } -union oac { - unsigned int val; - struct { - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac1; - struct { - unsigned short key : 4; - unsigned short : 4; - unsigned short as : 2; - unsigned short : 4; - unsigned short k : 1; - unsigned short a : 1; - } oac2; - }; -}; - int __noreturn __put_user_bad(void); #ifdef CONFIG_KMSAN diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index c7c269d5c491d..f977b7c37efc7 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -31,51 +31,6 @@ void debug_user_asce(int exit) } #endif /*CONFIG_DEBUG_ENTRY */ -static unsigned long raw_copy_from_user_key(void *to, const void __user *from, - unsigned long size, unsigned long key) -{ - unsigned long rem; - union oac spec = { - .oac2.key = key, - .oac2.as = PSW_BITS_AS_SECONDARY, - .oac2.k = 1, - .oac2.a = 1, - }; - - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[from],%[val]\n" - " slgr %[to],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[from])\n" /* rem = from + 4095 */ - " nr %[rem],%[val]\n" /* rem = (from + 4095) & -4096 */ - " slgr %[rem],%[from]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [from] "+&a" (from), [to] "+&a" (to), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - return raw_copy_from_user_key(to, from, n, 0); -} -EXPORT_SYMBOL(raw_copy_from_user); - unsigned long _copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) { @@ -93,51 +48,6 @@ unsigned long _copy_from_user_key(void *to, const void __user *from, } EXPORT_SYMBOL(_copy_from_user_key); -static unsigned long raw_copy_to_user_key(void __user *to, const void *from, - unsigned long size, unsigned long key) -{ - unsigned long rem; - union oac spec = { - .oac1.key = key, - .oac1.as = PSW_BITS_AS_SECONDARY, - .oac1.k = 1, - .oac1.a = 1, - }; - - asm volatile( - " lr 0,%[spec]\n" - "0: mvcos 0(%[to]),0(%[from]),%[size]\n" - "1: jz 5f\n" - " algr %[size],%[val]\n" - " slgr %[to],%[val]\n" - " slgr %[from],%[val]\n" - " j 0b\n" - "2: la %[rem],4095(%[to])\n" /* rem = to + 4095 */ - " nr %[rem],%[val]\n" /* rem = (to + 4095) & -4096 */ - " slgr %[rem],%[to]\n" - " clgr %[size],%[rem]\n" /* copy crosses next page boundary? */ - " jnh 6f\n" - "3: mvcos 0(%[to]),0(%[from]),%[rem]\n" - "4: slgr %[size],%[rem]\n" - " j 6f\n" - "5: slgr %[size],%[size]\n" - "6:\n" - EX_TABLE(0b, 2b) - EX_TABLE(1b, 2b) - EX_TABLE(3b, 6b) - EX_TABLE(4b, 6b) - : [size] "+&a" (size), [to] "+&a" (to), [from] "+&a" (from), [rem] "=&a" (rem) - : [val] "a" (-4096UL), [spec] "d" (spec.val) - : "cc", "memory", "0"); - return size; -} - -unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - return raw_copy_to_user_key(to, from, n, 0); -} -EXPORT_SYMBOL(raw_copy_to_user); - unsigned long _copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) { From 9e8f72f7730a403b66b3cf4b8e34244d08f74db5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Jan 2025 15:55:00 +0100 Subject: [PATCH 42/53] s390/futex: Generate futex atomic op functions Cleanup the futex atomic op inline assembly and generate a function for each futex atomic op. This makes the code hopefully a bit more readable. Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/futex.h | 76 +++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 752a2310f0d6c..84fedd7beaec5 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -8,56 +8,64 @@ #include #include -#define __futex_atomic_op(insn, ret, oldval, newval, uaddr, oparg) \ - asm volatile( \ - " sacf 256\n" \ - "0: l %1,0(%6)\n" \ - "1:"insn \ - "2: cs %1,%2,0(%6)\n" \ - "3: jl 1b\n" \ - " lhi %0,0\n" \ - "4: sacf 768\n" \ - EX_TABLE(0b,4b) EX_TABLE(1b,4b) \ - EX_TABLE(2b,4b) EX_TABLE(3b,4b) \ - : "=d" (ret), "=&d" (oldval), "=&d" (newval), \ - "=m" (*uaddr) \ - : "0" (-EFAULT), "d" (oparg), "a" (uaddr), \ - "m" (*uaddr) : "cc"); +#define FUTEX_OP_FUNC(name, insn) \ +static inline int \ +__futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \ +{ \ + int rc, new; \ + \ + asm_inline volatile( \ + " sacf 256\n" \ + "0: l %[old],%[uaddr]\n" \ + "1:"insn \ + "2: cs %[old],%[new],%[uaddr]\n" \ + "3: jl 1b\n" \ + " lhi %[rc],0\n" \ + "4: sacf 768\n" \ + EX_TABLE_UA_FAULT(0b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(1b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(2b, 4b, %[rc]) \ + EX_TABLE_UA_FAULT(3b, 4b, %[rc]) \ + : [rc] "=d" (rc), [old] "=&d" (*old), \ + [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \ + : [oparg] "d" (oparg) \ + : "cc"); \ + return rc; \ +} + +FUTEX_OP_FUNC(set, "lr %[new],%[oparg]\n") +FUTEX_OP_FUNC(add, "lr %[new],%[old]\n ar %[new],%[oparg]\n") +FUTEX_OP_FUNC(or, "lr %[new],%[old]\n or %[new],%[oparg]\n") +FUTEX_OP_FUNC(and, "lr %[new],%[old]\n nr %[new],%[oparg]\n") +FUTEX_OP_FUNC(xor, "lr %[new],%[old]\n xr %[new],%[oparg]\n") -static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, - u32 __user *uaddr) +static inline +int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { - int oldval = 0, newval, ret; + int old, rc; switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("lr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_set(oparg, &old, uaddr); break; case FUTEX_OP_ADD: - __futex_atomic_op("lr %2,%1\nar %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_add(oparg, &old, uaddr); break; case FUTEX_OP_OR: - __futex_atomic_op("lr %2,%1\nor %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_or(oparg, &old, uaddr); break; case FUTEX_OP_ANDN: - __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, ~oparg); + rc = __futex_atomic_and(~oparg, &old, uaddr); break; case FUTEX_OP_XOR: - __futex_atomic_op("lr %2,%1\nxr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + rc = __futex_atomic_xor(oparg, &old, uaddr); break; default: - ret = -ENOSYS; + rc = -ENOSYS; } - - if (!ret) - *oval = oldval; - - return ret; + if (!rc) + *oval = old; + return rc; } static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, From 554f8842dcd9555cfd91517062c31f229b309ba1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Jan 2025 15:55:01 +0100 Subject: [PATCH 43/53] s390/futex: Cleanup futex_atomic_cmpxchg_inatomic() Cleanup the futex_atomic_cmpxchg_inatomic() inline assembly to make it a bit more readable. Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/futex.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 84fedd7beaec5..6aa2544e5ff48 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -71,19 +71,20 @@ int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { - int ret; + int rc; - asm volatile( - " sacf 256\n" - "0: cs %1,%4,0(%5)\n" - "1: la %0,0\n" - "2: sacf 768\n" - EX_TABLE(0b,2b) EX_TABLE(1b,2b) - : "=d" (ret), "+d" (oldval), "=m" (*uaddr) - : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) + asm_inline volatile( + " sacf 256\n" + "0: cs %[old],%[new],%[uaddr]\n" + "1: lhi %[rc],0\n" + "2: sacf 768\n" + EX_TABLE_UA_FAULT(0b, 2b, %[rc]) + EX_TABLE_UA_FAULT(1b, 2b, %[rc]) + : [rc] "=d" (rc), [old] "+d" (oldval), [uaddr] "+Q" (*uaddr) + : [new] "d" (newval) : "cc", "memory"); *uval = oldval; - return ret; + return rc; } #endif /* _ASM_S390_FUTEX_H */ From c4891f4599410e50a1e42b8e76ca769cac83d846 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Jan 2025 15:55:02 +0100 Subject: [PATCH 44/53] s390/uaccess: Rename get_put_user_noinstr_attributes to uaccess_kmsan_or_inline Rename get_put_user_noinstr_attributes to a more generic uaccess_kmsan_or_inline name. This allows to use it for other non put_user()/get_user() uaccess functions withour causing confusion. Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/uaccess.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 680bb5ce95ed5..f5920163ee974 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -159,16 +159,15 @@ copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned lo int __noreturn __put_user_bad(void); #ifdef CONFIG_KMSAN -#define get_put_user_noinstr_attributes \ - noinline __maybe_unused __no_sanitize_memory +#define uaccess_kmsan_or_inline noinline __maybe_unused __no_sanitize_memory #else -#define get_put_user_noinstr_attributes __always_inline +#define uaccess_kmsan_or_inline __always_inline #endif #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define DEFINE_PUT_USER_NOINSTR(type) \ -static get_put_user_noinstr_attributes int \ +static uaccess_kmsan_or_inline int \ __put_user_##type##_noinstr(unsigned type __user *to, \ unsigned type *from, \ unsigned long size) \ @@ -193,7 +192,7 @@ Efault: \ #else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ #define DEFINE_PUT_USER_NOINSTR(type) \ -static get_put_user_noinstr_attributes int \ +static uaccess_kmsan_or_inline int \ __put_user_##type##_noinstr(unsigned type __user *to, \ unsigned type *from, \ unsigned long size) \ @@ -283,7 +282,7 @@ int __noreturn __get_user_bad(void); #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT #define DEFINE_GET_USER_NOINSTR(type) \ -static get_put_user_noinstr_attributes int \ +static uaccess_kmsan_or_inline int \ __get_user_##type##_noinstr(unsigned type *to, \ const unsigned type __user *from, \ unsigned long size) \ @@ -309,7 +308,7 @@ Efault: \ #else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ #define DEFINE_GET_USER_NOINSTR(type) \ -static get_put_user_noinstr_attributes int \ +static uaccess_kmsan_or_inline int \ __get_user_##type##_noinstr(unsigned type *to, \ const unsigned type __user *from, \ unsigned long size) \ From d8eebb11e933b784f6e5071da93d977f9187b2b5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 21 Jan 2025 15:55:03 +0100 Subject: [PATCH 45/53] s390/futex: Avoid KMSAN instrumention for user pointers Similar to commit eb6efdfeaeca ("s390/uaccess: add KMSAN support to put_user() and get_user()") disable KMSAN instrumention for futex inline assemblies, which contain dereferenced user pointers. With KMSAN instrumentation this would lead to accesses of shadows for user pointers, which should not happen. Handle the futex operations like they copy a value (old) from user space to kernel space. Acked-by: Ilya Leoshkevich Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/futex.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 6aa2544e5ff48..f5781794356ba 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -2,6 +2,7 @@ #ifndef _ASM_S390_FUTEX_H #define _ASM_S390_FUTEX_H +#include #include #include #include @@ -9,11 +10,12 @@ #include #define FUTEX_OP_FUNC(name, insn) \ -static inline int \ +static uaccess_kmsan_or_inline int \ __futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \ { \ int rc, new; \ \ + instrument_copy_from_user_before(old, uaddr, sizeof(*old)); \ asm_inline volatile( \ " sacf 256\n" \ "0: l %[old],%[uaddr]\n" \ @@ -30,6 +32,8 @@ __futex_atomic_##name(int oparg, int *old, u32 __user *uaddr) \ [new] "=&d" (new), [uaddr] "+Q" (*uaddr) \ : [oparg] "d" (oparg) \ : "cc"); \ + if (!rc) \ + instrument_copy_from_user_after(old, uaddr, sizeof(*old), 0); \ return rc; \ } @@ -68,11 +72,12 @@ int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) return rc; } -static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) +static uaccess_kmsan_or_inline +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { int rc; + instrument_copy_from_user_before(uval, uaddr, sizeof(*uval)); asm_inline volatile( " sacf 256\n" "0: cs %[old],%[new],%[uaddr]\n" @@ -84,6 +89,7 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : [new] "d" (newval) : "cc", "memory"); *uval = oldval; + instrument_copy_from_user_after(uval, uaddr, sizeof(*uval), 0); return rc; } From dc287e4c9149ab54a5003b4d4da007818b5fda3d Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 22 Jan 2025 14:36:01 +0100 Subject: [PATCH 46/53] s390/pci: Fix SR-IOV for PFs initially in standby Since commit 25f39d3dcb48 ("s390/pci: Ignore RID for isolated VFs") PFs which are not initially configured but in standby are considered isolated. That is they create only a single function PCI domain. Due to the PCI domains being created on discovery, this means that even if they are configured later on, sibling PFs and their child VFs will not be added to their PCI domain breaking SR-IOV expectations. The reason the referenced commit ignored standby PFs for the creation of multi-function PCI subhierarchies, was to work around a PCI domain renumbering scenario on reboot. The renumbering would occur after removing a previously in standby PF, whose domain number is used for its configured sibling PFs and their child VFs, but which itself remained in standby. When this is followed by a reboot, the sibling PF is used instead to determine the PCI domain number of it and its child VFs. In principle it is not possible to know which standby PFs will be configured later and which may be removed. The PCI domain and root bus are pre-requisites for hotplug slots so the decision of which functions belong to which domain can not be postponed. With the renumbering occurring only in rare circumstances and being generally benign, accept it as an oddity and fix SR-IOV for initially standby PFs simply by allowing them to create PCI domains. Cc: stable@vger.kernel.org Reviewed-by: Gerd Bayer Fixes: 25f39d3dcb48 ("s390/pci: Ignore RID for isolated VFs") Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/pci/pci_bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index d5ace00d10f04..857afbc4828f0 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -171,7 +171,6 @@ void zpci_bus_scan_busses(void) static bool zpci_bus_is_multifunction_root(struct zpci_dev *zdev) { return !s390_pci_no_rid && zdev->rid_available && - zpci_is_device_configured(zdev) && !zdev->vfn; } From 0a89123deec34698720ccb1a2aba2d7df49076b3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 23 Jan 2025 09:14:15 +0100 Subject: [PATCH 47/53] s390/bitops: Use correct constraint for arch_test_bit() inline assembly Use the "Q" instead of "R" constraint to correctly reflect the instruction format of the tm instruction: the first operand is a memory reference without index register and short displacement. The "R" constraint indicates a memory reference with index register instead. This may lead to compile errors like: arch/s390/include/asm/bitops.h: Assembler messages: arch/s390/include/asm/bitops.h:60: Error: operand 1: syntax error; missing ')' after base register arch/s390/include/asm/bitops.h:60: Error: operand 2: syntax error; ')' not allowed here arch/s390/include/asm/bitops.h:60: Error: junk at end of line: `,4' Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/r/20250122-s390-fix-std-for-gcc-15-v1-1-8b00cadee083@kernel.org Fixes: b2bc1b1a77c0 ("s390/bitops: Provide optimized arch_test_bit()") Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/bitops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 15aa64e3020e5..d5125296ade25 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -60,7 +60,7 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig asm volatile( " tm %[addr],%[mask]\n" : "=@cc" (cc) - : [addr] "R" (*addr), [mask] "I" (mask) + : [addr] "Q" (*addr), [mask] "I" (mask) ); return cc == 3; } From 3b8b80e993766dc96d1a1c01c62f5d15fafc79b9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 22 Jan 2025 19:54:27 -0700 Subject: [PATCH 48/53] s390: Add '-std=gnu11' to decompressor and purgatory CFLAGS GCC changed the default C standard dialect from gnu17 to gnu23, which should not have impacted the kernel because it explicitly requests the gnu11 standard in the main Makefile. However, there are certain places in the s390 code that use their own CFLAGS without a '-std=' value, which break with this dialect change because of the kernel's own definitions of bool, false, and true conflicting with the C23 reserved keywords. include/linux/stddef.h:11:9: error: cannot use keyword 'false' as enumeration constant 11 | false = 0, | ^~~~~ include/linux/stddef.h:11:9: note: 'false' is a keyword with '-std=c23' onwards include/linux/types.h:35:33: error: 'bool' cannot be defined via 'typedef' 35 | typedef _Bool bool; | ^~~~ include/linux/types.h:35:33: note: 'bool' is a keyword with '-std=c23' onwards Add '-std=gnu11' to the decompressor and purgatory CFLAGS to eliminate these errors and make the C standard version of these areas match the rest of the kernel. Cc: stable@vger.kernel.org Signed-off-by: Nathan Chancellor Tested-by: Heiko Carstens Link: https://lore.kernel.org/r/20250122-s390-fix-std-for-gcc-15-v1-1-8b00cadee083@kernel.org Signed-off-by: Alexander Gordeev --- arch/s390/Makefile | 2 +- arch/s390/purgatory/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 3f25498dac656..5fae311203c26 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -22,7 +22,7 @@ KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__ ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) endif -KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack +KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2 -mpacked-stack -std=gnu11 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY KBUILD_CFLAGS_DECOMPRESSOR += -D__DECOMPRESSOR KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbackchain diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 24eccaa293371..bdcf2a3b6c41b 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -13,7 +13,7 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes +KBUILD_CFLAGS := -std=gnu11 -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common From b05d66c882994fb63e8435bf64f262ef44c51874 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Mon, 27 Jan 2025 15:23:02 +0100 Subject: [PATCH 49/53] s390/vmem: Fix null-pointer-arithmetic warning in vmem_map_init() Fixes the following clang warning introduced by commit d7bebcb4a898 ("s390: Optimize __pa/__va when RANDOMIZE_IDENTITY_BASE is off") arch/s390/mm/vmem.c:665:36: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 665 | __set_memory_4k(__va(0), __va(0) + ident_map_size); | ~~~~~~~ ^ Fixes: d7bebcb4a898 ("s390: Optimize __pa/__va when RANDOMIZE_IDENTITY_BASE is off") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501270309.HzsVNo3o-lkp@intel.com/ Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/mm/vmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 7c684c54e7216..8ead999e340b6 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -662,7 +662,7 @@ void __init vmem_map_init(void) if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); if (debug_pagealloc_enabled()) - __set_memory_4k(__va(0), __va(0) + ident_map_size); + __set_memory_4k(__va(0), absolute_pointer(__va(0)) + ident_map_size); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } From 7cbae7ea3de5c17b47aeb34a770446aba1eaa904 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Jan 2025 14:51:52 +0100 Subject: [PATCH 50/53] s390/tools: Use array instead of string initializer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The in-kernel disassembler intentionally uses nun-null terminated strings in order to keep the arrays which contain mnemonics as small as possible. GCC 15 however warns about this: ./arch/s390/include/generated/asm/dis-defs.h:1662:71: error: initializer-string for array of ‘char’ is too long [-Werror=unterminated-string-initialization] 1662 | [1261] = { .opfrag = 0xea, .format = INSTR_SS_L0RDRD, .name = "unpka" }, \ Get rid of this warning by using array initializers. Reviewed-by: Jens Remus Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/tools/gen_opcode_table.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index a1bc02b29c813..7d76c417f83f5 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -201,6 +201,17 @@ static int cmp_long_insn(const void *a, const void *b) return strcmp(((struct insn *)a)->name, ((struct insn *)b)->name); } +static void print_insn_name(const char *name) +{ + size_t i, len; + + len = strlen(name); + printf("{"); + for (i = 0; i < len; i++) + printf(" \'%c\',", name[i]); + printf(" }"); +} + static void print_long_insn(struct gen_opcode *desc) { struct insn *insn; @@ -223,7 +234,9 @@ static void print_long_insn(struct gen_opcode *desc) insn = &desc->insn[i]; if (insn->name_len < 6) continue; - printf("\t[LONG_INSN_%s] = \"%s\", \\\n", insn->upper, insn->name); + printf("\t[LONG_INSN_%s] = ", insn->upper); + print_insn_name(insn->name); + printf(", \\\n"); } printf("}\n\n"); } @@ -236,11 +249,13 @@ static void print_opcode(struct insn *insn, int nr) if (insn->type->byte != 0) opcode += 2; printf("\t[%4d] = { .opfrag = 0x%s, .format = INSTR_%s, ", nr, opcode, insn->format); - if (insn->name_len < 6) - printf(".name = \"%s\" ", insn->name); - else - printf(".offset = LONG_INSN_%s ", insn->upper); - printf("}, \\\n"); + if (insn->name_len < 6) { + printf(".name = "); + print_insn_name(insn->name); + } else { + printf(".offset = LONG_INSN_%s", insn->upper); + } + printf(" }, \\\n"); } static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) From 3bcc8a1af581af152d43e42b53db3534018301b5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Jan 2025 11:53:42 +0100 Subject: [PATCH 51/53] s390/sclp: Initialize sclp subsystem via arch_cpu_finalize_init() With the switch to GENERIC_CPU_DEVICES an early call to the sclp subsystem was added to smp_prepare_cpus(). This will usually succeed since the sclp subsystem is implicitly initialized early enough if an sclp based console is present. If no such console is present the initialization happens with an arch_initcall(); in such cases calls to the sclp subsystem will fail. For CPU detection this means that the fallback sigp loop will be used permanently to detect CPUs instead of the preferred READ_CPU_INFO sclp request. Fix this by adding an explicit early sclp_init() call via arch_cpu_finalize_init(). Reported-by: Sheshu Ramanandan Fixes: 4a39f12e753d ("s390/smp: Switch to GENERIC_CPU_DEVICES") Reviewed-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/Kconfig | 1 + arch/s390/include/asm/sclp.h | 1 + arch/s390/kernel/setup.c | 5 +++++ drivers/s390/char/sclp.c | 12 ++---------- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4e9bf698dc4d5..f49ca2b485f6f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -78,6 +78,7 @@ config S390 select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 4da3b29562854..18f37dff03c99 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -172,6 +172,7 @@ void sclp_early_printk(const char *s); void __sclp_early_printk(const char *s, unsigned int len); void sclp_emergency_printk(const char *s); +int sclp_init(void); int sclp_early_get_memsize(unsigned long *mem); int sclp_early_get_hsa_size(unsigned long *hsa_size); int _sclp_get_core_info(struct sclp_core_info *info); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f6765ea4f8281..4758ad8829a28 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1025,3 +1025,8 @@ void __init setup_arch(char **cmdline_p) /* Add system specific data to the random pool */ setup_randomness(); } + +void __init arch_cpu_finalize_init(void) +{ + sclp_init(); +} diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index fbffd451031fd..45bd001206a2b 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -245,7 +245,6 @@ static void sclp_request_timeout(bool force_restart); static void sclp_process_queue(void); static void __sclp_make_read_req(void); static int sclp_init_mask(int calculate); -static int sclp_init(void); static void __sclp_queue_read_req(void) @@ -1251,8 +1250,7 @@ static struct platform_driver sclp_pdrv = { /* Initialize SCLP driver. Return zero if driver is operational, non-zero * otherwise. */ -static int -sclp_init(void) +int sclp_init(void) { unsigned long flags; int rc = 0; @@ -1305,13 +1303,7 @@ sclp_init(void) static __init int sclp_initcall(void) { - int rc; - - rc = platform_driver_register(&sclp_pdrv); - if (rc) - return rc; - - return sclp_init(); + return platform_driver_register(&sclp_pdrv); } arch_initcall(sclp_initcall); From 76bda8a16d77063cffea9fc4e3ead033afba9d6d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Jan 2025 14:51:53 +0100 Subject: [PATCH 52/53] s390/vmlogrdr: Use internal_name for error messages Use the internal_name member of vmlogrdr_priv_t to print error messages instead of the system_service member. The system_service member is not a string, but a non-null terminated eight byte character array, which contains the ASCII representation of a z/VM system service. Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- drivers/s390/char/vmlogrdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 3dd50ac9c5b0d..374f06c77a864 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -356,7 +356,7 @@ static int vmlogrdr_open (struct inode *inode, struct file *filp) if (connect_rc) { pr_err("vmlogrdr: iucv connection to %s " "failed with rc %i \n", - logptr->system_service, connect_rc); + logptr->internal_name, connect_rc); goto out_path; } From 87f017d0792befa83722b99cb21f9c8f574cd71d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Jan 2025 14:51:54 +0100 Subject: [PATCH 53/53] s390/vmlogrdr: Use array instead of string initializer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling vmlogrdr with GCC 15 generates this warning: CC [M] drivers/s390/char/vmlogrdr.o drivers/s390/char/vmlogrdr.c:126:29: error: initializer-string for array of ‘char’ is too long [-Werror=unterminated-string-initialization] 126 | { .system_service = "*LOGREC ", Given that the system_service array intentionally contains a non-null terminated string use an array initializer, instead of string initializer to get rid of this warning. Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- drivers/s390/char/vmlogrdr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 374f06c77a864..b2d93a6e36c41 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -123,7 +123,7 @@ static DECLARE_WAIT_QUEUE_HEAD(read_wait_queue); */ static struct vmlogrdr_priv_t sys_ser[] = { - { .system_service = "*LOGREC ", + { .system_service = { '*', 'L', 'O', 'G', 'R', 'E', 'C', ' ' }, .internal_name = "logrec", .recording_name = "EREP", .minor_num = 0, @@ -132,7 +132,7 @@ static struct vmlogrdr_priv_t sys_ser[] = { .autorecording = 1, .autopurge = 1, }, - { .system_service = "*ACCOUNT", + { .system_service = { '*', 'A', 'C', 'C', 'O', 'U', 'N', 'T' }, .internal_name = "account", .recording_name = "ACCOUNT", .minor_num = 1, @@ -141,7 +141,7 @@ static struct vmlogrdr_priv_t sys_ser[] = { .autorecording = 1, .autopurge = 1, }, - { .system_service = "*SYMPTOM", + { .system_service = { '*', 'S', 'Y', 'M', 'P', 'T', 'O', 'M' }, .internal_name = "symptom", .recording_name = "SYMPTOM", .minor_num = 2,