From b064904c509decf9e038f29f903a2304851a913b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 11:16:51 +0200 Subject: [PATCH 01/85] s390/checksum: provide csum_ipv6_magic() This implementation needs only ~30% of the time to calculate the checksum compared to the generic variant. In addition the compiler also generates only ~30% of the instructions compared to the generic variant (on z14, compiled with march=z196). Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 6d01c96aeb5c4..f4b42db5d007d 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -13,6 +13,7 @@ #define _S390_CHECKSUM_H #include +#include /* * computes the checksum of a memory block at buff, length len, @@ -115,6 +116,25 @@ static inline __sum16 ip_compute_csum(const void *buff, int len) return csum_fold(csum_partial(buff, len, 0)); } -#endif /* _S390_CHECKSUM_H */ - +#define _HAVE_ARCH_IPV6_CSUM +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + __u64 sum = (__force __u64)csum; + + sum += (__force __u32)saddr->s6_addr32[0]; + sum += (__force __u32)saddr->s6_addr32[1]; + sum += (__force __u32)saddr->s6_addr32[2]; + sum += (__force __u32)saddr->s6_addr32[3]; + sum += (__force __u32)daddr->s6_addr32[0]; + sum += (__force __u32)daddr->s6_addr32[1]; + sum += (__force __u32)daddr->s6_addr32[2]; + sum += (__force __u32)daddr->s6_addr32[3]; + sum += len; + sum += proto; + sum += (sum >> 32) | (sum << 32); + return csum_fold((__force __wsum)(sum >> 32)); +} +#endif /* _S390_CHECKSUM_H */ From bb4644b14accb05663847277002e3efa9fa3cd3b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 15:30:30 +0200 Subject: [PATCH 02/85] s390/checksum: rewrite csum_tcpudp_nofold() Rewrite csum_tcpudp_nofold() so that the generated code will not contain branches. The old implementation was also optimized for machines which came with "add logical with carry" instructions, however the compiler doesn't generate them anymore. This is most likely because those instructions are slower. However with the old code the compiler generates a lot of branches, which isn't too helpful usually. Therefore rewrite the code. In a tight loop this doesn't make any difference since the branch prediction unit does its job. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index f4b42db5d007d..961c25c5124b6 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -73,25 +73,17 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) * computes the checksum of the TCP/UDP pseudo-header * returns a 32-bit checksum */ -static inline __wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, - __wsum sum) +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { - __u32 csum = (__force __u32)sum; + __u64 csum = (__force __u64)sum; csum += (__force __u32)saddr; - if (csum < (__force __u32)saddr) - csum++; - csum += (__force __u32)daddr; - if (csum < (__force __u32)daddr) - csum++; - - csum += len + proto; - if (csum < len + proto) - csum++; - - return (__force __wsum)csum; + csum += len; + csum += proto; + csum += (csum >> 32) | (csum << 32); + return (__force __wsum)(csum >> 32); } /* From 614b4f5d0fa3f622cfcc899491d8a3e6af3d4dc5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 16:36:26 +0200 Subject: [PATCH 03/85] s390/checksum: make ip_fast_csum() faster Convert ip_fast_csum() so it doesn't call csum_partial(), but instead open code the checksum calculation. The problem with csum_partial() is that it makes use of the cksm instruction, which has high startup costs and therefore is only very fast if used on larger memory regions. IPv4 headers however are small in size (5-16 32-bit words). The open coded variant calculates the checksum in ~30% of the time compared to the old variant (z14, march=z196). Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 961c25c5124b6..8bc6bed4715b7 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -66,7 +66,18 @@ static inline __sum16 csum_fold(__wsum sum) */ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { - return csum_fold(csum_partial(iph, ihl*4, 0)); + __u64 csum = 0; + __u32 *ptr = (u32 *)iph; + + csum += *ptr++; + csum += *ptr++; + csum += *ptr++; + csum += *ptr++; + ihl -= 4; + while (ihl--) + csum += *ptr++; + csum += (csum >> 32) | (csum << 32); + return csum_fold((__force __wsum)(csum >> 32)); } /* From 612ad0785dd5161dc311b10bd26038553a378386 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 16:37:33 +0200 Subject: [PATCH 04/85] s390/checksum: have consistent calculations Use "|" instead of "+" within csum_fold() for consistency reasons, like in the rest of the file. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 8bc6bed4715b7..de97ae6910605 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -54,7 +54,7 @@ static inline __sum16 csum_fold(__wsum sum) { u32 csum = (__force u32) sum; - csum += (csum >> 16) + (csum << 16); + csum += (csum >> 16) | (csum << 16); csum >>= 16; return (__force __sum16) ~csum; } From 98ad45fb58c14ebef6da27f91905e5b8fcff8686 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Aug 2020 16:41:27 +0200 Subject: [PATCH 05/85] s390/checksum: coding style changes Add some coding style changes which hopefully make the code look a bit less odd. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/checksum.h | 50 ++++++++++++++------------------ 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index de97ae6910605..c401a5fd3ad2d 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -16,19 +16,18 @@ #include /* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit). * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic + * Returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic. * - * this function must be called with even lengths, except - * for the last fragment, which may be odd + * This function must be called with even lengths, except + * for the last fragment, which may be odd. * - * it's best to have buff aligned on a 32-bit boundary + * It's best to have buff aligned on a 32-bit boundary. */ -static inline __wsum -csum_partial(const void *buff, int len, __wsum sum) +static inline __wsum csum_partial(const void *buff, int len, __wsum sum) { register unsigned long reg2 asm("2") = (unsigned long) buff; register unsigned long reg3 asm("3") = (unsigned long) len; @@ -40,15 +39,15 @@ csum_partial(const void *buff, int len, __wsum sum) return sum; } -static inline __wsum -csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum) +static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) { - memcpy(dst,src,len); + memcpy(dst, src, len); return csum_partial(dst, len, sum); } /* - * Fold a partial checksum without adding pseudo headers + * Fold a partial checksum without adding pseudo headers. */ static inline __sum16 csum_fold(__wsum sum) { @@ -60,9 +59,8 @@ static inline __sum16 csum_fold(__wsum sum) } /* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. - * + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksums on 4 octet boundaries. */ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { @@ -81,8 +79,8 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) } /* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 32-bit checksum + * Computes the checksum of the TCP/UDP pseudo-header. + * Returns a 32-bit checksum. */ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) @@ -98,22 +96,18 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, } /* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented + * Computes the checksum of the TCP/UDP pseudo-header. + * Returns a 16-bit checksum, already complemented. */ - -static inline __sum16 -csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, - __wsum sum) +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } /* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c + * Used for miscellaneous IP-like checksums, mainly icmp. */ - static inline __sum16 ip_compute_csum(const void *buff, int len) { return csum_fold(csum_partial(buff, len, 0)); From 4bff8cb5450287e246d365b719148b2d9364c292 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 28 Apr 2020 09:52:23 +0200 Subject: [PATCH 06/85] s390: convert to GENERIC_VDSO Convert s390 to generic vDSO. There are a few special things on s390: - vDSO can be called without a stack frame - glibc did this in the past. So we need to allocate a stackframe on our own. - The former assembly code used stcke to get the TOD clock and applied time steering to it. We need to do the same in the new code. This is done in the architecture specific __arch_get_hw_counter function. The steering information is stored in an architecure specific area in the vDSO data. - CPUCLOCK_VIRT is now handled with a syscall fallback, which might be slower/less accurate than the old implementation. The getcpu() function stays as an assembly function because there is no generic implementation and the code is just a few lines. Performance number from my system do 100 mio gettimeofday() calls: Plain syscall: 8.6s Generic VDSO: 1.3s old ASM VDSO: 1s So it's a bit slower but still much faster than syscalls. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 3 + arch/s390/include/asm/clocksource.h | 7 + arch/s390/include/asm/vdso.h | 25 +-- arch/s390/include/asm/vdso/clocksource.h | 8 + arch/s390/include/asm/vdso/data.h | 13 ++ arch/s390/include/asm/vdso/gettimeofday.h | 71 +++++++++ arch/s390/include/asm/vdso/processor.h | 7 + arch/s390/include/asm/vdso/vdso.h | 0 arch/s390/include/asm/vdso/vsyscall.h | 26 ++++ arch/s390/kernel/asm-offsets.c | 20 --- arch/s390/kernel/entry.S | 6 - arch/s390/kernel/setup.c | 1 - arch/s390/kernel/time.c | 66 ++------ arch/s390/kernel/vdso.c | 29 +--- arch/s390/kernel/vdso64/Makefile | 19 ++- arch/s390/kernel/vdso64/clock_getres.S | 50 ------ arch/s390/kernel/vdso64/clock_gettime.S | 163 -------------------- arch/s390/kernel/vdso64/gettimeofday.S | 71 --------- arch/s390/kernel/vdso64/vdso64_generic.c | 18 +++ arch/s390/kernel/vdso64/vdso_user_wrapper.S | 38 +++++ 20 files changed, 221 insertions(+), 420 deletions(-) create mode 100644 arch/s390/include/asm/clocksource.h create mode 100644 arch/s390/include/asm/vdso/clocksource.h create mode 100644 arch/s390/include/asm/vdso/data.h create mode 100644 arch/s390/include/asm/vdso/gettimeofday.h create mode 100644 arch/s390/include/asm/vdso/processor.h create mode 100644 arch/s390/include/asm/vdso/vdso.h create mode 100644 arch/s390/include/asm/vdso/vsyscall.h delete mode 100644 arch/s390/kernel/vdso64/clock_getres.S delete mode 100644 arch/s390/kernel/vdso64/clock_gettime.S delete mode 100644 arch/s390/kernel/vdso64/gettimeofday.S create mode 100644 arch/s390/kernel/vdso64/vdso64_generic.c create mode 100644 arch/s390/kernel/vdso64/vdso_user_wrapper.S diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3d86e12e8e3c2..8a6121f937092 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -73,6 +73,7 @@ config S390 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_VDSO_DATA select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH @@ -118,6 +119,7 @@ config S390 select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT + select GENERIC_GETTIMEOFDAY select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HAVE_ALIGNED_STRUCT_PAGE if SLUB @@ -149,6 +151,7 @@ config S390 select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS + select HAVE_GENERIC_VDSO select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/arch/s390/include/asm/clocksource.h b/arch/s390/include/asm/clocksource.h new file mode 100644 index 0000000000000..03434369fce48 --- /dev/null +++ b/arch/s390/include/asm/clocksource.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* s390-specific clocksource additions */ + +#ifndef _ASM_S390_CLOCKSOURCE_H +#define _ASM_S390_CLOCKSOURCE_H + +#endif /* _ASM_S390_CLOCKSOURCE_H */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 0cd085cdeb4f2..82f86b3c394b9 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -2,6 +2,8 @@ #ifndef __S390_VDSO_H__ #define __S390_VDSO_H__ +#include + /* Default link addresses for the vDSOs */ #define VDSO32_LBASE 0 #define VDSO64_LBASE 0 @@ -18,30 +20,7 @@ * itself and may change without notice. */ -struct vdso_data { - __u64 tb_update_count; /* Timebase atomicity ctr 0x00 */ - __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */ - __u64 xtime_clock_sec; /* Kernel time 0x10 */ - __u64 xtime_clock_nsec; /* 0x18 */ - __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */ - __u64 xtime_coarse_nsec; /* 0x28 */ - __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */ - __u64 wtom_clock_nsec; /* 0x38 */ - __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */ - __u64 wtom_coarse_nsec; /* 0x48 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */ - __u32 tz_dsttime; /* Type of dst correction 0x54 */ - __u32 ectg_available; /* ECTG instruction present 0x58 */ - __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */ - __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ - __u32 ts_dir; /* TOD steering direction 0x64 */ - __u64 ts_end; /* TOD steering end 0x68 */ - __u32 hrtimer_res; /* hrtimer resolution 0x70 */ -}; - struct vdso_per_cpu_data { - __u64 ectg_timer_base; - __u64 ectg_user_time; /* * Note: node_id and cpu_nr must be at adjacent memory locations. * VDSO userspace must read both values with a single instruction. diff --git a/arch/s390/include/asm/vdso/clocksource.h b/arch/s390/include/asm/vdso/clocksource.h new file mode 100644 index 0000000000000..a93eda0ce7bb4 --- /dev/null +++ b/arch/s390/include/asm/vdso/clocksource.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_CLOCKSOURCE_H +#define __ASM_VDSO_CLOCKSOURCE_H + +#define VDSO_ARCH_CLOCKMODES \ + VDSO_CLOCKMODE_TOD + +#endif /* __ASM_VDSO_CLOCKSOURCE_H */ diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h new file mode 100644 index 0000000000000..7b3cdb4a5f481 --- /dev/null +++ b/arch/s390/include/asm/vdso/data.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __S390_ASM_VDSO_DATA_H +#define __S390_ASM_VDSO_DATA_H + +#include +#include + +struct arch_vdso_data { + __u64 tod_steering_delta; + __u64 tod_steering_end; +}; + +#endif /* __S390_ASM_VDSO_DATA_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h new file mode 100644 index 0000000000000..bf123065ad3b8 --- /dev/null +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_VDSO_GETTIMEOFDAY_H +#define ASM_VDSO_GETTIMEOFDAY_H + +#define VDSO_HAS_TIME 1 + +#define VDSO_HAS_CLOCK_GETRES 1 + +#include +#include +#include +#include + +#define vdso_calc_delta __arch_vdso_calc_delta +static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + return (cycles - last) * mult; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return _vdso_data; +} + +static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) +{ + const struct vdso_data *vdso = __arch_get_vdso_data(); + u64 adj, now; + + now = get_tod_clock(); + adj = vdso->arch_data.tod_steering_end - now; + if (unlikely((s64) adj > 0)) + now += (vdso->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15); + return now; +} + +static __always_inline +long clock_gettime_fallback(clockid_t clkid, struct __kernel_timespec *ts) +{ + register unsigned long r1 __asm__("r1") = __NR_clock_gettime; + register unsigned long r2 __asm__("r2") = (unsigned long)clkid; + register void *r3 __asm__("r3") = ts; + + asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory"); + return r2; +} + +static __always_inline +long gettimeofday_fallback(register struct __kernel_old_timeval *tv, + register struct timezone *tz) +{ + register unsigned long r1 __asm__("r1") = __NR_gettimeofday; + register unsigned long r2 __asm__("r2") = (unsigned long)tv; + register void *r3 __asm__("r3") = tz; + + asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory"); + return r2; +} + +static __always_inline +long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) +{ + register unsigned long r1 __asm__("r1") = __NR_clock_getres; + register unsigned long r2 __asm__("r2") = (unsigned long)clkid; + register void *r3 __asm__("r3") = ts; + + asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory"); + return r2; +} + +#endif diff --git a/arch/s390/include/asm/vdso/processor.h b/arch/s390/include/asm/vdso/processor.h new file mode 100644 index 0000000000000..cfcc3e117c4c9 --- /dev/null +++ b/arch/s390/include/asm/vdso/processor.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_VDSO_PROCESSOR_H +#define __ASM_VDSO_PROCESSOR_H + +#define cpu_relax() barrier() + +#endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/s390/include/asm/vdso/vdso.h b/arch/s390/include/asm/vdso/vdso.h new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h new file mode 100644 index 0000000000000..6c67c08cefdd6 --- /dev/null +++ b/arch/s390/include/asm/vdso/vsyscall.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ + +static __always_inline struct vdso_data *__s390_get_k_vdso_data(void) +{ + return vdso_data; +} +#define __arch_get_k_vdso_data __s390_get_k_vdso_data + +/* The asm-generic header needs to be included after the definitions above */ +#include + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 5d8cc1864566d..ece58f2217cbe 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -59,26 +59,6 @@ int main(void) OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]); OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]); BLANK(); - /* timeval/timezone offsets for use by vdso */ - OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count); - OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp); - OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec); - OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec); - OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec); - OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec); - OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec); - OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec); - OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec); - OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec); - OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest); - OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available); - OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult); - OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); - OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir); - OFFSET(__VDSO_TS_END, vdso_data, ts_end); - OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res); - OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); - OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); BLANK(); /* constants used by the vdso */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 23edf196d3dcf..86235919c2d15 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -435,10 +435,8 @@ ENTRY(system_call) jz .Lsysc_skip_fpu brasl %r14,load_fpu_regs .Lsysc_skip_fpu: - lg %r14,__LC_VDSO_PER_CPU mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE @@ -797,13 +795,11 @@ ENTRY(io_int_handler) TRACE_IRQS_ON 0: #endif - lg %r14,__LC_VDSO_PER_CPU mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER .Lio_exit_kernel: lmg %r0,%r15,__PT_R0(%r11) b __LC_RETURN_LPSWE @@ -1213,14 +1209,12 @@ ENTRY(mcck_int_handler) brasl %r14,s390_handle_mcck TRACE_IRQS_ON .Lmcck_return: - lg %r14,__LC_VDSO_PER_CPU lmg %r0,%r10,__PT_R0(%r11) mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP stpt __LC_EXIT_TIMER - mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 0: lmg %r11,%r15,__PT_R11(%r11) b __LC_RETURN_MCCK_LPSWE diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e600f6953d7ce..dfa45027cb473 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -402,7 +402,6 @@ static void __init setup_lowcore_dat_off(void) memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list, sizeof(lc->alt_stfle_fac_list)); nmi_alloc_boot_cpu(lc); - vdso_alloc_boot_cpu(lc); lc->sync_enter_timer = S390_lowcore.sync_enter_timer; lc->async_enter_timer = S390_lowcore.async_enter_timer; lc->exit_timer = S390_lowcore.exit_timer; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 513e59d08a55c..bc806e1547d6a 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -41,6 +41,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -84,7 +87,7 @@ void __init time_early_init(void) /* Initialize TOD steering parameters */ tod_steering_end = *(unsigned long long *) &tod_clock_base[1]; - vdso_data->ts_end = tod_steering_end; + vdso_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -257,6 +260,7 @@ static struct clocksource clocksource_tod = { .mult = 1000, .shift = 12, .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .vdso_clock_mode = VDSO_CLOCKMODE_TOD, }; struct clocksource * __init clocksource_default_clock(void) @@ -264,56 +268,6 @@ struct clocksource * __init clocksource_default_clock(void) return &clocksource_tod; } -void update_vsyscall(struct timekeeper *tk) -{ - u64 nsecps; - - if (tk->tkr_mono.clock != &clocksource_tod) - return; - - /* Make userspace gettimeofday spin until we're done. */ - ++vdso_data->tb_update_count; - smp_wmb(); - vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last; - vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->wtom_clock_sec = - tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec + - + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); - nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift; - while (vdso_data->wtom_clock_nsec >= nsecps) { - vdso_data->wtom_clock_nsec -= nsecps; - vdso_data->wtom_clock_sec++; - } - - vdso_data->xtime_coarse_sec = tk->xtime_sec; - vdso_data->xtime_coarse_nsec = - (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); - vdso_data->wtom_coarse_sec = - vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_coarse_nsec = - vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec; - while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) { - vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC; - vdso_data->wtom_coarse_sec++; - } - - vdso_data->tk_mult = tk->tkr_mono.mult; - vdso_data->tk_shift = tk->tkr_mono.shift; - vdso_data->hrtimer_res = hrtimer_resolution; - smp_wmb(); - ++vdso_data->tb_update_count; -} - -extern struct timezone sys_tz; - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; -} - /* * Initialize the TOD clock and the CPU timer of * the boot cpu. @@ -431,7 +385,6 @@ static void clock_sync_global(unsigned long long delta) /* Epoch overflow */ tod_clock_base[0]++; /* Adjust TOD steering parameters. */ - vdso_data->tb_update_count++; now = get_tod_clock(); adj = tod_steering_end - now; if (unlikely((s64) adj >= 0)) @@ -443,9 +396,8 @@ static void clock_sync_global(unsigned long long delta) panic("TOD clock sync offset %lli is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - vdso_data->ts_dir = (tod_steering_delta < 0) ? 0 : 1; - vdso_data->ts_end = tod_steering_end; - vdso_data->tb_update_count++; + vdso_data->arch_data.tod_steering_end = tod_steering_end; + /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) lpar_offset = qto.tod_epoch_difference; @@ -586,7 +538,7 @@ void stp_queue_work(void) static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; - unsigned long long clock_delta; + unsigned long long clock_delta, flags; static int first; int rc; @@ -599,6 +551,7 @@ static int stp_sync_clock(void *data) if (stp_info.todoff[0] || stp_info.todoff[1] || stp_info.todoff[2] || stp_info.todoff[3] || stp_info.tmd != 2) { + flags = vdso_update_begin(); rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta); if (rc == 0) { @@ -609,6 +562,7 @@ static int stp_sync_clock(void *data) if (rc == 0 && stp_info.tmd != 2) rc = -EAGAIN; } + vdso_update_end(flags); } sync->in_sync = rc ? -EAGAIN : 1; xchg(&first, 0); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index c4baefaa6e34f..f9da5b1491418 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include #include @@ -96,35 +98,12 @@ static union { struct vdso_data data; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; - -/* - * Setup vdso data page. - */ -static void __init vdso_init_data(struct vdso_data *vd) -{ - vd->ectg_available = test_facility(31); -} - +struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data; /* * Allocate/free per cpu vdso data. */ #define SEGMENT_ORDER 2 -/* - * The initial vdso_data structure for the boot CPU. Eventually - * it is replaced with a properly allocated structure in vdso_init. - * This is necessary because a valid S390_lowcore.vdso_per_cpu_data - * pointer is required to be able to return from an interrupt or - * program check. See the exit paths in entry.S. - */ -struct vdso_data boot_vdso_data __initdata; - -void __init vdso_alloc_boot_cpu(struct lowcore *lowcore) -{ - lowcore->vdso_per_cpu_data = (unsigned long) &boot_vdso_data; -} - int vdso_alloc_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; @@ -246,8 +225,6 @@ static int __init vdso_init(void) { int i; - vdso_init_data(vdso_data); - /* Calculate the size of the 64 bit vDSO */ vdso64_pages = ((&vdso64_end - &vdso64_start + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 4a66a1cb919b1..f1c0570780d15 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -1,14 +1,20 @@ # SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso, has to be asm only for now +# List of files in the vdso KCOV_INSTRUMENT := n +ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE +ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT -obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o +include $(srctree)/lib/vdso/Makefile +obj-vdso64 = vdso_user_wrapper.o note.o getcpu.o +obj-cvdso64 = vdso64_generic.o +CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) # Build rules -targets := $(obj-vdso64) vdso64.so vdso64.so.dbg +targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) +obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64)) KBUILD_AFLAGS += -DBUILD_VDSO KBUILD_CFLAGS += -DBUILD_VDSO @@ -37,7 +43,7 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE $(call if_changed,ld) # strip rule for the .so file @@ -49,9 +55,14 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) +$(obj-cvdso64): %.o: %.c FORCE + $(call if_changed_dep,vdso64cc) + # actual build commands quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdso64cc = VDSO64C $@ + cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $< # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S deleted file mode 100644 index 0c79caa32b592..0000000000000 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of clock_getres() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include -#include -#include -#include - - .text - .align 4 - .globl __kernel_clock_getres - .type __kernel_clock_getres,@function -__kernel_clock_getres: - CFI_STARTPROC - larl %r1,3f - lg %r0,0(%r1) - cghi %r2,__CLOCK_REALTIME_COARSE - je 0f - cghi %r2,__CLOCK_MONOTONIC_COARSE - je 0f - larl %r1,_vdso_data - llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1) - cghi %r2,__CLOCK_REALTIME - je 0f - cghi %r2,__CLOCK_MONOTONIC - je 0f - cghi %r2,__CLOCK_THREAD_CPUTIME_ID - je 0f - cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ - jne 2f - larl %r5,_vdso_data - icm %r0,15,__LC_ECTG_OK(%r5) - jz 2f -0: ltgr %r3,%r3 - jz 1f /* res == NULL */ - xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ - stg %r0,8(%r3) /* store tp->tv_usec */ -1: lghi %r2,0 - br %r14 -2: lghi %r1,__NR_clock_getres /* fallback to svc */ - svc 0 - br %r14 - CFI_ENDPROC -3: .quad __CLOCK_COARSE_RES - .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S deleted file mode 100644 index 9d2ee79b90f25..0000000000000 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ /dev/null @@ -1,163 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of clock_gettime() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include -#include -#include -#include -#include - - .text - .align 4 - .globl __kernel_clock_gettime - .type __kernel_clock_gettime,@function -__kernel_clock_gettime: - CFI_STARTPROC - aghi %r15,-16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD - larl %r5,_vdso_data - cghi %r2,__CLOCK_REALTIME_COARSE - je 4f - cghi %r2,__CLOCK_REALTIME - je 5f - cghi %r2,-3 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ - je 9f - cghi %r2,__CLOCK_MONOTONIC_COARSE - je 3f - cghi %r2,__CLOCK_MONOTONIC - jne 12f - - /* CLOCK_MONOTONIC */ -0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 0b - stcke 0(%r15) /* Store TOD clock */ - lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - lg %r0,__VDSO_WTOM_SEC(%r5) - lg %r1,1(%r15) - sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ - alg %r1,__VDSO_WTOM_NSEC(%r5) - srlg %r1,%r1,0(%r2) /* >> tk->shift */ - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 0b - larl %r5,13f -1: clg %r1,0(%r5) - jl 2f - slg %r1,0(%r5) - aghi %r0,1 - j 1b -2: stg %r0,0(%r3) /* store tp->tv_sec */ - stg %r1,8(%r3) /* store tp->tv_nsec */ - lghi %r2,0 - aghi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* CLOCK_MONOTONIC_COARSE */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 3b - lg %r0,__VDSO_WTOM_CRS_SEC(%r5) - lg %r1,__VDSO_WTOM_CRS_NSEC(%r5) - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 3b - j 2b - - /* CLOCK_REALTIME_COARSE */ -4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 4b - lg %r0,__VDSO_XTIME_CRS_SEC(%r5) - lg %r1,__VDSO_XTIME_CRS_NSEC(%r5) - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 4b - j 7f - - /* CLOCK_REALTIME */ -5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 5b - stcke 0(%r15) /* Store TOD clock */ - lg %r1,1(%r15) - lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */ - slgr %r0,%r1 /* now - ts_steering_end */ - ltgr %r0,%r0 /* past end of steering ? */ - jm 17f - srlg %r0,%r0,15 /* 1 per 2^16 */ - tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */ - jz 18f - lcgr %r0,%r0 /* negative TOD offset */ -18: algr %r1,%r0 /* add steering offset */ -17: lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ - srlg %r1,%r1,0(%r2) /* >> tk->shift */ - lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 5b - larl %r5,13f -6: clg %r1,0(%r5) - jl 7f - slg %r1,0(%r5) - aghi %r0,1 - j 6b -7: stg %r0,0(%r3) /* store tp->tv_sec */ - stg %r1,8(%r3) /* store tp->tv_nsec */ - lghi %r2,0 - aghi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* CPUCLOCK_VIRT for this thread */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -9: lghi %r4,0 - icm %r0,15,__VDSO_ECTG_OK(%r5) - jz 12f - sacf 256 /* Magic ectg instruction */ - .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 - sacf 0 - algr %r1,%r0 /* r1 = cputime as TOD value */ - mghi %r1,1000 /* convert to nanoseconds */ - srlg %r1,%r1,12 /* r1 = cputime in nanosec */ - lgr %r4,%r1 - larl %r5,13f - srlg %r1,%r1,9 /* divide by 1000000000 */ - mlg %r0,8(%r5) - srlg %r0,%r0,11 /* r0 = tv_sec */ - stg %r0,0(%r3) - msg %r0,0(%r5) /* calculate tv_nsec */ - slgr %r4,%r0 /* r4 = tv_nsec */ - stg %r4,8(%r3) - lghi %r2,0 - aghi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* Fallback to system call */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -12: lghi %r1,__NR_clock_gettime - svc 0 - aghi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - CFI_ENDPROC - -13: .quad 1000000000 -14: .quad 19342813113834067 - .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S deleted file mode 100644 index aebe10dc7c99a..0000000000000 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of gettimeofday() for 64 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include -#include -#include -#include -#include - - .text - .align 4 - .globl __kernel_gettimeofday - .type __kernel_gettimeofday,@function -__kernel_gettimeofday: - CFI_STARTPROC - aghi %r15,-16 - CFI_ADJUST_CFA_OFFSET 16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD - larl %r5,_vdso_data -0: ltgr %r3,%r3 /* check if tz is NULL */ - je 1f - mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) -1: ltgr %r2,%r2 /* check if tv is NULL */ - je 4f - lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ - tmll %r4,0x0001 /* pending update ? loop */ - jnz 0b - stcke 0(%r15) /* Store TOD clock */ - lg %r1,1(%r15) - lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */ - slgr %r0,%r1 /* now - ts_steering_end */ - ltgr %r0,%r0 /* past end of steering ? */ - jm 6f - srlg %r0,%r0,15 /* 1 per 2^16 */ - tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */ - jz 7f - lcgr %r0,%r0 /* negative TOD offset */ -7: algr %r1,%r0 /* add steering offset */ -6: sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ - alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ - lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */ - clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ - jne 0b - lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - srlg %r1,%r1,0(%r5) /* >> tk->shift */ - larl %r5,5f -2: clg %r1,0(%r5) - jl 3f - slg %r1,0(%r5) - aghi %r0,1 - j 2b -3: stg %r0,0(%r2) /* store tv->tv_sec */ - slgr %r0,%r0 /* tv_nsec -> tv_usec */ - ml %r0,8(%r5) - srlg %r0,%r0,6 - stg %r0,8(%r2) /* store tv->tv_usec */ -4: lghi %r2,0 - aghi %r15,16 - CFI_ADJUST_CFA_OFFSET -16 - CFI_RESTORE 15 - br %r14 - CFI_ENDPROC -5: .quad 1000000000 - .long 274877907 - .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c new file mode 100644 index 0000000000000..a8cef7e4d137b --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso64_generic.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/vdso/gettimeofday.c" + +int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_getres(clock, ts); +} diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S new file mode 100644 index 0000000000000..a775d7e528728 --- /dev/null +++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include + +#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8) + +/* + * Older glibc version called vdso without allocating a stackframe. This wrapper + * is just used to allocate a stackframe. See + * https://sourceware.org/git/?p=glibc.git;a=commit;h=478593e6374f3818da39332260dc453cb19cfa1e + * for details. + */ +.macro vdso_func func + .globl __kernel_\func + .type __kernel_\func,@function + .align 8 +__kernel_\func: + CFI_STARTPROC + aghi %r15,-WRAPPER_FRAME_SIZE + CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE) + CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD + stg %r14,STACK_FRAME_OVERHEAD(%r15) + brasl %r14,__s390_vdso_\func + lg %r14,STACK_FRAME_OVERHEAD(%r15) + aghi %r15,WRAPPER_FRAME_SIZE + CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD + CFI_RESTORE 15 + br %r14 + CFI_ENDPROC + .size __kernel_\func,.-__kernel_\func +.endm + +vdso_func gettimeofday +vdso_func clock_getres +vdso_func clock_gettime From 4bf3ec384edf0bf893ec7bd62ccebb635b02efd9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 7 Sep 2020 17:45:37 +0200 Subject: [PATCH 07/85] s390: disable branch profiling for vdso When branch profiling is enabled, if () gets annotated with code to instrument the hit/miss ratio. This doesn't work for VDSO as we can't access kernel code. Add -DDISABLE_BRANCH_PROFILING to fix this. Reported-by: Thomas Richter Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index f1c0570780d15..3d3303283181f 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -17,7 +17,7 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64)) KBUILD_AFLAGS += -DBUILD_VDSO -KBUILD_CFLAGS += -DBUILD_VDSO +KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS)) KBUILD_AFLAGS_64 += -m64 -s From 309f98dbc66cb183a84781e2b19595f044a2d6e4 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 6 Feb 2020 17:52:03 +0100 Subject: [PATCH 08/85] s390/qdio: make qdio_handle_aobs() more robust When processing a PENDING buffer with no attached aob, the current code would get stuck on this buffer (as the 'continue' causes us to not advance the buffer index) and process it repeatedly until the loop terminates eventually. Luckily this should never happen - the HW must not use the PENDING state when no aob was provided. But we can still make this code path less fragile and protect against buggy devices. Signed-off-by: Julian Wiedmann Signed-off-by: Vasily Gorbik --- drivers/s390/cio/qdio_main.c | 42 +++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 4fab8bba2cdd4..0ed8c680cae45 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -531,26 +531,6 @@ static inline int qdio_inbound_q_done(struct qdio_q *q, unsigned int start) return 1; } -static inline void qdio_handle_aobs(struct qdio_q *q, int start, int count) -{ - unsigned char state = 0; - int j, b = start; - - for (j = 0; j < count; ++j) { - get_buf_state(q, b, &state, 0); - if (state == SLSB_P_OUTPUT_PENDING) { - struct qaob *aob = q->u.out.aobs[b]; - if (aob == NULL) - continue; - - q->u.out.sbal_state[b].flags |= - QDIO_OUTBUF_STATE_FLAG_PENDING; - q->u.out.aobs[b] = NULL; - } - b = next_buf(b); - } -} - static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q, int bufnr) { @@ -640,6 +620,19 @@ void qdio_inbound_processing(unsigned long data) __qdio_inbound_processing(q); } +static void qdio_check_pending(struct qdio_q *q, unsigned int index) +{ + unsigned char state; + + if (get_buf_state(q, index, &state, 0) > 0 && + state == SLSB_P_OUTPUT_PENDING && + q->u.out.aobs[index]) { + q->u.out.sbal_state[index].flags |= + QDIO_OUTBUF_STATE_FLAG_PENDING; + q->u.out.aobs[index] = NULL; + } +} + static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start) { unsigned char state = 0; @@ -712,8 +705,13 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q, unsigned int start) if (count) { DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "out moved:%1d", q->nr); - if (q->u.out.use_cq) - qdio_handle_aobs(q, start, count); + + if (q->u.out.use_cq) { + unsigned int i; + + for (i = 0; i < count; i++) + qdio_check_pending(q, QDIO_BUFNR(start + i)); + } } return count; From 4d4a3caaf36246520d61c17a9fd86ce3893f6595 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 19 Feb 2020 11:15:30 +0100 Subject: [PATCH 09/85] s390/qdio: clean up QDR setup __qdio_allocate_fill_qdr() is meant to set up one specific queue descriptor in the QDR. But for this simple task, it gets passed a bunch of global structs and offsets - and then navigates through the structs to find its actual operands. Clean up all the complicated pointer chasing & index calculation, and just pass a descriptor and its associated queue struct. While at it also add some virt_to_phys() translations, to clarify that addresses in the QDR are meant to be absolute. Signed-off-by: Julian Wiedmann Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 8 ++++---- drivers/s390/cio/qdio_setup.c | 33 +++++++++++++-------------------- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index e69dbf438f999..60012fc11bac1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -26,9 +26,9 @@ /** * struct qdesfmt0 - queue descriptor, format 0 - * @sliba: storage list information block address - * @sla: storage list address - * @slsba: storage list state block address + * @sliba: absolute address of storage list information block + * @sla: absolute address of storage list + * @slsba: absolute address of storage list state block * @akey: access key for SLIB * @bkey: access key for SL * @ckey: access key for SBALs @@ -56,7 +56,7 @@ struct qdesfmt0 { * @oqdcnt: output queue descriptor count * @iqdsz: input queue descriptor size * @oqdsz: output queue descriptor size - * @qiba: queue information block address + * @qiba: absolute address of queue information block * @qkey: queue information block key * @qdf0: queue descriptions */ diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 2c5cc6ec668e9..42e1c09493092 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -403,28 +403,22 @@ void qdio_free_async_data(struct qdio_irq *irq_ptr) } } -static void __qdio_allocate_fill_qdr(struct qdio_irq *irq_ptr, - struct qdio_q **irq_ptr_qs, - int i, int nr) +static void qdio_fill_qdr_desc(struct qdesfmt0 *desc, struct qdio_q *queue) { - irq_ptr->qdr->qdf0[i + nr].sliba = - (unsigned long)irq_ptr_qs[i]->slib; - - irq_ptr->qdr->qdf0[i + nr].sla = - (unsigned long)irq_ptr_qs[i]->sl; - - irq_ptr->qdr->qdf0[i + nr].slsba = - (unsigned long)&irq_ptr_qs[i]->slsb.val[0]; - - irq_ptr->qdr->qdf0[i + nr].akey = PAGE_DEFAULT_KEY >> 4; - irq_ptr->qdr->qdf0[i + nr].bkey = PAGE_DEFAULT_KEY >> 4; - irq_ptr->qdr->qdf0[i + nr].ckey = PAGE_DEFAULT_KEY >> 4; - irq_ptr->qdr->qdf0[i + nr].dkey = PAGE_DEFAULT_KEY >> 4; + desc->sliba = virt_to_phys(queue->slib); + desc->sla = virt_to_phys(queue->sl); + desc->slsba = virt_to_phys(&queue->slsb); + + desc->akey = PAGE_DEFAULT_KEY >> 4; + desc->bkey = PAGE_DEFAULT_KEY >> 4; + desc->ckey = PAGE_DEFAULT_KEY >> 4; + desc->dkey = PAGE_DEFAULT_KEY >> 4; } static void setup_qdr(struct qdio_irq *irq_ptr, struct qdio_initialize *qdio_init) { + struct qdesfmt0 *desc = &irq_ptr->qdr->qdf0[0]; int i; irq_ptr->qdr->qfmt = qdio_init->q_format; @@ -433,15 +427,14 @@ static void setup_qdr(struct qdio_irq *irq_ptr, irq_ptr->qdr->oqdcnt = qdio_init->no_output_qs; irq_ptr->qdr->iqdsz = sizeof(struct qdesfmt0) / 4; /* size in words */ irq_ptr->qdr->oqdsz = sizeof(struct qdesfmt0) / 4; - irq_ptr->qdr->qiba = (unsigned long)&irq_ptr->qib; + irq_ptr->qdr->qiba = virt_to_phys(&irq_ptr->qib); irq_ptr->qdr->qkey = PAGE_DEFAULT_KEY >> 4; for (i = 0; i < qdio_init->no_input_qs; i++) - __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->input_qs, i, 0); + qdio_fill_qdr_desc(desc++, irq_ptr->input_qs[i]); for (i = 0; i < qdio_init->no_output_qs; i++) - __qdio_allocate_fill_qdr(irq_ptr, irq_ptr->output_qs, i, - qdio_init->no_input_qs); + qdio_fill_qdr_desc(desc++, irq_ptr->output_qs[i]); } static void setup_qib(struct qdio_irq *irq_ptr, From b02002cc4c0f8a2340d07690f58cae0c04ba2325 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 13 Jul 2020 14:12:49 +0200 Subject: [PATCH 10/85] s390/pci: Implement ioremap_wc/prot() with MIO With our current support for the new MIO PCI instructions, write combining/write back MMIO memory can be obtained via the pci_iomap_wc() and pci_iomap_wc_range() functions. This is achieved by using the write back address for a specific bar as provided in clp_store_query_pci_fn() These functions are however not widely used and instead drivers often rely on ioremap_wc() and ioremap_prot(), which on other platforms enable write combining using a PTE flag set through the pgrprot value. While we do not have a write combining flag in the low order flag bits of the PTE like x86_64 does, with MIO support, there is a write back bit in the physical address (bit 1 on z15) and thus also the PTE. Which bit is used to toggle write back and whether it is available at all, is however not fixed in the architecture. Instead we get this information from the CLP Store Logical Processor Characteristics for PCI command. When the write back bit is not provided we fall back to the existing behavior. Signed-off-by: Niklas Schnelle Reviewed-by: Pierre Morel Reviewed-by: Gerald Schaefer Signed-off-by: Vasily Gorbik --- .../features/vm/ioremap_prot/arch-support.txt | 2 +- arch/s390/include/asm/clp.h | 3 ++ arch/s390/include/asm/io.h | 8 +++++ arch/s390/include/asm/pci.h | 1 + arch/s390/include/asm/pci_clp.h | 19 ++++++++++ arch/s390/include/asm/pgtable.h | 9 ++++- arch/s390/include/asm/setup.h | 3 ++ arch/s390/kernel/setup.c | 6 ++++ arch/s390/mm/pgtable.c | 20 +++++++++++ arch/s390/pci/pci.c | 30 ++++++++++++++-- arch/s390/pci/pci_clp.c | 36 ++++++++++++++++++- 11 files changed, 132 insertions(+), 5 deletions(-) diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt index 1cb7406cd8589..b5fb37c28cc67 100644 --- a/Documentation/features/vm/ioremap_prot/arch-support.txt +++ b/Documentation/features/vm/ioremap_prot/arch-support.txt @@ -24,7 +24,7 @@ | parisc: | TODO | | powerpc: | ok | | riscv: | TODO | - | s390: | TODO | + | s390: | ok | | sh: | ok | | sparc: | TODO | | um: | TODO | diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h index 3925b0f085b7d..10919eeb7533d 100644 --- a/arch/s390/include/asm/clp.h +++ b/arch/s390/include/asm/clp.h @@ -5,6 +5,9 @@ /* CLP common request & response block size */ #define CLP_BLK_SIZE PAGE_SIZE +/* Call Logical Processor - Command Code */ +#define CLP_SLPC 0x0001 + #define CLP_LPS_BASE 0 #define CLP_LPS_PCI 2 diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index da014e4f81132..28664ee0abc1a 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -12,6 +12,7 @@ #include #include +#include #include #define xlate_dev_mem_ptr xlate_dev_mem_ptr @@ -26,7 +27,10 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); #define IO_SPACE_LIMIT 0 +void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot); void __iomem *ioremap(phys_addr_t addr, size_t size); +void __iomem *ioremap_wc(phys_addr_t addr, size_t size); +void __iomem *ioremap_wt(phys_addr_t addr, size_t size); void iounmap(volatile void __iomem *addr); static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) @@ -52,6 +56,10 @@ static inline void ioport_unmap(void __iomem *p) #define pci_iomap_wc pci_iomap_wc #define pci_iomap_wc_range pci_iomap_wc_range +#define ioremap ioremap +#define ioremap_wt ioremap_wt +#define ioremap_wc ioremap_wc + #define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count) #define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count) #define memset_io(dst, val, count) zpci_memset_io(dst, val, count) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 99b92c3e46b09..8015b93015339 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -208,6 +208,7 @@ int zpci_unregister_ioat(struct zpci_dev *, u8); void zpci_remove_reserved_devices(void); /* CLP */ +int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); int clp_rescan_pci_devices(void); int clp_rescan_pci_devices_simple(u32 *fid); diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index eb51272dd2ccd..1f4b666e85ee3 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -7,6 +7,7 @@ /* * Call Logical Processor - Command Codes */ +#define CLP_SLPC 0x0001 #define CLP_LIST_PCI 0x0002 #define CLP_QUERY_PCI_FN 0x0003 #define CLP_QUERY_PCI_FNGRP 0x0004 @@ -51,6 +52,19 @@ struct clp_fh_list_entry { extern bool zpci_unique_uid; +struct clp_rsp_slpc_pci { + struct clp_rsp_hdr hdr; + u32 reserved2[4]; + u32 lpif[8]; + u32 reserved3[4]; + u32 vwb : 1; + u32 : 1; + u32 mio_wb : 6; + u32 : 24; + u32 reserved5[3]; + u32 lpic[8]; +} __packed; + /* List PCI functions request */ struct clp_req_list_pci { struct clp_req_hdr hdr; @@ -172,6 +186,11 @@ struct clp_rsp_set_pci { } __packed; /* Combined request/response block structures used by clp insn */ +struct clp_req_rsp_slpc_pci { + struct clp_req_slpc request; + struct clp_rsp_slpc_pci response; +} __packed; + struct clp_req_rsp_list_pci { struct clp_req_list_pci request; struct clp_rsp_list_pci response; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7eb01a5459cdf..e9244b9fb5047 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1186,6 +1186,12 @@ void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr); void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr); void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); +#define pgprot_writecombine pgprot_writecombine +pgprot_t pgprot_writecombine(pgprot_t prot); + +#define pgprot_writethrough pgprot_writethrough +pgprot_t pgprot_writethrough(pgprot_t prot); + /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following @@ -1209,7 +1215,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; - pte_val(__pte) = physpage + pgprot_val(pgprot); + + pte_val(__pte) = physpage | pgprot_val(pgprot); if (!MACHINE_HAS_NX) pte_val(__pte) &= ~_PAGE_NOEXEC; return pte_mkyoung(__pte); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 534f212753d65..7b104f156e342 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -94,6 +94,9 @@ extern unsigned long vmalloc_size; extern unsigned long max_physmem_end; extern unsigned long __swsusp_reset_dma; +/* The Write Back bit position in the physaddr is given by the SLPC PCI */ +extern unsigned long mio_wb_bit_mask; + #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) #define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index dfa45027cb473..efd12221ecb4d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -127,6 +127,12 @@ unsigned long MODULES_END; struct lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); +/* + * The Write Back bit position in the physaddr is given by the SLPC PCI. + * Leaving the mask zero always uses write through which is safe + */ +unsigned long mio_wb_bit_mask __ro_after_init; + /* * This is set up by the setup-routine at boot-time * for S390 need to find out, what we have to setup diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0d25f743b270e..18205f851c247 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,6 +24,26 @@ #include #include +pgprot_t pgprot_writecombine(pgprot_t prot) +{ + /* + * mio_wb_bit_mask may be set on a different CPU, but it is only set + * once at init and only read afterwards. + */ + return __pgprot(pgprot_val(prot) | mio_wb_bit_mask); +} +EXPORT_SYMBOL_GPL(pgprot_writecombine); + +pgprot_t pgprot_writethrough(pgprot_t prot) +{ + /* + * mio_wb_bit_mask may be set on a different CPU, but it is only set + * once at init and only read afterwards. + */ + return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask); +} +EXPORT_SYMBOL_GPL(pgprot_writethrough); + static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int nodat) { diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 4b62d6b550246..fdbb99c4569dc 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -226,7 +226,7 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count) zpci_memcpy_toio(to, from, count); } -void __iomem *ioremap(phys_addr_t addr, size_t size) +static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot) { unsigned long offset, vaddr; struct vm_struct *area; @@ -247,14 +247,37 @@ void __iomem *ioremap(phys_addr_t addr, size_t size) return NULL; vaddr = (unsigned long) area->addr; - if (ioremap_page_range(vaddr, vaddr + size, addr, PAGE_KERNEL)) { + if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) { free_vm_area(area); return NULL; } return (void __iomem *) ((unsigned long) area->addr + offset); } + +void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot) +{ + return __ioremap(addr, size, __pgprot(prot)); +} +EXPORT_SYMBOL(ioremap_prot); + +void __iomem *ioremap(phys_addr_t addr, size_t size) +{ + return __ioremap(addr, size, PAGE_KERNEL); +} EXPORT_SYMBOL(ioremap); +void __iomem *ioremap_wc(phys_addr_t addr, size_t size) +{ + return __ioremap(addr, size, pgprot_writecombine(PAGE_KERNEL)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iomem *ioremap_wt(phys_addr_t addr, size_t size) +{ + return __ioremap(addr, size, pgprot_writethrough(PAGE_KERNEL)); +} +EXPORT_SYMBOL(ioremap_wt); + void iounmap(volatile void __iomem *addr) { if (static_branch_likely(&have_mio)) @@ -784,6 +807,9 @@ static int zpci_mem_init(void) if (!zpci_iomap_bitmap) goto error_iomap_bitmap; + if (static_branch_likely(&have_mio)) + clp_setup_writeback_mio(); + return 0; error_iomap_bitmap: kfree(zpci_iomap_start); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 7e735f41a0a64..51807945ca008 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -292,6 +292,40 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) return rc; } +int clp_setup_writeback_mio(void) +{ + struct clp_req_rsp_slpc_pci *rrb; + u8 wb_bit_pos; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_SLPC; + rrb->response.hdr.len = sizeof(rrb->response); + + rc = clp_req(rrb, CLP_LPS_PCI); + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + if (rrb->response.vwb) { + wb_bit_pos = rrb->response.mio_wb; + set_bit_inv(wb_bit_pos, &mio_wb_bit_mask); + zpci_dbg(3, "wb bit: %d\n", wb_bit_pos); + } else { + zpci_dbg(3, "wb bit: n.a.\n"); + } + + } else { + zpci_err("SLPC PCI:\n"); + zpci_err_clp(rrb->response.hdr.rsp, rc); + rc = -EIO; + } + clp_free_block(rrb); + return rc; +} + int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) { int rc; @@ -495,7 +529,7 @@ static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb) } } -static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc_pci *lpcb) { unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); From 180a4c42e541cabe478c3f6e6c986a6b61744407 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 24 Aug 2020 17:32:52 +0300 Subject: [PATCH 11/85] s390/qdio: always use dev_name() for device name in QIB Passing a custom name from the device driver is nice - but in practice it's only zfcp who has been using this. So we might as well hard-code a naming scheme in the qdio layer, so that qeth also benefits from it. Signed-off-by: Julian Wiedmann Reviewed-by: Steffen Maier Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/qdio.h | 2 -- drivers/s390/cio/qdio_main.c | 1 - drivers/s390/cio/qdio_setup.c | 5 ++++- drivers/s390/scsi/zfcp_qdio.c | 2 -- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 60012fc11bac1..19e84c95d1e7b 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -327,7 +327,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * struct qdio_initialize - qdio initialization data * @q_format: queue format * @qdr_ac: feature flags to set - * @adapter_name: name for the adapter * @qib_param_field_format: format for qib_parm_field * @qib_param_field: pointer to 128 bytes or NULL, if no param field * @qib_rflags: rflags to set @@ -347,7 +346,6 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, struct qdio_initialize { unsigned char q_format; unsigned char qdr_ac; - unsigned char adapter_name[8]; unsigned int qib_param_field_format; unsigned char *qib_param_field; unsigned char qib_rflags; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 0ed8c680cae45..f9a31c7819ae6 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1219,7 +1219,6 @@ static void qdio_trace_init_data(struct qdio_irq *irq, struct qdio_initialize *data) { DBF_DEV_EVENT(DBF_ERR, irq, "qfmt:%1u", data->q_format); - DBF_DEV_HEX(irq, data->adapter_name, 8, DBF_ERR); DBF_DEV_EVENT(DBF_ERR, irq, "qpff%4x", data->qib_param_field_format); DBF_DEV_HEX(irq, &data->qib_param_field, sizeof(void *), DBF_ERR); DBF_DEV_HEX(irq, &data->input_slib_elements, sizeof(void *), DBF_ERR); diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 42e1c09493092..a5b2e16b7aa89 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -9,6 +9,8 @@ #include #include #include + +#include #include #include "cio.h" @@ -452,7 +454,8 @@ static void setup_qib(struct qdio_irq *irq_ptr, if (init_data->no_output_qs) irq_ptr->qib.osliba = (unsigned long)(irq_ptr->output_qs[0]->slib); - memcpy(irq_ptr->qib.ebcnam, init_data->adapter_name, 8); + memcpy(irq_ptr->qib.ebcnam, dev_name(&irq_ptr->cdev->dev), 8); + ASCEBC(irq_ptr->qib.ebcnam, 8); } int qdio_setup_irq(struct qdio_irq *irq_ptr, struct qdio_initialize *init_data) diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index e78d65bd46b16..a8a5140740848 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -380,8 +380,6 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio) &qdio->adapter->status); init_data.q_format = QDIO_ZFCP_QFMT; - memcpy(init_data.adapter_name, dev_name(&cdev->dev), 8); - ASCEBC(init_data.adapter_name, 8); init_data.qib_rflags = QIB_RFLAGS_ENABLE_DATA_DIV; if (enable_multibuffer) init_data.qdr_ac |= QDR_AC_MULTI_BUFFER_ENABLE; From 0d574ad33e5b70383b4cee81024fcb8763aac591 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 9 Sep 2020 12:02:46 +0200 Subject: [PATCH 12/85] s390/zcrypt: simplify cca_findcard2 loop code Instead of two times go through the list of available AP devices (which may be up to 256 * 256 entries) this patch reworks the code do only run through once. The price is instead of reporting all possible devices to the caller only the first 256 devices are collected. However, having to choose from 256 AP devices is plenty of resources and should fulfill the caller's requirements. On the other side the loop code is much simplier and more easy to maintain. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_ccamisc.c | 121 +++++++++++++-------------- 1 file changed, 58 insertions(+), 63 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 3f5b61351cde5..5fafda6663ed4 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -1685,12 +1685,10 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, int minhwtype, u64 cur_mkvp, u64 old_mkvp, int verify) { struct zcrypt_device_status_ext *device_status; - int i, n, card, dom, curmatch, oldmatch, rc = 0; + u32 *_apqns = NULL, _nr_apqns = 0; + int i, card, dom, curmatch, oldmatch, rc = 0; struct cca_info ci; - *apqns = NULL; - *nr_apqns = 0; - /* fetch status of all crypto cards */ device_status = kmalloc_array(MAX_ZDEV_ENTRIES_EXT, sizeof(struct zcrypt_device_status_ext), @@ -1699,67 +1697,64 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, return -ENOMEM; zcrypt_device_status_mask_ext(device_status); - /* loop two times: first gather eligible apqns, then store them */ - while (1) { - n = 0; - /* walk through all the crypto cards */ - for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) { - card = AP_QID_CARD(device_status[i].qid); - dom = AP_QID_QUEUE(device_status[i].qid); - /* check online state */ - if (!device_status[i].online) - continue; - /* check for cca functions */ - if (!(device_status[i].functions & 0x04)) - continue; - /* check cardnr */ - if (cardnr != 0xFFFF && card != cardnr) - continue; - /* check domain */ - if (domain != 0xFFFF && dom != domain) - continue; - /* get cca info on this apqn */ - if (cca_get_info(card, dom, &ci, verify)) - continue; - /* current master key needs to be valid */ - if (ci.cur_mk_state != '2') - continue; - /* check min hardware type */ - if (minhwtype > 0 && minhwtype > ci.hwtype) + /* allocate 1k space for up to 256 apqns */ + _apqns = kmalloc_array(256, sizeof(u32), GFP_KERNEL); + if (!_apqns) { + kvfree(device_status); + return -ENOMEM; + } + + /* walk through all the crypto apqnss */ + for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) { + card = AP_QID_CARD(device_status[i].qid); + dom = AP_QID_QUEUE(device_status[i].qid); + /* check online state */ + if (!device_status[i].online) + continue; + /* check for cca functions */ + if (!(device_status[i].functions & 0x04)) + continue; + /* check cardnr */ + if (cardnr != 0xFFFF && card != cardnr) + continue; + /* check domain */ + if (domain != 0xFFFF && dom != domain) + continue; + /* get cca info on this apqn */ + if (cca_get_info(card, dom, &ci, verify)) + continue; + /* current master key needs to be valid */ + if (ci.cur_mk_state != '2') + continue; + /* check min hardware type */ + if (minhwtype > 0 && minhwtype > ci.hwtype) + continue; + if (cur_mkvp || old_mkvp) { + /* check mkvps */ + curmatch = oldmatch = 0; + if (cur_mkvp && cur_mkvp == ci.cur_mkvp) + curmatch = 1; + if (old_mkvp && ci.old_mk_state == '2' && + old_mkvp == ci.old_mkvp) + oldmatch = 1; + if ((cur_mkvp || old_mkvp) && + (curmatch + oldmatch < 1)) continue; - if (cur_mkvp || old_mkvp) { - /* check mkvps */ - curmatch = oldmatch = 0; - if (cur_mkvp && cur_mkvp == ci.cur_mkvp) - curmatch = 1; - if (old_mkvp && ci.old_mk_state == '2' && - old_mkvp == ci.old_mkvp) - oldmatch = 1; - if ((cur_mkvp || old_mkvp) && - (curmatch + oldmatch < 1)) - continue; - } - /* apqn passed all filtering criterons */ - if (*apqns && n < *nr_apqns) - (*apqns)[n] = (((u16)card) << 16) | ((u16) dom); - n++; - } - /* loop 2nd time: array has been filled */ - if (*apqns) - break; - /* loop 1st time: have # of eligible apqns in n */ - if (!n) { - rc = -ENODEV; /* no eligible apqns found */ - break; - } - *nr_apqns = n; - /* allocate array to store n apqns into */ - *apqns = kmalloc_array(n, sizeof(u32), GFP_KERNEL); - if (!*apqns) { - rc = -ENOMEM; - break; } - verify = 0; + /* apqn passed all filtering criterons, add to the array */ + if (_nr_apqns < 256) + _apqns[_nr_apqns++] = (((u16)card) << 16) | ((u16) dom); + } + + /* nothing found ? */ + if (!_nr_apqns) { + kfree(_apqns); + rc = -ENODEV; + } else { + /* no re-allocation, simple return the _apqns array */ + *apqns = _apqns; + *nr_apqns = _nr_apqns; + rc = 0; } kfree(device_status); From 9d719d39aab41d3b8b6f259574dc29a27f60e66c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 4 Sep 2020 17:41:27 +0200 Subject: [PATCH 13/85] s390/mm,ptdump: convert to generic page table dumper Make use of generic ptdump infrastructure. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/Kconfig.debug | 12 -- arch/s390/configs/debug_defconfig | 2 +- arch/s390/configs/defconfig | 2 +- arch/s390/mm/Makefile | 2 +- arch/s390/mm/dump_pagetables.c | 233 ++++++------------------------ 6 files changed, 51 insertions(+), 201 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8a6121f937092..85bf121211d10 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -120,6 +120,7 @@ config S390 select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY + select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select HAVE_ALIGNED_STRUCT_PAGE if SLUB diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 761fe2b0b2f6d..ab48b694ade8e 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -3,17 +3,5 @@ config TRACE_IRQFLAGS_SUPPORT def_bool y -config S390_PTDUMP - bool "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL - select DEBUG_FS - help - Say Y here if you want to show the kernel pagetable layout in a - debugfs file. This information is only useful for kernel developers - who are working in architecture specific areas of the kernel. - It is probably not a good idea to enable this feature in a production - kernel. - If in doubt, say "N" - config EARLY_PRINTK def_bool y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 0cf9a82326a85..f79eafb597cb3 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -774,6 +774,7 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_PAGE_OWNER=y CONFIG_DEBUG_RODATA_TEST=y +CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_SELFTEST=y CONFIG_DEBUG_OBJECTS_FREE=y @@ -819,7 +820,6 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_HIST_TRIGGERS=y -CONFIG_S390_PTDUMP=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 5df9759e8ff67..9593cc8a9efd6 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -758,6 +758,7 @@ CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y +CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y CONFIG_TEST_LOCKUP=m @@ -772,7 +773,6 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_HIST_TRIGGERS=y -CONFIG_S390_PTDUMP=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 3175413186b9d..8ab9daeeace38 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -8,7 +8,7 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index c2ac9b8ae6120..93a29e2f13d42 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include -#include #include #include #include @@ -42,10 +42,11 @@ static struct addr_marker address_markers[] = { }; struct pg_state { + struct ptdump_state ptdump; + struct seq_file *seq; int level; unsigned int current_prot; unsigned long start_address; - unsigned long current_address; const struct addr_marker *marker; }; @@ -63,215 +64,75 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n"); } -static void note_page(struct seq_file *m, struct pg_state *st, - unsigned int new_prot, int level) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { - static const char units[] = "KMGTPE"; int width = sizeof(unsigned long) * 2; + static const char units[] = "KMGTPE"; const char *unit = units; - unsigned int prot, cur; unsigned long delta; + struct pg_state *st; + struct seq_file *m; + unsigned int prot; - /* - * If we have a "break" in the series, we need to flush the state - * that we have now. "break" is either changing perms, levels or - * address space marker. - */ - prot = new_prot; - cur = st->current_prot; - - if (!st->level) { - /* First entry */ - st->current_prot = new_prot; - st->level = level; - st->marker = address_markers; + st = container_of(pt_st, struct pg_state, ptdump); + m = st->seq; + prot = val & (_PAGE_PROTECT | _PAGE_NOEXEC); + if (level == 4 && (val & _PAGE_INVALID)) + prot = _PAGE_INVALID; + /* For pmd_none() & friends val gets passed as zero. */ + if (level != 4 && !val) + prot = _PAGE_INVALID; + /* Final flush from generic code. */ + if (level == -1) + addr = max_addr; + if (st->level == -1) { seq_printf(m, "---[ %s ]---\n", st->marker->name); - } else if (prot != cur || level != st->level || - st->current_address >= st->marker[1].start_address) { - /* Print the actual finished series */ + st->start_address = addr; + st->current_prot = prot; + st->level = level; + } else if (prot != st->current_prot || level != st->level || + addr >= st->marker[1].start_address) { seq_printf(m, "0x%0*lx-0x%0*lx ", width, st->start_address, - width, st->current_address); - delta = (st->current_address - st->start_address) >> 10; + width, addr); + delta = (addr - st->start_address) >> 10; while (!(delta & 0x3ff) && unit[1]) { delta >>= 10; unit++; } seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); - while (st->current_address >= st->marker[1].start_address) { + while (addr >= st->marker[1].start_address) { st->marker++; seq_printf(m, "---[ %s ]---\n", st->marker->name); } - st->start_address = st->current_address; - st->current_prot = new_prot; + st->start_address = addr; + st->current_prot = prot; st->level = level; } } -#ifdef CONFIG_KASAN -static void note_kasan_early_shadow_page(struct seq_file *m, - struct pg_state *st) -{ - unsigned int prot; - - prot = pte_val(*kasan_early_shadow_pte) & - (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC); - note_page(m, st, prot, 4); -} -#endif - -/* - * The actual page table walker functions. In order to keep the - * implementation of print_prot() short, we only check and pass - * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region, - * segment or page table entry is invalid or read-only. - * After all it's just a hint that the current level being walked - * contains an invalid or read-only entry. - */ -static void walk_pte_level(struct seq_file *m, struct pg_state *st, - pmd_t *pmd, unsigned long addr) -{ - unsigned int prot; - pte_t *pte; - int i; - - for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { - st->current_address = addr; - pte = pte_offset_kernel(pmd, addr); - prot = pte_val(*pte) & - (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC); - note_page(m, st, prot, 4); - addr += PAGE_SIZE; - } -} - -static void walk_pmd_level(struct seq_file *m, struct pg_state *st, - pud_t *pud, unsigned long addr) -{ - unsigned int prot; - pmd_t *pmd; - int i; - -#ifdef CONFIG_KASAN - if ((pud_val(*pud) & PAGE_MASK) == __pa(kasan_early_shadow_pmd)) { - note_kasan_early_shadow_page(m, st); - return; - } -#endif - - pmd = pmd_offset(pud, addr); - for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++, pmd++) { - st->current_address = addr; - if (!pmd_none(*pmd)) { - if (pmd_large(*pmd)) { - prot = pmd_val(*pmd) & - (_SEGMENT_ENTRY_PROTECT | - _SEGMENT_ENTRY_NOEXEC); - note_page(m, st, prot, 3); - } else - walk_pte_level(m, st, pmd, addr); - } else - note_page(m, st, _PAGE_INVALID, 3); - addr += PMD_SIZE; - } -} - -static void walk_pud_level(struct seq_file *m, struct pg_state *st, - p4d_t *p4d, unsigned long addr) -{ - unsigned int prot; - pud_t *pud; - int i; - -#ifdef CONFIG_KASAN - if ((p4d_val(*p4d) & PAGE_MASK) == __pa(kasan_early_shadow_pud)) { - note_kasan_early_shadow_page(m, st); - return; - } -#endif - - pud = pud_offset(p4d, addr); - for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++, pud++) { - st->current_address = addr; - if (!pud_none(*pud)) - if (pud_large(*pud)) { - prot = pud_val(*pud) & - (_REGION_ENTRY_PROTECT | - _REGION_ENTRY_NOEXEC); - note_page(m, st, prot, 2); - } else - walk_pmd_level(m, st, pud, addr); - else - note_page(m, st, _PAGE_INVALID, 2); - addr += PUD_SIZE; - } -} - -static void walk_p4d_level(struct seq_file *m, struct pg_state *st, - pgd_t *pgd, unsigned long addr) -{ - p4d_t *p4d; - int i; - -#ifdef CONFIG_KASAN - if ((pgd_val(*pgd) & PAGE_MASK) == __pa(kasan_early_shadow_p4d)) { - note_kasan_early_shadow_page(m, st); - return; - } -#endif - - p4d = p4d_offset(pgd, addr); - for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++, p4d++) { - st->current_address = addr; - if (!p4d_none(*p4d)) - walk_pud_level(m, st, p4d, addr); - else - note_page(m, st, _PAGE_INVALID, 2); - addr += P4D_SIZE; - } -} - -static void walk_pgd_level(struct seq_file *m) -{ - unsigned long addr = 0; - struct pg_state st; - pgd_t *pgd; - int i; - - memset(&st, 0, sizeof(st)); - for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) { - st.current_address = addr; - pgd = pgd_offset_k(addr); - if (!pgd_none(*pgd)) - walk_p4d_level(m, &st, pgd, addr); - else - note_page(m, &st, _PAGE_INVALID, 1); - addr += PGDIR_SIZE; - cond_resched(); - } - /* Flush out the last page */ - st.current_address = max_addr; - note_page(m, &st, 0, 0); -} - static int ptdump_show(struct seq_file *m, void *v) { - walk_pgd_level(m); + struct pg_state st = { + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {.start = 0, .end = max_addr}, + {.start = 0, .end = 0}, + } + }, + .seq = m, + .level = -1, + .current_prot = 0, + .start_address = 0, + .marker = address_markers, + }; + + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); return 0; } - -static int ptdump_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, ptdump_show, NULL); -} - -static const struct file_operations ptdump_fops = { - .open = ptdump_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(ptdump); static int pt_dump_init(void) { From 36c2733c439caa424fe2b7dded870913dcb868ac Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Sep 2020 17:11:36 +0200 Subject: [PATCH 14/85] s390/mm,ptdump: hold memory hotplug lock while walking for kernel page table dump This is the s390 variant of commit bf2b59f60ee1 ("arm64/mm: Hold memory hotplug lock while walking for kernel page table dump"). Right now this doesn't fix any real bug, however as soon as kvm patches get merged which make use of memory remove we might end up dereferencing/accessing freed page tables. Therefore fix this potential bug already now. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 93a29e2f13d42..3c5e4055a3d23 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -129,7 +129,9 @@ static int ptdump_show(struct seq_file *m, void *v) .marker = address_markers, }; + get_online_mems(); ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + put_online_mems(); return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); From da1694ad9e8d13484c8b4ecaabde0bd7b958442a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Sep 2020 17:20:05 +0200 Subject: [PATCH 15/85] s390/mm,ptdump: hold cpa mutex while walking for kernel page table dump This is currently only preventing that outdated information is provided to user space. A concurrent split of huge/large pages does modify the kernel page tables, however either the huge/large mapping is reported or the split area is being walked. This "fixes" also only a potential future bug, since split pages could also be merged again if page permissions are the same for larger memory areas. Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/set_memory.h | 4 ++++ arch/s390/mm/dump_pagetables.c | 3 +++ arch/s390/mm/pageattr.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h index c59a83536c70e..a22a5a81811cf 100644 --- a/arch/s390/include/asm/set_memory.h +++ b/arch/s390/include/asm/set_memory.h @@ -2,6 +2,10 @@ #ifndef _ASMS390_SET_MEMORY_H #define _ASMS390_SET_MEMORY_H +#include + +extern struct mutex cpa_mutex; + #define SET_MEMORY_RO 1UL #define SET_MEMORY_RW 2UL #define SET_MEMORY_NX 4UL diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 3c5e4055a3d23..09c7179cb17da 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -130,7 +131,9 @@ static int ptdump_show(struct seq_file *m, void *v) }; get_online_mems(); + mutex_lock(&cpa_mutex); ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + mutex_unlock(&cpa_mutex); put_online_mems(); return 0; } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index c5c52ec2b46ff..ed8e5b3575d59 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -278,7 +278,7 @@ static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end, return rc; } -static DEFINE_MUTEX(cpa_mutex); +DEFINE_MUTEX(cpa_mutex); static int change_page_attr(unsigned long addr, unsigned long end, unsigned long flags) From abb95b7550f88bfb77081601f80662a259f2d143 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 17 Aug 2020 10:29:23 +0200 Subject: [PATCH 16/85] s390/pci: consolidate SR-IOV specific code currently we have multiple #ifdef CONFIG_PCI_IOV blocks spread over different compliation units and headers, all dealing with SR-IOV specific behavior. This violates the style guide which discourages conditionally compiled code blocks and hinders maintainability by speading SR-IOV functionality over many files. Let's move all of this into a conditionally compiled pci_iov.c file and local header and prefix SR-IOV specific functions with zpci_iov_*. Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/Makefile | 1 + arch/s390/pci/pci.c | 23 ++-------- arch/s390/pci/pci_bus.c | 66 +--------------------------- arch/s390/pci/pci_bus.h | 12 ----- arch/s390/pci/pci_iov.c | 97 +++++++++++++++++++++++++++++++++++++++++ arch/s390/pci/pci_iov.h | 30 +++++++++++++ 6 files changed, 133 insertions(+), 96 deletions(-) create mode 100644 arch/s390/pci/pci_iov.c create mode 100644 arch/s390/pci/pci_iov.h diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index b4e3c84772a16..bf557a1b789c7 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ pci_bus.o +obj-$(CONFIG_PCI_IOV) += pci_iov.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index fdbb99c4569dc..e432318f69372 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -37,6 +37,7 @@ #include #include "pci_bus.h" +#include "pci_iov.h" /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); @@ -413,15 +414,6 @@ static struct pci_ops pci_root_ops = { .write = pci_write, }; -#ifdef CONFIG_PCI_IOV -static struct resource iov_res = { - .name = "PCI IOV res", - .start = 0, - .end = -1, - .flags = IORESOURCE_MEM, -}; -#endif - static void zpci_map_resources(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); @@ -442,16 +434,7 @@ static void zpci_map_resources(struct pci_dev *pdev) pdev->resource[i].end = pdev->resource[i].start + len - 1; } -#ifdef CONFIG_PCI_IOV - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { - int bar = i + PCI_IOV_RESOURCES; - - len = pci_resource_len(pdev, bar); - if (!len) - continue; - pdev->resource[bar].parent = &iov_res; - } -#endif + zpci_iov_map_resources(pdev); } static void zpci_unmap_resources(struct pci_dev *pdev) @@ -703,7 +686,7 @@ void zpci_remove_device(struct zpci_dev *zdev) pdev = pci_get_slot(zbus->bus, zdev->devfn); if (pdev) { if (pdev->is_virtfn) - return zpci_remove_virtfn(pdev, zdev->vfn); + return zpci_iov_remove_virtfn(pdev, zdev->vfn); pci_stop_and_remove_bus_device_locked(pdev); } } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 5967f30141563..0c0db7c3a4042 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -24,6 +24,7 @@ #include #include "pci_bus.h" +#include "pci_iov.h" static LIST_HEAD(zbus_list); static DEFINE_SPINLOCK(zbus_list_lock); @@ -126,69 +127,6 @@ static struct zpci_bus *zpci_bus_alloc(int pchid) return zbus; } -#ifdef CONFIG_PCI_IOV -static int zpci_bus_link_virtfn(struct pci_dev *pdev, - struct pci_dev *virtfn, int vfid) -{ - int rc; - - rc = pci_iov_sysfs_link(pdev, virtfn, vfid); - if (rc) - return rc; - - virtfn->is_virtfn = 1; - virtfn->multifunction = 0; - virtfn->physfn = pci_dev_get(pdev); - - return 0; -} - -static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, - struct pci_dev *virtfn, int vfn) -{ - int i, cand_devfn; - struct zpci_dev *zdev; - struct pci_dev *pdev; - int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ - int rc = 0; - - if (!zbus->multifunction) - return 0; - - /* If the parent PF for the given VF is also configured in the - * instance, it must be on the same zbus. - * We can then identify the parent PF by checking what - * devfn the VF would have if it belonged to that PF using the PF's - * stride and offset. Only if this candidate devfn matches the - * actual devfn will we link both functions. - */ - for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) { - zdev = zbus->function[i]; - if (zdev && zdev->is_physfn) { - pdev = pci_get_slot(zbus->bus, zdev->devfn); - if (!pdev) - continue; - cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); - if (cand_devfn == virtfn->devfn) { - rc = zpci_bus_link_virtfn(pdev, virtfn, vfid); - /* balance pci_get_slot() */ - pci_dev_put(pdev); - break; - } - /* balance pci_get_slot() */ - pci_dev_put(pdev); - } - } - return rc; -} -#else -static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus, - struct pci_dev *virtfn, int vfn) -{ - return 0; -} -#endif - void pcibios_bus_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); @@ -198,7 +136,7 @@ void pcibios_bus_add_device(struct pci_dev *pdev) * perform PF/VF linking. */ if (zdev->vfn) - zpci_bus_setup_virtfn(zdev->zbus, pdev, zdev->vfn); + zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn); } diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index 4972433df4581..8d19723ed5c03 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -30,15 +30,3 @@ static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus, return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn]; } -#ifdef CONFIG_PCI_IOV -static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) -{ - - pci_lock_rescan_remove(); - /* Linux' vfid's start at 0 vfn at 1 */ - pci_iov_remove_virtfn(pdev->physfn, vfn - 1); - pci_unlock_rescan_remove(); -} -#else /* CONFIG_PCI_IOV */ -static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) {} -#endif /* CONFIG_PCI_IOV */ diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c new file mode 100644 index 0000000000000..35fca14ebb35f --- /dev/null +++ b/arch/s390/pci/pci_iov.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Niklas Schnelle + * + */ + +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include + +static struct resource iov_res = { + .name = "PCI IOV res", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, +}; + +void zpci_iov_map_resources(struct pci_dev *pdev) +{ + resource_size_t len; + int i; + + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + int bar = i + PCI_IOV_RESOURCES; + + len = pci_resource_len(pdev, bar); + if (!len) + continue; + pdev->resource[bar].parent = &iov_res; + } +} + +void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) +{ + pci_lock_rescan_remove(); + /* Linux' vfid's start at 0 vfn at 1 */ + pci_iov_remove_virtfn(pdev->physfn, vfn - 1); + pci_unlock_rescan_remove(); +} + +static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, int vfid) +{ + int rc; + + rc = pci_iov_sysfs_link(pdev, virtfn, vfid); + if (rc) + return rc; + + virtfn->is_virtfn = 1; + virtfn->multifunction = 0; + virtfn->physfn = pci_dev_get(pdev); + + return 0; +} + +int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +{ + int i, cand_devfn; + struct zpci_dev *zdev; + struct pci_dev *pdev; + int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ + int rc = 0; + + if (!zbus->multifunction) + return 0; + + /* If the parent PF for the given VF is also configured in the + * instance, it must be on the same zbus. + * We can then identify the parent PF by checking what + * devfn the VF would have if it belonged to that PF using the PF's + * stride and offset. Only if this candidate devfn matches the + * actual devfn will we link both functions. + */ + for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) { + zdev = zbus->function[i]; + if (zdev && zdev->is_physfn) { + pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (!pdev) + continue; + cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); + if (cand_devfn == virtfn->devfn) { + rc = zpci_iov_link_virtfn(pdev, virtfn, vfid); + /* balance pci_get_slot() */ + pci_dev_put(pdev); + break; + } + /* balance pci_get_slot() */ + pci_dev_put(pdev); + } + } + return rc; +} diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h new file mode 100644 index 0000000000000..b2c828003bad0 --- /dev/null +++ b/arch/s390/pci/pci_iov.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Niklas Schnelle + * + */ + +#ifndef __S390_PCI_IOV_H +#define __S390_PCI_IOV_H + +#ifdef CONFIG_PCI_IOV +void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn); + +void zpci_iov_map_resources(struct pci_dev *pdev); + +int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn); + +#else /* CONFIG_PCI_IOV */ +static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {} + +static inline void zpci_iov_map_resources(struct pci_dev *pdev) {} + +static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +{ + return 0; +} +#endif /* CONFIG_PCI_IOV */ +#endif /* __S390_PCI_IOV_h */ From 2bce60b5032fe3ca225a36b80f6df90938f0ead6 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 21 Aug 2020 10:26:40 +0200 Subject: [PATCH 17/85] s390/pci: remove unused function zpci_rescan() the only caller of this was removed as part of the suspend/resume removal so no need to keep this function around. Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 2 -- arch/s390/pci/pci.c | 6 ------ 2 files changed, 8 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 8015b93015339..140cb6bc8e1c6 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -233,12 +233,10 @@ static inline bool zpci_use_mio(struct zpci_dev *zdev) /* Error handling and recovery */ void zpci_event_error(void *); void zpci_event_availability(void *); -void zpci_rescan(void); bool zpci_is_enabled(void); #else /* CONFIG_PCI */ static inline void zpci_event_error(void *e) {} static inline void zpci_event_availability(void *e) {} -static inline void zpci_rescan(void) {} #endif /* CONFIG_PCI */ #ifdef CONFIG_HOTPLUG_PCI_S390 diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e432318f69372..0ff48c51cd0e3 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -890,9 +890,3 @@ static int __init pci_base_init(void) return rc; } subsys_initcall_sync(pci_base_init); - -void zpci_rescan(void) -{ - if (zpci_is_enabled()) - clp_rescan_pci_devices_simple(NULL); -} From 809fcfaf9238052f03e8002f6dad156a7fb6cd5e Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 21 Aug 2020 11:01:54 +0200 Subject: [PATCH 18/85] s390/pci: remove clp_rescan_pci_devices() there is only one call site of clp_rescan_pci_devices() and all the function does is call zpci_remove_reserved_devices() followed by a duplicating clp_scan_pci_devices(). So inline the single call as a call to zpci_remove_reserved_devices() and clp_scan_pci_devices() and remove the function. Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 1 - arch/s390/pci/pci_clp.c | 17 ----------------- arch/s390/pci/pci_event.c | 3 ++- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 140cb6bc8e1c6..430f694163c16 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -210,7 +210,6 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); -int clp_rescan_pci_devices(void); int clp_rescan_pci_devices_simple(u32 *fid); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 51807945ca008..6aabfb131c1e7 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -441,23 +441,6 @@ int clp_scan_pci_devices(void) return rc; } -int clp_rescan_pci_devices(void) -{ - struct clp_req_rsp_list_pci *rrb; - int rc; - - zpci_remove_reserved_devices(); - - rrb = clp_alloc_block(GFP_KERNEL); - if (!rrb) - return -ENOMEM; - - rc = clp_list_pci(rrb, NULL, __clp_add); - - clp_free_block(rrb); - return rc; -} - /* Rescan PCI functions and refresh function handles. If fid is non-NULL only * refresh the handle of the function matching @fid */ diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 9a3a291cad432..e67cc2f6e169e 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -150,7 +150,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) } break; case 0x0306: /* 0x308 or 0x302 for multiple devices */ - clp_rescan_pci_devices(); + zpci_remove_reserved_devices(); + clp_scan_pci_devices(); break; case 0x0308: /* Standby -> Reserved */ if (!zdev) From c3b2c9064e76fa3952c99982f11a98e0327f3dbe Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 21 Aug 2020 11:16:48 +0200 Subject: [PATCH 19/85] s390/pci: remove clp_rescan_pci_devices_simple() clp_rescan_pci_devices_simple() is neither simpler than clp_scan_pci_devices() nor does it really scan PCI devices, in particular it will neither add newly discovered devices nor remove those which disappeared. Instead it only refreshes PCI function handles and also has just a single callsite in the same translation unit left which in fact only refreshes one specific function handle identified by a FID. Clarify this by renaming the function and its helper to clp_refresh_fh() respectvely __clp_refresh_fh() and make it take a fid directly which saves us dealing with the NULL case which updated all function handles but is not used anymore. Furthermore since the only callsite is in the same translation unit make it static. Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pci.h | 1 - arch/s390/pci/pci_clp.c | 44 ++++++++++++++++++------------------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 430f694163c16..178a24e0af5fc 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -210,7 +210,6 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); -int clp_rescan_pci_devices_simple(u32 *fid); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 6aabfb131c1e7..5a34a1359dc5a 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -244,6 +244,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) return rc; } +static int clp_refresh_fh(u32 fid); /* * Enable/Disable a given PCI function and update its function handle if * necessary @@ -286,7 +287,7 @@ static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY && rrb->response.fh == 0) { /* Function is already in desired state - update handle */ - rc = clp_rescan_pci_devices_simple(&fid); + rc = clp_refresh_fh(fid); } clp_free_block(rrb); return rc; @@ -408,24 +409,6 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) clp_add_pci_device(entry->fid, entry->fh, entry->config_state); } -static void __clp_update(struct clp_fh_list_entry *entry, void *data) -{ - struct zpci_dev *zdev; - u32 *fid = data; - - if (!entry->vendor_id) - return; - - if (fid && *fid != entry->fid) - return; - - zdev = get_zdev_by_fid(entry->fid); - if (!zdev) - return; - - zdev->fh = entry->fh; -} - int clp_scan_pci_devices(void) { struct clp_req_rsp_list_pci *rrb; @@ -441,10 +424,25 @@ int clp_scan_pci_devices(void) return rc; } -/* Rescan PCI functions and refresh function handles. If fid is non-NULL only - * refresh the handle of the function matching @fid +static void __clp_refresh_fh(struct clp_fh_list_entry *entry, void *data) +{ + struct zpci_dev *zdev; + u32 fid = *((u32 *)data); + + if (!entry->vendor_id || fid != entry->fid) + return; + + zdev = get_zdev_by_fid(fid); + if (!zdev) + return; + + zdev->fh = entry->fh; +} + +/* + * Refresh the function handle of the function matching @fid */ -int clp_rescan_pci_devices_simple(u32 *fid) +static int clp_refresh_fh(u32 fid) { struct clp_req_rsp_list_pci *rrb; int rc; @@ -453,7 +451,7 @@ int clp_rescan_pci_devices_simple(u32 *fid) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, fid, __clp_update); + rc = clp_list_pci(rrb, &fid, __clp_refresh_fh); clp_free_block(rrb); return rc; From 6c6687a444cfa62548e080a52e6c2d5d41577a73 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Sep 2020 16:48:35 +0200 Subject: [PATCH 20/85] s390/kprobes: make insn pages read-only Make sure that kprobe insn pages are not writable anymore. Tested-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/kprobes.c | 57 ++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index d2a71d872638f..b34fa4eef7425 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -7,6 +7,7 @@ * s390 port, used ppc64 as template. Mike Grundy */ +#include #include #include #include @@ -32,17 +33,33 @@ DEFINE_INSN_CACHE_OPS(s390_insn); static int insn_page_in_use; static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE); +void *alloc_insn_page(void) +{ + void *page; + + page = module_alloc(PAGE_SIZE); + if (!page) + return NULL; + __set_memory((unsigned long) page, 1, SET_MEMORY_RO | SET_MEMORY_X); + return page; +} + +void free_insn_page(void *page) +{ + module_memfree(page); +} + static void *alloc_s390_insn_page(void) { if (xchg(&insn_page_in_use, 1) == 1) return NULL; - set_memory_x((unsigned long) &insn_page, 1); + __set_memory((unsigned long) &insn_page, 1, SET_MEMORY_RO | SET_MEMORY_X); return &insn_page; } static void free_s390_insn_page(void *page) { - set_memory_nx((unsigned long) page, 1); + __set_memory((unsigned long) page, 1, SET_MEMORY_RW | SET_MEMORY_NX); xchg(&insn_page_in_use, 0); } @@ -56,25 +73,29 @@ struct kprobe_insn_cache kprobe_s390_insn_slots = { static void copy_instruction(struct kprobe *p) { + kprobe_opcode_t insn[MAX_INSN_SIZE]; s64 disp, new_disp; u64 addr, new_addr; + unsigned int len; - memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8)); - p->opcode = p->ainsn.insn[0]; - if (!probe_is_insn_relative_long(p->ainsn.insn)) - return; - /* - * For pc-relative instructions in RIL-b or RIL-c format patch the - * RI2 displacement field. We have already made sure that the insn - * slot for the patched instruction is within the same 2GB area - * as the original instruction (either kernel image or module area). - * Therefore the new displacement will always fit. - */ - disp = *(s32 *)&p->ainsn.insn[1]; - addr = (u64)(unsigned long)p->addr; - new_addr = (u64)(unsigned long)p->ainsn.insn; - new_disp = ((addr + (disp * 2)) - new_addr) / 2; - *(s32 *)&p->ainsn.insn[1] = new_disp; + len = insn_length(*p->addr >> 8); + memcpy(&insn, p->addr, len); + p->opcode = insn[0]; + if (probe_is_insn_relative_long(&insn[0])) { + /* + * For pc-relative instructions in RIL-b or RIL-c format patch + * the RI2 displacement field. We have already made sure that + * the insn slot for the patched instruction is within the same + * 2GB area as the original instruction (either kernel image or + * module area). Therefore the new displacement will always fit. + */ + disp = *(s32 *)&insn[1]; + addr = (u64)(unsigned long)p->addr; + new_addr = (u64)(unsigned long)p->ainsn.insn; + new_disp = ((addr + (disp * 2)) - new_addr) / 2; + *(s32 *)&insn[1] = new_disp; + } + s390_kernel_write(p->ainsn.insn, &insn, len); } NOKPROBE_SYMBOL(copy_instruction); From 6bf9a639e76e1da8eb1ed29e037e900106e1dff4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 9 Sep 2020 11:14:52 +0200 Subject: [PATCH 21/85] s390/mm,ptdump: make page table dumping seq_file optional s390 version of ae5d1cf358a5 ("arm64: dump: Make the page table dumping seq_file optional"). Tested-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 36 ++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 09c7179cb17da..b7401a2f93f32 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -51,18 +51,34 @@ struct pg_state { const struct addr_marker *marker; }; +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + struct seq_file *__m = (m); \ + \ + if (__m) \ + seq_printf(__m, fmt, ##args); \ +}) + +#define pt_dump_seq_puts(m, fmt) \ +({ \ + struct seq_file *__m = (m); \ + \ + if (__m) \ + seq_printf(__m, fmt); \ +}) + static void print_prot(struct seq_file *m, unsigned int pr, int level) { static const char * const level_name[] = { "ASCE", "PGD", "PUD", "PMD", "PTE" }; - seq_printf(m, "%s ", level_name[level]); + pt_dump_seq_printf(m, "%s ", level_name[level]); if (pr & _PAGE_INVALID) { - seq_printf(m, "I\n"); + pt_dump_seq_printf(m, "I\n"); return; } - seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW "); - seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n"); + pt_dump_seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW "); + pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n"); } static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) @@ -87,25 +103,25 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, if (level == -1) addr = max_addr; if (st->level == -1) { - seq_printf(m, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); st->start_address = addr; st->current_prot = prot; st->level = level; } else if (prot != st->current_prot || level != st->level || addr >= st->marker[1].start_address) { - seq_printf(m, "0x%0*lx-0x%0*lx ", - width, st->start_address, - width, addr); + pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, addr); delta = (addr - st->start_address) >> 10; while (!(delta & 0x3ff) && unit[1]) { delta >>= 10; unit++; } - seq_printf(m, "%9lu%c ", delta, *unit); + pt_dump_seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); while (addr >= st->marker[1].start_address) { st->marker++; - seq_printf(m, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); } st->start_address = addr; st->current_prot = prot; From 08c8e685c7c9223f9c4ad6365e02bebd3f106480 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 9 Sep 2020 17:10:29 +0200 Subject: [PATCH 22/85] s390: add ARCH_HAS_DEBUG_WX support Checks the whole kernel address space for W+X mappings. Note that currently the first lowcore page unfortunately has to be mapped W+X. Therefore this not reported as an insecure mapping. For the very same reason the wording is also different to other architectures if the test passes: On s390 it is "no unexpected W+X pages found" instead of "no W+X pages found". Tested-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + arch/s390/configs/debug_defconfig | 1 + arch/s390/configs/defconfig | 1 + arch/s390/include/asm/ptdump.h | 14 +++++++ arch/s390/mm/Makefile | 2 +- arch/s390/mm/dump_pagetables.c | 64 ++++++++++++++++++++++++++++++- arch/s390/mm/init.c | 2 + 7 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 arch/s390/include/asm/ptdump.h diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 85bf121211d10..2052b39b84597 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -60,6 +60,7 @@ config S390 def_bool y select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEBUG_VM_PGTABLE + select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f79eafb597cb3..901723e4ed631 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -774,6 +774,7 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y CONFIG_PAGE_OWNER=y CONFIG_DEBUG_RODATA_TEST=y +CONFIG_DEBUG_WX=y CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_SELFTEST=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 9593cc8a9efd6..87da48c851305 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -758,6 +758,7 @@ CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_WX=y CONFIG_PTDUMP_DEBUGFS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_PANIC_ON_OOPS=y diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h new file mode 100644 index 0000000000000..f960b2896606a --- /dev/null +++ b/arch/s390/include/asm/ptdump.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_S390_PTDUMP_H +#define _ASM_S390_PTDUMP_H + +void ptdump_check_wx(void); + +static inline void debug_checkwx(void) +{ + if (IS_ENABLED(CONFIG_DEBUG_WX)) + ptdump_check_wx(); +} + +#endif /* _ASM_S390_PTDUMP_H */ diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 8ab9daeeace38..cd67e94c16aa2 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -8,7 +8,7 @@ obj-y += page-states.o pageattr.o pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PTDUMP_DEBUGFS) += dump_pagetables.o +obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index b7401a2f93f32..4b27c1a533de8 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -47,6 +48,8 @@ struct pg_state { struct seq_file *seq; int level; unsigned int current_prot; + bool check_wx; + unsigned long wx_pages; unsigned long start_address; const struct addr_marker *marker; }; @@ -81,6 +84,26 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n"); } +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ +#ifdef CONFIG_DEBUG_WX + if (!st->check_wx) + return; + if (st->current_prot & _PAGE_INVALID) + return; + if (st->current_prot & _PAGE_PROTECT) + return; + if (st->current_prot & _PAGE_NOEXEC) + return; + /* The first lowcore page is currently still W+X. */ + if (addr == PAGE_SIZE) + return; + WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n", + (void *)st->start_address); + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +#endif /* CONFIG_DEBUG_WX */ +} + static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { int width = sizeof(unsigned long) * 2; @@ -109,6 +132,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, st->level = level; } else if (prot != st->current_prot || level != st->level || addr >= st->marker[1].start_address) { + note_prot_wx(st, addr); pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx ", width, st->start_address, width, addr); @@ -129,6 +153,40 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } } +#ifdef CONFIG_DEBUG_WX +void ptdump_check_wx(void) +{ + struct pg_state st = { + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {.start = 0, .end = max_addr}, + {.start = 0, .end = 0}, + } + }, + .seq = NULL, + .level = -1, + .current_prot = 0, + .check_wx = true, + .wx_pages = 0, + .start_address = 0, + .marker = (struct addr_marker[]) { + { .start_address = 0, .name = NULL}, + { .start_address = -1, .name = NULL}, + }, + }; + + if (!MACHINE_HAS_NX) + return; + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + if (st.wx_pages) + pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages); + else + pr_info("Checked W+X mappings: passed, no unexpected W+X pages found\n"); +} +#endif /* CONFIG_DEBUG_WX */ + +#ifdef CONFIG_PTDUMP_DEBUGFS static int ptdump_show(struct seq_file *m, void *v) { struct pg_state st = { @@ -142,6 +200,8 @@ static int ptdump_show(struct seq_file *m, void *v) .seq = m, .level = -1, .current_prot = 0, + .check_wx = false, + .wx_pages = 0, .start_address = 0, .marker = address_markers, }; @@ -154,6 +214,7 @@ static int ptdump_show(struct seq_file *m, void *v) return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); +#endif /* CONFIG_PTDUMP_DEBUGFS */ static int pt_dump_init(void) { @@ -167,7 +228,8 @@ static int pt_dump_init(void) address_markers[MODULES_NR].start_address = MODULES_VADDR; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMALLOC_NR].start_address = VMALLOC_START; - debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); + if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS)) + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); return 0; } device_initcall(pt_dump_init); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 0d282081dc1f8..d3ddb43613611 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ void mark_rodata_ro(void) set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); + debug_checkwx(); } int set_memory_encrypted(unsigned long addr, int numpages) From d411e3c6744925f08a2c8682559ce77090144fa2 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 10 Sep 2020 22:25:13 +0200 Subject: [PATCH 23/85] s390/kasan: make shadow memory noexec ARCH_HAS_DEBUG_WX feature support brought attention to the fact that currently initial kasan shadow memory mapped without noexec flag. So fix that. Temporary initial identity mapping is still created without noexec, but it is replaced by properly set up paging later. Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/kasan_init.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 99dd1c63a065f..1a27a71433495 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -99,8 +99,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO); if (!has_nx) pgt_prot_zero &= ~_PAGE_NOEXEC; - pgt_prot = pgprot_val(PAGE_KERNEL_EXEC); - sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC); + pgt_prot = pgprot_val(PAGE_KERNEL); + sgt_prot = pgprot_val(SEGMENT_KERNEL); + if (!has_nx || mode == POPULATE_ONE2ONE) { + pgt_prot &= ~_PAGE_NOEXEC; + sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC; + } while (address < end) { pg_dir = pgd_offset_k(address); From e670e64af1d46571be3d9b1177024d9c8520087c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 11 Sep 2020 12:51:59 +0200 Subject: [PATCH 24/85] s390/mm,ptdump: add couple of additional markers Signed-off-by: Vasily Gorbik [hca@linux.ibm.com: add more markers, rename some markers] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/pgtable.h | 1 + arch/s390/kernel/setup.c | 2 ++ arch/s390/mm/dump_pagetables.c | 26 +++++++++++++++++++++----- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index e9244b9fb5047..d87b83a77aff4 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -89,6 +89,7 @@ extern unsigned long VMALLOC_START; extern unsigned long VMALLOC_END; #define VMALLOC_DEFAULT_SIZE ((128UL << 30) - MODULES_LEN) extern struct page *vmemmap; +extern unsigned long vmemmap_size; #define VMEM_MAX_PHYS ((unsigned long) vmemmap) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index efd12221ecb4d..f04252cb6004c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -119,6 +119,7 @@ EXPORT_SYMBOL(VMALLOC_END); struct page *vmemmap; EXPORT_SYMBOL(vmemmap); +unsigned long vmemmap_size; unsigned long MODULES_VADDR; unsigned long MODULES_END; @@ -589,6 +590,7 @@ static void __init setup_memory_end(void) memory_end = min(memory_end, KASAN_SHADOW_START); vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); #endif + vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page); max_pfn = max_low_pfn = PFN_DOWN(memory_end); memblock_remove(memory_end, ULONG_MAX); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 4b27c1a533de8..052223c92fb1d 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -17,29 +17,41 @@ struct addr_marker { }; enum address_markers_idx { - IDENTITY_NR = 0, + IDENTITY_BEFORE_NR = 0, + IDENTITY_BEFORE_END_NR, KERNEL_START_NR, KERNEL_END_NR, + IDENTITY_AFTER_NR, + IDENTITY_AFTER_END_NR, #ifdef CONFIG_KASAN KASAN_SHADOW_START_NR, KASAN_SHADOW_END_NR, #endif VMEMMAP_NR, + VMEMMAP_END_NR, VMALLOC_NR, + VMALLOC_END_NR, MODULES_NR, + MODULES_END_NR, }; static struct addr_marker address_markers[] = { - [IDENTITY_NR] = {0, "Identity Mapping"}, + [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"}, + [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, + [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"}, + [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"}, #ifdef CONFIG_KASAN [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"}, [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"}, #endif - [VMEMMAP_NR] = {0, "vmemmap Area"}, - [VMALLOC_NR] = {0, "vmalloc Area"}, - [MODULES_NR] = {0, "Modules Area"}, + [VMEMMAP_NR] = {0, "vmemmap Area Start"}, + [VMEMMAP_END_NR] = {0, "vmemmap Area End"}, + [VMALLOC_NR] = {0, "vmalloc Area Start"}, + [VMALLOC_END_NR] = {0, "vmalloc Area End"}, + [MODULES_NR] = {0, "Modules Area Start"}, + [MODULES_END_NR] = {0, "Modules Area End"}, { -1, NULL } }; @@ -225,9 +237,13 @@ static int pt_dump_init(void) */ max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); + address_markers[IDENTITY_AFTER_END_NR].start_address = memory_end; address_markers[MODULES_NR].start_address = MODULES_VADDR; + address_markers[MODULES_END_NR].start_address = MODULES_END; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; + address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; + address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS)) debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); return 0; From 1a80b54d1ce1556bba6a8d8cd9384d6a9dcb641a Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 7 Sep 2020 08:46:59 -0400 Subject: [PATCH 25/85] s390/uv: add destroy page call We don't need to export pages if we destroy the VM configuration afterwards anyway. Instead we can destroy the page which will zero it and then make it accessible to the host. Destroying is about twice as fast as the export. Signed-off-by: Janosch Frank Reviewed-by: Claudio Imbrenda Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/kvm/20200907124700.10374-2-frankja@linux.ibm.com/ Signed-off-by: Janosch Frank Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uv.h | 7 +++++++ arch/s390/kernel/uv.c | 20 ++++++++++++++++++++ arch/s390/mm/gmap.c | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index cff4b4c99b755..0325fc0469b7b 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -33,6 +33,7 @@ #define UVC_CMD_DESTROY_SEC_CPU 0x0121 #define UVC_CMD_CONV_TO_SEC_STOR 0x0200 #define UVC_CMD_CONV_FROM_SEC_STOR 0x0201 +#define UVC_CMD_DESTR_SEC_STOR 0x0202 #define UVC_CMD_SET_SEC_CONF_PARAMS 0x0300 #define UVC_CMD_UNPACK_IMG 0x0301 #define UVC_CMD_VERIFY_IMG 0x0302 @@ -344,6 +345,7 @@ static inline int is_prot_virt_host(void) } int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); +int uv_destroy_page(unsigned long paddr); int uv_convert_from_secure(unsigned long paddr); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); @@ -354,6 +356,11 @@ void adjust_to_uv_max(unsigned long *vmax); static inline void setup_uv(void) {} static inline void adjust_to_uv_max(unsigned long *vmax) {} +static inline int uv_destroy_page(unsigned long paddr) +{ + return 0; +} + static inline int uv_convert_from_secure(unsigned long paddr) { return 0; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index c296e5c8dbf97..d3399b8a9b237 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -118,6 +118,26 @@ static int uv_pin_shared(unsigned long paddr) return 0; } +/* + * Requests the Ultravisor to destroy a guest page and make it + * accessible to the host. The destroy clears the page instead of + * exporting. + * + * @paddr: Absolute host address of page to be destroyed + */ +int uv_destroy_page(unsigned long paddr) +{ + struct uv_cb_cfs uvcb = { + .header.cmd = UVC_CMD_DESTR_SEC_STOR, + .header.len = sizeof(uvcb), + .paddr = paddr + }; + + if (uv_call(0, (u64)&uvcb)) + return -EINVAL; + return 0; +} + /* * Requests the Ultravisor to encrypt a guest page and make it * accessible to the host for paging (export). diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 373542ca1113e..cfb0017f33a70 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2679,7 +2679,7 @@ static int __s390_reset_acc(pte_t *ptep, unsigned long addr, pte_t pte = READ_ONCE(*ptep); if (pte_present(pte)) - WARN_ON_ONCE(uv_convert_from_secure(pte_val(pte) & PAGE_MASK)); + WARN_ON_ONCE(uv_destroy_page(pte_val(pte) & PAGE_MASK)); return 0; } From 980d5f9ab36b6cfe473a8371a7e11bd168c9e630 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Wed, 2 Sep 2020 16:52:06 +0200 Subject: [PATCH 26/85] s390/boot: enable .bss section for compressed kernel - Support static uninitialized variables in compressed kernel. - Remove chkbss script - Get rid of workarounds for not having .bss section Signed-off-by: Alexander Egorenkov Reviewed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/boot/Makefile | 4 ---- arch/s390/boot/compressed/Makefile | 4 ---- arch/s390/boot/compressed/decompressor.c | 1 - arch/s390/boot/compressed/vmlinux.lds.S | 22 +++++++++++++--------- arch/s390/boot/head.S | 6 ++++++ arch/s390/boot/ipl_parm.c | 4 ++-- arch/s390/boot/startup.c | 3 +++ arch/s390/kernel/setup.c | 2 +- arch/s390/scripts/Makefile.chkbss | 20 -------------------- 9 files changed, 25 insertions(+), 41 deletions(-) delete mode 100644 arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 45b33b83de08c..41a64b8dce252 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -73,7 +73,3 @@ $(obj)/startup.a: $(OBJECTS) FORCE install: sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" - -chkbss := $(obj-y) -chkbss-target := startup.a -include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index fa529c5b44865..b235ed95a3d89 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -62,7 +62,3 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed $(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE $(call if_changed,objcopy) - -chkbss := $(filter-out piggy.o info.o, $(obj-y)) -chkbss-target := vmlinux.bin -include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c index 368fd372c8757..3061b11c4d27f 100644 --- a/arch/s390/boot/compressed/decompressor.c +++ b/arch/s390/boot/compressed/decompressor.c @@ -16,7 +16,6 @@ * gzip declarations */ #define STATIC static -#define STATIC_RW_DATA static __section(.data) #undef memset #undef memcpy diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index 44561b2c37124..9427e2cd0c154 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -58,6 +58,19 @@ SECTIONS BOOT_DATA BOOT_DATA_PRESERVED + /* + * This is the BSS section of the decompressor and not of the decompressed Linux kernel. + * It will consume place in the decompressor's image. + */ + . = ALIGN(8); + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + _ebss = .; + } + /* * uncompressed image info used by the decompressor it should match * struct vmlinux_info. It comes from .vmlinux.info section of @@ -81,15 +94,6 @@ SECTIONS FILL(0xff); . = ALIGN(4096); } - . = ALIGN(256); - .bss : { - _bss = . ; - *(.bss) - *(.bss.*) - *(COMMON) - . = ALIGN(8); /* For convenience during zeroing */ - _ebss = .; - } _end = .; /* Sections to be discarded */ diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index dae10961d0724..fd78755d996dc 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -313,6 +313,12 @@ ENTRY(startup_kdump) spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) l %r15,.Lstack-.LPG0(%r13) + // Clear decompressor's BSS section + larl %r2,_bss + slgr %r3,%r3 + larl %r4,_ebss + slgr %r4,%r2 + brasl %r14,memset brasl %r14,verify_facilities brasl %r14,startup_kernel diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 8e222a6660250..ae230ebd6420b 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -21,7 +21,7 @@ unsigned long __bootdata(memory_end); int __bootdata(memory_end_set); int __bootdata(noexec_disabled); -int kaslr_enabled __section(.data); +int kaslr_enabled; static inline int __diag308(unsigned long subcode, void *addr) { @@ -209,7 +209,7 @@ static void modify_fac_list(char *str) check_cleared_facilities(); } -static char command_line_buf[COMMAND_LINE_SIZE] __section(.data); +static char command_line_buf[COMMAND_LINE_SIZE]; void parse_boot_command_line(void) { char *param, *val; diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 3b3a11f95269a..81835483169b8 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -120,6 +120,9 @@ static void handle_relocs(unsigned long offset) } } +/* + * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. + */ static void clear_bss_section(void) { memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f04252cb6004c..047793902ce67 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -307,7 +307,7 @@ void machine_power_off(void) void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL_GPL(pm_power_off); -void *restart_stack __section(.data); +void *restart_stack; unsigned long stack_alloc(void) { diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss deleted file mode 100644 index f4f4c2c6dee9e..0000000000000 --- a/arch/s390/scripts/Makefile.chkbss +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 - -chkbss-target ?= built-in.a -$(obj)/$(chkbss-target): chkbss - -chkbss-files := $(addsuffix .chkbss, $(chkbss)) -clean-files += $(chkbss-files) - -PHONY += chkbss -chkbss: $(addprefix $(obj)/, $(chkbss-files)) - -quiet_cmd_chkbss = CHKBSS $< - cmd_chkbss = \ - if ! $(OBJSIZE) --common $< | $(AWK) 'END { if ($$3) exit 1 }'; then \ - echo "error: $< .bss section is not empty" >&2; exit 1; \ - fi; \ - touch $@; - -$(obj)/%.o.chkbss: $(obj)/%.o - $(call cmd,chkbss) From 48111b4838480d1357783f4231c351bb2ba2d27d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 15 Sep 2020 12:52:36 +0200 Subject: [PATCH 27/85] s390/mm,ptdump: add proper ifdefs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ifdefs instead of IS_ENABLED() to avoid compile error for !PTDUMP_DEBUGFS: arch/s390/mm/dump_pagetables.c: In function ‘pt_dump_init’: arch/s390/mm/dump_pagetables.c:248:64: error: ‘ptdump_fops’ undeclared (first use in this function); did you mean ‘pidfd_fops’? debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); Reported-by: Julian Wiedmann Fixes: 08c8e685c7c9 ("s390: add ARCH_HAS_DEBUG_WX support") Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 052223c92fb1d..0356ac6d7dad2 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -244,8 +244,9 @@ static int pt_dump_init(void) address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; - if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS)) - debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); +#ifdef CONFIG_PTDUMP_DEBUGFS + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); +#endif /* CONFIG_PTDUMP_DEBUGFS */ return 0; } device_initcall(pt_dump_init); From 4904e1941ee334bf1f90e4017a37f7c8a52b685c Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 15 Sep 2020 10:42:41 +0200 Subject: [PATCH 28/85] s390/pci: add missing pci_iov.h include this fixes a missing prototype compiler warning spotted by the kernel test robot. Fixes: abb95b7550f8 ("s390/pci: consolidate SR-IOV specific code") Reported-by: kernel test robot Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_iov.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c index 35fca14ebb35f..ead062bf2b41c 100644 --- a/arch/s390/pci/pci_iov.c +++ b/arch/s390/pci/pci_iov.c @@ -13,6 +13,8 @@ #include #include +#include "pci_iov.h" + static struct resource iov_res = { .name = "PCI IOV res", .start = 0, From ee4b2ce6d1e3baa412148a7b0889f6643749c665 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 10 Sep 2020 22:51:17 +0200 Subject: [PATCH 29/85] s390/mm,ptdump: sort markers Kasan configuration options and size of physical memory present could affect kernel memory layout. In particular vmemmap, vmalloc and modules might come before kasan shadow or after it. To make ptdump correctly output markers in the right order markers have to be sorted. To preserve the original order of markers with the same start address avoid using sort() from lib/sort.c (which is not stable sorting algorithm) and sort markers in place. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 0356ac6d7dad2..8f9ff7e7187d7 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -228,6 +228,24 @@ static int ptdump_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump); #endif /* CONFIG_PTDUMP_DEBUGFS */ +/* + * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple + * insertion sort to preserve the original order of markers with the same + * start address. + */ +static void sort_address_markers(void) +{ + struct addr_marker tmp; + int i, j; + + for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) { + tmp = address_markers[i]; + for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--) + address_markers[j + 1] = address_markers[j]; + address_markers[j + 1] = tmp; + } +} + static int pt_dump_init(void) { /* @@ -244,6 +262,7 @@ static int pt_dump_init(void) address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; + sort_address_markers(); #ifdef CONFIG_PTDUMP_DEBUGFS debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); #endif /* CONFIG_PTDUMP_DEBUGFS */ From 8f78657c291f9e5ec26a2a9187938b374629d176 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 10 Sep 2020 22:54:58 +0200 Subject: [PATCH 30/85] s390/kasan: avoid unnecessary moving of vmemmap Currently vmemmap area is unconditionally moved beyond Kasan shadow memory. When Kasan is not enabled vmemmap area position is calculated in setup_memory_end() and depends on limiting factors like ultravisor secure storage limit. Try to follow the same logic with Kasan enabled as well and avoid unnecessary vmemmap area position changes unless it really intersects with Kasan shadow. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 047793902ce67..d63b08bacdf9d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -586,11 +586,15 @@ static void __init setup_memory_end(void) /* Take care that memory_end is set and <= vmemmap */ memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap); #ifdef CONFIG_KASAN - /* fit in kasan shadow memory region between 1:1 and vmemmap */ memory_end = min(memory_end, KASAN_SHADOW_START); - vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); #endif vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page); +#ifdef CONFIG_KASAN + /* move vmemmap above kasan shadow only if stands in a way */ + if (KASAN_SHADOW_END > (unsigned long)vmemmap && + (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START) + vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END); +#endif max_pfn = max_low_pfn = PFN_DOWN(memory_end); memblock_remove(memory_end, ULONG_MAX); From 1d6671ae46e52e383bc6eea8d33bdd32ae61b323 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 11 Sep 2020 11:38:21 +0200 Subject: [PATCH 31/85] s390/protvirt: parse prot_virt option in the decompressor To make early kernel address space layout definition possible parse prot_virt option in the decompressor and pass it to the uncompressed kernel. This enables kasan to take ultravisor secure storage limit into consideration and pre-define vmalloc position correctly. Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 8 ++++++++ arch/s390/boot/uv.c | 3 +++ arch/s390/kernel/setup.c | 3 +-- arch/s390/kernel/uv.c | 40 ++++++++++++++++----------------------- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index ae230ebd6420b..92ebc4a58fe2d 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -254,6 +254,14 @@ void parse_boot_command_line(void) if (!strcmp(param, "nokaslr")) kaslr_enabled = 0; + +#if IS_ENABLED(CONFIG_KVM) + if (!strcmp(param, "prot_virt")) { + rc = kstrtobool(val, &enabled); + if (!rc && enabled) + prot_virt_host = 1; + } +#endif } } diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index f887a479cdc7e..a15c033f53ca4 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -7,6 +7,9 @@ #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); #endif +#if IS_ENABLED(CONFIG_KVM) +int __bootdata_preserved(prot_virt_host); +#endif struct uv_info __bootdata_preserved(uv_info); void uv_query_info(void) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d63b08bacdf9d..dd3fa7039cb0e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1137,8 +1137,7 @@ void __init setup_arch(char **cmdline_p) free_mem_detect_info(); remove_oldmem(); - if (is_prot_virt_host()) - setup_uv(); + setup_uv(); setup_memory_end(); setup_memory(); dma_contiguous_reserve(memory_end); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index d3399b8a9b237..4233245737bd9 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -26,33 +26,10 @@ int __bootdata_preserved(prot_virt_guest); struct uv_info __bootdata_preserved(uv_info); #if IS_ENABLED(CONFIG_KVM) -int prot_virt_host; +int __bootdata_preserved(prot_virt_host); EXPORT_SYMBOL(prot_virt_host); EXPORT_SYMBOL(uv_info); -static int __init prot_virt_setup(char *val) -{ - bool enabled; - int rc; - - rc = kstrtobool(val, &enabled); - if (!rc && enabled) - prot_virt_host = 1; - - if (is_prot_virt_guest() && prot_virt_host) { - prot_virt_host = 0; - pr_warn("Protected virtualization not available in protected guests."); - } - - if (prot_virt_host && !test_facility(158)) { - prot_virt_host = 0; - pr_warn("Protected virtualization not supported by the hardware."); - } - - return rc; -} -early_param("prot_virt", prot_virt_setup); - static int __init uv_init(unsigned long stor_base, unsigned long stor_len) { struct uv_cb_init uvcb = { @@ -74,6 +51,21 @@ void __init setup_uv(void) { unsigned long uv_stor_base; + if (!is_prot_virt_host()) + return; + + if (is_prot_virt_guest()) { + prot_virt_host = 0; + pr_warn("Protected virtualization not available in protected guests."); + return; + } + + if (!test_facility(158)) { + prot_virt_host = 0; + pr_warn("Protected virtualization not supported by the hardware."); + return; + } + uv_stor_base = (unsigned long)memblock_alloc_try_nid( uv_info.uv_base_stor_len, SZ_1M, SZ_2G, MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); From c2314cb2dd4140cb14b79a8139be34459777f421 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 11 Sep 2020 11:40:21 +0200 Subject: [PATCH 32/85] s390/protvirt: support ultravisor without secure storage limit Avoid potential crash due to lack of secure storage limit. Check that max_sec_stor_addr is not 0 before adjusting vmalloc position. Signed-off-by: Vasily Gorbik --- arch/s390/kernel/uv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 4233245737bd9..1a166a1119c0e 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -90,7 +90,8 @@ void __init setup_uv(void) void adjust_to_uv_max(unsigned long *vmax) { - *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); + if (uv_info.max_sec_stor_addr) + *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); } /* From c360c9a238d1754b1ee8f1c3368ef36794e1daab Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 11 Sep 2020 11:44:47 +0200 Subject: [PATCH 33/85] s390/kasan: support protvirt with 4-level paging Currently the kernel crashes in Kasan instrumentation code if CONFIG_KASAN_S390_4_LEVEL_PAGING is used on protected virtualization capable machine where the ultravisor imposes addressing limitations on the host and those limitations are lower then KASAN_SHADOW_OFFSET. The problem is that Kasan has to know in advance where vmalloc/modules areas would be. With protected virtualization enabled vmalloc/modules areas are moved down to the ultravisor secure storage limit while kasan still expects them at the very end of 4-level paging address space. To fix that make Kasan recognize when protected virtualization is enabled and predefine vmalloc/modules areas position which are compliant with ultravisor secure storage limit. Kasan shadow itself stays in place and might reside above that ultravisor secure storage limit. One slight difference compaired to a kernel without Kasan enabled is that vmalloc/modules areas position is not reverted to default if ultravisor initialization fails. It would still be below the ultravisor secure storage limit. Kernel layout with kasan, 4-level paging and protected virtualization enabled (ultravisor secure storage limit is at 0x0000800000000000): ---[ vmemmap Area Start ]--- 0x0000400000000000-0x0000400080000000 ---[ vmemmap Area End ]--- ---[ vmalloc Area Start ]--- 0x00007fe000000000-0x00007fff80000000 ---[ vmalloc Area End ]--- ---[ Modules Area Start ]--- 0x00007fff80000000-0x0000800000000000 ---[ Modules Area End ]--- ---[ Kasan Shadow Start ]--- 0x0018000000000000-0x001c000000000000 ---[ Kasan Shadow End ]--- 0x001c000000000000-0x0020000000000000 1P PGD I Kernel layout with kasan, 4-level paging and protected virtualization disabled/unsupported: ---[ vmemmap Area Start ]--- 0x0000400000000000-0x0000400060000000 ---[ vmemmap Area End ]--- ---[ Kasan Shadow Start ]--- 0x0018000000000000-0x001c000000000000 ---[ Kasan Shadow End ]--- ---[ vmalloc Area Start ]--- 0x001fffe000000000-0x001fffff80000000 ---[ vmalloc Area End ]--- ---[ Modules Area Start ]--- 0x001fffff80000000-0x0020000000000000 ---[ Modules Area End ]--- Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/kasan.h | 1 + arch/s390/kernel/setup.c | 23 +++++++++------------- arch/s390/kernel/uv.c | 3 +++ arch/s390/mm/kasan_init.c | 36 +++++++++++++++++++++++++++++------ 4 files changed, 43 insertions(+), 20 deletions(-) diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index 89d6886040c89..e9bf486de136b 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -19,6 +19,7 @@ extern void kasan_early_init(void); extern void kasan_copy_shadow(pgd_t *dst); extern void kasan_free_early_identity(void); +extern unsigned long kasan_vmax; #else static inline void kasan_early_init(void) { } static inline void kasan_copy_shadow(pgd_t *dst) { } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index dd3fa7039cb0e..ae2f4d9460486 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -552,22 +552,17 @@ static void __init setup_memory_end(void) unsigned long vmax, tmp; /* Choose kernel address space layout: 3 or 4 levels. */ - if (IS_ENABLED(CONFIG_KASAN)) { - vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) - ? _REGION1_SIZE - : _REGION2_SIZE; - } else { - tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; - tmp = tmp * (sizeof(struct page) + PAGE_SIZE); - if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) - vmax = _REGION2_SIZE; /* 3-level kernel page table */ - else - vmax = _REGION1_SIZE; /* 4-level kernel page table */ - } - + tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; + tmp = tmp * (sizeof(struct page) + PAGE_SIZE); + if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE) + vmax = _REGION2_SIZE; /* 3-level kernel page table */ + else + vmax = _REGION1_SIZE; /* 4-level kernel page table */ if (is_prot_virt_host()) adjust_to_uv_max(&vmax); - +#ifdef CONFIG_KASAN + vmax = kasan_vmax; +#endif /* module area is at the end of the kernel address space. */ MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 1a166a1119c0e..14bd9d58edc90 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -51,6 +51,9 @@ void __init setup_uv(void) { unsigned long uv_stor_base; + /* + * keep these conditions in line with kasan init code has_uv_sec_stor_limit() + */ if (!is_prot_virt_host()) return; diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 1a27a71433495..5646b39c728a9 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -11,7 +11,9 @@ #include #include #include +#include +unsigned long kasan_vmax; static unsigned long segment_pos __initdata; static unsigned long segment_low __initdata; static unsigned long pgalloc_pos __initdata; @@ -256,14 +258,31 @@ static void __init kasan_early_detect_facilities(void) } } +static bool __init has_uv_sec_stor_limit(void) +{ + /* + * keep these conditions in line with setup_uv() + */ + if (!is_prot_virt_host()) + return false; + + if (is_prot_virt_guest()) + return false; + + if (!test_facility(158)) + return false; + + return !!uv_info.max_sec_stor_addr; +} + void __init kasan_early_init(void) { unsigned long untracked_mem_end; unsigned long shadow_alloc_size; + unsigned long vmax_unlimited; unsigned long initrd_end; unsigned long asce_type; unsigned long memsize; - unsigned long vmax; unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO); pte_t pte_z; pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); @@ -291,7 +310,9 @@ void __init kasan_early_init(void) BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY); - untracked_mem_end = vmax = _REGION1_SIZE; + untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE; + if (has_uv_sec_stor_limit()) + kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr); asce_type = _ASCE_TYPE_REGION2; } else { /* 3 level paging */ @@ -299,7 +320,7 @@ void __init kasan_early_init(void) BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)); crst_table_init((unsigned long *)early_pg_dir, _REGION3_ENTRY_EMPTY); - untracked_mem_end = vmax = _REGION2_SIZE; + untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION2_SIZE; asce_type = _ASCE_TYPE_REGION3; } @@ -369,17 +390,20 @@ void __init kasan_early_init(void) /* populate kasan shadow (for identity mapping and zero page mapping) */ kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP); if (IS_ENABLED(CONFIG_MODULES)) - untracked_mem_end = vmax - MODULES_LEN; + untracked_mem_end = kasan_vmax - MODULES_LEN; if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_mem_end = vmax - vmalloc_size - MODULES_LEN; + untracked_mem_end = kasan_vmax - vmalloc_size - MODULES_LEN; /* shallowly populate kasan shadow for vmalloc and modules */ kasan_early_vmemmap_populate(__sha(untracked_mem_end), - __sha(vmax), POPULATE_SHALLOW); + __sha(kasan_vmax), POPULATE_SHALLOW); } /* populate kasan shadow for untracked memory */ kasan_early_vmemmap_populate(__sha(max_physmem_end), __sha(untracked_mem_end), POPULATE_ZERO_SHADOW); + kasan_early_vmemmap_populate(__sha(kasan_vmax), + __sha(vmax_unlimited), + POPULATE_ZERO_SHADOW); /* memory allocated for identity mapping structs will be freed later */ pgalloc_freeable = pgalloc_pos; /* populate identity mapping */ From 52f72feba9dbe4fac848ae0d90bc0b85b17f91bc Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 15 Sep 2020 17:01:58 +0200 Subject: [PATCH 34/85] s390/zcrypt: remove set_fs() invocation in zcrypt device driver This patch reworks the zcrypt device driver so that the set_fs() invocation is not needed any more. Instead there is a new flag bool userspace passed through all the functions which tells if the pointer arguments are userspace or kernelspace. Together with the two new inline functions z_copy_from_user() and z_copy_to_user() which either invoke copy_from_user (userspace is true) or memcpy (userspace is false) the zcrypt dd and the AP bus now has no requirement for the set_fs() functionality any more. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Reviewed-by: Christoph Hellwig Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_api.c | 30 +++++------ drivers/s390/crypto/zcrypt_api.h | 26 ++++++++- drivers/s390/crypto/zcrypt_ccamisc.c | 32 +++-------- drivers/s390/crypto/zcrypt_ep11misc.c | 28 ++-------- drivers/s390/crypto/zcrypt_msgtype6.c | 78 +++++++++++++-------------- drivers/s390/crypto/zcrypt_msgtype6.h | 4 +- 6 files changed, 92 insertions(+), 106 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 4dbbfd88262cd..a711728c3857f 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -797,7 +797,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, return rc; } -static long _zcrypt_send_cprb(struct ap_perms *perms, +static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, struct ica_xcRB *xcRB) { struct zcrypt_card *zc, *pref_zc; @@ -813,7 +813,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms, xcRB->status = 0; ap_init_message(&ap_msg); - rc = get_cprb_fc(xcRB, &ap_msg, &func_code, &domain); + rc = get_cprb_fc(userspace, xcRB, &ap_msg, &func_code, &domain); if (rc) goto out; @@ -878,7 +878,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms, if (*domain == AUTOSEL_DOM) *domain = AP_QID_QUEUE(qid); - rc = pref_zq->ops->send_cprb(pref_zq, xcRB, &ap_msg); + rc = pref_zq->ops->send_cprb(userspace, pref_zq, xcRB, &ap_msg); spin_lock(&zcrypt_list_lock); zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); @@ -893,7 +893,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms, long zcrypt_send_cprb(struct ica_xcRB *xcRB) { - return _zcrypt_send_cprb(&ap_perms, xcRB); + return _zcrypt_send_cprb(false, &ap_perms, xcRB); } EXPORT_SYMBOL(zcrypt_send_cprb); @@ -924,7 +924,7 @@ static bool is_desired_ep11_queue(unsigned int dev_qid, return false; } -static long _zcrypt_send_ep11_cprb(struct ap_perms *perms, +static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, struct ep11_urb *xcrb) { struct zcrypt_card *zc, *pref_zc; @@ -956,7 +956,7 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms, } uptr = (struct ep11_target_dev __force __user *) xcrb->targets; - if (copy_from_user(targets, uptr, + if (z_copy_from_user(userspace, targets, uptr, target_num * sizeof(*targets))) { func_code = 0; rc = -EFAULT; @@ -964,7 +964,7 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms, } } - rc = get_ep11cprb_fc(xcrb, &ap_msg, &func_code); + rc = get_ep11cprb_fc(userspace, xcrb, &ap_msg, &func_code); if (rc) goto out_free; @@ -1015,7 +1015,7 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms, } qid = pref_zq->queue->qid; - rc = pref_zq->ops->send_ep11_cprb(pref_zq, xcrb, &ap_msg); + rc = pref_zq->ops->send_ep11_cprb(userspace, pref_zq, xcrb, &ap_msg); spin_lock(&zcrypt_list_lock); zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); @@ -1032,7 +1032,7 @@ static long _zcrypt_send_ep11_cprb(struct ap_perms *perms, long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb) { - return _zcrypt_send_ep11_cprb(&ap_perms, xcrb); + return _zcrypt_send_ep11_cprb(false, &ap_perms, xcrb); } EXPORT_SYMBOL(zcrypt_send_ep11_cprb); @@ -1353,12 +1353,12 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg) if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB))) return -EFAULT; do { - rc = _zcrypt_send_cprb(perms, &xcRB); + rc = _zcrypt_send_cprb(true, perms, &xcRB); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = _zcrypt_send_cprb(perms, &xcRB); + rc = _zcrypt_send_cprb(true, perms, &xcRB); } while (rc == -EAGAIN); if (rc) ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d status=0x%x\n", @@ -1377,12 +1377,12 @@ static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg) if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb))) return -EFAULT; do { - rc = _zcrypt_send_ep11_cprb(perms, &xcrb); + rc = _zcrypt_send_ep11_cprb(true, perms, &xcrb); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = _zcrypt_send_ep11_cprb(perms, &xcrb); + rc = _zcrypt_send_ep11_cprb(true, perms, &xcrb); } while (rc == -EAGAIN); if (rc) ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d\n", rc); @@ -1655,12 +1655,12 @@ static long trans_xcRB32(struct ap_perms *perms, struct file *filp, xcRB64.priority_window = xcRB32.priority_window; xcRB64.status = xcRB32.status; do { - rc = _zcrypt_send_cprb(perms, &xcRB64); + rc = _zcrypt_send_cprb(true, perms, &xcRB64); } while (rc == -EAGAIN); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = _zcrypt_send_cprb(perms, &xcRB64); + rc = _zcrypt_send_cprb(true, perms, &xcRB64); } while (rc == -EAGAIN); xcRB32.reply_control_blk_length = xcRB64.reply_control_blk_length; xcRB32.reply_data_length = xcRB64.reply_data_length; diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 599e68bf53f7a..19ddfc38e029f 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -59,9 +59,9 @@ struct zcrypt_ops { long (*rsa_modexpo)(struct zcrypt_queue *, struct ica_rsa_modexpo *); long (*rsa_modexpo_crt)(struct zcrypt_queue *, struct ica_rsa_modexpo_crt *); - long (*send_cprb)(struct zcrypt_queue *, struct ica_xcRB *, + long (*send_cprb)(bool userspace, struct zcrypt_queue *, struct ica_xcRB *, struct ap_message *); - long (*send_ep11_cprb)(struct zcrypt_queue *, struct ep11_urb *, + long (*send_ep11_cprb)(bool userspace, struct zcrypt_queue *, struct ep11_urb *, struct ap_message *); long (*rng)(struct zcrypt_queue *, char *, struct ap_message *); struct list_head list; /* zcrypt ops list. */ @@ -145,4 +145,26 @@ void zcrypt_device_status_mask_ext(struct zcrypt_device_status_ext *devstatus); int zcrypt_device_status_ext(int card, int queue, struct zcrypt_device_status_ext *devstatus); +static inline unsigned long z_copy_from_user(bool userspace, + void *to, + const void __user *from, + unsigned long n) +{ + if (likely(userspace)) + return copy_from_user(to, from, n); + memcpy(to, (void __force *) from, n); + return 0; +} + +static inline unsigned long z_copy_to_user(bool userspace, + void __user *to, + const void *from, + unsigned long n) +{ + if (likely(userspace)) + return copy_to_user(to, from, n); + memcpy((void __force *) to, from, n); + return 0; +} + #endif /* _ZCRYPT_API_H_ */ diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 5fafda6663ed4..40b59a77ec0cf 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -248,24 +248,6 @@ static inline void prep_xcrb(struct ica_xcRB *pxcrb, pxcrb->reply_control_blk_addr = (void __user *) prepcblk; } -/* - * Helper function which calls zcrypt_send_cprb with - * memory management segment adjusted to kernel space - * so that the copy_from_user called within this - * function do in fact copy from kernel space. - */ -static inline int _zcrypt_send_cprb(struct ica_xcRB *xcrb) -{ - int rc; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - rc = zcrypt_send_cprb(xcrb); - set_fs(old_fs); - - return rc; -} - /* * Generate (random) CCA AES DATA secure key. */ @@ -359,7 +341,7 @@ int cca_genseckey(u16 cardnr, u16 domain, prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk); /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */ - rc = _zcrypt_send_cprb(&xcrb); + rc = zcrypt_send_cprb(&xcrb); if (rc) { DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, errno %d\n", __func__, (int) cardnr, (int) domain, rc); @@ -497,7 +479,7 @@ int cca_clr2seckey(u16 cardnr, u16 domain, u32 keybitsize, prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk); /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */ - rc = _zcrypt_send_cprb(&xcrb); + rc = zcrypt_send_cprb(&xcrb); if (rc) { DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n", __func__, (int) cardnr, (int) domain, rc); @@ -624,7 +606,7 @@ int cca_sec2protkey(u16 cardnr, u16 domain, prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk); /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */ - rc = _zcrypt_send_cprb(&xcrb); + rc = zcrypt_send_cprb(&xcrb); if (rc) { DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n", __func__, (int) cardnr, (int) domain, rc); @@ -850,7 +832,7 @@ int cca_gencipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk); /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */ - rc = _zcrypt_send_cprb(&xcrb); + rc = zcrypt_send_cprb(&xcrb); if (rc) { DEBUG_ERR( "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n", @@ -1018,7 +1000,7 @@ static int _ip_cprb_helper(u16 cardnr, u16 domain, prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk); /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */ - rc = _zcrypt_send_cprb(&xcrb); + rc = zcrypt_send_cprb(&xcrb); if (rc) { DEBUG_ERR( "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n", @@ -1235,7 +1217,7 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk); /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */ - rc = _zcrypt_send_cprb(&xcrb); + rc = zcrypt_send_cprb(&xcrb); if (rc) { DEBUG_ERR( "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n", @@ -1366,7 +1348,7 @@ int cca_query_crypto_facility(u16 cardnr, u16 domain, prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk); /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */ - rc = _zcrypt_send_cprb(&xcrb); + rc = zcrypt_send_cprb(&xcrb); if (rc) { DEBUG_ERR("%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n", __func__, (int) cardnr, (int) domain, rc); diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index 3c3d403abe925..60b6bec21c323 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -169,24 +169,6 @@ int ep11_check_aeskeyblob(debug_info_t *dbg, int dbflvl, } EXPORT_SYMBOL(ep11_check_aeskeyblob); -/* - * Helper function which calls zcrypt_send_ep11_cprb with - * memory management segment adjusted to kernel space - * so that the copy_from_user called within this - * function do in fact copy from kernel space. - */ -static inline int _zcrypt_send_ep11_cprb(struct ep11_urb *urb) -{ - int rc; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - rc = zcrypt_send_ep11_cprb(urb); - set_fs(old_fs); - - return rc; -} - /* * Allocate and prepare ep11 cprb plus additional payload. */ @@ -399,7 +381,7 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type, req, sizeof(*req) + sizeof(*req_pl), rep, sizeof(*rep) + sizeof(*rep_pl) + buflen); - rc = _zcrypt_send_ep11_cprb(urb); + rc = zcrypt_send_ep11_cprb(urb); if (rc) { DEBUG_ERR( "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n", @@ -637,7 +619,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, req, sizeof(*req) + sizeof(*req_pl), rep, sizeof(*rep) + sizeof(*rep_pl)); - rc = _zcrypt_send_ep11_cprb(urb); + rc = zcrypt_send_ep11_cprb(urb); if (rc) { DEBUG_ERR( "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n", @@ -757,7 +739,7 @@ static int ep11_cryptsingle(u16 card, u16 domain, req, sizeof(*req) + req_pl_size, rep, sizeof(*rep) + rep_pl_size); - rc = _zcrypt_send_ep11_cprb(urb); + rc = zcrypt_send_ep11_cprb(urb); if (rc) { DEBUG_ERR( "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n", @@ -905,7 +887,7 @@ static int ep11_unwrapkey(u16 card, u16 domain, req, sizeof(*req) + req_pl_size, rep, sizeof(*rep) + sizeof(*rep_pl)); - rc = _zcrypt_send_ep11_cprb(urb); + rc = zcrypt_send_ep11_cprb(urb); if (rc) { DEBUG_ERR( "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n", @@ -1033,7 +1015,7 @@ static int ep11_wrapkey(u16 card, u16 domain, req, sizeof(*req) + req_pl_size, rep, sizeof(*rep) + sizeof(*rep_pl)); - rc = _zcrypt_send_ep11_cprb(urb); + rc = zcrypt_send_ep11_cprb(urb); if (rc) { DEBUG_ERR( "%s zcrypt_send_ep11_cprb(card=%d dom=%d) failed, rc=%d\n", diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index d77991c74c252..3db901883a5ca 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -388,7 +388,7 @@ struct type86_fmt2_msg { struct type86_fmt2_ext fmt2; } __packed; -static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg, +static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, struct ica_xcRB *xcRB, unsigned int *fcode, unsigned short **dom) @@ -465,8 +465,8 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg, msg->hdr.FromCardLen2 = xcRB->reply_data_length; /* prepare CPRB */ - if (copy_from_user(&(msg->cprbx), xcRB->request_control_blk_addr, - xcRB->request_control_blk_length)) + if (z_copy_from_user(userspace, &(msg->cprbx), xcRB->request_control_blk_addr, + xcRB->request_control_blk_length)) return -EFAULT; if (msg->cprbx.cprb_len + sizeof(msg->hdr.function_code) > xcRB->request_control_blk_length) @@ -484,16 +484,16 @@ static int XCRB_msg_to_type6CPRB_msgX(struct ap_message *ap_msg, /* copy data block */ if (xcRB->request_data_length && - copy_from_user(req_data, xcRB->request_data_address, - xcRB->request_data_length)) + z_copy_from_user(userspace, req_data, xcRB->request_data_address, + xcRB->request_data_length)) return -EFAULT; return 0; } -static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg, - struct ep11_urb *xcRB, - unsigned int *fcode) +static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap_msg, + struct ep11_urb *xcRB, + unsigned int *fcode) { unsigned int lfmt; static struct type6_hdr static_type6_ep11_hdr = { @@ -543,8 +543,8 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct ap_message *ap_msg, msg->hdr.FromCardLen1 = xcRB->resp_len; /* Import CPRB data from the ioctl input parameter */ - if (copy_from_user(&(msg->cprbx.cprb_len), - (char __force __user *)xcRB->req, xcRB->req_len)) { + if (z_copy_from_user(userspace, &(msg->cprbx.cprb_len), + (char __force __user *)xcRB->req, xcRB->req_len)) { return -EFAULT; } @@ -707,7 +707,7 @@ static int convert_type86_ica(struct zcrypt_queue *zq, * * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error. */ -static int convert_type86_xcrb(struct zcrypt_queue *zq, +static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq, struct ap_message *reply, struct ica_xcRB *xcRB) { @@ -715,15 +715,15 @@ static int convert_type86_xcrb(struct zcrypt_queue *zq, char *data = reply->msg; /* Copy CPRB to user */ - if (copy_to_user(xcRB->reply_control_blk_addr, - data + msg->fmt2.offset1, msg->fmt2.count1)) + if (z_copy_to_user(userspace, xcRB->reply_control_blk_addr, + data + msg->fmt2.offset1, msg->fmt2.count1)) return -EFAULT; xcRB->reply_control_blk_length = msg->fmt2.count1; /* Copy data buffer to user */ if (msg->fmt2.count2) - if (copy_to_user(xcRB->reply_data_addr, - data + msg->fmt2.offset2, msg->fmt2.count2)) + if (z_copy_to_user(userspace, xcRB->reply_data_addr, + data + msg->fmt2.offset2, msg->fmt2.count2)) return -EFAULT; xcRB->reply_data_length = msg->fmt2.count2; return 0; @@ -738,7 +738,7 @@ static int convert_type86_xcrb(struct zcrypt_queue *zq, * * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error. */ -static int convert_type86_ep11_xcrb(struct zcrypt_queue *zq, +static int convert_type86_ep11_xcrb(bool userspace, struct zcrypt_queue *zq, struct ap_message *reply, struct ep11_urb *xcRB) { @@ -749,8 +749,8 @@ static int convert_type86_ep11_xcrb(struct zcrypt_queue *zq, return -EINVAL; /* Copy response CPRB to user */ - if (copy_to_user((char __force __user *)xcRB->resp, - data + msg->fmt2.offset1, msg->fmt2.count1)) + if (z_copy_to_user(userspace, (char __force __user *)xcRB->resp, + data + msg->fmt2.offset1, msg->fmt2.count1)) return -EFAULT; xcRB->resp_len = msg->fmt2.count1; return 0; @@ -814,9 +814,9 @@ static int convert_response_ica(struct zcrypt_queue *zq, } } -static int convert_response_xcrb(struct zcrypt_queue *zq, - struct ap_message *reply, - struct ica_xcRB *xcRB) +static int convert_response_xcrb(bool userspace, struct zcrypt_queue *zq, + struct ap_message *reply, + struct ica_xcRB *xcRB) { struct type86x_reply *msg = reply->msg; @@ -831,7 +831,7 @@ static int convert_response_xcrb(struct zcrypt_queue *zq, return convert_error(zq, reply); } if (msg->cprbx.cprb_ver_id == 0x02) - return convert_type86_xcrb(zq, reply, xcRB); + return convert_type86_xcrb(userspace, zq, reply, xcRB); fallthrough; /* wrong cprb version is an unknown response */ default: /* Unknown response type, this should NEVER EVER happen */ xcRB->status = 0x0008044DL; /* HDD_InvalidParm */ @@ -848,8 +848,8 @@ static int convert_response_xcrb(struct zcrypt_queue *zq, } } -static int convert_response_ep11_xcrb(struct zcrypt_queue *zq, - struct ap_message *reply, struct ep11_urb *xcRB) +static int convert_response_ep11_xcrb(bool userspace, struct zcrypt_queue *zq, + struct ap_message *reply, struct ep11_urb *xcRB) { struct type86_ep11_reply *msg = reply->msg; @@ -861,7 +861,7 @@ static int convert_response_ep11_xcrb(struct zcrypt_queue *zq, if (msg->hdr.reply_code) return convert_error(zq, reply); if (msg->cprbx.cprb_ver_id == 0x04) - return convert_type86_ep11_xcrb(zq, reply, xcRB); + return convert_type86_ep11_xcrb(userspace, zq, reply, xcRB); fallthrough; /* wrong cprb version is an unknown resp */ default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; @@ -1095,9 +1095,9 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, * by the caller with ap_init_message(). Also the caller has to * make sure ap_release_message() is always called even on failure. */ -unsigned int get_cprb_fc(struct ica_xcRB *xcRB, - struct ap_message *ap_msg, - unsigned int *func_code, unsigned short **dom) +unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *xcRB, + struct ap_message *ap_msg, + unsigned int *func_code, unsigned short **dom) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_XCRB, @@ -1112,7 +1112,7 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB, ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) return -ENOMEM; - return XCRB_msg_to_type6CPRB_msgX(ap_msg, xcRB, func_code, dom); + return XCRB_msg_to_type6CPRB_msgX(userspace, ap_msg, xcRB, func_code, dom); } /** @@ -1122,9 +1122,9 @@ unsigned int get_cprb_fc(struct ica_xcRB *xcRB, * CEXxC device to the request distributor * @xcRB: pointer to the send_cprb request buffer */ -static long zcrypt_msgtype6_send_cprb(struct zcrypt_queue *zq, - struct ica_xcRB *xcRB, - struct ap_message *ap_msg) +static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, + struct ica_xcRB *xcRB, + struct ap_message *ap_msg) { int rc; struct response_type *rtype = (struct response_type *)(ap_msg->private); @@ -1135,7 +1135,7 @@ static long zcrypt_msgtype6_send_cprb(struct zcrypt_queue *zq, if (rc == 0) { rc = ap_msg->rc; if (rc == 0) - rc = convert_response_xcrb(zq, ap_msg, xcRB); + rc = convert_response_xcrb(userspace, zq, ap_msg, xcRB); } else /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); @@ -1150,9 +1150,9 @@ static long zcrypt_msgtype6_send_cprb(struct zcrypt_queue *zq, * by the caller with ap_init_message(). Also the caller has to * make sure ap_release_message() is always called even on failure. */ -unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb, - struct ap_message *ap_msg, - unsigned int *func_code) +unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *xcrb, + struct ap_message *ap_msg, + unsigned int *func_code) { struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_EP11, @@ -1167,7 +1167,7 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb, ap_msg->private = kmemdup(&resp_type, sizeof(resp_type), GFP_KERNEL); if (!ap_msg->private) return -ENOMEM; - return xcrb_msg_to_type6_ep11cprb_msgx(ap_msg, xcrb, func_code); + return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, func_code); } /** @@ -1177,7 +1177,7 @@ unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb, * CEX4P device to the request distributor * @xcRB: pointer to the ep11 user request block */ -static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_queue *zq, +static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *zq, struct ep11_urb *xcrb, struct ap_message *ap_msg) { @@ -1237,7 +1237,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_queue *zq, if (rc == 0) { rc = ap_msg->rc; if (rc == 0) - rc = convert_response_ep11_xcrb(zq, ap_msg, xcrb); + rc = convert_response_ep11_xcrb(userspace, zq, ap_msg, xcrb); } else /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 0de280a81dd40..0a0bf074206bb 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -96,9 +96,9 @@ struct type86_fmt2_ext { unsigned int offset4; /* 0x00000000 */ } __packed; -unsigned int get_cprb_fc(struct ica_xcRB *, struct ap_message *, +unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *, struct ap_message *, unsigned int *, unsigned short **); -unsigned int get_ep11cprb_fc(struct ep11_urb *, struct ap_message *, +unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *, struct ap_message *, unsigned int *); unsigned int get_rng_fc(struct ap_message *, int *, unsigned int *); From fc3f61e1bcd5f88bfb7241bf509a3f63bab49ea8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 14 Sep 2020 13:15:15 +0200 Subject: [PATCH 35/85] s390/dis: get rid of set_fs() usage Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/dis.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index f304802ecf7be..a7eab7be4db05 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -482,31 +482,37 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) return (int) (ptr - buffer); } +static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len) +{ + if (user_mode(regs)) { + if (copy_from_user(dst, (char __user *)src, len)) + return -EFAULT; + } else { + if (copy_from_kernel_nofault(dst, src, len)) + return -EFAULT; + } + return 0; +} + void show_code(struct pt_regs *regs) { char *mode = user_mode(regs) ? "User" : "Krnl"; unsigned char code[64]; char buffer[128], *ptr; - mm_segment_t old_fs; unsigned long addr; int start, end, opsize, hops, i; /* Get a snapshot of the 64 bytes surrounding the fault address. */ - old_fs = get_fs(); - set_fs(user_mode(regs) ? USER_DS : KERNEL_DS); for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) { addr = regs->psw.addr - 34 + start; - if (__copy_from_user(code + start - 2, - (char __user *) addr, 2)) + if (copy_from_regs(regs, code + start - 2, (void *)addr, 2)) break; } for (end = 32; end < 64; end += 2) { addr = regs->psw.addr + end - 32; - if (__copy_from_user(code + end, - (char __user *) addr, 2)) + if (copy_from_regs(regs, code + end, (void *)addr, 2)) break; } - set_fs(old_fs); /* Code snapshot useable ? */ if ((regs->psw.addr & 1) || start >= end) { printk("%s Code: Bad PSW.\n", mode); From 110a6dbb2eca6b10bf60c61a51063d7fe1e55078 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 14 Sep 2020 13:42:25 +0200 Subject: [PATCH 36/85] s390/uaccess: add HAVE_GET_KERNEL_NOFAULT support Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 111 ++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f09444d6aeab3..23c85801cf043 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -278,4 +278,115 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo int copy_to_user_real(void __user *dest, void *src, unsigned long count); void *s390_kernel_write(void *dst, const void *src, size_t size); +#define HAVE_GET_KERNEL_NOFAULT + +int __noreturn __put_kernel_bad(void); + +#define __put_kernel_asm(val, to, insn) \ +({ \ + int __rc; \ + \ + asm volatile( \ + "0: " insn " %2,%1\n" \ + "1: xr %0,%0\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: lhi %0,%3\n" \ + " jg 2b\n" \ + ".popsection\n" \ + EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + : "=d" (__rc), "+Q" (*(to)) \ + : "d" (val), "K" (-EFAULT) \ + : "cc"); \ + __rc; \ +}) + +#define __put_kernel_nofault(dst, src, type, err_label) \ +do { \ + u64 __x = (u64)(*((type *)(src))); \ + int __pk_err; \ + \ + switch (sizeof(type)) { \ + case 1: \ + __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \ + break; \ + case 2: \ + __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \ + break; \ + case 4: \ + __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \ + break; \ + case 8: \ + __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \ + break; \ + default: \ + __pk_err = __put_kernel_bad(); \ + break; \ + } \ + if (unlikely(__pk_err)) \ + goto err_label; \ +} while (0) + +int __noreturn __get_kernel_bad(void); + +#define __get_kernel_asm(val, from, insn) \ +({ \ + int __rc; \ + \ + asm volatile( \ + "0: " insn " %1,%2\n" \ + "1: xr %0,%0\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: lhi %0,%3\n" \ + " jg 2b\n" \ + ".popsection\n" \ + EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + : "=d" (__rc), "+d" (val) \ + : "Q" (*(from)), "K" (-EFAULT) \ + : "cc"); \ + __rc; \ +}) + +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __gk_err; \ + \ + switch (sizeof(type)) { \ + case 1: { \ + u8 __x = 0; \ + \ + __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \ + *((type *)(dst)) = (type)__x; \ + break; \ + }; \ + case 2: { \ + u16 __x = 0; \ + \ + __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \ + *((type *)(dst)) = (type)__x; \ + break; \ + }; \ + case 4: { \ + u32 __x = 0; \ + \ + __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \ + *((type *)(dst)) = (type)__x; \ + break; \ + }; \ + case 8: { \ + u64 __x = 0; \ + \ + __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \ + *((type *)(dst)) = (type)__x; \ + break; \ + }; \ + default: \ + __gk_err = __get_kernel_bad(); \ + break; \ + } \ + if (unlikely(__gk_err)) \ + goto err_label; \ +} while (0) + #endif /* __S390_UACCESS_H */ From 61f2e77489182b9b0e2fabe875e151fd46d286a1 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 16 Sep 2020 10:50:29 +0800 Subject: [PATCH 37/85] s390/diag: convert to use DEFINE_SEQ_ATTRIBUTE macro Use DEFINE_SEQ_ATTRIBUTE macro to simplify the code. Signed-off-by: Liu Shixin Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/diag.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index ccba63aaeb470..b8b0cd7b008fd 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -104,18 +104,7 @@ static const struct seq_operations show_diag_stat_sops = { .show = show_diag_stat, }; -static int show_diag_stat_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &show_diag_stat_sops); -} - -static const struct file_operations show_diag_stat_fops = { - .open = show_diag_stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - +DEFINE_SEQ_ATTRIBUTE(show_diag_stat); static int __init show_diag_stat_init(void) { From 48175fed1deaf8f37e918f504a5cff7504c583ad Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Wed, 16 Sep 2020 14:21:30 +0800 Subject: [PATCH 38/85] s390/ap: remove unnecessary spin_lock_init() The spinlock ap_poll_timer_lock is initialized statically. It is unnecessary to initialize by spin_lock_init(). Signed-off-by: Qinglang Miao Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 24a1940b829ee..231a98c9165d5 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1575,7 +1575,6 @@ static int __init ap_module_init(void) */ if (MACHINE_IS_VM) poll_timeout = 1500000; - spin_lock_init(&ap_poll_timer_lock); hrtimer_init(&ap_poll_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ap_poll_timer.function = ap_poll_timeout; From bcf1650c9b826602ad860f4465a3b66be611508a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 16 Sep 2020 12:02:49 +0200 Subject: [PATCH 39/85] s390/boot: avoid unnecessary zeroing of .bss section .bss section is a part of the decompressor's image now, linker fills it with zeros already. No need do it with memset additionally. Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/boot/head.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index fd78755d996dc..dae10961d0724 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -313,12 +313,6 @@ ENTRY(startup_kdump) spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) l %r15,.Lstack-.LPG0(%r13) - // Clear decompressor's BSS section - larl %r2,_bss - slgr %r3,%r3 - larl %r4,_ebss - slgr %r4,%r2 - brasl %r14,memset brasl %r14,verify_facilities brasl %r14,startup_kernel From 14ab622432415326ab832441ec5d19003a3b1176 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 16 Sep 2020 12:06:02 +0200 Subject: [PATCH 40/85] s390/sclp: clean up unneeded .data section usage Since commit 980d5f9ab36b ("s390/boot: enable .bss section for compressed kernel") .bss section usage is no longer restricted. .bss section is a part of the decompressor's image and is zeroed by the linker. For that reason clean up now unneeded .data section usage. Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- drivers/s390/char/sclp_early_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index 7737470f8498f..958621326ece7 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -17,12 +17,12 @@ static struct read_info_sccb __bootdata(sclp_info_sccb); static int __bootdata(sclp_info_sccb_valid); char *sclp_early_sccb = (char *) EARLY_SCCB_OFFSET; -int sclp_init_state __section(.data) = sclp_init_state_uninitialized; +int sclp_init_state = sclp_init_state_uninitialized; /* * Used to keep track of the size of the event masks. Qemu until version 2.11 * only supports 4 and needs a workaround. */ -bool sclp_mask_compat_mode __section(.data); +bool sclp_mask_compat_mode; void sclp_early_wait_irq(void) { From 5596c4c106baf3c915724dc0ae3ed293b4d1af55 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 18 Sep 2020 19:04:36 +0200 Subject: [PATCH 41/85] s390/sclp: remove unused sclp_early_printk_forced This reverts commit 55a5542a5462 ("s390/hibernate: fix error handling when suspend cpu != resume cpu"). It added sclp_early_printk_force() which is no longer used since commit 394216275c7d ("s390: remove broken hibernate / power management support"). No hibernate - no problem. Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/sclp.h | 3 +-- arch/s390/kernel/early_printk.c | 2 +- drivers/s390/char/sclp_early_core.c | 11 +++-------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index c563f8368b19b..90f34c7e2752b 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -114,8 +114,7 @@ int sclp_early_get_core_info(struct sclp_core_info *info); void sclp_early_get_ipl_info(struct sclp_ipl_info *info); void sclp_early_detect(void); void sclp_early_printk(const char *s); -void sclp_early_printk_force(const char *s); -void __sclp_early_printk(const char *s, unsigned int len, unsigned int force); +void __sclp_early_printk(const char *s, unsigned int len); int sclp_early_get_memsize(unsigned long *mem); int sclp_early_get_hsa_size(unsigned long *hsa_size); diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c index 6f24d83bc5dc0..d9d53f44008ac 100644 --- a/arch/s390/kernel/early_printk.c +++ b/arch/s390/kernel/early_printk.c @@ -10,7 +10,7 @@ static void sclp_early_write(struct console *con, const char *s, unsigned int len) { - __sclp_early_printk(s, len, 0); + __sclp_early_printk(s, len); } static struct console sclp_early_console = { diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index 958621326ece7..a960afa974bff 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -214,11 +214,11 @@ static int sclp_early_setup(int disable, int *have_linemode, int *have_vt220) * Output one or more lines of text on the SCLP console (VT220 and / * or line-mode). */ -void __sclp_early_printk(const char *str, unsigned int len, unsigned int force) +void __sclp_early_printk(const char *str, unsigned int len) { int have_linemode, have_vt220; - if (!force && sclp_init_state != sclp_init_state_uninitialized) + if (sclp_init_state != sclp_init_state_uninitialized) return; if (sclp_early_setup(0, &have_linemode, &have_vt220) != 0) return; @@ -231,12 +231,7 @@ void __sclp_early_printk(const char *str, unsigned int len, unsigned int force) void sclp_early_printk(const char *str) { - __sclp_early_printk(str, strlen(str), 0); -} - -void sclp_early_printk_force(const char *str) -{ - __sclp_early_printk(str, strlen(str), 1); + __sclp_early_printk(str, strlen(str)); } int __init sclp_early_read_info(void) From 32ca04bba6fd9775a0630534f3db52e27b062e6c Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 4 Sep 2020 16:11:37 +0200 Subject: [PATCH 42/85] s390/zcrypt: Support for CCA APKA master keys Support for CCA APKA (used for CCA ECC keys) master keys. The existing mkvps sysfs attribute for each queue for cards in CCA mode is extended to show the APKA master key register states and verification pattern: Improve the mkvps sysfs attribute to display the APKA master key verification patterns for old, current and new master key registers. The APKA master key is used to encrypt CCA ECC secure keys. The syntax is analog to the existing AES mk verification patterns: APKA NEW: APKA CUR: APKA OLD: with : 'empty' or 'partial' or 'full' : 'valid' or 'invalid' : 'valid' or 'invalid' , , 8 byte hex string with leading 0x MKVP means Master Key Verification Pattern and is a folded hash over the key value. Only the states 'full' and 'valid' result in displaying a useful mkvp, otherwise a mkvp of all bytes zero is shown. If for any reason the FQ fails and the (cached) information is not available, the state '-' will be shown with the mkvp value also '-'. The values shown here are the very same as the cca panel tools displays. The internal function cca_findcard2() also supports to match against the APKA master key verification patterns and the pkey kernel module which uses this function needed compatible rewrite of these invocations. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/pkey_api.c | 18 +++--- drivers/s390/crypto/zcrypt_ccamisc.c | 85 ++++++++++++++++++---------- drivers/s390/crypto/zcrypt_ccamisc.h | 30 +++++++--- drivers/s390/crypto/zcrypt_cex2c.c | 39 +++++++++++-- drivers/s390/crypto/zcrypt_cex4.c | 39 +++++++++++-- 5 files changed, 154 insertions(+), 57 deletions(-) diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 5896e5282a4e8..e48c13acc5daf 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -661,13 +661,14 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, *ksize = (enum pkey_key_size) t->bitsize; rc = cca_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain, - ZCRYPT_CEX3C, t->mkvp, 0, 1); + ZCRYPT_CEX3C, AES_MK_SET, t->mkvp, 0, 1); if (rc == 0 && flags) *flags = PKEY_FLAGS_MATCH_CUR_MKVP; if (rc == -ENODEV) { rc = cca_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain, - ZCRYPT_CEX3C, 0, t->mkvp, 1); + ZCRYPT_CEX3C, AES_MK_SET, + 0, t->mkvp, 1); if (rc == 0 && flags) *flags = PKEY_FLAGS_MATCH_ALT_MKVP; } @@ -697,13 +698,14 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, } rc = cca_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain, - ZCRYPT_CEX6, t->mkvp0, 0, 1); + ZCRYPT_CEX6, AES_MK_SET, t->mkvp0, 0, 1); if (rc == 0 && flags) *flags = PKEY_FLAGS_MATCH_CUR_MKVP; if (rc == -ENODEV) { rc = cca_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain, - ZCRYPT_CEX6, 0, t->mkvp0, 1); + ZCRYPT_CEX6, AES_MK_SET, + 0, t->mkvp0, 1); if (rc == 0 && flags) *flags = PKEY_FLAGS_MATCH_ALT_MKVP; } @@ -863,7 +865,8 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, return -EINVAL; } rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, - minhwtype, cur_mkvp, old_mkvp, 1); + minhwtype, AES_MK_SET, + cur_mkvp, old_mkvp, 1); if (rc) goto out; } else @@ -900,7 +903,8 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype, if (ktype == PKEY_TYPE_CCA_CIPHER) minhwtype = ZCRYPT_CEX6; rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, - minhwtype, cur_mkvp, old_mkvp, 1); + minhwtype, AES_MK_SET, + cur_mkvp, old_mkvp, 1); if (rc) goto out; } else if (ktype == PKEY_TYPE_EP11) { @@ -1589,7 +1593,7 @@ static ssize_t pkey_ccacipher_aes_attr_read(enum pkey_key_size keybits, /* build a list of apqns able to generate an cipher key */ rc = cca_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF, - ZCRYPT_CEX6, 0, 0, 0); + ZCRYPT_CEX6, 0, 0, 0, 0); if (rc) return rc; diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index 40b59a77ec0cf..e969188a1ec4e 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -1506,21 +1506,38 @@ static int fetch_cca_info(u16 cardnr, u16 domain, struct cca_info *ci) rarray, &rlen, varray, &vlen); if (rc == 0 && rlen >= 10*8 && vlen >= 204) { memcpy(ci->serial, rarray, 8); - ci->new_mk_state = (char) rarray[7*8]; - ci->cur_mk_state = (char) rarray[8*8]; - ci->old_mk_state = (char) rarray[9*8]; - if (ci->old_mk_state == '2') - memcpy(&ci->old_mkvp, varray + 172, 8); - if (ci->cur_mk_state == '2') - memcpy(&ci->cur_mkvp, varray + 184, 8); - if (ci->new_mk_state == '3') - memcpy(&ci->new_mkvp, varray + 196, 8); - found = 1; + ci->new_aes_mk_state = (char) rarray[7*8]; + ci->cur_aes_mk_state = (char) rarray[8*8]; + ci->old_aes_mk_state = (char) rarray[9*8]; + if (ci->old_aes_mk_state == '2') + memcpy(&ci->old_aes_mkvp, varray + 172, 8); + if (ci->cur_aes_mk_state == '2') + memcpy(&ci->cur_aes_mkvp, varray + 184, 8); + if (ci->new_aes_mk_state == '3') + memcpy(&ci->new_aes_mkvp, varray + 196, 8); + found++; + } + if (!found) + goto out; + rlen = vlen = PAGE_SIZE/2; + rc = cca_query_crypto_facility(cardnr, domain, "STATICSB", + rarray, &rlen, varray, &vlen); + if (rc == 0 && rlen >= 10*8 && vlen >= 240) { + ci->new_apka_mk_state = (char) rarray[7*8]; + ci->cur_apka_mk_state = (char) rarray[8*8]; + ci->old_apka_mk_state = (char) rarray[9*8]; + if (ci->old_apka_mk_state == '2') + memcpy(&ci->old_apka_mkvp, varray + 208, 8); + if (ci->cur_apka_mk_state == '2') + memcpy(&ci->cur_apka_mkvp, varray + 220, 8); + if (ci->new_apka_mk_state == '3') + memcpy(&ci->new_apka_mkvp, varray + 232, 8); + found++; } +out: free_page((unsigned long) pg); - - return found ? 0 : -ENOENT; + return found == 2 ? 0 : -ENOENT; } /* @@ -1574,16 +1591,16 @@ static int findcard(u64 mkvp, u16 *pcardnr, u16 *pdomain, /* enabled CCA card, check current mkvp from cache */ if (cca_info_cache_fetch(card, dom, &ci) == 0 && ci.hwtype >= minhwtype && - ci.cur_mk_state == '2' && - ci.cur_mkvp == mkvp) { + ci.cur_aes_mk_state == '2' && + ci.cur_aes_mkvp == mkvp) { if (!verify) break; /* verify: refresh card info */ if (fetch_cca_info(card, dom, &ci) == 0) { cca_info_cache_update(card, dom, &ci); if (ci.hwtype >= minhwtype && - ci.cur_mk_state == '2' && - ci.cur_mkvp == mkvp) + ci.cur_aes_mk_state == '2' && + ci.cur_aes_mkvp == mkvp) break; } } @@ -1605,12 +1622,12 @@ static int findcard(u64 mkvp, u16 *pcardnr, u16 *pdomain, if (fetch_cca_info(card, dom, &ci) == 0) { cca_info_cache_update(card, dom, &ci); if (ci.hwtype >= minhwtype && - ci.cur_mk_state == '2' && - ci.cur_mkvp == mkvp) + ci.cur_aes_mk_state == '2' && + ci.cur_aes_mkvp == mkvp) break; if (ci.hwtype >= minhwtype && - ci.old_mk_state == '2' && - ci.old_mkvp == mkvp && + ci.old_aes_mk_state == '2' && + ci.old_aes_mkvp == mkvp && oi < 0) oi = i; } @@ -1664,7 +1681,8 @@ int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify) EXPORT_SYMBOL(cca_findcard); int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, - int minhwtype, u64 cur_mkvp, u64 old_mkvp, int verify) + int minhwtype, int mktype, u64 cur_mkvp, u64 old_mkvp, + int verify) { struct zcrypt_device_status_ext *device_status; u32 *_apqns = NULL, _nr_apqns = 0; @@ -1706,7 +1724,9 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, if (cca_get_info(card, dom, &ci, verify)) continue; /* current master key needs to be valid */ - if (ci.cur_mk_state != '2') + if (mktype == AES_MK_SET && ci.cur_aes_mk_state != '2') + continue; + if (mktype == APKA_MK_SET && ci.cur_apka_mk_state != '2') continue; /* check min hardware type */ if (minhwtype > 0 && minhwtype > ci.hwtype) @@ -1714,13 +1734,20 @@ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, if (cur_mkvp || old_mkvp) { /* check mkvps */ curmatch = oldmatch = 0; - if (cur_mkvp && cur_mkvp == ci.cur_mkvp) - curmatch = 1; - if (old_mkvp && ci.old_mk_state == '2' && - old_mkvp == ci.old_mkvp) - oldmatch = 1; - if ((cur_mkvp || old_mkvp) && - (curmatch + oldmatch < 1)) + if (mktype == AES_MK_SET) { + if (cur_mkvp && cur_mkvp == ci.cur_aes_mkvp) + curmatch = 1; + if (old_mkvp && ci.old_aes_mk_state == '2' && + old_mkvp == ci.old_aes_mkvp) + oldmatch = 1; + } else { + if (cur_mkvp && cur_mkvp == ci.cur_apka_mkvp) + curmatch = 1; + if (old_mkvp && ci.old_apka_mk_state == '2' && + old_mkvp == ci.old_apka_mkvp) + oldmatch = 1; + } + if (curmatch + oldmatch < 1) continue; } /* apqn passed all filtering criterons, add to the array */ diff --git a/drivers/s390/crypto/zcrypt_ccamisc.h b/drivers/s390/crypto/zcrypt_ccamisc.h index 8b7a641671c90..4d88a1d6af218 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.h +++ b/drivers/s390/crypto/zcrypt_ccamisc.h @@ -186,6 +186,8 @@ int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify); * - if verify is enabled and a cur_mkvp and/or old_mkvp * value is given, then refetch the cca_info and make sure the current * cur_mkvp or old_mkvp values of the apqn are used. + * The mktype determines which set of master keys to use: + * 0 = AES_MK_SET - AES MK set, 1 = APKA MK_SET - APKA MK set * The array of apqn entries is allocated with kmalloc and returned in *apqns; * the number of apqns stored into the list is returned in *nr_apqns. One apqn * entry is simple a 32 bit value with 16 bit cardnr and 16 bit domain nr and @@ -194,18 +196,28 @@ int cca_findcard(const u8 *key, u16 *pcardnr, u16 *pdomain, int verify); * -ENODEV is returned. */ int cca_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, - int minhwtype, u64 cur_mkvp, u64 old_mkvp, int verify); + int minhwtype, int mktype, u64 cur_mkvp, u64 old_mkvp, + int verify); + +#define AES_MK_SET 0 +#define APKA_MK_SET 1 /* struct to hold info for each CCA queue */ struct cca_info { - int hwtype; /* one of the defined AP_DEVICE_TYPE_* */ - char new_mk_state; /* '1' empty, '2' partially full, '3' full */ - char cur_mk_state; /* '1' invalid, '2' valid */ - char old_mk_state; /* '1' invalid, '2' valid */ - u64 new_mkvp; /* truncated sha256 hash of new master key */ - u64 cur_mkvp; /* truncated sha256 hash of current master key */ - u64 old_mkvp; /* truncated sha256 hash of old master key */ - char serial[9]; /* serial number string (8 ascii numbers + 0x00) */ + int hwtype; /* one of the defined AP_DEVICE_TYPE_* */ + char new_aes_mk_state; /* '1' empty, '2' partially full, '3' full */ + char cur_aes_mk_state; /* '1' invalid, '2' valid */ + char old_aes_mk_state; /* '1' invalid, '2' valid */ + char new_apka_mk_state; /* '1' empty, '2' partially full, '3' full */ + char cur_apka_mk_state; /* '1' invalid, '2' valid */ + char old_apka_mk_state; /* '1' invalid, '2' valid */ + u64 new_aes_mkvp; /* truncated sha256 of new aes master key */ + u64 cur_aes_mkvp; /* truncated sha256 of current aes master key */ + u64 old_aes_mkvp; /* truncated sha256 of old aes master key */ + u64 new_apka_mkvp; /* truncated sha256 of new apka master key */ + u64 cur_apka_mkvp; /* truncated sha256 of current apka mk */ + u64 old_apka_mkvp; /* truncated sha256 of old apka mk */ + char serial[9]; /* serial number (8 ascii numbers + 0x00) */ }; /* diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index f00127a78bab1..146eb9f246942 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -109,26 +109,53 @@ static ssize_t cca_mkvps_show(struct device *dev, AP_QID_QUEUE(zq->queue->qid), &ci, zq->online); - if (ci.new_mk_state >= '1' && ci.new_mk_state <= '3') + if (ci.new_aes_mk_state >= '1' && ci.new_aes_mk_state <= '3') n = scnprintf(buf, PAGE_SIZE, "AES NEW: %s 0x%016llx\n", - new_state[ci.new_mk_state - '1'], ci.new_mkvp); + new_state[ci.new_aes_mk_state - '1'], + ci.new_aes_mkvp); else n = scnprintf(buf, PAGE_SIZE, "AES NEW: - -\n"); - if (ci.cur_mk_state >= '1' && ci.cur_mk_state <= '2') + if (ci.cur_aes_mk_state >= '1' && ci.cur_aes_mk_state <= '2') n += scnprintf(buf + n, PAGE_SIZE - n, "AES CUR: %s 0x%016llx\n", - cao_state[ci.cur_mk_state - '1'], ci.cur_mkvp); + cao_state[ci.cur_aes_mk_state - '1'], + ci.cur_aes_mkvp); else n += scnprintf(buf + n, PAGE_SIZE - n, "AES CUR: - -\n"); - if (ci.old_mk_state >= '1' && ci.old_mk_state <= '2') + if (ci.old_aes_mk_state >= '1' && ci.old_aes_mk_state <= '2') n += scnprintf(buf + n, PAGE_SIZE - n, "AES OLD: %s 0x%016llx\n", - cao_state[ci.old_mk_state - '1'], ci.old_mkvp); + cao_state[ci.old_aes_mk_state - '1'], + ci.old_aes_mkvp); else n += scnprintf(buf + n, PAGE_SIZE - n, "AES OLD: - -\n"); + if (ci.new_apka_mk_state >= '1' && ci.new_apka_mk_state <= '3') + n += scnprintf(buf + n, PAGE_SIZE - n, + "APKA NEW: %s 0x%016llx\n", + new_state[ci.new_apka_mk_state - '1'], + ci.new_apka_mkvp); + else + n += scnprintf(buf + n, PAGE_SIZE - n, "APKA NEW: - -\n"); + + if (ci.cur_apka_mk_state >= '1' && ci.cur_apka_mk_state <= '2') + n += scnprintf(buf + n, PAGE_SIZE - n, + "APKA CUR: %s 0x%016llx\n", + cao_state[ci.cur_apka_mk_state - '1'], + ci.cur_apka_mkvp); + else + n += scnprintf(buf + n, PAGE_SIZE - n, "APKA CUR: - -\n"); + + if (ci.old_apka_mk_state >= '1' && ci.old_apka_mk_state <= '2') + n += scnprintf(buf + n, PAGE_SIZE - n, + "APKA OLD: %s 0x%016llx\n", + cao_state[ci.old_apka_mk_state - '1'], + ci.old_apka_mkvp); + else + n += scnprintf(buf + n, PAGE_SIZE - n, "APKA OLD: - -\n"); + return n; } diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index dc20d983e4682..d9ebe3a3c210a 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -121,26 +121,53 @@ static ssize_t cca_mkvps_show(struct device *dev, AP_QID_QUEUE(zq->queue->qid), &ci, zq->online); - if (ci.new_mk_state >= '1' && ci.new_mk_state <= '3') + if (ci.new_aes_mk_state >= '1' && ci.new_aes_mk_state <= '3') n = scnprintf(buf, PAGE_SIZE, "AES NEW: %s 0x%016llx\n", - new_state[ci.new_mk_state - '1'], ci.new_mkvp); + new_state[ci.new_aes_mk_state - '1'], + ci.new_aes_mkvp); else n = scnprintf(buf, PAGE_SIZE, "AES NEW: - -\n"); - if (ci.cur_mk_state >= '1' && ci.cur_mk_state <= '2') + if (ci.cur_aes_mk_state >= '1' && ci.cur_aes_mk_state <= '2') n += scnprintf(buf + n, PAGE_SIZE - n, "AES CUR: %s 0x%016llx\n", - cao_state[ci.cur_mk_state - '1'], ci.cur_mkvp); + cao_state[ci.cur_aes_mk_state - '1'], + ci.cur_aes_mkvp); else n += scnprintf(buf + n, PAGE_SIZE - n, "AES CUR: - -\n"); - if (ci.old_mk_state >= '1' && ci.old_mk_state <= '2') + if (ci.old_aes_mk_state >= '1' && ci.old_aes_mk_state <= '2') n += scnprintf(buf + n, PAGE_SIZE - n, "AES OLD: %s 0x%016llx\n", - cao_state[ci.old_mk_state - '1'], ci.old_mkvp); + cao_state[ci.old_aes_mk_state - '1'], + ci.old_aes_mkvp); else n += scnprintf(buf + n, PAGE_SIZE - n, "AES OLD: - -\n"); + if (ci.new_apka_mk_state >= '1' && ci.new_apka_mk_state <= '3') + n += scnprintf(buf + n, PAGE_SIZE - n, + "APKA NEW: %s 0x%016llx\n", + new_state[ci.new_apka_mk_state - '1'], + ci.new_apka_mkvp); + else + n += scnprintf(buf + n, PAGE_SIZE - n, "APKA NEW: - -\n"); + + if (ci.cur_apka_mk_state >= '1' && ci.cur_apka_mk_state <= '2') + n += scnprintf(buf + n, PAGE_SIZE - n, + "APKA CUR: %s 0x%016llx\n", + cao_state[ci.cur_apka_mk_state - '1'], + ci.cur_apka_mkvp); + else + n += scnprintf(buf + n, PAGE_SIZE - n, "APKA CUR: - -\n"); + + if (ci.old_apka_mk_state >= '1' && ci.old_apka_mk_state <= '2') + n += scnprintf(buf + n, PAGE_SIZE - n, + "APKA OLD: %s 0x%016llx\n", + cao_state[ci.old_apka_mk_state - '1'], + ci.old_apka_mkvp); + else + n += scnprintf(buf + n, PAGE_SIZE - n, "APKA OLD: - -\n"); + return n; } From fa6999e326fe7851ecbd572b8cb9be8e930ebf41 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 21 Sep 2020 10:45:55 +0200 Subject: [PATCH 43/85] s390/pkey: support CCA and EP11 secure ECC private keys This patch extends the pkey kernel module to support CCA and EP11 secure ECC (private) keys as source for deriving ECC protected (private) keys. There is yet another new ioctl to support this: PKEY_KBLOB2PROTK3 can handle all the old keys plus CCA and EP11 secure ECC keys. For details see ioctl description in pkey.h. The CPACF unit currently only supports a subset of 5 different ECC curves (P-256, P-384, P-521, ED25519, ED448) and so only keys of this curve type can be transformed into protected keys. However, the pkey and the cca/ep11 low level functions do not check this but simple pass-through the key blob to the firmware onto the crypto cards. So most likely the failure will be a response carrying an error code resulting in user space errno value EIO instead of EINVAL. Deriving a protected key from an EP11 ECC secure key requires a CEX7 in EP11 mode. Deriving a protected key from an CCA ECC secure key requires a CEX7 in CCA mode. Together with this new ioctl the ioctls for querying lists of apqns (PKEY_APQNS4K and PKEY_APQNS4KT) have been extended to support EP11 and CCA ECC secure key type and key blobs. Together with this ioctl there comes a new struct ep11kblob_header which is to be prepended onto the EP11 key blob. See details in pkey.h for the fields in there. The older EP11 AES key blob with some info stored in the (unused) session field is also supported with this new ioctl. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Signed-off-by: Vasily Gorbik --- arch/s390/include/uapi/asm/pkey.h | 77 +++++-- drivers/s390/crypto/pkey_api.c | 244 +++++++++++++++++++-- drivers/s390/crypto/zcrypt_ccamisc.c | 193 +++++++++++++++++ drivers/s390/crypto/zcrypt_ccamisc.h | 44 +++- drivers/s390/crypto/zcrypt_ep11misc.c | 292 +++++++++++++++++++++----- drivers/s390/crypto/zcrypt_ep11misc.h | 63 ++++-- 6 files changed, 818 insertions(+), 95 deletions(-) diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index d27d7d3292634..7349e96d28a0d 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -35,12 +35,16 @@ #define PKEY_KEYTYPE_AES_128 1 #define PKEY_KEYTYPE_AES_192 2 #define PKEY_KEYTYPE_AES_256 3 +#define PKEY_KEYTYPE_ECC 4 /* the newer ioctls use a pkey_key_type enum for type information */ enum pkey_key_type { PKEY_TYPE_CCA_DATA = (__u32) 1, PKEY_TYPE_CCA_CIPHER = (__u32) 2, PKEY_TYPE_EP11 = (__u32) 3, + PKEY_TYPE_CCA_ECC = (__u32) 0x1f, + PKEY_TYPE_EP11_AES = (__u32) 6, + PKEY_TYPE_EP11_ECC = (__u32) 7, }; /* the newer ioctls use a pkey_key_size enum for key size information */ @@ -88,6 +92,20 @@ struct pkey_clrkey { __u8 clrkey[MAXCLRKEYSIZE]; /* 16, 24, or 32 byte clear key value */ }; +/* + * EP11 key blobs of type PKEY_TYPE_EP11_AES and PKEY_TYPE_EP11_ECC + * are ep11 blobs prepended by this header: + */ +struct ep11kblob_header { + __u8 type; /* always 0x00 */ + __u8 hver; /* header version, currently needs to be 0x00 */ + __u16 len; /* total length in bytes (including this header) */ + __u8 version; /* PKEY_TYPE_EP11_AES or PKEY_TYPE_EP11_ECC */ + __u8 res0; /* unused */ + __u16 bitlen; /* clear key bit len, 0 for unknown */ + __u8 res1[8]; /* unused */ +} __packed; + /* * Generate CCA AES secure key. */ @@ -304,7 +322,7 @@ struct pkey_verifykey2 { #define PKEY_VERIFYKEY2 _IOWR(PKEY_IOCTL_MAGIC, 0x17, struct pkey_verifykey2) /* - * Transform a key blob (of any type) into a protected key, version 2. + * Transform a key blob into a protected key, version 2. * There needs to be a list of apqns given with at least one entry in there. * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain * is not supported. The implementation walks through the list of apqns and @@ -313,6 +331,8 @@ struct pkey_verifykey2 { * list is tried until success (return 0) or the end of the list is reached * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to * generate a list of apqns based on the key. + * Deriving ECC protected keys from ECC secure keys is not supported with + * this ioctl, use PKEY_KBLOB2PROTK3 for this purpose. */ struct pkey_kblob2pkey2 { __u8 __user *key; /* in: pointer to key blob */ @@ -326,17 +346,17 @@ struct pkey_kblob2pkey2 { /* * Build a list of APQNs based on a key blob given. * Is able to find out which type of secure key is given (CCA AES secure - * key, CCA AES cipher key or EP11 AES key) and tries to find all matching - * crypto cards based on the MKVP and maybe other criterias (like CCA AES - * cipher keys need a CEX5C or higher, EP11 keys with BLOB_PKEY_EXTRACTABLE - * need a CEX7 and EP11 api version 4). The list of APQNs is further filtered - * by the key's mkvp which needs to match to either the current mkvp (CCA and - * EP11) or the alternate mkvp (old mkvp, CCA adapters only) of the apqns. The - * flags argument may be used to limit the matching apqns. If the - * PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of each apqn is - * compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both are given, it - * is assumed to return apqns where either the current or the alternate mkvp - * matches. At least one of the matching flags needs to be given. + * key, CCA AES cipher key, CCA ECC private key, EP11 AES key, EP11 ECC private + * key) and tries to find all matching crypto cards based on the MKVP and maybe + * other criterias (like CCA AES cipher keys need a CEX5C or higher, EP11 keys + * with BLOB_PKEY_EXTRACTABLE need a CEX7 and EP11 api version 4). The list of + * APQNs is further filtered by the key's mkvp which needs to match to either + * the current mkvp (CCA and EP11) or the alternate mkvp (old mkvp, CCA adapters + * only) of the apqns. The flags argument may be used to limit the matching + * apqns. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of + * each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both + * are given, it is assumed to return apqns where either the current or the + * alternate mkvp matches. At least one of the matching flags needs to be given. * The flags argument for EP11 keys has no further action and is currently * ignored (but needs to be given as PKEY_FLAGS_MATCH_CUR_MKVP) as there is only * the wkvp from the key to match against the apqn's wkvp. @@ -365,9 +385,10 @@ struct pkey_apqns4key { * restrict the list by given master key verification patterns. * For different key types there may be different ways to match the * master key verification patterns. For CCA keys (CCA data key and CCA - * cipher key) the first 8 bytes of cur_mkvp refer to the current mkvp value - * of the apqn and the first 8 bytes of the alt_mkvp refer to the old mkvp. - * The flags argument controls if the apqns current and/or alternate mkvp + * cipher key) the first 8 bytes of cur_mkvp refer to the current AES mkvp value + * of the apqn and the first 8 bytes of the alt_mkvp refer to the old AES mkvp. + * For CCA ECC keys it is similar but the match is against the APKA current/old + * mkvp. The flags argument controls if the apqns current and/or alternate mkvp * should match. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current * mkvp of each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. * If both are given, it is assumed to return apqns where either the @@ -397,4 +418,30 @@ struct pkey_apqns4keytype { }; #define PKEY_APQNS4KT _IOWR(PKEY_IOCTL_MAGIC, 0x1C, struct pkey_apqns4keytype) +/* + * Transform a key blob into a protected key, version 3. + * The difference to version 2 of this ioctl is that the protected key + * buffer is now explicitly and not within a struct pkey_protkey any more. + * So this ioctl is also able to handle EP11 and CCA ECC secure keys and + * provide ECC protected keys. + * There needs to be a list of apqns given with at least one entry in there. + * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain + * is not supported. The implementation walks through the list of apqns and + * tries to send the request to each apqn without any further checking (like + * card type or online state). If the apqn fails, simple the next one in the + * list is tried until success (return 0) or the end of the list is reached + * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to + * generate a list of apqns based on the key. + */ +struct pkey_kblob2pkey3 { + __u8 __user *key; /* in: pointer to key blob */ + __u32 keylen; /* in: key blob size */ + struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */ + __u32 apqn_entries; /* in: # of apqn target list entries */ + __u32 pkeytype; /* out: prot key type (enum pkey_key_type) */ + __u32 pkeylen; /* in/out: size of pkey buffer/actual len of pkey */ + __u8 __user *pkey; /* in: pkey blob buffer space ptr */ +}; +#define PKEY_KBLOB2PROTK3 _IOWR(PKEY_IOCTL_MAGIC, 0x1D, struct pkey_kblob2pkey3) + #endif /* _UAPI_PKEY_H */ diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index e48c13acc5daf..99cb60ea663d9 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -31,8 +31,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("s390 protected key interface"); -#define KEYBLOBBUFSIZE 8192 /* key buffer size used for internal processing */ -#define MAXAPQNSINLIST 64 /* max 64 apqns within a apqn list */ +#define KEYBLOBBUFSIZE 8192 /* key buffer size used for internal processing */ +#define PROTKEYBLOBBUFSIZE 256 /* protected key buffer size used internal */ +#define MAXAPQNSINLIST 64 /* max 64 apqns within a apqn list */ /* mask of available pckmo subfunctions, fetched once at module init */ static cpacf_mask_t pckmo_functions; @@ -237,8 +238,9 @@ static int pkey_ep11key2pkey(const u8 *key, struct pkey_protkey *pkey) for (rc = -ENODEV, i = 0; i < nr_apqns; i++) { card = apqns[i] >> 16; dom = apqns[i] & 0xFFFF; - rc = ep11_key2protkey(card, dom, key, kb->head.len, - pkey->protkey, &pkey->len, &pkey->type); + pkey->len = sizeof(pkey->protkey); + rc = ep11_kblob2protkey(card, dom, key, kb->head.len, + pkey->protkey, &pkey->len, &pkey->type); if (rc == 0) break; } @@ -449,15 +451,21 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen, break; } case TOKVER_EP11_AES: { - if (keylen < MINEP11AESKEYBLOBSIZE) - goto out; /* check ep11 key for exportable as protected key */ - rc = ep11_check_aeskeyblob(debug_info, 3, key, 0, 1); + rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1); if (rc) goto out; rc = pkey_ep11key2pkey(key, protkey); break; } + case TOKVER_EP11_AES_WITH_HEADER: + /* check ep11 key with header for exportable as protected key */ + rc = ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1); + if (rc) + goto out; + rc = pkey_ep11key2pkey(key + sizeof(struct ep11kblob_header), + protkey); + break; default: DEBUG_ERR("%s unknown/unsupported non-CCA token version %d\n", __func__, hdr->version); @@ -719,7 +727,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen, && hdr->version == TOKVER_EP11_AES) { struct ep11keyblob *kb = (struct ep11keyblob *)key; - rc = ep11_check_aeskeyblob(debug_info, 3, key, 0, 1); + rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1); if (rc) goto out; if (ktype) @@ -780,7 +788,7 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, if (hdr->version == TOKVER_EP11_AES) { if (keylen < sizeof(struct ep11keyblob)) return -EINVAL; - if (ep11_check_aeskeyblob(debug_info, 3, key, 0, 1)) + if (ep11_check_aes_key(debug_info, 3, key, keylen, 1)) return -EINVAL; } else { return pkey_nonccatok2pkey(key, keylen, pkey); @@ -806,9 +814,10 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns, else { /* EP11 AES secure key blob */ struct ep11keyblob *kb = (struct ep11keyblob *) key; - rc = ep11_key2protkey(card, dom, key, kb->head.len, - pkey->protkey, &pkey->len, - &pkey->type); + pkey->len = sizeof(pkey->protkey); + rc = ep11_kblob2protkey(card, dom, key, kb->head.len, + pkey->protkey, &pkey->len, + &pkey->type); } if (rc == 0) break; @@ -827,7 +836,27 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, if (keylen < sizeof(struct keytoken_header) || flags == 0) return -EINVAL; - if (hdr->type == TOKTYPE_NON_CCA && hdr->version == TOKVER_EP11_AES) { + if (hdr->type == TOKTYPE_NON_CCA + && (hdr->version == TOKVER_EP11_AES_WITH_HEADER + || hdr->version == TOKVER_EP11_ECC_WITH_HEADER) + && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) { + int minhwtype = 0, api = 0; + struct ep11keyblob *kb = (struct ep11keyblob *) + (key + sizeof(struct ep11kblob_header)); + + if (flags != PKEY_FLAGS_MATCH_CUR_MKVP) + return -EINVAL; + if (kb->attr & EP11_BLOB_PKEY_EXTRACTABLE) { + minhwtype = ZCRYPT_CEX7; + api = EP11_API_V; + } + rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, + minhwtype, api, kb->wkvp); + if (rc) + goto out; + } else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES + && is_ep11_keyblob(key)) { int minhwtype = 0, api = 0; struct ep11keyblob *kb = (struct ep11keyblob *) key; @@ -869,6 +898,24 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags, cur_mkvp, old_mkvp, 1); if (rc) goto out; + } else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) { + u64 cur_mkvp = 0, old_mkvp = 0; + struct eccprivkeytoken *t = (struct eccprivkeytoken *)key; + + if (t->secid == 0x20) { + if (flags & PKEY_FLAGS_MATCH_CUR_MKVP) + cur_mkvp = t->mkvp; + if (flags & PKEY_FLAGS_MATCH_ALT_MKVP) + old_mkvp = t->mkvp; + } else { + /* unknown cca internal 2 token type */ + return -EINVAL; + } + rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, + ZCRYPT_CEX7, APKA_MK_SET, + cur_mkvp, old_mkvp, 1); + if (rc) + goto out; } else return -EINVAL; @@ -907,7 +954,22 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype, cur_mkvp, old_mkvp, 1); if (rc) goto out; - } else if (ktype == PKEY_TYPE_EP11) { + } else if (ktype == PKEY_TYPE_CCA_ECC) { + u64 cur_mkvp = 0, old_mkvp = 0; + + if (flags & PKEY_FLAGS_MATCH_CUR_MKVP) + cur_mkvp = *((u64 *) cur_mkvp); + if (flags & PKEY_FLAGS_MATCH_ALT_MKVP) + old_mkvp = *((u64 *) alt_mkvp); + rc = cca_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF, + ZCRYPT_CEX7, APKA_MK_SET, + cur_mkvp, old_mkvp, 1); + if (rc) + goto out; + + } else if (ktype == PKEY_TYPE_EP11 || + ktype == PKEY_TYPE_EP11_AES || + ktype == PKEY_TYPE_EP11_ECC) { u8 *wkvp = NULL; if (flags & PKEY_FLAGS_MATCH_CUR_MKVP) @@ -933,6 +995,111 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype, return rc; } +static int pkey_keyblob2pkey3(const struct pkey_apqn *apqns, size_t nr_apqns, + const u8 *key, size_t keylen, u32 *protkeytype, + u8 *protkey, u32 *protkeylen) +{ + int i, card, dom, rc; + struct keytoken_header *hdr = (struct keytoken_header *)key; + + /* check for at least one apqn given */ + if (!apqns || !nr_apqns) + return -EINVAL; + + if (keylen < sizeof(struct keytoken_header)) + return -EINVAL; + + if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES_WITH_HEADER + && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) { + /* EP11 AES key blob with header */ + if (ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1)) + return -EINVAL; + } else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_ECC_WITH_HEADER + && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) { + /* EP11 ECC key blob with header */ + if (ep11_check_ecc_key_with_hdr(debug_info, 3, key, keylen, 1)) + return -EINVAL; + } else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES + && is_ep11_keyblob(key)) { + /* EP11 AES key blob with header in session field */ + if (ep11_check_aes_key(debug_info, 3, key, keylen, 1)) + return -EINVAL; + } else if (hdr->type == TOKTYPE_CCA_INTERNAL) { + if (hdr->version == TOKVER_CCA_AES) { + /* CCA AES data key */ + if (keylen != sizeof(struct secaeskeytoken)) + return -EINVAL; + if (cca_check_secaeskeytoken(debug_info, 3, key, 0)) + return -EINVAL; + } else if (hdr->version == TOKVER_CCA_VLSC) { + /* CCA AES cipher key */ + if (keylen < hdr->len || keylen > MAXCCAVLSCTOKENSIZE) + return -EINVAL; + if (cca_check_secaescipherkey(debug_info, 3, key, 0, 1)) + return -EINVAL; + } else { + DEBUG_ERR("%s unknown CCA internal token version %d\n", + __func__, hdr->version); + return -EINVAL; + } + } else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) { + /* CCA ECC (private) key */ + if (keylen < sizeof(struct eccprivkeytoken)) + return -EINVAL; + if (cca_check_sececckeytoken(debug_info, 3, key, keylen, 1)) + return -EINVAL; + } else if (hdr->type == TOKTYPE_NON_CCA) { + struct pkey_protkey pkey; + + rc = pkey_nonccatok2pkey(key, keylen, &pkey); + if (rc) + return rc; + memcpy(protkey, pkey.protkey, pkey.len); + *protkeylen = pkey.len; + *protkeytype = pkey.type; + return 0; + } else { + DEBUG_ERR("%s unknown/unsupported blob type %d\n", + __func__, hdr->type); + return -EINVAL; + } + + /* simple try all apqns from the list */ + for (rc = -ENODEV, i = 0; rc && i < nr_apqns; i++) { + card = apqns[i].card; + dom = apqns[i].domain; + if (hdr->type == TOKTYPE_NON_CCA + && (hdr->version == TOKVER_EP11_AES_WITH_HEADER + || hdr->version == TOKVER_EP11_ECC_WITH_HEADER) + && is_ep11_keyblob(key + sizeof(struct ep11kblob_header))) + rc = ep11_kblob2protkey(card, dom, key, hdr->len, + protkey, protkeylen, protkeytype); + else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES + && is_ep11_keyblob(key)) + rc = ep11_kblob2protkey(card, dom, key, hdr->len, + protkey, protkeylen, protkeytype); + else if (hdr->type == TOKTYPE_CCA_INTERNAL && + hdr->version == TOKVER_CCA_AES) + rc = cca_sec2protkey(card, dom, key, protkey, + protkeylen, protkeytype); + else if (hdr->type == TOKTYPE_CCA_INTERNAL && + hdr->version == TOKVER_CCA_VLSC) + rc = cca_cipher2protkey(card, dom, key, protkey, + protkeylen, protkeytype); + else if (hdr->type == TOKTYPE_CCA_INTERNAL_PKA) + rc = cca_ecc2protkey(card, dom, key, protkey, + protkeylen, protkeytype); + else + return -EINVAL; + } + + return rc; +} + /* * File io functions */ @@ -1333,6 +1500,55 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, kfree(apqns); break; } + case PKEY_KBLOB2PROTK3: { + struct pkey_kblob2pkey3 __user *utp = (void __user *) arg; + struct pkey_kblob2pkey3 ktp; + struct pkey_apqn *apqns = NULL; + u32 protkeylen = PROTKEYBLOBBUFSIZE; + u8 *kkey, *protkey; + + if (copy_from_user(&ktp, utp, sizeof(ktp))) + return -EFAULT; + apqns = _copy_apqns_from_user(ktp.apqns, ktp.apqn_entries); + if (IS_ERR(apqns)) + return PTR_ERR(apqns); + kkey = _copy_key_from_user(ktp.key, ktp.keylen); + if (IS_ERR(kkey)) { + kfree(apqns); + return PTR_ERR(kkey); + } + protkey = kmalloc(protkeylen, GFP_KERNEL); + if (!protkey) { + kfree(apqns); + kfree(kkey); + return -ENOMEM; + } + rc = pkey_keyblob2pkey3(apqns, ktp.apqn_entries, kkey, + ktp.keylen, &ktp.pkeytype, + protkey, &protkeylen); + DEBUG_DBG("%s pkey_keyblob2pkey3()=%d\n", __func__, rc); + kfree(apqns); + kfree(kkey); + if (rc) { + kfree(protkey); + break; + } + if (ktp.pkey && ktp.pkeylen) { + if (protkeylen > ktp.pkeylen) { + kfree(protkey); + return -EINVAL; + } + if (copy_to_user(ktp.pkey, protkey, protkeylen)) { + kfree(protkey); + return -EFAULT; + } + } + kfree(protkey); + ktp.pkeylen = protkeylen; + if (copy_to_user(utp, &ktp, sizeof(ktp))) + return -EFAULT; + break; + } default: /* unknown/unsupported ioctl cmd */ return -ENOTTY; diff --git a/drivers/s390/crypto/zcrypt_ccamisc.c b/drivers/s390/crypto/zcrypt_ccamisc.c index e969188a1ec4e..b9d293ae228cf 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.c +++ b/drivers/s390/crypto/zcrypt_ccamisc.c @@ -172,6 +172,49 @@ int cca_check_secaescipherkey(debug_info_t *dbg, int dbflvl, } EXPORT_SYMBOL(cca_check_secaescipherkey); +/* + * Simple check if the token is a valid CCA secure ECC private + * key token. Returns 0 on success or errno value on failure. + */ +int cca_check_sececckeytoken(debug_info_t *dbg, int dbflvl, + const u8 *token, size_t keysize, + int checkcpacfexport) +{ + struct eccprivkeytoken *t = (struct eccprivkeytoken *) token; + +#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__) + + if (t->type != TOKTYPE_CCA_INTERNAL_PKA) { + if (dbg) + DBF("%s token check failed, type 0x%02x != 0x%02x\n", + __func__, (int) t->type, TOKTYPE_CCA_INTERNAL_PKA); + return -EINVAL; + } + if (t->len > keysize) { + if (dbg) + DBF("%s token check failed, len %d > keysize %zu\n", + __func__, (int) t->len, keysize); + return -EINVAL; + } + if (t->secid != 0x20) { + if (dbg) + DBF("%s token check failed, secid 0x%02x != 0x20\n", + __func__, (int) t->secid); + return -EINVAL; + } + if (checkcpacfexport && !(t->kutc & 0x01)) { + if (dbg) + DBF("%s token check failed, XPRTCPAC bit is 0\n", + __func__); + return -EINVAL; + } + +#undef DBF + + return 0; +} +EXPORT_SYMBOL(cca_check_sececckeytoken); + /* * Allocate consecutive memory for request CPRB, request param * block, reply CPRB and reply param block and fill in values @@ -1297,6 +1340,156 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, } EXPORT_SYMBOL(cca_cipher2protkey); +/* + * Derive protected key from CCA ECC secure private key. + */ +int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key, + u8 *protkey, u32 *protkeylen, u32 *protkeytype) +{ + int rc; + u8 *mem, *ptr; + struct CPRBX *preqcblk, *prepcblk; + struct ica_xcRB xcrb; + struct aureqparm { + u8 subfunc_code[2]; + u16 rule_array_len; + u8 rule_array[8]; + struct { + u16 len; + u16 tk_blob_len; + u16 tk_blob_tag; + u8 tk_blob[66]; + } vud; + struct { + u16 len; + u16 cca_key_token_len; + u16 cca_key_token_flags; + u8 cca_key_token[0]; + } kb; + } __packed * preqparm; + struct aurepparm { + u8 subfunc_code[2]; + u16 rule_array_len; + struct { + u16 len; + u16 sublen; + u16 tag; + struct cpacfkeyblock { + u8 version; /* version of this struct */ + u8 flags[2]; + u8 algo; + u8 form; + u8 pad1[3]; + u16 keylen; + u8 key[0]; /* the key (keylen bytes) */ + u16 keyattrlen; + u8 keyattr[32]; + u8 pad2[1]; + u8 vptype; + u8 vp[32]; /* verification pattern */ + } ckb; + } vud; + struct { + u16 len; + } kb; + } __packed * prepparm; + int keylen = ((struct eccprivkeytoken *)key)->len; + + /* get already prepared memory for 2 cprbs with param block each */ + rc = alloc_and_prep_cprbmem(PARMBSIZE, &mem, &preqcblk, &prepcblk); + if (rc) + return rc; + + /* fill request cprb struct */ + preqcblk->domain = domain; + + /* fill request cprb param block with AU request */ + preqparm = (struct aureqparm __force *) preqcblk->req_parmb; + memcpy(preqparm->subfunc_code, "AU", 2); + preqparm->rule_array_len = + sizeof(preqparm->rule_array_len) + + sizeof(preqparm->rule_array); + memcpy(preqparm->rule_array, "EXPT-SK ", 8); + /* vud, tk blob */ + preqparm->vud.len = sizeof(preqparm->vud); + preqparm->vud.tk_blob_len = sizeof(preqparm->vud.tk_blob) + + 2 * sizeof(uint16_t); + preqparm->vud.tk_blob_tag = 0x00C2; + /* kb, cca token */ + preqparm->kb.len = keylen + 3 * sizeof(uint16_t); + preqparm->kb.cca_key_token_len = keylen + 2 * sizeof(uint16_t); + memcpy(preqparm->kb.cca_key_token, key, keylen); + /* now fill length of param block into cprb */ + preqcblk->req_parml = sizeof(struct aureqparm) + keylen; + + /* fill xcrb struct */ + prep_xcrb(&xcrb, cardnr, preqcblk, prepcblk); + + /* forward xcrb with request CPRB and reply CPRB to zcrypt dd */ + rc = zcrypt_send_cprb(&xcrb); + if (rc) { + DEBUG_ERR( + "%s zcrypt_send_cprb (cardnr=%d domain=%d) failed, rc=%d\n", + __func__, (int) cardnr, (int) domain, rc); + goto out; + } + + /* check response returncode and reasoncode */ + if (prepcblk->ccp_rtcode != 0) { + DEBUG_ERR( + "%s unwrap secure key failure, card response %d/%d\n", + __func__, + (int) prepcblk->ccp_rtcode, + (int) prepcblk->ccp_rscode); + rc = -EIO; + goto out; + } + if (prepcblk->ccp_rscode != 0) { + DEBUG_WARN( + "%s unwrap secure key warning, card response %d/%d\n", + __func__, + (int) prepcblk->ccp_rtcode, + (int) prepcblk->ccp_rscode); + } + + /* process response cprb param block */ + ptr = ((u8 *) prepcblk) + sizeof(struct CPRBX); + prepcblk->rpl_parmb = (u8 __user *) ptr; + prepparm = (struct aurepparm *) ptr; + + /* check the returned keyblock */ + if (prepparm->vud.ckb.version != 0x02) { + DEBUG_ERR("%s reply param keyblock version mismatch 0x%02x != 0x02\n", + __func__, (int) prepparm->vud.ckb.version); + rc = -EIO; + goto out; + } + if (prepparm->vud.ckb.algo != 0x81) { + DEBUG_ERR( + "%s reply param keyblock algo mismatch 0x%02x != 0x81\n", + __func__, (int) prepparm->vud.ckb.algo); + rc = -EIO; + goto out; + } + + /* copy the translated protected key */ + if (prepparm->vud.ckb.keylen > *protkeylen) { + DEBUG_ERR("%s prot keylen mismatch %d > buffersize %u\n", + __func__, prepparm->vud.ckb.keylen, *protkeylen); + rc = -EIO; + goto out; + } + memcpy(protkey, prepparm->vud.ckb.key, prepparm->vud.ckb.keylen); + *protkeylen = prepparm->vud.ckb.keylen; + if (protkeytype) + *protkeytype = PKEY_KEYTYPE_ECC; + +out: + free_cprbmem(mem, PARMBSIZE, 0); + return rc; +} +EXPORT_SYMBOL(cca_ecc2protkey); + /* * query cryptographic facility from CCA adapter */ diff --git a/drivers/s390/crypto/zcrypt_ccamisc.h b/drivers/s390/crypto/zcrypt_ccamisc.h index 4d88a1d6af218..e7105443d5cb3 100644 --- a/drivers/s390/crypto/zcrypt_ccamisc.h +++ b/drivers/s390/crypto/zcrypt_ccamisc.h @@ -14,8 +14,9 @@ #include /* Key token types */ -#define TOKTYPE_NON_CCA 0x00 /* Non-CCA key token */ -#define TOKTYPE_CCA_INTERNAL 0x01 /* CCA internal key token */ +#define TOKTYPE_NON_CCA 0x00 /* Non-CCA key token */ +#define TOKTYPE_CCA_INTERNAL 0x01 /* CCA internal sym key token */ +#define TOKTYPE_CCA_INTERNAL_PKA 0x1f /* CCA internal asym key token */ /* For TOKTYPE_NON_CCA: */ #define TOKVER_PROTECTED_KEY 0x01 /* Protected key token */ @@ -93,6 +94,31 @@ struct cipherkeytoken { u8 vdata[]; /* variable part data follows */ } __packed; +/* inside view of an CCA secure ECC private key */ +struct eccprivkeytoken { + u8 type; /* 0x1f for internal asym key token */ + u8 version; /* should be 0x00 */ + u16 len; /* total key token length in bytes */ + u8 res1[4]; + u8 secid; /* 0x20 for ECC priv key section marker */ + u8 secver; /* section version */ + u16 seclen; /* section length */ + u8 wtype; /* wrapping method, 0x00 clear, 0x01 AES */ + u8 htype; /* hash method, 0x02 for SHA-256 */ + u8 res2[2]; + u8 kutc; /* key usage and translation control */ + u8 ctype; /* curve type */ + u8 kfs; /* key format and security */ + u8 ksrc; /* key source */ + u16 pbitlen; /* length of prime p in bits */ + u16 ibmadlen; /* IBM associated data length in bytes */ + u64 mkvp; /* master key verification pattern */ + u8 opk[48]; /* encrypted object protection key data */ + u16 adatalen; /* associated data length in bytes */ + u16 fseclen; /* formated section length in bytes */ + u8 more_data[]; /* more data follows */ +} __packed; + /* Some defines for the CCA AES cipherkeytoken kmf1 field */ #define KMF1_XPRT_SYM 0x8000 #define KMF1_XPRT_UASY 0x4000 @@ -122,6 +148,14 @@ int cca_check_secaescipherkey(debug_info_t *dbg, int dbflvl, const u8 *token, int keybitsize, int checkcpacfexport); +/* + * Simple check if the token is a valid CCA secure ECC private + * key token. Returns 0 on success or errno value on failure. + */ +int cca_check_sececckeytoken(debug_info_t *dbg, int dbflvl, + const u8 *token, size_t keysize, + int checkcpacfexport); + /* * Generate (random) CCA AES DATA secure key. */ @@ -158,6 +192,12 @@ int cca_cipher2protkey(u16 cardnr, u16 domain, const u8 *ckey, int cca_clr2cipherkey(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, const u8 *clrkey, u8 *keybuf, size_t *keybufsize); +/* + * Derive proteced key from CCA ECC secure private key. + */ +int cca_ecc2protkey(u16 cardnr, u16 domain, const u8 *key, + u8 *protkey, u32 *protkeylen, u32 *protkeytype); + /* * Query cryptographic facility from CCA adapter */ diff --git a/drivers/s390/crypto/zcrypt_ep11misc.c b/drivers/s390/crypto/zcrypt_ep11misc.c index 60b6bec21c323..9ce5a71da69b8 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.c +++ b/drivers/s390/crypto/zcrypt_ep11misc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "ap_bus.h" #include "zcrypt_api.h" @@ -113,16 +114,156 @@ static void __exit card_cache_free(void) } /* - * Simple check if the key blob is a valid EP11 secure AES key. + * Simple check if the key blob is a valid EP11 AES key blob with header. */ -int ep11_check_aeskeyblob(debug_info_t *dbg, int dbflvl, - const u8 *key, int keybitsize, - int checkcpacfexport) +int ep11_check_aes_key_with_hdr(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp) +{ + struct ep11kblob_header *hdr = (struct ep11kblob_header *) key; + struct ep11keyblob *kb = (struct ep11keyblob *) (key + sizeof(*hdr)); + +#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__) + + if (keylen < sizeof(*hdr) + sizeof(*kb)) { + DBF("%s key check failed, keylen %zu < %zu\n", + __func__, keylen, sizeof(*hdr) + sizeof(*kb)); + return -EINVAL; + } + + if (hdr->type != TOKTYPE_NON_CCA) { + if (dbg) + DBF("%s key check failed, type 0x%02x != 0x%02x\n", + __func__, (int) hdr->type, TOKTYPE_NON_CCA); + return -EINVAL; + } + if (hdr->hver != 0x00) { + if (dbg) + DBF("%s key check failed, header version 0x%02x != 0x00\n", + __func__, (int) hdr->hver); + return -EINVAL; + } + if (hdr->version != TOKVER_EP11_AES_WITH_HEADER) { + if (dbg) + DBF("%s key check failed, version 0x%02x != 0x%02x\n", + __func__, (int) hdr->version, TOKVER_EP11_AES_WITH_HEADER); + return -EINVAL; + } + if (hdr->len > keylen) { + if (dbg) + DBF("%s key check failed, header len %d keylen %zu mismatch\n", + __func__, (int) hdr->len, keylen); + return -EINVAL; + } + if (hdr->len < sizeof(*hdr) + sizeof(*kb)) { + if (dbg) + DBF("%s key check failed, header len %d < %zu\n", + __func__, (int) hdr->len, sizeof(*hdr) + sizeof(*kb)); + return -EINVAL; + } + + if (kb->version != EP11_STRUCT_MAGIC) { + if (dbg) + DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n", + __func__, (int) kb->version, EP11_STRUCT_MAGIC); + return -EINVAL; + } + if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) { + if (dbg) + DBF("%s key check failed, PKEY_EXTRACTABLE is off\n", + __func__); + return -EINVAL; + } + +#undef DBF + + return 0; +} +EXPORT_SYMBOL(ep11_check_aes_key_with_hdr); + +/* + * Simple check if the key blob is a valid EP11 ECC key blob with header. + */ +int ep11_check_ecc_key_with_hdr(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp) +{ + struct ep11kblob_header *hdr = (struct ep11kblob_header *) key; + struct ep11keyblob *kb = (struct ep11keyblob *) (key + sizeof(*hdr)); + +#define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__) + + if (keylen < sizeof(*hdr) + sizeof(*kb)) { + DBF("%s key check failed, keylen %zu < %zu\n", + __func__, keylen, sizeof(*hdr) + sizeof(*kb)); + return -EINVAL; + } + + if (hdr->type != TOKTYPE_NON_CCA) { + if (dbg) + DBF("%s key check failed, type 0x%02x != 0x%02x\n", + __func__, (int) hdr->type, TOKTYPE_NON_CCA); + return -EINVAL; + } + if (hdr->hver != 0x00) { + if (dbg) + DBF("%s key check failed, header version 0x%02x != 0x00\n", + __func__, (int) hdr->hver); + return -EINVAL; + } + if (hdr->version != TOKVER_EP11_ECC_WITH_HEADER) { + if (dbg) + DBF("%s key check failed, version 0x%02x != 0x%02x\n", + __func__, (int) hdr->version, TOKVER_EP11_ECC_WITH_HEADER); + return -EINVAL; + } + if (hdr->len > keylen) { + if (dbg) + DBF("%s key check failed, header len %d keylen %zu mismatch\n", + __func__, (int) hdr->len, keylen); + return -EINVAL; + } + if (hdr->len < sizeof(*hdr) + sizeof(*kb)) { + if (dbg) + DBF("%s key check failed, header len %d < %zu\n", + __func__, (int) hdr->len, sizeof(*hdr) + sizeof(*kb)); + return -EINVAL; + } + + if (kb->version != EP11_STRUCT_MAGIC) { + if (dbg) + DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n", + __func__, (int) kb->version, EP11_STRUCT_MAGIC); + return -EINVAL; + } + if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) { + if (dbg) + DBF("%s key check failed, PKEY_EXTRACTABLE is off\n", + __func__); + return -EINVAL; + } + +#undef DBF + + return 0; +} +EXPORT_SYMBOL(ep11_check_ecc_key_with_hdr); + +/* + * Simple check if the key blob is a valid EP11 AES key blob with + * the header in the session field (old style EP11 AES key). + */ +int ep11_check_aes_key(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp) { struct ep11keyblob *kb = (struct ep11keyblob *) key; #define DBF(...) debug_sprintf_event(dbg, dbflvl, ##__VA_ARGS__) + if (keylen < sizeof(*kb)) { + DBF("%s key check failed, keylen %zu < %zu\n", + __func__, keylen, sizeof(*kb)); + return -EINVAL; + } + if (kb->head.type != TOKTYPE_NON_CCA) { if (dbg) DBF("%s key check failed, type 0x%02x != 0x%02x\n", @@ -135,39 +276,37 @@ int ep11_check_aeskeyblob(debug_info_t *dbg, int dbflvl, __func__, (int) kb->head.version, TOKVER_EP11_AES); return -EINVAL; } - if (kb->version != EP11_STRUCT_MAGIC) { + if (kb->head.len > keylen) { if (dbg) - DBF("%s key check failed, magic 0x%04x != 0x%04x\n", - __func__, (int) kb->version, EP11_STRUCT_MAGIC); + DBF("%s key check failed, header len %d keylen %zu mismatch\n", + __func__, (int) kb->head.len, keylen); return -EINVAL; } - switch (kb->head.keybitlen) { - case 128: - case 192: - case 256: - break; - default: + if (kb->head.len < sizeof(*kb)) { if (dbg) - DBF("%s key check failed, keybitlen %d invalid\n", - __func__, (int) kb->head.keybitlen); + DBF("%s key check failed, header len %d < %zu\n", + __func__, (int) kb->head.len, sizeof(*kb)); return -EINVAL; } - if (keybitsize > 0 && keybitsize != (int) kb->head.keybitlen) { - DBF("%s key check failed, keybitsize %d\n", - __func__, keybitsize); + + if (kb->version != EP11_STRUCT_MAGIC) { + if (dbg) + DBF("%s key check failed, blob magic 0x%04x != 0x%04x\n", + __func__, (int) kb->version, EP11_STRUCT_MAGIC); return -EINVAL; } - if (checkcpacfexport && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) { + if (checkcpacfexp && !(kb->attr & EP11_BLOB_PKEY_EXTRACTABLE)) { if (dbg) - DBF("%s key check failed, PKEY_EXTRACTABLE is 0\n", + DBF("%s key check failed, PKEY_EXTRACTABLE is off\n", __func__); return -EINVAL; } + #undef DBF return 0; } -EXPORT_SYMBOL(ep11_check_aeskeyblob); +EXPORT_SYMBOL(ep11_check_aes_key); /* * Allocate and prepare ep11 cprb plus additional payload. @@ -954,7 +1093,7 @@ static int ep11_wrapkey(u16 card, u16 domain, u8 data_tag; u8 data_lenfmt; u16 data_len; - u8 data[512]; + u8 data[1024]; } __packed * rep_pl; struct ep11_cprb *req = NULL, *rep = NULL; struct ep11_target_dev target; @@ -962,8 +1101,17 @@ static int ep11_wrapkey(u16 card, u16 domain, struct ep11keyblob *kb; size_t req_pl_size; int api, rc = -ENOMEM; + bool has_header = false; u8 *p; + /* maybe the session field holds a header with key info */ + kb = (struct ep11keyblob *) key; + if (kb->head.type == TOKTYPE_NON_CCA && + kb->head.version == TOKVER_EP11_AES) { + has_header = true; + keysize = kb->head.len < keysize ? kb->head.len : keysize; + } + /* request cprb and payload */ req_pl_size = sizeof(struct wk_req_pl) + (iv ? 16 : 0) + ASN1TAGLEN(keysize) + 4; @@ -989,9 +1137,10 @@ static int ep11_wrapkey(u16 card, u16 domain, /* key blob */ p += asn1tag_write(p, 0x04, key, keysize); /* maybe the key argument needs the head data cleaned out */ - kb = (struct ep11keyblob *)(p - keysize); - if (kb->head.version == TOKVER_EP11_AES) + if (has_header) { + kb = (struct ep11keyblob *)(p - keysize); memset(&kb->head, 0, sizeof(kb->head)); + } /* empty kek tag */ *p++ = 0x04; *p++ = 0; @@ -1114,12 +1263,12 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, } EXPORT_SYMBOL(ep11_clr2keyblob); -int ep11_key2protkey(u16 card, u16 dom, const u8 *key, size_t keylen, - u8 *protkey, u32 *protkeylen, u32 *protkeytype) +int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen, + u8 *protkey, u32 *protkeylen, u32 *protkeytype) { int rc = -EIO; u8 *wkbuf = NULL; - size_t wkbuflen = 256; + size_t wkbuflen, keylen; struct wk_info { u16 version; u8 res1[16]; @@ -1129,8 +1278,33 @@ int ep11_key2protkey(u16 card, u16 dom, const u8 *key, size_t keylen, u8 res2[8]; u8 pkey[0]; } __packed * wki; + const u8 *key; + struct ep11kblob_header *hdr; + + /* key with or without header ? */ + hdr = (struct ep11kblob_header *) keyblob; + if (hdr->type == TOKTYPE_NON_CCA + && (hdr->version == TOKVER_EP11_AES_WITH_HEADER + || hdr->version == TOKVER_EP11_ECC_WITH_HEADER) + && is_ep11_keyblob(keyblob + sizeof(struct ep11kblob_header))) { + /* EP11 AES or ECC key with header */ + key = keyblob + sizeof(struct ep11kblob_header); + keylen = hdr->len - sizeof(struct ep11kblob_header); + } else if (hdr->type == TOKTYPE_NON_CCA + && hdr->version == TOKVER_EP11_AES + && is_ep11_keyblob(keyblob)) { + /* EP11 AES key (old style) */ + key = keyblob; + keylen = hdr->len; + } else if (is_ep11_keyblob(keyblob)) { + /* raw EP11 key blob */ + key = keyblob; + keylen = keybloblen; + } else + return -EINVAL; /* alloc temp working buffer */ + wkbuflen = (keylen + AES_BLOCK_SIZE) & (~(AES_BLOCK_SIZE - 1)); wkbuf = kmalloc(wkbuflen, GFP_ATOMIC); if (!wkbuf) return -ENOMEM; @@ -1147,46 +1321,68 @@ int ep11_key2protkey(u16 card, u16 dom, const u8 *key, size_t keylen, wki = (struct wk_info *) wkbuf; /* check struct version and pkey type */ - if (wki->version != 1 || wki->pkeytype != 1) { + if (wki->version != 1 || wki->pkeytype < 1 || wki->pkeytype > 5) { DEBUG_ERR("%s wk info version %d or pkeytype %d mismatch.\n", __func__, (int) wki->version, (int) wki->pkeytype); rc = -EIO; goto out; } - /* copy the tanslated protected key */ - switch (wki->pkeysize) { - case 16+32: - /* AES 128 protected key */ - if (protkeytype) - *protkeytype = PKEY_KEYTYPE_AES_128; - break; - case 24+32: - /* AES 192 protected key */ - if (protkeytype) - *protkeytype = PKEY_KEYTYPE_AES_192; + /* check protected key type field */ + switch (wki->pkeytype) { + case 1: /* AES */ + switch (wki->pkeysize) { + case 16+32: + /* AES 128 protected key */ + if (protkeytype) + *protkeytype = PKEY_KEYTYPE_AES_128; + break; + case 24+32: + /* AES 192 protected key */ + if (protkeytype) + *protkeytype = PKEY_KEYTYPE_AES_192; + break; + case 32+32: + /* AES 256 protected key */ + if (protkeytype) + *protkeytype = PKEY_KEYTYPE_AES_256; + break; + default: + DEBUG_ERR("%s unknown/unsupported AES pkeysize %d\n", + __func__, (int) wki->pkeysize); + rc = -EIO; + goto out; + } break; - case 32+32: - /* AES 256 protected key */ + case 3: /* EC-P */ + case 4: /* EC-ED */ + case 5: /* EC-BP */ if (protkeytype) - *protkeytype = PKEY_KEYTYPE_AES_256; + *protkeytype = PKEY_KEYTYPE_ECC; break; + case 2: /* TDES */ default: - DEBUG_ERR("%s unknown/unsupported pkeysize %d\n", - __func__, (int) wki->pkeysize); + DEBUG_ERR("%s unknown/unsupported key type %d\n", + __func__, (int) wki->pkeytype); rc = -EIO; goto out; } + + /* copy the tanslated protected key */ + if (wki->pkeysize > *protkeylen) { + DEBUG_ERR("%s wk info pkeysize %llu > protkeysize %u\n", + __func__, wki->pkeysize, *protkeylen); + rc = -EINVAL; + goto out; + } memcpy(protkey, wki->pkey, wki->pkeysize); - if (protkeylen) - *protkeylen = (u32) wki->pkeysize; - rc = 0; + *protkeylen = wki->pkeysize; out: kfree(wkbuf); return rc; } -EXPORT_SYMBOL(ep11_key2protkey); +EXPORT_SYMBOL(ep11_kblob2protkey); int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, int minhwtype, int minapi, const u8 *wkvp) diff --git a/drivers/s390/crypto/zcrypt_ep11misc.h b/drivers/s390/crypto/zcrypt_ep11misc.h index e3ed5ed1de86b..1e02b197c0035 100644 --- a/drivers/s390/crypto/zcrypt_ep11misc.h +++ b/drivers/s390/crypto/zcrypt_ep11misc.h @@ -12,22 +12,28 @@ #include #include -#define TOKVER_EP11_AES 0x03 /* EP11 AES key blob */ - #define EP11_API_V 4 /* highest known and supported EP11 API version */ - #define EP11_STRUCT_MAGIC 0x1234 -#define EP11_BLOB_PKEY_EXTRACTABLE 0x200000 +#define EP11_BLOB_PKEY_EXTRACTABLE 0x00200000 + +/* + * Internal used values for the version field of the key header. + * Should match to the enum pkey_key_type in pkey.h. + */ +#define TOKVER_EP11_AES 0x03 /* EP11 AES key blob (old style) */ +#define TOKVER_EP11_AES_WITH_HEADER 0x06 /* EP11 AES key blob with header */ +#define TOKVER_EP11_ECC_WITH_HEADER 0x07 /* EP11 ECC key blob with header */ /* inside view of an EP11 secure key blob */ struct ep11keyblob { union { u8 session[32]; + /* only used for PKEY_TYPE_EP11: */ struct { u8 type; /* 0x00 (TOKTYPE_NON_CCA) */ u8 res0; /* unused */ u16 len; /* total length in bytes of this blob */ - u8 version; /* 0x06 (TOKVER_EP11_AES) */ + u8 version; /* 0x03 (TOKVER_EP11_AES) */ u8 res1; /* unused */ u16 keybitlen; /* clear key bit len, 0 for unknown */ } head; @@ -41,16 +47,41 @@ struct ep11keyblob { u8 mac[32]; } __packed; +/* check ep11 key magic to find out if this is an ep11 key blob */ +static inline bool is_ep11_keyblob(const u8 *key) +{ + struct ep11keyblob *kb = (struct ep11keyblob *) key; + + return (kb->version == EP11_STRUCT_MAGIC); +} + +/* + * Simple check if the key blob is a valid EP11 AES key blob with header. + * If checkcpacfexport is enabled, the key is also checked for the + * attributes needed to export this key for CPACF use. + * Returns 0 on success or errno value on failure. + */ +int ep11_check_aes_key_with_hdr(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp); + /* - * Simple check if the key blob is a valid EP11 secure AES key. - * If keybitsize is given, the bitsize of the key is also checked. + * Simple check if the key blob is a valid EP11 ECC key blob with header. * If checkcpacfexport is enabled, the key is also checked for the * attributes needed to export this key for CPACF use. * Returns 0 on success or errno value on failure. */ -int ep11_check_aeskeyblob(debug_info_t *dbg, int dbflvl, - const u8 *key, int keybitsize, - int checkcpacfexport); +int ep11_check_ecc_key_with_hdr(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp); + +/* + * Simple check if the key blob is a valid EP11 AES key blob with + * the header in the session field (old style EP11 AES key). + * If checkcpacfexport is enabled, the key is also checked for the + * attributes needed to export this key for CPACF use. + * Returns 0 on success or errno value on failure. + */ +int ep11_check_aes_key(debug_info_t *dbg, int dbflvl, + const u8 *key, size_t keylen, int checkcpacfexp); /* EP11 card info struct */ struct ep11_card_info { @@ -91,12 +122,6 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags, int ep11_clr2keyblob(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags, const u8 *clrkey, u8 *keybuf, size_t *keybufsize); -/* - * Derive proteced key from EP11 AES secure key blob. - */ -int ep11_key2protkey(u16 cardnr, u16 domain, const u8 *key, size_t keylen, - u8 *protkey, u32 *protkeylen, u32 *protkeytype); - /* * Build a list of ep11 apqns meeting the following constrains: * - apqn is online and is in fact an EP11 apqn @@ -119,6 +144,12 @@ int ep11_key2protkey(u16 cardnr, u16 domain, const u8 *key, size_t keylen, int ep11_findcard2(u32 **apqns, u32 *nr_apqns, u16 cardnr, u16 domain, int minhwtype, int minapi, const u8 *wkvp); +/* + * Derive proteced key from EP11 key blob (AES and ECC keys). + */ +int ep11_kblob2protkey(u16 card, u16 dom, const u8 *key, size_t keylen, + u8 *protkey, u32 *protkeylen, u32 *protkeytype); + void zcrypt_ep11misc_exit(void); #endif /* _ZCRYPT_EP11MISC_H_ */ From b3bd02495cb339124f13135d51940cf48d83e5cb Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 15 Sep 2020 08:53:50 +0200 Subject: [PATCH 44/85] s390/stp: add locking to sysfs functions The sysfs function might race with stp_work_fn. To prevent that, add the required locking. Another issue is that the sysfs functions are checking the stp_online flag, but this flag just holds the user setting whether STP is enabled. Add a flag to clock_sync_flag whether stp_info holds valid data and use that instead. Cc: stable@vger.kernel.org Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/kernel/time.c | 118 +++++++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 33 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index bc806e1547d6a..bee380340bada 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -299,8 +299,9 @@ static DEFINE_PER_CPU(atomic_t, clock_sync_word); static DEFINE_MUTEX(clock_sync_mutex); static unsigned long clock_sync_flags; -#define CLOCK_SYNC_HAS_STP 0 -#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_HAS_STP 0 +#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_STPINFO_VALID 2 /* * The get_clock function for the physical clock. It will get the current @@ -535,6 +536,22 @@ void stp_queue_work(void) queue_work(time_sync_wq, &stp_work); } +static int __store_stpinfo(void) +{ + int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + + if (rc) + clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + else + set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + return rc; +} + +static int stpinfo_valid(void) +{ + return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); +} + static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; @@ -557,8 +574,7 @@ static int stp_sync_clock(void *data) if (rc == 0) { sync->clock_delta = clock_delta; clock_sync_global(clock_delta); - rc = chsc_sstpi(stp_page, &stp_info, - sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc == 0 && stp_info.tmd != 2) rc = -EAGAIN; } @@ -604,7 +620,7 @@ static void stp_work_fn(struct work_struct *work) if (rc) goto out_unlock; - rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc || stp_info.c == 0) goto out_unlock; @@ -641,10 +657,14 @@ static ssize_t ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%016llx\n", - *(unsigned long long *) stp_info.ctnid); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%016llx\n", + *(unsigned long long *) stp_info.ctnid); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(ctn_id); @@ -653,9 +673,13 @@ static ssize_t ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.ctn); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.ctn); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(ctn_type); @@ -664,9 +688,13 @@ static ssize_t dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x2000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x2000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(dst_offset); @@ -675,9 +703,13 @@ static ssize_t leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x8000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x8000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(leap_seconds); @@ -686,9 +718,13 @@ static ssize_t stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(stratum); @@ -697,9 +733,13 @@ static ssize_t time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x0800)) - return -ENODATA; - return sprintf(buf, "%i\n", (int) stp_info.tto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x0800)) + ret = sprintf(buf, "%i\n", (int) stp_info.tto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(time_offset); @@ -708,9 +748,13 @@ static ssize_t time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x4000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x4000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(time_zone_offset); @@ -719,9 +763,13 @@ static ssize_t timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tmd); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tmd); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(timing_mode); @@ -730,9 +778,13 @@ static ssize_t timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tst); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tst); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR_RO(timing_state); From bb7d066a091654b6d6c0b6061bda438bf36c6613 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 3 Aug 2020 08:50:38 +0200 Subject: [PATCH 45/85] s390/stp: use __packed Use __packed instead of __attribute__((packed)) Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h index f0ddefb06ec88..572f6f4286e2a 100644 --- a/arch/s390/include/asm/stp.h +++ b/arch/s390/include/asm/stp.h @@ -6,6 +6,8 @@ #ifndef __S390_STP_H #define __S390_STP_H +#include + /* notifier for syncs */ extern struct atomic_notifier_head s390_epoch_delta_notifier; @@ -16,7 +18,7 @@ struct stp_irq_parm { unsigned int lac : 1; /* Link availability change */ unsigned int tcpc : 1; /* Time control parameter change */ unsigned int _pad2 : 15; -} __attribute__ ((packed)); +} __packed; #define STP_OP_SYNC 1 #define STP_OP_CTRL 3 @@ -42,7 +44,7 @@ struct stp_sstpi { unsigned int rsvd5; unsigned int todoff[4]; unsigned int rsvd6[48]; -} __attribute__ ((packed)); +} __packed; /* Functions needed by the machine check handler */ int stp_sync_check(void); From 4ab79ed223d2ff32ffe16cfd841e064199341c34 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 8 Sep 2020 10:14:00 +0200 Subject: [PATCH 46/85] s390/stp: use u32 instead of unsigned int In hardware-dependent headers using u32 is easier to read and less error-prone. Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/stp.h | 50 ++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h index 572f6f4286e2a..a5c0bc1e5b902 100644 --- a/arch/s390/include/asm/stp.h +++ b/arch/s390/include/asm/stp.h @@ -13,37 +13,37 @@ extern struct atomic_notifier_head s390_epoch_delta_notifier; /* STP interruption parameter */ struct stp_irq_parm { - unsigned int _pad0 : 14; - unsigned int tsc : 1; /* Timing status change */ - unsigned int lac : 1; /* Link availability change */ - unsigned int tcpc : 1; /* Time control parameter change */ - unsigned int _pad2 : 15; + u32 : 14; + u32 tsc : 1; /* Timing status change */ + u32 lac : 1; /* Link availability change */ + u32 tcpc : 1; /* Time control parameter change */ + u32 : 15; } __packed; #define STP_OP_SYNC 1 #define STP_OP_CTRL 3 struct stp_sstpi { - unsigned int rsvd0; - unsigned int rsvd1 : 8; - unsigned int stratum : 8; - unsigned int vbits : 16; - unsigned int leaps : 16; - unsigned int tmd : 4; - unsigned int ctn : 4; - unsigned int rsvd2 : 3; - unsigned int c : 1; - unsigned int tst : 4; - unsigned int tzo : 16; - unsigned int dsto : 16; - unsigned int ctrl : 16; - unsigned int rsvd3 : 16; - unsigned int tto; - unsigned int rsvd4; - unsigned int ctnid[3]; - unsigned int rsvd5; - unsigned int todoff[4]; - unsigned int rsvd6[48]; + u32 : 32; + u32 : 8; + u32 stratum : 8; + u32 vbits : 16; + u32 leaps : 16; + u32 tmd : 4; + u32 ctn : 4; + u32 : 3; + u32 c : 1; + u32 tst : 4; + u32 tzo : 16; + u32 dsto : 16; + u32 ctrl : 16; + u32 : 16; + u32 tto; + u32 : 32; + u32 ctnid[3]; + u32 : 32; + u32 todoff[4]; + u32 rsvd[48]; } __packed; /* Functions needed by the machine check handler */ From b2539aa0d7ff1e42c74a9dd8c73ec1c2771c9e5d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 12 Jun 2020 12:59:19 +0200 Subject: [PATCH 47/85] s390/stp: add support for leap seconds In the current implementation, leap seconds are only synchronized during the bootup process when the STP clock is synced. If the Leap second offset (LSO) changes the machine must be rebooted, which is not desired. This patch adds the required code to handle Leap second changes during runtime. If the Leap second changes, a Configuration change machine check is triggered. The STP code than schedules a Leap second insertion/deletion with do_adjtimex(). Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/cio.h | 1 + arch/s390/include/asm/stp.h | 46 +++++++++++++++++- arch/s390/kernel/time.c | 94 +++++++++++++++++++++++++++++++++---- drivers/s390/cio/chsc.c | 21 +++++++++ 4 files changed, 152 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index b5bfb3123cb1f..953a7316b30ab 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -372,6 +372,7 @@ struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages); /* Function from drivers/s390/cio/chsc.c */ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); +int chsc_stzi(void *page, void *result, size_t size); int chsc_sgib(u32 origin); #endif diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h index a5c0bc1e5b902..ba07463897c10 100644 --- a/arch/s390/include/asm/stp.h +++ b/arch/s390/include/asm/stp.h @@ -25,7 +25,9 @@ struct stp_irq_parm { struct stp_sstpi { u32 : 32; - u32 : 8; + u32 tu : 1; + u32 lu : 1; + u32 : 6; u32 stratum : 8; u32 vbits : 16; u32 leaps : 16; @@ -46,6 +48,48 @@ struct stp_sstpi { u32 rsvd[48]; } __packed; +struct stp_tzib { + u32 tzan : 16; + u32 : 16; + u32 tzo : 16; + u32 dsto : 16; + u32 stn; + u32 dstn; + u64 dst_on_alg; + u64 dst_off_alg; +} __packed; + +struct stp_tcpib { + u32 atcode : 4; + u32 ntcode : 4; + u32 d : 1; + u32 : 23; + s32 tto; + struct stp_tzib atzib; + struct stp_tzib ntzib; + s32 adst_offset : 16; + s32 ndst_offset : 16; + u32 rsvd1; + u64 ntzib_update; + u64 ndsto_update; +} __packed; + +struct stp_lsoib { + u32 p : 1; + u32 : 31; + s32 also : 16; + s32 nlso : 16; + u64 nlsout; +} __packed; + +struct stp_stzi { + u32 rsvd0[3]; + u64 data_ts; + u32 rsvd1[22]; + struct stp_tcpib tcpib; + struct stp_lsoib lsoib; +} __packed; + /* Functions needed by the machine check handler */ int stp_sync_check(void); int stp_island_check(void); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index bee380340bada..49648d574b35c 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -598,6 +598,81 @@ static int stp_sync_clock(void *data) return 0; } +static int stp_clear_leap(void) +{ + struct __kernel_timex txc; + int ret; + + memset(&txc, 0, sizeof(txc)); + + ret = do_adjtimex(&txc); + if (ret < 0) + return ret; + + txc.modes = ADJ_STATUS; + txc.status &= ~(STA_INS|STA_DEL); + return do_adjtimex(&txc); +} + +static void stp_check_leap(void) +{ + struct stp_stzi stzi; + struct stp_lsoib *lsoib = &stzi.lsoib; + struct __kernel_timex txc; + int64_t timediff; + int leapdiff, ret; + + if (!stp_info.lu || !check_sync_clock()) { + /* + * Either a scheduled leap second was removed by the operator, + * or STP is out of sync. In both cases, clear the leap second + * kernel flags. + */ + if (stp_clear_leap() < 0) + pr_err("failed to clear leap second flags\n"); + return; + } + + if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) { + pr_err("stzi failed\n"); + return; + } + + timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC; + leapdiff = lsoib->nlso - lsoib->also; + + if (leapdiff != 1 && leapdiff != -1) { + pr_err("Cannot schedule %d leap seconds\n", leapdiff); + return; + } + + if (timediff < 0) { + if (stp_clear_leap() < 0) + pr_err("failed to clear leap second flags\n"); + } else if (timediff < 7200) { + memset(&txc, 0, sizeof(txc)); + ret = do_adjtimex(&txc); + if (ret < 0) + return; + + txc.modes = ADJ_STATUS; + if (leapdiff > 0) + txc.status |= STA_INS; + else + txc.status |= STA_DEL; + ret = do_adjtimex(&txc); + if (ret < 0) + pr_err("failed to set leap second flags\n"); + /* arm Timer to clear leap second flags */ + mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC)); + } else { + /* The day the leap second is scheduled for hasn't been reached. Retry + * in one hour. + */ + mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC)); + } +} + /* * STP work. Check for the STP state and take over the clock * synchronization if the STP clock source is usable. @@ -616,7 +691,7 @@ static void stp_work_fn(struct work_struct *work) goto out_unlock; } - rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL); + rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xf0e0, NULL); if (rc) goto out_unlock; @@ -625,14 +700,13 @@ static void stp_work_fn(struct work_struct *work) goto out_unlock; /* Skip synchronization if the clock is already in sync. */ - if (check_sync_clock()) - goto out_unlock; - - memset(&stp_sync, 0, sizeof(stp_sync)); - cpus_read_lock(); - atomic_set(&stp_sync.cpus, num_online_cpus() - 1); - stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask); - cpus_read_unlock(); + if (!check_sync_clock()) { + memset(&stp_sync, 0, sizeof(stp_sync)); + cpus_read_lock(); + atomic_set(&stp_sync.cpus, num_online_cpus() - 1); + stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask); + cpus_read_unlock(); + } if (!check_sync_clock()) /* @@ -640,6 +714,8 @@ static void stp_work_fn(struct work_struct *work) * Retry after a second. */ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC)); + else if (stp_info.lu) + stp_check_leap(); out_unlock: mutex_unlock(&stp_work_mutex); diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index c314e9495c1b4..e612e570447a6 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -1260,6 +1260,27 @@ int chsc_sstpi(void *page, void *result, size_t size) return (rr->response.code == 0x0001) ? 0 : -EIO; } +int chsc_stzi(void *page, void *result, size_t size) +{ + struct { + struct chsc_header request; + unsigned int rsvd0[3]; + struct chsc_header response; + char data[]; + } *rr; + int rc; + + memset(page, 0, PAGE_SIZE); + rr = page; + rr->request.length = 0x0010; + rr->request.code = 0x003e; + rc = chsc(rr); + if (rc) + return -EIO; + memcpy(result, &rr->data, size); + return (rr->response.code == 0x0001) ? 0 : -EIO; +} + int chsc_siosl(struct subchannel_id schid) { struct { From 4fb53dde770cc095a6e279ca9d9c72d49488cdb3 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 17 Jun 2020 10:58:47 +0200 Subject: [PATCH 48/85] s390/stp: add sysfs file to show scheduled leap seconds This patch introduces /sys/devices/system/stp/scheduled_leap_seconds, which will contain either 0,0 if no leap second is scheduled, or the UTC timestamp + leap second offset. Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/kernel/time.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 49648d574b35c..da271f8ec8d66 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -790,6 +790,34 @@ static ssize_t leap_seconds_show(struct device *dev, static DEVICE_ATTR_RO(leap_seconds); +static ssize_t leap_seconds_scheduled_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct stp_stzi stzi; + ssize_t ret; + + mutex_lock(&stp_work_mutex); + if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) { + mutex_unlock(&stp_work_mutex); + return -ENODATA; + } + + ret = chsc_stzi(stp_page, &stzi, sizeof(stzi)); + mutex_unlock(&stp_work_mutex); + if (ret < 0) + return ret; + + if (!stzi.lsoib.p) + return sprintf(buf, "0,0\n"); + + return sprintf(buf, "%llu,%d\n", + tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC, + stzi.lsoib.nlso - stzi.lsoib.also); +} + +static DEVICE_ATTR_RO(leap_seconds_scheduled); + static ssize_t stratum_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -906,6 +934,7 @@ static struct device_attribute *stp_attributes[] = { &dev_attr_dst_offset, &dev_attr_leap_seconds, &dev_attr_online, + &dev_attr_leap_seconds_scheduled, &dev_attr_stratum, &dev_attr_time_offset, &dev_attr_time_zone_offset, From ad5ceb33eee128346475f5efa672e6402ae15e51 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 21 Sep 2020 17:23:42 +0200 Subject: [PATCH 49/85] s390/stp: unify stp_work_mutex and clock_sync_mutex No need to have two mutexes, and while at it rename it to stp_mutex. Signed-off-by: Sven Schnelle Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/kernel/time.c | 53 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index da271f8ec8d66..0ac30ee2c6330 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -296,7 +296,7 @@ void __init time_init(void) } static DEFINE_PER_CPU(atomic_t, clock_sync_word); -static DEFINE_MUTEX(clock_sync_mutex); +static DEFINE_MUTEX(stp_mutex); static unsigned long clock_sync_flags; #define CLOCK_SYNC_HAS_STP 0 @@ -445,7 +445,6 @@ static struct stp_sstpi stp_info; static void *stp_page; static void stp_work_fn(struct work_struct *work); -static DEFINE_MUTEX(stp_work_mutex); static DECLARE_WORK(stp_work, stp_work_fn); static struct timer_list stp_timer; @@ -683,7 +682,7 @@ static void stp_work_fn(struct work_struct *work) int rc; /* prevent multiple execution. */ - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (!stp_online) { chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL); @@ -718,7 +717,7 @@ static void stp_work_fn(struct work_struct *work) stp_check_leap(); out_unlock: - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); } /* @@ -735,11 +734,11 @@ static ssize_t ctn_id_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%016llx\n", *(unsigned long long *) stp_info.ctnid); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -751,10 +750,10 @@ static ssize_t ctn_type_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%i\n", stp_info.ctn); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -766,10 +765,10 @@ static ssize_t dst_offset_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x2000)) ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -781,10 +780,10 @@ static ssize_t leap_seconds_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x8000)) ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -797,14 +796,14 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev, struct stp_stzi stzi; ssize_t ret; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) { - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return -ENODATA; } ret = chsc_stzi(stp_page, &stzi, sizeof(stzi)); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); if (ret < 0) return ret; @@ -824,10 +823,10 @@ static ssize_t stratum_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -839,10 +838,10 @@ static ssize_t time_offset_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x0800)) ret = sprintf(buf, "%i\n", (int) stp_info.tto); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -854,10 +853,10 @@ static ssize_t time_zone_offset_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid() && (stp_info.vbits & 0x4000)) ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -869,10 +868,10 @@ static ssize_t timing_mode_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%i\n", stp_info.tmd); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -884,10 +883,10 @@ static ssize_t timing_state_show(struct device *dev, { ssize_t ret = -ENODATA; - mutex_lock(&stp_work_mutex); + mutex_lock(&stp_mutex); if (stpinfo_valid()) ret = sprintf(buf, "%i\n", stp_info.tst); - mutex_unlock(&stp_work_mutex); + mutex_unlock(&stp_mutex); return ret; } @@ -911,14 +910,14 @@ static ssize_t online_store(struct device *dev, return -EINVAL; if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) return -EOPNOTSUPP; - mutex_lock(&clock_sync_mutex); + mutex_lock(&stp_mutex); stp_online = value; if (stp_online) set_bit(CLOCK_SYNC_STP, &clock_sync_flags); else clear_bit(CLOCK_SYNC_STP, &clock_sync_flags); queue_work(time_sync_wq, &stp_work); - mutex_unlock(&clock_sync_mutex); + mutex_unlock(&stp_mutex); return count; } From 62aacabc1301364446b36a418229c12a8e7162ba Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Mon, 21 Sep 2020 21:11:01 +0800 Subject: [PATCH 50/85] s390/3215: simplify the return expression of tty3215_open() Simplify the return expression. Link: https://lkml.kernel.org/r/20200921131101.93037-1-miaoqinglang@huawei.com Signed-off-by: Qinglang Miao Signed-off-by: Vasily Gorbik --- drivers/s390/char/con3215.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 92757f9bd010f..d8acabbb1ed3b 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -978,7 +978,6 @@ static int tty3215_install(struct tty_driver *driver, struct tty_struct *tty) static int tty3215_open(struct tty_struct *tty, struct file * filp) { struct raw3215_info *raw = tty->driver_data; - int retval; tty_port_tty_set(&raw->port, tty); @@ -986,11 +985,7 @@ static int tty3215_open(struct tty_struct *tty, struct file * filp) /* * Start up 3215 device */ - retval = raw3215_startup(raw); - if (retval) - return retval; - - return 0; + return raw3215_startup(raw); } /* From 07a699bc43d1feb2abe3d47781e2db3d08554bcc Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 23 Sep 2020 09:37:43 +0200 Subject: [PATCH 51/85] s390/kaslr: avoid mixing valid random value and an error code 0 is a valid random value. To avoid mixing it with error code 0 as an return code make get_random() take extra argument to output random value and return an error code. Reviewed-by: Philipp Rudo Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/boot/kaslr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index d4442163ffa99..c8549a0474e1e 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -42,7 +42,7 @@ static int check_prng(void) return PRNG_MODE_TDES; } -static unsigned long get_random(unsigned long limit) +static int get_random(unsigned long limit, unsigned long *value) { struct prng_parm prng = { /* initial parameter block for tdes mode, copied from libica */ @@ -84,9 +84,10 @@ static unsigned long get_random(unsigned long limit) (u8 *) &random, sizeof(random)); break; default: - random = 0; + return -1; } - return random % limit; + *value = random % limit; + return 0; } unsigned long get_random_base(unsigned long safe_addr) @@ -143,8 +144,7 @@ unsigned long get_random_base(unsigned long safe_addr) return 0; } - base = get_random(block_sum); - if (base == 0) + if (get_random(block_sum, &base)) return 0; if (base < safe_addr) base = safe_addr; From 5c46f2768c4bbf8e0beebea9adac1320015816b7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 18 Sep 2020 16:02:45 +0200 Subject: [PATCH 52/85] s390/kaslr: correct and explain randomization base generation Currently there are several minor problems with randomization base generation code: 1. It might misbehave in low memory conditions. In particular there might be enough space for the kernel on [0, block_sum] but after if (base < safe_addr) base = safe_addr; it might not be enough anymore. 2. It does not correctly handle minimal address constraint. In condition if (base < safe_addr) base = safe_addr; a synthetic value is compared with an address. If we have a memory setup with memory holes due to offline memory regions, and safe_addr is close to the end of the first online memory block - we might position the kernel in invalid memory. 3. block_sum calculation logic contains off-by-one error. Let's say we have a memory block in which the kernel fits perfectly (end - start == kernel_size). In this case: if (end - start < kernel_size) continue; block_sum += end - start - kernel_size; block_sum is not increased, while it is a valid kernel position. So, address problems listed and explain algorithm used. Besides that restructuring the code makes it possible to extend kernel positioning algorithm further. Currently we pick position in between single [min, max] range (min = safe_addr, max = memory_limit). In future we can do that for multiple ranges as well (by calling count_valid_kernel_positions for each range). Reviewed-by: Philipp Rudo Reviewed-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/boot/kaslr.c | 130 +++++++++++++++++++++++++++++------------ 1 file changed, 92 insertions(+), 38 deletions(-) diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index c8549a0474e1e..d844a5ef9089f 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -90,14 +90,95 @@ static int get_random(unsigned long limit, unsigned long *value) return 0; } +/* + * To randomize kernel base address we have to consider several facts: + * 1. physical online memory might not be continuous and have holes. mem_detect + * info contains list of online memory ranges we should consider. + * 2. we have several memory regions which are occupied and we should not + * overlap and destroy them. Currently safe_addr tells us the border below + * which all those occupied regions are. We are safe to use anything above + * safe_addr. + * 3. the upper limit might apply as well, even if memory above that limit is + * online. Currently those limitations are: + * 3.1. Limit set by "mem=" kernel command line option + * 3.2. memory reserved at the end for kasan initialization. + * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size). + * Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages + * (16 pages when the kernel is built with kasan enabled) + * Assumptions: + * 1. kernel size (including .bss size) and upper memory limit are page aligned. + * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE + * aligned (in practice memory configurations granularity on z/VM and LPAR + * is 1mb). + * + * To guarantee uniform distribution of kernel base address among all suitable + * addresses we generate random value just once. For that we need to build a + * continuous range in which every value would be suitable. We can build this + * range by simply counting all suitable addresses (let's call them positions) + * which would be valid as kernel base address. To count positions we iterate + * over online memory ranges. For each range which is big enough for the + * kernel image we count all suitable addresses we can put the kernel image at + * that is + * (end - start - kernel_size) / THREAD_SIZE + 1 + * Two functions count_valid_kernel_positions and position_to_address help + * to count positions in memory range given and then convert position back + * to address. + */ +static unsigned long count_valid_kernel_positions(unsigned long kernel_size, + unsigned long _min, + unsigned long _max) +{ + unsigned long start, end, pos = 0; + int i; + + for_each_mem_detect_block(i, &start, &end) { + if (_min >= end) + continue; + if (start >= _max) + break; + start = max(_min, start); + end = min(_max, end); + if (end - start < kernel_size) + continue; + pos += (end - start - kernel_size) / THREAD_SIZE + 1; + } + + return pos; +} + +static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size, + unsigned long _min, unsigned long _max) +{ + unsigned long start, end; + int i; + + for_each_mem_detect_block(i, &start, &end) { + if (_min >= end) + continue; + if (start >= _max) + break; + start = max(_min, start); + end = min(_max, end); + if (end - start < kernel_size) + continue; + if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos) + return start + (pos - 1) * THREAD_SIZE; + pos -= (end - start - kernel_size) / THREAD_SIZE + 1; + } + + return 0; +} + unsigned long get_random_base(unsigned long safe_addr) { - unsigned long memory_limit = memory_end_set ? memory_end : 0; - unsigned long base, start, end, kernel_size; - unsigned long block_sum, offset; + unsigned long memory_limit = get_mem_detect_end(); + unsigned long base_pos, max_pos, kernel_size; unsigned long kasan_needs; int i; + if (memory_end_set) + memory_limit = min(memory_limit, memory_end); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) { if (safe_addr < INITRD_START + INITRD_SIZE) safe_addr = INITRD_START + INITRD_SIZE; @@ -127,44 +208,17 @@ unsigned long get_random_base(unsigned long safe_addr) } kernel_size = vmlinux.image_size + vmlinux.bss_size; - block_sum = 0; - for_each_mem_detect_block(i, &start, &end) { - if (memory_limit) { - if (start >= memory_limit) - break; - if (end > memory_limit) - end = memory_limit; - } - if (end - start < kernel_size) - continue; - block_sum += end - start - kernel_size; - } - if (!block_sum) { + if (safe_addr + kernel_size > memory_limit) + return 0; + + max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit); + if (!max_pos) { sclp_early_printk("KASLR disabled: not enough memory\n"); return 0; } - if (get_random(block_sum, &base)) + /* we need a value in the range [1, base_pos] inclusive */ + if (get_random(max_pos, &base_pos)) return 0; - if (base < safe_addr) - base = safe_addr; - block_sum = offset = 0; - for_each_mem_detect_block(i, &start, &end) { - if (memory_limit) { - if (start >= memory_limit) - break; - if (end > memory_limit) - end = memory_limit; - } - if (end - start < kernel_size) - continue; - block_sum += end - start - kernel_size; - if (base <= block_sum) { - base = start + base - offset; - base = ALIGN_DOWN(base, THREAD_SIZE); - break; - } - offset = block_sum; - } - return base; + return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit); } From 1c7c83e8d2351ee3d736094115e447a5da8e5369 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Sep 2020 18:29:29 +0200 Subject: [PATCH 53/85] s390: remove unused _swsusp_reset_dma Since commit 394216275c7d ("s390: remove broken hibernate / power management support") _swsusp_reset_dma is unused and could be safely removed. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 2 -- arch/s390/boot/text_dma.S | 17 ----------------- arch/s390/include/asm/setup.h | 1 - arch/s390/kernel/setup.c | 1 - 4 files changed, 21 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 81835483169b8..90842936545bc 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -48,8 +48,6 @@ struct diag_ops __bootdata_preserved(diag_dma_ops) = { }; static struct diag210 _diag210_tmp_dma __section(.dma.data); struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma; -void _swsusp_reset_dma(void); -unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma); void error(char *x) { diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S index 9715715c4c28d..f7c77cd518f2b 100644 --- a/arch/s390/boot/text_dma.S +++ b/arch/s390/boot/text_dma.S @@ -96,23 +96,6 @@ ENTRY(_diag0c_dma) BR_EX_DMA_r14 ENDPROC(_diag0c_dma) -/* - * void _swsusp_reset_dma(void) - */ -ENTRY(_swsusp_reset_dma) - larl %r1,restart_entry - larl %r2,.Lrestart_diag308_psw - og %r1,0(%r2) - stg %r1,0(%r0) - lghi %r0,0 - diag %r0,%r0,0x308 -restart_entry: - lhi %r1,1 - sigp %r1,%r0,SIGP_SET_ARCHITECTURE - sam64 - BR_EX_DMA_r14 -ENDPROC(_swsusp_reset_dma) - /* * void _diag308_reset_dma(void) * diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 7b104f156e342..396db16384176 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -92,7 +92,6 @@ extern int memory_end_set; extern unsigned long memory_end; extern unsigned long vmalloc_size; extern unsigned long max_physmem_end; -extern unsigned long __swsusp_reset_dma; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ae2f4d9460486..c1b78aae270b9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -102,7 +102,6 @@ struct mem_detect_info __bootdata(mem_detect); struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table); struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table); -unsigned long __bootdata_preserved(__swsusp_reset_dma); unsigned long __bootdata_preserved(__stext_dma); unsigned long __bootdata_preserved(__etext_dma); unsigned long __bootdata_preserved(__sdma); From 2835c2ea95d50625108e47a459e1a47f6be836ce Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Sep 2020 19:07:04 +0200 Subject: [PATCH 54/85] s390/startup: avoid save_area_sync overflow Currently we overflow save_area_sync and write over save_area_async. Although this is not a real problem make startup_pgm_check_handler consistent with late pgm check handler and store [%r0,%r7] directly into gpregs_save_area. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/head.S | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index dae10961d0724..1a2c2b1ed9649 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -360,22 +360,23 @@ ENTRY(startup_kdump) # the save area and does disabled wait with a faulty address. # ENTRY(startup_pgm_check_handler) - stmg %r0,%r15,__LC_SAVE_AREA_SYNC - la %r1,4095 - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r1) - mvc __LC_GPREGS_SAVE_AREA-4095(128,%r1),__LC_SAVE_AREA_SYNC - mvc __LC_PSW_SAVE_AREA-4095(16,%r1),__LC_PGM_OLD_PSW + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + la %r8,4095 + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) + stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC + mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW ni __LC_RETURN_PSW,0xfc # remove IO and EX bits ni __LC_RETURN_PSW+1,0xfb # remove MCHK bit oi __LC_RETURN_PSW+1,0x2 # set wait state bit - larl %r2,.Lold_psw_disabled_wait - stg %r2,__LC_PGM_NEW_PSW+8 - l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r2) + larl %r9,.Lold_psw_disabled_wait + stg %r9,__LC_PGM_NEW_PSW+8 + l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r9) brasl %r14,print_pgm_check_info .Lold_psw_disabled_wait: - la %r1,4095 - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) + la %r8,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) lpswe __LC_RETURN_PSW # disabled wait .Ldump_info_stack: .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD From ad3e6948f90ac3d71f9c03ec92009d99dfb561e9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 28 Sep 2020 07:22:50 +0200 Subject: [PATCH 55/85] s390: remove cad commandline option remove the cad command line option as the instruction was never published and never used by userspace. Signed-off-by: Sven Schnelle Reviewed-by: Vasily Gorbik Acked-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/kernel/early.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 0782772318580..705844f739345 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -274,19 +274,6 @@ static int __init disable_vector_extension(char *str) } early_param("novx", disable_vector_extension); -static int __init cad_setup(char *str) -{ - bool enabled; - int rc; - - rc = kstrtobool(str, &enabled); - if (!rc && enabled && test_facility(128)) - /* Enable problem state CAD. */ - __ctl_set_bit(2, 3); - return rc; -} -early_param("cad", cad_setup); - char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; static void __init setup_boot_command_line(void) { From 54530ce6a184ed8c0accc3c50b659590ec445222 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sat, 26 Sep 2020 00:08:54 +0200 Subject: [PATCH 56/85] s390/cio: remove unused channel_subsystem_reinit Added with commit 77e844b96440 ("s390/hibernate: add early resume function") unused since commit 394216275c7d ("s390: remove broken hibernate / power management support"). Reviewed-by: Vineeth Vijayan Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/cio.h | 1 - drivers/s390/cio/css.c | 14 -------------- 2 files changed, 15 deletions(-) diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 953a7316b30ab..5c58756d6476c 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -356,7 +356,6 @@ static inline u8 pathmask_to_pos(u8 mask) return 8 - ffs(mask); } -void channel_subsystem_reinit(void); extern void css_schedule_reprobe(void); extern void *cio_dma_zalloc(size_t size); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index aca022239b333..c17d6c99f4044 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1350,20 +1350,6 @@ static int __init channel_subsystem_init_sync(void) } subsys_initcall_sync(channel_subsystem_init_sync); -void channel_subsystem_reinit(void) -{ - struct channel_path *chp; - struct chp_id chpid; - - chsc_enable_facility(CHSC_SDA_OC_MSS); - chp_id_for_each(&chpid) { - chp = chpid_to_chp(chpid); - if (chp) - chp_update_desc(chp); - } - cmf_reactivate(); -} - #ifdef CONFIG_PROC_FS static ssize_t cio_settle_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) From 3731ac579519d6c24ad2c5ac91959586b1919b5c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 27 Sep 2020 21:34:55 +0200 Subject: [PATCH 57/85] s390/vdso: remove orphaned declarations Remove couple of declarations which are unused since commit 4bff8cb54502 ("s390: convert to GENERIC_VDSO"). Acked-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vdso.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 82f86b3c394b9..29b44a930e71a 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -35,9 +35,7 @@ struct vdso_per_cpu_data { }; extern struct vdso_data *vdso_data; -extern struct vdso_data boot_vdso_data; -void vdso_alloc_boot_cpu(struct lowcore *lowcore); int vdso_alloc_per_cpu(struct lowcore *lowcore); void vdso_free_per_cpu(struct lowcore *lowcore); From 86cde618e718d0d286a565bcabfce5bdfc304685 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Sep 2020 22:42:30 +0200 Subject: [PATCH 58/85] s390/startup: correct "dfltcc" option parsing Currently if just "dfltcc" is passed as a kernel command line option "val" going to be NULL, this leads to reading at address 0 in strcmp(val, "off") Fix that by making sure "val" is not NULL. This does not affect option handling logic. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 92ebc4a58fe2d..1add096e09a62 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -230,7 +230,7 @@ void parse_boot_command_line(void) if (!strcmp(param, "vmalloc") && val) vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); - if (!strcmp(param, "dfltcc")) { + if (!strcmp(param, "dfltcc") && val) { if (!strcmp(val, "off")) zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED; else if (!strcmp(val, "on")) From f980ec9e34f8b0e74ba524cfc1b7bd9ff10ee6b4 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sat, 26 Sep 2020 00:27:25 +0200 Subject: [PATCH 59/85] s390/sclp_sdias: remove unused sclp_sdias_exit sclp_sdias cannot be built as a module, CRASH_DUMP option is a bool not a tristate. zcore_exit() has already been removed with commit cbe62fac178c ("s390: char: make zcore explicitly non-modular"). Remove orphaned sclp_sdias_exit for consistency as well. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- drivers/s390/char/sclp.h | 2 -- drivers/s390/char/sclp_sdias.c | 6 ------ 2 files changed, 8 deletions(-) diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 196333013e543..ccc2d759c575b 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -305,9 +305,7 @@ int sclp_deactivate(void); int sclp_reactivate(void); int sclp_sync_request(sclp_cmdw_t command, void *sccb); int sclp_sync_request_timeout(sclp_cmdw_t command, void *sccb, int timeout); - int sclp_sdias_init(void); -void sclp_sdias_exit(void); enum { sclp_init_state_uninitialized, diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index 644b61013679a..be8cad61b4cfa 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -275,9 +275,3 @@ int __init sclp_sdias_init(void) TRACE("init done\n"); return 0; } - -void __exit sclp_sdias_exit(void) -{ - debug_unregister(sdias_dbf); - sclp_unregister(&sclp_sdias_register); -} From 3372e88b094e7ed463b92dc28e1121ec44fb6fbe Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sat, 26 Sep 2020 00:38:41 +0200 Subject: [PATCH 60/85] s390/sclp: remove orphaned sclp_set_columns and sclp_set_htab sclp_set_columns and sclp_set_htab are leftovers since commit 095761d28ae4 ("[S390] sclp_tty: remove ioctl interface."), remove them as a dead code. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- drivers/s390/char/sclp_rw.c | 18 ------------------ drivers/s390/char/sclp_rw.h | 2 -- 2 files changed, 20 deletions(-) diff --git a/drivers/s390/char/sclp_rw.c b/drivers/s390/char/sclp_rw.c index 44594a4925538..d6c84e354df5b 100644 --- a/drivers/s390/char/sclp_rw.c +++ b/drivers/s390/char/sclp_rw.c @@ -336,24 +336,6 @@ sclp_chars_in_buffer(struct sclp_buffer *buffer) return count; } -/* - * sets or provides some values that influence the drivers behaviour - */ -void -sclp_set_columns(struct sclp_buffer *buffer, unsigned short columns) -{ - buffer->columns = columns; - if (buffer->current_line != NULL && - buffer->current_length > buffer->columns) - sclp_finalize_mto(buffer); -} - -void -sclp_set_htab(struct sclp_buffer *buffer, unsigned short htab) -{ - buffer->htab = htab; -} - /* * called by sclp_console_init and/or sclp_tty_init */ diff --git a/drivers/s390/char/sclp_rw.h b/drivers/s390/char/sclp_rw.h index a2eb22f67393f..93d706e4935ce 100644 --- a/drivers/s390/char/sclp_rw.h +++ b/drivers/s390/char/sclp_rw.h @@ -86,8 +86,6 @@ void *sclp_unmake_buffer(struct sclp_buffer *); int sclp_buffer_space(struct sclp_buffer *); int sclp_write(struct sclp_buffer *buffer, const unsigned char *, int); int sclp_emit_buffer(struct sclp_buffer *,void (*)(struct sclp_buffer *,int)); -void sclp_set_columns(struct sclp_buffer *, unsigned short); -void sclp_set_htab(struct sclp_buffer *, unsigned short); int sclp_chars_in_buffer(struct sclp_buffer *); #ifdef CONFIG_SCLP_CONSOLE From 3ca8b855b0112906caecab88f04a8786a2d40906 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 11 Aug 2019 20:23:56 +0200 Subject: [PATCH 61/85] s390/startup: add kaslr_offset to pgm check info print startup pgm check handler is active since the very beginning of kernel code execution until uncompressed kernel sets up s390_base_pgm_handler. It is useful not just for the decompressor debugging itself, but also for early code of uncompressed kernel, in particular Kasan initialization. But since there is no stack trace or symbolic representation of failing psw address it is impossible to figure out faulty code location without knowing Kaslr kernel base. So, let's add it to the startup pgm check info printed as well. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/pgm_check_info.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index 83b5b7915c32d..d3ab20ec517e4 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "boot.h" @@ -42,6 +43,13 @@ void print_pgm_check_info(void) add_str(p, "\n"); sclp_early_printk(buf); + if (kaslr_enabled) { + p = add_str(buf, "Kernel random base: "); + p = add_val_as_hex(p, __kaslr_offset); + add_str(p, "\n"); + sclp_early_printk(buf); + } + p = add_str(buf, "PSW : "); p = add_val_as_hex(p, S390_lowcore.psw_save_area.mask); p = add_str(p, " "); From 402e9228f7a6a90e4fad44b358350ae358f1bc3c Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 27 Sep 2020 22:07:40 +0200 Subject: [PATCH 62/85] s390: remove orphaned function declarations arch/s390/pci/pci_bus.h: zpci_bus_init - only declaration left after commit 05bc1be6db4b ("s390/pci: create zPCI bus") arch/s390/include/asm/gmap.h: gmap_pte_notify - only declaration left after commit 4be130a08420 ("s390/mm: add shadow gmap support") arch/s390/include/asm/pgalloc.h: rcu_table_freelist_finish - only declaration left after commit 36409f6353fc ("[S390] use generic RCU page-table freeing code") arch/s390/include/asm/tlbflush.h: smp_ptlb_all - only declaration left after commit 5a79859ae0f3 ("s390: remove 31 bit support") arch/s390/include/asm/vtimer.h: init_cpu_vtimer - only declaration left after commit b5f87f15e200 ("s390/idle: consolidate idle functions and definitions") arch/s390/include/asm/pci.h: zpci_debug_info - only declaration left after commit 386aa051fb4b ("s390/pci: remove per device debug attribute") arch/s390/include/asm/vdso.h: vdso_alloc_boot_cpu - only declaration left after commit 4bff8cb54502 ("s390: convert to GENERIC_VDSO") arch/s390/include/asm/smp.h: smp_vcpu_scheduled - only declaration left after commit 67626fadd269 ("s390: enforce CONFIG_SMP") arch/s390/kernel/entry.h: restart_call_handler - only declaration left after commit 8b646bd75908 ("[S390] rework smp code") arch/s390/kernel/entry.h: startup_init_nobss - only declaration left after commit 2e83e0eb85ca ("s390: clean .bss before running uncompressed kernel") arch/s390/kernel/entry.h: s390_early_resume - only declaration left after commit 394216275c7d ("s390: remove broken hibernate / power management support") drivers/s390/char/raw3270.h: raw3270_request_alloc_bootmem - only declaration left after commit 33403dcfcdfd ("[S390] 3270 console: convert from bootmem to slab") drivers/s390/cio/device.h: ccw_device_schedule_sch_unregister - only declaration left after commit 37de53bb5290 ("[S390] cio: introduce ccw device todos") drivers/s390/char/tape.h: tape_hotplug_event - has only declaration since recorded git history. drivers/s390/char/tape.h: tape_oper_handler - has only declaration since recorded git history. drivers/s390/char/tape.h: tape_noper_handler - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_check_locate - only declaration left after commit 161beff8f40d ("s390/tape: remove tape block leftovers") drivers/s390/char/tape_std.h: tape_std_default_handler - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_unexpect_uchk_handler - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_irq - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_has_failed - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_succeded - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_do_retry - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_read_opposite - has only declaration since recorded git history. drivers/s390/char/tape_std.h: tape_std_error_recovery_HWBUG - has only declaration since recorded git history. Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/gmap.h | 2 -- arch/s390/include/asm/pci.h | 1 - arch/s390/include/asm/pgalloc.h | 2 -- arch/s390/include/asm/smp.h | 1 - arch/s390/include/asm/tlbflush.h | 2 -- arch/s390/include/asm/vtimer.h | 2 -- arch/s390/kernel/entry.h | 3 --- arch/s390/pci/pci_bus.h | 1 - drivers/s390/char/raw3270.h | 1 - drivers/s390/char/tape.h | 3 --- drivers/s390/char/tape_std.h | 12 ------------ drivers/s390/cio/device.h | 1 - 12 files changed, 31 deletions(-) diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index a816fb4734b8b..40264f60b0da9 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -140,8 +140,6 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte); void gmap_register_pte_notifier(struct gmap_notifier *); void gmap_unregister_pte_notifier(struct gmap_notifier *); -void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *, - unsigned long bits); int gmap_mprotect_notify(struct gmap *, unsigned long start, unsigned long len, int prot); diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 178a24e0af5fc..b5380a251df21 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -279,7 +279,6 @@ int zpci_debug_init(void); void zpci_debug_exit(void); void zpci_debug_init_device(struct zpci_dev *, const char *); void zpci_debug_exit_device(struct zpci_dev *); -void zpci_debug_info(struct zpci_dev *, struct seq_file *); /* Error reporting */ int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *); diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 74a352f8c0d1d..d1297d6bbdcf1 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -146,8 +146,6 @@ static inline void pmd_populate(struct mm_struct *mm, #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) -extern void rcu_table_freelist_finish(void); - void vmem_map_init(void); void *vmem_crst_alloc(unsigned long val); pte_t *vmem_pte_alloc(void); diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 7e155fb6c254d..01e3600044812 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -31,7 +31,6 @@ extern void smp_emergency_stop(void); extern int smp_find_processor_id(u16 address); extern int smp_store_status(int cpu); extern void smp_save_dump_cpus(void); -extern int smp_vcpu_scheduled(int cpu); extern void smp_yield_cpu(int cpu); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index acce6a08a1faa..6448bb5be10cc 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -30,8 +30,6 @@ static inline void __tlb_flush_idte(unsigned long asce) : : "a" (opt), "a" (asce) : "cc"); } -void smp_ptlb_all(void); - /* * Flush all TLB entries on all CPUs. */ diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h index 42f707d1c1e8e..e601adaa6320d 100644 --- a/arch/s390/include/asm/vtimer.h +++ b/arch/s390/include/asm/vtimer.h @@ -25,8 +25,6 @@ extern void add_virt_timer_periodic(struct vtimer_list *timer); extern int mod_virt_timer(struct vtimer_list *timer, u64 expires); extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires); extern int del_virt_timer(struct vtimer_list *timer); - -extern void init_cpu_vtimer(void); extern void vtime_init(void); #endif /* _ASM_S390_TIMER_H */ diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index faca269d5f278..412a54e6aa81a 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -17,7 +17,6 @@ void ext_int_handler(void); void io_int_handler(void); void mcck_int_handler(void); void restart_int_handler(void); -void restart_call_handler(void); asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); @@ -61,12 +60,10 @@ void do_notify_resume(struct pt_regs *regs); void __init init_IRQ(void); void do_IRQ(struct pt_regs *regs, int irq); void do_restart(void); -void __init startup_init_nobss(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); void __init time_init(void); -void s390_early_resume(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); struct s390_mmap_arg_struct; diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index 8d19723ed5c03..f8dfac0b5b713 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -9,7 +9,6 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops); void zpci_bus_device_unregister(struct zpci_dev *zdev); -int zpci_bus_init(void); void zpci_release_device(struct kref *kref); static inline void zpci_zdev_put(struct zpci_dev *zdev) diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 08f36e973b43d..8d979e0ee6054 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -110,7 +110,6 @@ struct raw3270_request { }; struct raw3270_request *raw3270_request_alloc(size_t size); -struct raw3270_request *raw3270_request_alloc_bootmem(size_t size); void raw3270_request_free(struct raw3270_request *); void raw3270_request_reset(struct raw3270_request *); void raw3270_request_set_cmd(struct raw3270_request *, u8 cmd); diff --git a/drivers/s390/char/tape.h b/drivers/s390/char/tape.h index 8bec5f9ea92c9..e2c60475dfa89 100644 --- a/drivers/s390/char/tape.h +++ b/drivers/s390/char/tape.h @@ -238,7 +238,6 @@ extern int tape_do_io(struct tape_device *, struct tape_request *); extern int tape_do_io_async(struct tape_device *, struct tape_request *); extern int tape_do_io_interruptible(struct tape_device *, struct tape_request *); extern int tape_cancel_io(struct tape_device *, struct tape_request *); -void tape_hotplug_event(struct tape_device *, int major, int action); static inline int tape_do_io_free(struct tape_device *device, struct tape_request *request) @@ -258,8 +257,6 @@ tape_do_io_async_free(struct tape_device *device, struct tape_request *request) tape_do_io_async(device, request); } -extern int tape_oper_handler(int irq, int status); -extern void tape_noper_handler(int irq, int status); extern int tape_open(struct tape_device *); extern int tape_release(struct tape_device *); extern int tape_mtop(struct tape_device *, int, int); diff --git a/drivers/s390/char/tape_std.h b/drivers/s390/char/tape_std.h index 53ec8e2870d43..dcc63ff587f92 100644 --- a/drivers/s390/char/tape_std.h +++ b/drivers/s390/char/tape_std.h @@ -101,7 +101,6 @@ struct tape_request *tape_std_read_block(struct tape_device *, size_t); void tape_std_read_backward(struct tape_device *device, struct tape_request *request); struct tape_request *tape_std_write_block(struct tape_device *, size_t); -void tape_std_check_locate(struct tape_device *, struct tape_request *); /* Some non-mtop commands. */ int tape_std_assign(struct tape_device *); @@ -131,19 +130,8 @@ int tape_std_mtunload(struct tape_device *, int); int tape_std_mtweof(struct tape_device *, int); /* Event handlers */ -void tape_std_default_handler(struct tape_device *); -void tape_std_unexpect_uchk_handler(struct tape_device *); -void tape_std_irq(struct tape_device *); void tape_std_process_eov(struct tape_device *); -// the error recovery stuff: -void tape_std_error_recovery(struct tape_device *); -void tape_std_error_recovery_has_failed(struct tape_device *,int error_id); -void tape_std_error_recovery_succeded(struct tape_device *); -void tape_std_error_recovery_do_retry(struct tape_device *); -void tape_std_error_recovery_read_opposite(struct tape_device *); -void tape_std_error_recovery_HWBUG(struct tape_device *, int condno); - /* S390 tape types */ enum s390_tape_type { tape_3480, diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index f5c427ec24b12..853b6a8ca095d 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -96,7 +96,6 @@ int ccw_device_online(struct ccw_device *); int ccw_device_offline(struct ccw_device *); void ccw_device_update_sense_data(struct ccw_device *); int ccw_device_test_sense_data(struct ccw_device *); -void ccw_device_schedule_sch_unregister(struct ccw_device *); int ccw_purge_blacklisted(void); void ccw_device_sched_todo(struct ccw_device *cdev, enum cdev_todo todo); struct ccw_device *get_ccwdev_by_dev_id(struct ccw_dev_id *dev_id); From d70e38cb1deef3b2acee4cd36d33fef4c98abf28 Mon Sep 17 00:00:00 2001 From: "Jason J. Herne" Date: Tue, 17 Mar 2020 09:23:41 -0400 Subject: [PATCH 63/85] s390: nvme dump support Add the nvme dump ipl type, associated data, and sysfs entries. This allows booting into a stand alone dump environment that resides on an nvme device. Signed-off-by: Jason J. Herne Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/ipl.h | 1 + arch/s390/kernel/ipl.c | 71 ++++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 7d5cfdda52771..a72d195bf92d1 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -66,6 +66,7 @@ enum ipl_type { IPL_TYPE_FCP_DUMP = 8, IPL_TYPE_NSS = 16, IPL_TYPE_NVME = 32, + IPL_TYPE_NVME_DUMP = 64, }; struct ipl_info diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 90a2a17239b0e..c5f9d6f88d275 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -40,10 +40,12 @@ #define IPL_FCP_STR "fcp" #define IPL_FCP_DUMP_STR "fcp_dump" #define IPL_NVME_STR "nvme" +#define IPL_NVME_DUMP_STR "nvme_dump" #define IPL_NSS_STR "nss" #define DUMP_CCW_STR "ccw" #define DUMP_FCP_STR "fcp" +#define DUMP_NVME_STR "nvme" #define DUMP_NONE_STR "none" /* @@ -96,6 +98,8 @@ static char *ipl_type_str(enum ipl_type type) return IPL_NSS_STR; case IPL_TYPE_NVME: return IPL_NVME_STR; + case IPL_TYPE_NVME_DUMP: + return IPL_NVME_DUMP_STR; case IPL_TYPE_UNKNOWN: default: return IPL_UNKNOWN_STR; @@ -106,6 +110,7 @@ enum dump_type { DUMP_TYPE_NONE = 1, DUMP_TYPE_CCW = 2, DUMP_TYPE_FCP = 4, + DUMP_TYPE_NVME = 8, }; static char *dump_type_str(enum dump_type type) @@ -117,6 +122,8 @@ static char *dump_type_str(enum dump_type type) return DUMP_CCW_STR; case DUMP_TYPE_FCP: return DUMP_FCP_STR; + case DUMP_TYPE_NVME: + return DUMP_NVME_STR; default: return NULL; } @@ -144,6 +151,7 @@ static struct ipl_parameter_block *reipl_block_actual; static int dump_capabilities = DUMP_TYPE_NONE; static enum dump_type dump_type = DUMP_TYPE_NONE; static struct ipl_parameter_block *dump_block_fcp; +static struct ipl_parameter_block *dump_block_nvme; static struct ipl_parameter_block *dump_block_ccw; static struct sclp_ipl_info sclp_ipl_info; @@ -266,7 +274,10 @@ static __init enum ipl_type get_ipl_type(void) else return IPL_TYPE_FCP; case IPL_PBT_NVME: - return IPL_TYPE_NVME; + if (ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return IPL_TYPE_NVME_DUMP; + else + return IPL_TYPE_NVME; } return IPL_TYPE_UNKNOWN; } @@ -324,6 +335,7 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, case IPL_TYPE_FCP_DUMP: return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno); case IPL_TYPE_NVME: + case IPL_TYPE_NVME_DUMP: return sprintf(page, "%08ux\n", ipl_block.nvme.fid); default: return 0; @@ -531,6 +543,7 @@ static int __init ipl_init(void) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); break; case IPL_TYPE_NVME: + case IPL_TYPE_NVME_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group); break; default: @@ -1109,6 +1122,7 @@ static void __reipl_run(void *unused) diag308(DIAG308_LOAD_CLEAR, NULL); break; case IPL_TYPE_FCP_DUMP: + case IPL_TYPE_NVME_DUMP: break; } disabled_wait(); @@ -1382,6 +1396,29 @@ static struct attribute_group dump_fcp_attr_group = { .attrs = dump_fcp_attrs, }; +/* NVME dump device attributes */ +DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n", + dump_block_nvme->nvme.fid); +DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n", + dump_block_nvme->nvme.nsid); +DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n", + dump_block_nvme->nvme.bootprog); +DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n", + dump_block_nvme->nvme.br_lba); + +static struct attribute *dump_nvme_attrs[] = { + &sys_dump_nvme_fid_attr.attr, + &sys_dump_nvme_nsid_attr.attr, + &sys_dump_nvme_bootprog_attr.attr, + &sys_dump_nvme_br_lba_attr.attr, + NULL, +}; + +static struct attribute_group dump_nvme_attr_group = { + .name = IPL_NVME_STR, + .attrs = dump_nvme_attrs, +}; + /* CCW dump device attributes */ DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw); @@ -1423,6 +1460,8 @@ static ssize_t dump_type_store(struct kobject *kobj, rc = dump_set_type(DUMP_TYPE_CCW); else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0) rc = dump_set_type(DUMP_TYPE_FCP); + else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0) + rc = dump_set_type(DUMP_TYPE_NVME); return (rc != 0) ? rc : len; } @@ -1450,6 +1489,9 @@ static void __dump_run(void *unused) case DUMP_TYPE_FCP: diag308_dump(dump_block_fcp); break; + case DUMP_TYPE_NVME: + diag308_dump(dump_block_nvme); + break; default: break; } @@ -1506,6 +1548,29 @@ static int __init dump_fcp_init(void) return 0; } +static int __init dump_nvme_init(void) +{ + int rc; + + if (!sclp_ipl_info.has_dump) + return 0; /* LDIPL DUMP is not installed */ + dump_block_nvme = (void *) get_zeroed_page(GFP_KERNEL); + if (!dump_block_nvme) + return -ENOMEM; + rc = sysfs_create_group(&dump_kset->kobj, &dump_nvme_attr_group); + if (rc) { + free_page((unsigned long)dump_block_nvme); + return rc; + } + dump_block_nvme->hdr.len = IPL_BP_NVME_LEN; + dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION; + dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN; + dump_block_nvme->fcp.pbt = IPL_PBT_NVME; + dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP; + dump_capabilities |= DUMP_TYPE_NVME; + return 0; +} + static int __init dump_init(void) { int rc; @@ -1522,6 +1587,9 @@ static int __init dump_init(void) if (rc) return rc; rc = dump_fcp_init(); + if (rc) + return rc; + rc = dump_nvme_init(); if (rc) return rc; dump_set_type(DUMP_TYPE_NONE); @@ -1956,6 +2024,7 @@ void __init setup_ipl(void) ipl_info.data.fcp.lun = ipl_block.fcp.lun; break; case IPL_TYPE_NVME: + case IPL_TYPE_NVME_DUMP: ipl_info.data.nvme.fid = ipl_block.nvme.fid; ipl_info.data.nvme.nsid = ipl_block.nvme.nsid; break; From d9f12e48d08ec08ace574050a838e001e442ee38 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Tue, 29 Sep 2020 20:23:17 +0200 Subject: [PATCH 64/85] s390/ipl: support NVMe IPL kernel parameters Enable extracting of extra kernel command-line parameters from the NVMe IPL block passed by the firmware to the kernel at boot. Signed-off-by: Alexander Egorenkov Reviewed-by: Vasily Gorbik Reviewed-by: Philipp Rudo Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 1add096e09a62..f26c34e6f1e68 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -70,30 +70,44 @@ static size_t scpdata_length(const u8 *buf, size_t count) static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size, const struct ipl_parameter_block *ipb) { - size_t count; - size_t i; + const __u8 *scp_data; + __u32 scp_data_len; int has_lowercase; + size_t count = 0; + size_t i; + + switch (ipb->pb0_hdr.pbt) { + case IPL_PBT_FCP: + scp_data_len = ipb->fcp.scp_data_len; + scp_data = ipb->fcp.scp_data; + break; + case IPL_PBT_NVME: + scp_data_len = ipb->nvme.scp_data_len; + scp_data = ipb->nvme.scp_data; + break; + default: + goto out; + } - count = min(size - 1, scpdata_length(ipb->fcp.scp_data, - ipb->fcp.scp_data_len)); + count = min(size - 1, scpdata_length(scp_data, scp_data_len)); if (!count) goto out; has_lowercase = 0; for (i = 0; i < count; i++) { - if (!isascii(ipb->fcp.scp_data[i])) { + if (!isascii(scp_data[i])) { count = 0; goto out; } - if (!has_lowercase && islower(ipb->fcp.scp_data[i])) + if (!has_lowercase && islower(scp_data[i])) has_lowercase = 1; } if (has_lowercase) - memcpy(dest, ipb->fcp.scp_data, count); + memcpy(dest, scp_data, count); else for (i = 0; i < count; i++) - dest[i] = tolower(ipb->fcp.scp_data[i]); + dest[i] = tolower(scp_data[i]); out: dest[count] = '\0'; return count; @@ -115,6 +129,7 @@ static void append_ipl_block_parm(void) parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); break; case IPL_PBT_FCP: + case IPL_PBT_NVME: rc = ipl_block_get_ascii_scpdata( parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); break; From bd37b36832f62bf42ab66da8744191d99252a6e3 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Tue, 29 Sep 2020 20:24:55 +0200 Subject: [PATCH 65/85] s390/nvme: support firmware-assisted dump to NVMe disks From the kernel perspective NVMe dump works exactly like zFCP dump. Therefore, adapt all places where code explicitly tests only for IPL of type FCP DUMP. And also set the memory end correctly in this case. Signed-off-by: Alexander Egorenkov Reviewed-by: Vasily Gorbik Reviewed-by: Philipp Rudo Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 15 ++++++++++++--- arch/s390/include/asm/ipl.h | 6 ++++++ arch/s390/kernel/crash_dump.c | 16 ++++++++-------- arch/s390/kernel/setup.c | 4 ++-- arch/s390/kernel/smp.c | 12 ++++++------ drivers/s390/char/sclp_sdias.c | 2 +- drivers/s390/char/zcore.c | 17 ++++++++++++----- 7 files changed, 47 insertions(+), 25 deletions(-) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index f26c34e6f1e68..f94b91d72620e 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -280,14 +280,23 @@ void parse_boot_command_line(void) } } +static inline bool is_ipl_block_dump(void) +{ + if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && + ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) + return true; + if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME && + ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP) + return true; + return false; +} + void setup_memory_end(void) { #ifdef CONFIG_CRASH_DUMP if (OLDMEM_BASE) { kaslr_enabled = 0; - } else if (ipl_block_valid && - ipl_block.pb0_hdr.pbt == IPL_PBT_FCP && - ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) { + } else if (ipl_block_valid && is_ipl_block_dump()) { kaslr_enabled = 0; if (!sclp_early_get_hsa_size(&memory_end) && memory_end) memory_end_set = 1; diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index a72d195bf92d1..a9e2c7295b351 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -95,6 +95,12 @@ extern struct ipl_info ipl_info; extern void setup_ipl(void); extern void set_os_info_reipl_block(void); +static inline bool is_ipl_type_dump(void) +{ + return (ipl_info.type == IPL_TYPE_FCP_DUMP) || + (ipl_info.type == IPL_TYPE_NVME_DUMP); +} + struct ipl_report { struct ipl_parameter_block *ipib; struct list_head components; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index c42ce348103cc..205b2e2648aae 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -141,7 +141,7 @@ int copy_oldmem_kernel(void *dst, void *src, size_t count) while (count) { from = __pa(src); if (!OLDMEM_BASE && from < sclp.hsa_size) { - /* Copy from zfcpdump HSA area */ + /* Copy from zfcp/nvme dump HSA area */ len = min(count, sclp.hsa_size - from); rc = memcpy_hsa_kernel(dst, from, len); if (rc) @@ -184,7 +184,7 @@ static int copy_oldmem_user(void __user *dst, void *src, size_t count) while (count) { from = __pa(src); if (!OLDMEM_BASE && from < sclp.hsa_size) { - /* Copy from zfcpdump HSA area */ + /* Copy from zfcp/nvme dump HSA area */ len = min(count, sclp.hsa_size - from); rc = memcpy_hsa_user(dst, from, len); if (rc) @@ -258,7 +258,7 @@ static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, } /* - * Remap "oldmem" for zfcpdump + * Remap "oldmem" for zfcp/nvme dump * * We only map available memory above HSA size. Memory below HSA size * is read on demand using the copy_oldmem_page() function. @@ -283,7 +283,7 @@ static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, } /* - * Remap "oldmem" for kdump or zfcpdump + * Remap "oldmem" for kdump or zfcp/nvme dump */ int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) @@ -632,11 +632,11 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) u32 alloc_size; u64 hdr_off; - /* If we are not in kdump or zfcpdump mode return */ - if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) + /* If we are not in kdump or zfcp/nvme dump mode return */ + if (!OLDMEM_BASE && !is_ipl_type_dump()) return 0; - /* If we cannot get HSA size for zfcpdump return error */ - if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size) + /* If we cannot get HSA size for zfcp/nvme dump return error */ + if (is_ipl_type_dump() && !sclp.hsa_size) return -ENODEV; /* For kdump, exclude previous crashkernel memory */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c1b78aae270b9..419a0604959f6 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -251,7 +251,7 @@ static void __init conmode_default(void) #ifdef CONFIG_CRASH_DUMP static void __init setup_zfcpdump(void) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (!is_ipl_type_dump()) return; if (OLDMEM_BASE) return; @@ -1175,7 +1175,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_EXPOLINE)) nospec_init_branches(); - /* Setup zfcpdump support */ + /* Setup zfcp/nvme dump support */ setup_zfcpdump(); /* Add system specific data to the random pool */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 85700bd85f98d..ebfe86d097f0a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -606,14 +606,14 @@ int smp_store_status(int cpu) /* * Collect CPU state of the previous, crashed system. * There are four cases: - * 1) standard zfcp dump - * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * 1) standard zfcp/nvme dump + * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true * The state for all CPUs except the boot CPU needs to be collected * with sigp stop-and-store-status. The boot CPU state is located in * the absolute lowcore of the memory stored in the HSA. The zcore code * will copy the boot CPU state from the HSA. - * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory) - * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory) + * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true * The state for all CPUs except the boot CPU needs to be collected * with sigp stop-and-store-status. The firmware or the boot-loader * stored the registers of the boot CPU in the absolute lowcore in the @@ -660,7 +660,7 @@ void __init smp_save_dump_cpus(void) unsigned long page; bool is_boot_cpu; - if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP)) + if (!(OLDMEM_BASE || is_ipl_type_dump())) /* No previous system present, normal boot. */ return; /* Allocate a page as dumping area for the store status sigps */ @@ -686,7 +686,7 @@ void __init smp_save_dump_cpus(void) /* Get the vector registers */ smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page); /* - * For a zfcp dump OLDMEM_BASE == NULL and the registers + * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers * of the boot CPU are stored in the HSA. To retrieve * these registers an SCLP request is required which is * done by drivers/s390/char/zcore.c:init_cpu_info() diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index be8cad61b4cfa..215d4b4a5ff53 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -257,7 +257,7 @@ static int __init sclp_sdias_init_async(void) int __init sclp_sdias_init(void) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (!is_ipl_type_dump()) return 0; sclp_sdias_sccb = (void *) __get_free_page(GFP_KERNEL | GFP_DMA); BUG_ON(!sclp_sdias_sccb); diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index d29f1b71618e1..1515fdc3c1abd 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-1.0+ /* * zcore module to export memory content and register sets for creating system - * dumps on SCSI disks (zfcpdump). + * dumps on SCSI/NVMe disks (zfcp/nvme dump). * * For more information please refer to Documentation/s390/zfcpdump.rst * @@ -243,7 +243,7 @@ static int __init zcore_init(void) unsigned char arch; int rc; - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (!is_ipl_type_dump()) return -ENODATA; if (OLDMEM_BASE) return -ENODATA; @@ -252,9 +252,16 @@ static int __init zcore_init(void) debug_register_view(zcore_dbf, &debug_sprintf_view); debug_set_level(zcore_dbf, 6); - TRACE("devno: %x\n", ipl_info.data.fcp.dev_id.devno); - TRACE("wwpn: %llx\n", (unsigned long long) ipl_info.data.fcp.wwpn); - TRACE("lun: %llx\n", (unsigned long long) ipl_info.data.fcp.lun); + if (ipl_info.type == IPL_TYPE_FCP_DUMP) { + TRACE("type: fcp\n"); + TRACE("devno: %x\n", ipl_info.data.fcp.dev_id.devno); + TRACE("wwpn: %llx\n", (unsigned long long) ipl_info.data.fcp.wwpn); + TRACE("lun: %llx\n", (unsigned long long) ipl_info.data.fcp.lun); + } else if (ipl_info.type == IPL_TYPE_NVME_DUMP) { + TRACE("type: nvme\n"); + TRACE("fid: %x\n", ipl_info.data.nvme.fid); + TRACE("nsid: %x\n", ipl_info.data.nvme.nsid); + } rc = sclp_sdias_init(); if (rc) From 5627b9224b00334e4c91122ebbb8536a9a575969 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 23 Jun 2020 19:10:08 +0200 Subject: [PATCH 66/85] s390/ipl: add support to control memory clearing for nvme re-IPL Re-IPL for nvme is currently done by using diag 308 with the "Load Clear" subcode, which means that all memory will be cleared. This can increase re-IPL duration considerably on very large machines. For list-directed IPL like nvme or fcp IPL, a "Load Normal" subcode was introduced with z14. The "Load Normal" diag 308 subcode allows to re-IPL without clearing memory. This patch adds a new "clear" sysfs attribute to /sys/firmware/reipl/nvme, which can be set to either "0" or "1" to disable or enable re-IPL with memory clearing. The default value is "0", which disables memory clearing. Signed-off-by: Gerald Schaefer Reviewed-by: Vasily Gorbik Tested-by: Alexander Egorenkov Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ipl.c | 48 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index c5f9d6f88d275..98b3aca1de8e1 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -156,6 +156,7 @@ static struct ipl_parameter_block *dump_block_ccw; static struct sclp_ipl_info sclp_ipl_info; +static bool reipl_nvme_clear; static bool reipl_fcp_clear; static bool reipl_ccw_clear; @@ -886,6 +887,24 @@ static struct attribute_group reipl_nvme_attr_group = { .bin_attrs = reipl_nvme_bin_attrs }; +static ssize_t reipl_nvme_clear_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return sprintf(page, "%u\n", reipl_nvme_clear); +} + +static ssize_t reipl_nvme_clear_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + if (strtobool(buf, &reipl_nvme_clear) < 0) + return -EINVAL; + return len; +} + +static struct kobj_attribute sys_reipl_nvme_clear_attr = + __ATTR(clear, 0644, reipl_nvme_clear_show, reipl_nvme_clear_store); + /* CCW reipl device attributes */ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw); @@ -1112,7 +1131,10 @@ static void __reipl_run(void *unused) break; case IPL_TYPE_NVME: diag308(DIAG308_SET, reipl_block_nvme); - diag308(DIAG308_LOAD_CLEAR, NULL); + if (reipl_nvme_clear) + diag308(DIAG308_LOAD_CLEAR, NULL); + else + diag308(DIAG308_LOAD_NORMAL, NULL); break; case IPL_TYPE_NSS: diag308(DIAG308_SET, reipl_block_nss); @@ -1233,8 +1255,9 @@ static int __init reipl_fcp_init(void) &sys_reipl_fcp_clear_attr.attr); if (rc) goto out2; - } else + } else { reipl_fcp_clear = true; + } if (ipl_info.type == IPL_TYPE_FCP) { memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block)); @@ -1280,10 +1303,16 @@ static int __init reipl_nvme_init(void) } rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group); - if (rc) { - kset_unregister(reipl_nvme_kset); - free_page((unsigned long) reipl_block_nvme); - return rc; + if (rc) + goto out1; + + if (test_facility(141)) { + rc = sysfs_create_file(&reipl_nvme_kset->kobj, + &sys_reipl_nvme_clear_attr.attr); + if (rc) + goto out2; + } else { + reipl_nvme_clear = true; } if (ipl_info.type == IPL_TYPE_NVME) { @@ -1304,6 +1333,13 @@ static int __init reipl_nvme_init(void) } reipl_capabilities |= IPL_TYPE_NVME; return 0; + +out2: + sysfs_remove_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group); +out1: + kset_unregister(reipl_nvme_kset); + free_page((unsigned long) reipl_block_nvme); + return rc; } static int __init reipl_type_init(void) From 21a66717079c3d74e8573cd48743728e4a244507 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 24 Sep 2020 01:01:29 +0200 Subject: [PATCH 67/85] s390/kasan: make sure int handler always run with DAT on Since commit 998f5bbe3dbd ("s390/kasan: fix early pgm check handler execution") early pgm check handler is executed with DAT on if Kasan is enabled. Still there is a window between setup_lowcore_dat_off() and setup_lowcore_dat_on() when int handlers could be executed with DAT off under Kasan. If this happens the kernel ends up in pgm check loop due to Kasan shadow memory access attempts. With Kasan enabled paging is initialized much earlier and DAT flag has to be on at all times instrumented code is executed. Make sure int handlers are set up to be called with DAT on right away in this case. Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 419a0604959f6..dc4d461095ccd 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -372,8 +372,12 @@ void __init arch_call_rest_init(void) static void __init setup_lowcore_dat_off(void) { + unsigned long int_psw_mask = PSW_KERNEL_BITS; struct lowcore *lc; + if (IS_ENABLED(CONFIG_KASAN)) + int_psw_mask |= PSW_MASK_DAT; + /* * Setup lowcore for boot cpu */ @@ -385,15 +389,15 @@ static void __init setup_lowcore_dat_off(void) lc->restart_psw.mask = PSW_KERNEL_BITS; lc->restart_psw.addr = (unsigned long) restart_int_handler; - lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->external_new_psw.addr = (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->svc_new_psw.addr = (unsigned long) system_call; - lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->program_new_psw.addr = (unsigned long) pgm_check_handler; lc->mcck_new_psw.mask = PSW_KERNEL_BITS; lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; + lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = clock_comparator_max; lc->nodat_stack = ((unsigned long) &init_thread_union) From 100a980c174bed82e0178766809ac664e59ca037 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 27 Sep 2020 01:34:25 +0200 Subject: [PATCH 68/85] s390: remove orphaned extern variables declarations arch/s390/kernel/entry.h: suspend_zero_pages - only declaration left after commit 394216275c7d ("s390: remove broken hibernate / power management support") arch/s390/include/asm/setup.h: vmhalt_cmd - only declaration left after commit 99ca4e582d4a ("[S390] kernel: Shutdown Actions Interface") arch/s390/include/asm/setup.h: vmpoff_cmd - only declaration left after commit 99ca4e582d4a ("[S390] kernel: Shutdown Actions Interface") Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/setup.h | 3 --- arch/s390/kernel/entry.h | 1 - 2 files changed, 4 deletions(-) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 396db16384176..bdb242a1544eb 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -121,9 +121,6 @@ extern unsigned int console_mode; extern unsigned int console_devno; extern unsigned int console_irq; -extern char vmhalt_cmd[]; -extern char vmpoff_cmd[]; - #define CONSOLE_IS_UNDEFINED (console_mode == 0) #define CONSOLE_IS_SCLP (console_mode == 1) #define CONSOLE_IS_3215 (console_mode == 2) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 412a54e6aa81a..6475a885cd60f 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -9,7 +9,6 @@ #include extern void *restart_stack; -extern unsigned long suspend_zero_pages; void system_call(void); void pgm_check_handler(void); From 4ec95ed312c4ce877fb04084991754cb8cd33f01 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 1 Oct 2020 02:22:54 +0200 Subject: [PATCH 69/85] s390/startup: correct early pgm check info formatting Early sclp console messages are printed in line mode on z/VM and LPAR, but under kvm newlines matter. Add a missing newline between "kernel version" and "Kernel fault". Signed-off-by: Vasily Gorbik --- arch/s390/boot/pgm_check_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index d3ab20ec517e4..a3c9862bcede7 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -33,7 +33,8 @@ void print_pgm_check_info(void) char *p; add_str(buf, "Linux version "); - strlcat(buf, kernel_version, sizeof(buf)); + strlcat(buf, kernel_version, sizeof(buf) - 1); + strlcat(buf, "\n", sizeof(buf)); sclp_early_printk(buf); p = add_str(buf, "Kernel fault: interruption code "); From 91ffc519c1997520ff3435ee227d86cfaa30d037 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 2 Jul 2020 11:10:11 +0200 Subject: [PATCH 70/85] s390/zcrypt: introduce msg tracking in zcrypt functions Introduce a new internal struct zcrypt_track with an retry counter field and a last return code field. Fill and update these fields at certain points during processing of an request/reply. This tracking info is then used to - avoid trying to resend the message forever. Now each message is tried to be send TRACK_AGAIN_MAX (currently 10) times and then the ioctl returns to userspace with errno EAGAIN. - avoid trying to resend the message on the very same card/domain. If possible (more than one APQN with same quality) don't use the very same qid as the previous attempt when again scheduling the request. This is done by adding penalty weight values when the dispatching takes place. There is a penalty TRACK_AGAIN_CARD_WEIGHT_PENALTY for using the same card as previously and another penalty define TRACK_AGAIN_QUEUE_WEIGHT_PENALTY to be considered when the same qid as the previous sent attempt is calculated. Both values make it harder to choose the very same card/domain but not impossible. For example when only one APQN is available a resend can only address the very same APQN. There are some more ideas for the future to extend the use of this tracking information. For example the last response code at NQAP and DQAP could be stored there, giving the possibility to extended tracing and debugging about requests failing to get processed properly. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_api.c | 253 ++++++++++++++++++++--------- drivers/s390/crypto/zcrypt_api.h | 14 +- drivers/s390/crypto/zcrypt_cex2a.c | 6 +- drivers/s390/crypto/zcrypt_cex2c.c | 6 +- drivers/s390/crypto/zcrypt_cex4.c | 58 +++---- 5 files changed, 213 insertions(+), 124 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index a711728c3857f..223e1c2332788 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -602,13 +602,13 @@ static inline bool zcrypt_card_compare(struct zcrypt_card *zc, unsigned int pref_weight) { if (!pref_zc) - return false; + return true; weight += atomic_read(&zc->load); pref_weight += atomic_read(&pref_zc->load); if (weight == pref_weight) - return atomic64_read(&zc->card->total_request_count) > + return atomic64_read(&zc->card->total_request_count) < atomic64_read(&pref_zc->card->total_request_count); - return weight > pref_weight; + return weight < pref_weight; } static inline bool zcrypt_queue_compare(struct zcrypt_queue *zq, @@ -617,26 +617,27 @@ static inline bool zcrypt_queue_compare(struct zcrypt_queue *zq, unsigned int pref_weight) { if (!pref_zq) - return false; + return true; weight += atomic_read(&zq->load); pref_weight += atomic_read(&pref_zq->load); if (weight == pref_weight) - return zq->queue->total_request_count > + return zq->queue->total_request_count < pref_zq->queue->total_request_count; - return weight > pref_weight; + return weight < pref_weight; } /* * zcrypt ioctls. */ static long zcrypt_rsa_modexpo(struct ap_perms *perms, + struct zcrypt_track *tr, struct ica_rsa_modexpo *mex) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; - unsigned int weight = 0, pref_weight = 0; + unsigned int wgt = 0, pref_wgt = 0; unsigned int func_code; - int qid = 0, rc = -ENODEV; + int cpen, qpen, qid = 0, rc = -ENODEV; struct module *mod; trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO); @@ -673,8 +674,12 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, if (!zcrypt_check_card(perms, zc->card->id)) continue; /* get weight index of the card device */ - weight = zc->speed_rating[func_code]; - if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight)) + wgt = zc->speed_rating[func_code]; + /* penalty if this msg was previously sent via this card */ + cpen = (tr && tr->again_counter && tr->last_qid && + AP_QID_CARD(tr->last_qid) == zc->card->id) ? + TRACK_AGAIN_CARD_WEIGHT_PENALTY : 0; + if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { /* check if device is online and eligible */ @@ -684,15 +689,19 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, if (!zcrypt_check_queue(perms, AP_QID_QUEUE(zq->queue->qid))) continue; - if (zcrypt_queue_compare(zq, pref_zq, - weight, pref_weight)) + /* penalty if the msg was previously sent at this qid */ + qpen = (tr && tr->again_counter && tr->last_qid && + tr->last_qid == zq->queue->qid) ? + TRACK_AGAIN_QUEUE_WEIGHT_PENALTY : 0; + if (!zcrypt_queue_compare(zq, pref_zq, + wgt + cpen + qpen, pref_wgt)) continue; pref_zc = zc; pref_zq = zq; - pref_weight = weight; + pref_wgt = wgt + cpen + qpen; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -704,23 +713,28 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, rc = pref_zq->ops->rsa_modexpo(pref_zq, mex); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt); spin_unlock(&zcrypt_list_lock); out: + if (tr) { + tr->last_rc = rc; + tr->last_qid = qid; + } trace_s390_zcrypt_rep(mex, func_code, rc, AP_QID_CARD(qid), AP_QID_QUEUE(qid)); return rc; } static long zcrypt_rsa_crt(struct ap_perms *perms, + struct zcrypt_track *tr, struct ica_rsa_modexpo_crt *crt) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; - unsigned int weight = 0, pref_weight = 0; + unsigned int wgt = 0, pref_wgt = 0; unsigned int func_code; - int qid = 0, rc = -ENODEV; + int cpen, qpen, qid = 0, rc = -ENODEV; struct module *mod; trace_s390_zcrypt_req(crt, TP_ICARSACRT); @@ -757,8 +771,12 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, if (!zcrypt_check_card(perms, zc->card->id)) continue; /* get weight index of the card device */ - weight = zc->speed_rating[func_code]; - if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight)) + wgt = zc->speed_rating[func_code]; + /* penalty if this msg was previously sent via this card */ + cpen = (tr && tr->again_counter && tr->last_qid && + AP_QID_CARD(tr->last_qid) == zc->card->id) ? + TRACK_AGAIN_CARD_WEIGHT_PENALTY : 0; + if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { /* check if device is online and eligible */ @@ -768,15 +786,19 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, if (!zcrypt_check_queue(perms, AP_QID_QUEUE(zq->queue->qid))) continue; - if (zcrypt_queue_compare(zq, pref_zq, - weight, pref_weight)) + /* penalty if the msg was previously sent at this qid */ + qpen = (tr && tr->again_counter && tr->last_qid && + tr->last_qid == zq->queue->qid) ? + TRACK_AGAIN_QUEUE_WEIGHT_PENALTY : 0; + if (!zcrypt_queue_compare(zq, pref_zq, + wgt + cpen + qpen, pref_wgt)) continue; pref_zc = zc; pref_zq = zq; - pref_weight = weight; + pref_wgt = wgt + cpen + qpen; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -788,25 +810,30 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt); spin_unlock(&zcrypt_list_lock); out: + if (tr) { + tr->last_rc = rc; + tr->last_qid = qid; + } trace_s390_zcrypt_rep(crt, func_code, rc, AP_QID_CARD(qid), AP_QID_QUEUE(qid)); return rc; } static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, + struct zcrypt_track *tr, struct ica_xcRB *xcRB) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; struct ap_message ap_msg; - unsigned int weight = 0, pref_weight = 0; + unsigned int wgt = 0, pref_wgt = 0; unsigned int func_code; unsigned short *domain, tdom; - int qid = 0, rc = -ENODEV; + int cpen, qpen, qid = 0, rc = -ENODEV; struct module *mod; trace_s390_zcrypt_req(xcRB, TB_ZSECSENDCPRB); @@ -843,8 +870,12 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, if (!zcrypt_check_card(perms, zc->card->id)) continue; /* get weight index of the card device */ - weight = speed_idx_cca(func_code) * zc->speed_rating[SECKEY]; - if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight)) + wgt = speed_idx_cca(func_code) * zc->speed_rating[SECKEY]; + /* penalty if this msg was previously sent via this card */ + cpen = (tr && tr->again_counter && tr->last_qid && + AP_QID_CARD(tr->last_qid) == zc->card->id) ? + TRACK_AGAIN_CARD_WEIGHT_PENALTY : 0; + if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { /* check if device is online and eligible */ @@ -857,15 +888,19 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, if (!zcrypt_check_queue(perms, AP_QID_QUEUE(zq->queue->qid))) continue; - if (zcrypt_queue_compare(zq, pref_zq, - weight, pref_weight)) + /* penalty if the msg was previously sent at this qid */ + qpen = (tr && tr->again_counter && tr->last_qid && + tr->last_qid == zq->queue->qid) ? + TRACK_AGAIN_QUEUE_WEIGHT_PENALTY : 0; + if (!zcrypt_queue_compare(zq, pref_zq, + wgt + cpen + qpen, pref_wgt)) continue; pref_zc = zc; pref_zq = zq; - pref_weight = weight; + pref_wgt = wgt + cpen + qpen; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -881,11 +916,15 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, rc = pref_zq->ops->send_cprb(userspace, pref_zq, xcRB, &ap_msg); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt); spin_unlock(&zcrypt_list_lock); out: ap_release_message(&ap_msg); + if (tr) { + tr->last_rc = rc; + tr->last_qid = qid; + } trace_s390_zcrypt_rep(xcRB, func_code, rc, AP_QID_CARD(qid), AP_QID_QUEUE(qid)); return rc; @@ -893,7 +932,7 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, long zcrypt_send_cprb(struct ica_xcRB *xcRB) { - return _zcrypt_send_cprb(false, &ap_perms, xcRB); + return _zcrypt_send_cprb(false, &ap_perms, NULL, xcRB); } EXPORT_SYMBOL(zcrypt_send_cprb); @@ -925,16 +964,17 @@ static bool is_desired_ep11_queue(unsigned int dev_qid, } static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, + struct zcrypt_track *tr, struct ep11_urb *xcrb) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; struct ep11_target_dev *targets; unsigned short target_num; - unsigned int weight = 0, pref_weight = 0; + unsigned int wgt = 0, pref_wgt = 0; unsigned int func_code; struct ap_message ap_msg; - int qid = 0, rc = -ENODEV; + int cpen, qpen, qid = 0, rc = -ENODEV; struct module *mod; trace_s390_zcrypt_req(xcrb, TP_ZSENDEP11CPRB); @@ -983,8 +1023,12 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, if (!zcrypt_check_card(perms, zc->card->id)) continue; /* get weight index of the card device */ - weight = speed_idx_ep11(func_code) * zc->speed_rating[SECKEY]; - if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight)) + wgt = speed_idx_ep11(func_code) * zc->speed_rating[SECKEY]; + /* penalty if this msg was previously sent via this card */ + cpen = (tr && tr->again_counter && tr->last_qid && + AP_QID_CARD(tr->last_qid) == zc->card->id) ? + TRACK_AGAIN_CARD_WEIGHT_PENALTY : 0; + if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { /* check if device is online and eligible */ @@ -998,15 +1042,19 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, if (!zcrypt_check_queue(perms, AP_QID_QUEUE(zq->queue->qid))) continue; - if (zcrypt_queue_compare(zq, pref_zq, - weight, pref_weight)) + /* penalty if the msg was previously sent at this qid */ + qpen = (tr && tr->again_counter && tr->last_qid && + tr->last_qid == zq->queue->qid) ? + TRACK_AGAIN_QUEUE_WEIGHT_PENALTY : 0; + if (!zcrypt_queue_compare(zq, pref_zq, + wgt + cpen + qpen, pref_wgt)) continue; pref_zc = zc; pref_zq = zq; - pref_weight = weight; + pref_wgt = wgt + cpen + qpen; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -1018,13 +1066,17 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, rc = pref_zq->ops->send_ep11_cprb(userspace, pref_zq, xcrb, &ap_msg); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt); spin_unlock(&zcrypt_list_lock); out_free: kfree(targets); out: ap_release_message(&ap_msg); + if (tr) { + tr->last_rc = rc; + tr->last_qid = qid; + } trace_s390_zcrypt_rep(xcrb, func_code, rc, AP_QID_CARD(qid), AP_QID_QUEUE(qid)); return rc; @@ -1032,7 +1084,7 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb) { - return _zcrypt_send_ep11_cprb(false, &ap_perms, xcrb); + return _zcrypt_send_ep11_cprb(false, &ap_perms, NULL, xcrb); } EXPORT_SYMBOL(zcrypt_send_ep11_cprb); @@ -1040,7 +1092,7 @@ static long zcrypt_rng(char *buffer) { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; - unsigned int weight = 0, pref_weight = 0; + unsigned int wgt = 0, pref_wgt = 0; unsigned int func_code; struct ap_message ap_msg; unsigned int domain; @@ -1062,22 +1114,21 @@ static long zcrypt_rng(char *buffer) if (!zc->online || !(zc->card->functions & 0x10000000)) continue; /* get weight index of the card device */ - weight = zc->speed_rating[func_code]; - if (zcrypt_card_compare(zc, pref_zc, weight, pref_weight)) + wgt = zc->speed_rating[func_code]; + if (!zcrypt_card_compare(zc, pref_zc, wgt, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { /* check if device is online and eligible */ if (!zq->online || !zq->ops->rng) continue; - if (zcrypt_queue_compare(zq, pref_zq, - weight, pref_weight)) + if (!zcrypt_queue_compare(zq, pref_zq, wgt, pref_wgt)) continue; pref_zc = zc; pref_zq = zq; - pref_weight = weight; + pref_wgt = wgt; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, wgt); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -1089,7 +1140,7 @@ static long zcrypt_rng(char *buffer) rc = pref_zq->ops->rng(pref_zq, buffer, &ap_msg); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt); spin_unlock(&zcrypt_list_lock); out: @@ -1301,19 +1352,25 @@ static int zcrypt_requestq_count(void) static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg) { int rc; + struct zcrypt_track tr; struct ica_rsa_modexpo mex; struct ica_rsa_modexpo __user *umex = (void __user *) arg; + memset(&tr, 0, sizeof(tr)); if (copy_from_user(&mex, umex, sizeof(mex))) return -EFAULT; do { - rc = zcrypt_rsa_modexpo(perms, &mex); - } while (rc == -EAGAIN); + rc = zcrypt_rsa_modexpo(perms, &tr, &mex); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_rsa_modexpo(perms, &mex); - } while (rc == -EAGAIN); + rc = zcrypt_rsa_modexpo(perms, &tr, &mex); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); if (rc) { ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d\n", rc); return rc; @@ -1324,19 +1381,25 @@ static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg) static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg) { int rc; + struct zcrypt_track tr; struct ica_rsa_modexpo_crt crt; struct ica_rsa_modexpo_crt __user *ucrt = (void __user *) arg; + memset(&tr, 0, sizeof(tr)); if (copy_from_user(&crt, ucrt, sizeof(crt))) return -EFAULT; do { - rc = zcrypt_rsa_crt(perms, &crt); - } while (rc == -EAGAIN); + rc = zcrypt_rsa_crt(perms, &tr, &crt); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_rsa_crt(perms, &crt); - } while (rc == -EAGAIN); + rc = zcrypt_rsa_crt(perms, &tr, &crt); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); if (rc) { ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d\n", rc); return rc; @@ -1348,18 +1411,24 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg) { int rc; struct ica_xcRB xcRB; + struct zcrypt_track tr; struct ica_xcRB __user *uxcRB = (void __user *) arg; + memset(&tr, 0, sizeof(tr)); if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB))) return -EFAULT; do { - rc = _zcrypt_send_cprb(true, perms, &xcRB); - } while (rc == -EAGAIN); + rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = _zcrypt_send_cprb(true, perms, &xcRB); - } while (rc == -EAGAIN); + rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); if (rc) ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d status=0x%x\n", rc, xcRB.status); @@ -1372,18 +1441,24 @@ static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg) { int rc; struct ep11_urb xcrb; + struct zcrypt_track tr; struct ep11_urb __user *uxcrb = (void __user *)arg; + memset(&tr, 0, sizeof(tr)); if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb))) return -EFAULT; do { - rc = _zcrypt_send_ep11_cprb(true, perms, &xcrb); - } while (rc == -EAGAIN); + rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = _zcrypt_send_ep11_cprb(true, perms, &xcrb); - } while (rc == -EAGAIN); + rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); if (rc) ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d\n", rc); if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb))) @@ -1535,8 +1610,10 @@ static long trans_modexpo32(struct ap_perms *perms, struct file *filp, struct compat_ica_rsa_modexpo __user *umex32 = compat_ptr(arg); struct compat_ica_rsa_modexpo mex32; struct ica_rsa_modexpo mex64; + struct zcrypt_track tr; long rc; + memset(&tr, 0, sizeof(tr)); if (copy_from_user(&mex32, umex32, sizeof(mex32))) return -EFAULT; mex64.inputdata = compat_ptr(mex32.inputdata); @@ -1546,13 +1623,17 @@ static long trans_modexpo32(struct ap_perms *perms, struct file *filp, mex64.b_key = compat_ptr(mex32.b_key); mex64.n_modulus = compat_ptr(mex32.n_modulus); do { - rc = zcrypt_rsa_modexpo(perms, &mex64); - } while (rc == -EAGAIN); + rc = zcrypt_rsa_modexpo(perms, &tr, &mex64); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_rsa_modexpo(perms, &mex64); - } while (rc == -EAGAIN); + rc = zcrypt_rsa_modexpo(perms, &tr, &mex64); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); if (rc) return rc; return put_user(mex64.outputdatalength, @@ -1577,8 +1658,10 @@ static long trans_modexpo_crt32(struct ap_perms *perms, struct file *filp, struct compat_ica_rsa_modexpo_crt __user *ucrt32 = compat_ptr(arg); struct compat_ica_rsa_modexpo_crt crt32; struct ica_rsa_modexpo_crt crt64; + struct zcrypt_track tr; long rc; + memset(&tr, 0, sizeof(tr)); if (copy_from_user(&crt32, ucrt32, sizeof(crt32))) return -EFAULT; crt64.inputdata = compat_ptr(crt32.inputdata); @@ -1591,13 +1674,17 @@ static long trans_modexpo_crt32(struct ap_perms *perms, struct file *filp, crt64.nq_prime = compat_ptr(crt32.nq_prime); crt64.u_mult_inv = compat_ptr(crt32.u_mult_inv); do { - rc = zcrypt_rsa_crt(perms, &crt64); - } while (rc == -EAGAIN); + rc = zcrypt_rsa_crt(perms, &tr, &crt64); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = zcrypt_rsa_crt(perms, &crt64); - } while (rc == -EAGAIN); + rc = zcrypt_rsa_crt(perms, &tr, &crt64); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); if (rc) return rc; return put_user(crt64.outputdatalength, @@ -1629,9 +1716,11 @@ static long trans_xcRB32(struct ap_perms *perms, struct file *filp, { struct compat_ica_xcRB __user *uxcRB32 = compat_ptr(arg); struct compat_ica_xcRB xcRB32; + struct zcrypt_track tr; struct ica_xcRB xcRB64; long rc; + memset(&tr, 0, sizeof(tr)); if (copy_from_user(&xcRB32, uxcRB32, sizeof(xcRB32))) return -EFAULT; xcRB64.agent_ID = xcRB32.agent_ID; @@ -1655,13 +1744,17 @@ static long trans_xcRB32(struct ap_perms *perms, struct file *filp, xcRB64.priority_window = xcRB32.priority_window; xcRB64.status = xcRB32.status; do { - rc = _zcrypt_send_cprb(true, perms, &xcRB64); - } while (rc == -EAGAIN); + rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB64); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) do { - rc = _zcrypt_send_cprb(true, perms, &xcRB64); - } while (rc == -EAGAIN); + rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB64); + if (rc == -EAGAIN) + tr.again_counter++; + } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); xcRB32.reply_control_blk_length = xcRB64.reply_control_blk_length; xcRB32.reply_data_length = xcRB64.reply_data_length; xcRB32.status = xcRB64.status; diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 19ddfc38e029f..263ed17354314 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -55,6 +55,18 @@ enum crypto_ops { struct zcrypt_queue; +/* struct to hold tracking information for a userspace request/response */ +struct zcrypt_track { + int again_counter; /* retry attempts counter */ + int last_qid; /* last qid used */ + int last_rc; /* last return code */ +}; + +/* defines related to message tracking */ +#define TRACK_AGAIN_MAX 10 +#define TRACK_AGAIN_CARD_WEIGHT_PENALTY 1000 +#define TRACK_AGAIN_QUEUE_WEIGHT_PENALTY 10000 + struct zcrypt_ops { long (*rsa_modexpo)(struct zcrypt_queue *, struct ica_rsa_modexpo *); long (*rsa_modexpo_crt)(struct zcrypt_queue *, @@ -82,7 +94,7 @@ struct zcrypt_card { int min_mod_size; /* Min number of bits. */ int max_mod_size; /* Max number of bits. */ int max_exp_bit_length; - int speed_rating[NUM_OPS]; /* Speed idx of crypto ops. */ + const int *speed_rating; /* Speed idx of crypto ops. */ atomic_t load; /* Utilization of the crypto device */ int request_count; /* # current requests. */ diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index b447f3e9e4a2c..226a5612e855a 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -94,8 +94,7 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev) if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) { zc->min_mod_size = CEX2A_MIN_MOD_SIZE; zc->max_mod_size = CEX2A_MAX_MOD_SIZE; - memcpy(zc->speed_rating, CEX2A_SPEED_IDX, - sizeof(CEX2A_SPEED_IDX)); + zc->speed_rating = CEX2A_SPEED_IDX; zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE; zc->type_string = "CEX2A"; zc->user_space_type = ZCRYPT_CEX2A; @@ -108,8 +107,7 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev) zc->max_mod_size = CEX3A_MAX_MOD_SIZE; zc->max_exp_bit_length = CEX3A_MAX_MOD_SIZE; } - memcpy(zc->speed_rating, CEX3A_SPEED_IDX, - sizeof(CEX3A_SPEED_IDX)); + zc->speed_rating = CEX3A_SPEED_IDX; zc->type_string = "CEX3A"; zc->user_space_type = ZCRYPT_CEX3A; } else { diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c index 146eb9f246942..7a8cbdbe44080 100644 --- a/drivers/s390/crypto/zcrypt_cex2c.c +++ b/drivers/s390/crypto/zcrypt_cex2c.c @@ -266,8 +266,7 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev) case AP_DEVICE_TYPE_CEX2C: zc->user_space_type = ZCRYPT_CEX2C; zc->type_string = "CEX2C"; - memcpy(zc->speed_rating, CEX2C_SPEED_IDX, - sizeof(CEX2C_SPEED_IDX)); + zc->speed_rating = CEX2C_SPEED_IDX; zc->min_mod_size = CEX2C_MIN_MOD_SIZE; zc->max_mod_size = CEX2C_MAX_MOD_SIZE; zc->max_exp_bit_length = CEX2C_MAX_MOD_SIZE; @@ -275,8 +274,7 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev) case AP_DEVICE_TYPE_CEX3C: zc->user_space_type = ZCRYPT_CEX3C; zc->type_string = "CEX3C"; - memcpy(zc->speed_rating, CEX3C_SPEED_IDX, - sizeof(CEX3C_SPEED_IDX)); + zc->speed_rating = CEX3C_SPEED_IDX; zc->min_mod_size = CEX3C_MIN_MOD_SIZE; zc->max_mod_size = CEX3C_MAX_MOD_SIZE; zc->max_exp_bit_length = CEX3C_MAX_MOD_SIZE; diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index d9ebe3a3c210a..f5195bca1d856 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -409,31 +409,31 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) * Normalized speed ratings per crypto adapter * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY */ - static const int CEX4A_SPEED_IDX[] = { + static const int CEX4A_SPEED_IDX[NUM_OPS] = { 14, 19, 249, 42, 228, 1458, 0, 0}; - static const int CEX5A_SPEED_IDX[] = { + static const int CEX5A_SPEED_IDX[NUM_OPS] = { 8, 9, 20, 18, 66, 458, 0, 0}; - static const int CEX6A_SPEED_IDX[] = { + static const int CEX6A_SPEED_IDX[NUM_OPS] = { 6, 9, 20, 17, 65, 438, 0, 0}; - static const int CEX7A_SPEED_IDX[] = { + static const int CEX7A_SPEED_IDX[NUM_OPS] = { 6, 8, 17, 15, 54, 362, 0, 0}; - static const int CEX4C_SPEED_IDX[] = { + static const int CEX4C_SPEED_IDX[NUM_OPS] = { 59, 69, 308, 83, 278, 2204, 209, 40}; static const int CEX5C_SPEED_IDX[] = { 24, 31, 50, 37, 90, 479, 27, 10}; - static const int CEX6C_SPEED_IDX[] = { + static const int CEX6C_SPEED_IDX[NUM_OPS] = { 16, 20, 32, 27, 77, 455, 24, 9}; - static const int CEX7C_SPEED_IDX[] = { + static const int CEX7C_SPEED_IDX[NUM_OPS] = { 14, 16, 26, 23, 64, 376, 23, 8}; - static const int CEX4P_SPEED_IDX[] = { + static const int CEX4P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 50}; - static const int CEX5P_SPEED_IDX[] = { + static const int CEX5P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 10}; - static const int CEX6P_SPEED_IDX[] = { + static const int CEX6P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 9}; - static const int CEX7P_SPEED_IDX[] = { + static const int CEX7P_SPEED_IDX[NUM_OPS] = { 0, 0, 0, 0, 0, 0, 0, 8}; struct ap_card *ac = to_ap_card(&ap_dev->device); @@ -449,26 +449,22 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) { zc->type_string = "CEX4A"; zc->user_space_type = ZCRYPT_CEX4; - memcpy(zc->speed_rating, CEX4A_SPEED_IDX, - sizeof(CEX4A_SPEED_IDX)); + zc->speed_rating = CEX4A_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX5) { zc->type_string = "CEX5A"; zc->user_space_type = ZCRYPT_CEX5; - memcpy(zc->speed_rating, CEX5A_SPEED_IDX, - sizeof(CEX5A_SPEED_IDX)); + zc->speed_rating = CEX5A_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) { zc->type_string = "CEX6A"; zc->user_space_type = ZCRYPT_CEX6; - memcpy(zc->speed_rating, CEX6A_SPEED_IDX, - sizeof(CEX6A_SPEED_IDX)); + zc->speed_rating = CEX6A_SPEED_IDX; } else { zc->type_string = "CEX7A"; /* wrong user space type, just for compatibility * with the ZCRYPT_STATUS_MASK ioctl. */ zc->user_space_type = ZCRYPT_CEX6; - memcpy(zc->speed_rating, CEX7A_SPEED_IDX, - sizeof(CEX7A_SPEED_IDX)); + zc->speed_rating = CEX7A_SPEED_IDX; } zc->min_mod_size = CEX4A_MIN_MOD_SIZE; if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) && @@ -488,32 +484,28 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - memcpy(zc->speed_rating, CEX4C_SPEED_IDX, - sizeof(CEX4C_SPEED_IDX)); + zc->speed_rating = CEX4C_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX5) { zc->type_string = "CEX5C"; /* wrong user space type, must be CEX5 * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - memcpy(zc->speed_rating, CEX5C_SPEED_IDX, - sizeof(CEX5C_SPEED_IDX)); + zc->speed_rating = CEX5C_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) { zc->type_string = "CEX6C"; /* wrong user space type, must be CEX6 * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - memcpy(zc->speed_rating, CEX6C_SPEED_IDX, - sizeof(CEX6C_SPEED_IDX)); + zc->speed_rating = CEX6C_SPEED_IDX; } else { zc->type_string = "CEX7C"; /* wrong user space type, must be CEX7 * just keep it for cca compatibility */ zc->user_space_type = ZCRYPT_CEX3C; - memcpy(zc->speed_rating, CEX7C_SPEED_IDX, - sizeof(CEX7C_SPEED_IDX)); + zc->speed_rating = CEX7C_SPEED_IDX; } zc->min_mod_size = CEX4C_MIN_MOD_SIZE; zc->max_mod_size = CEX4C_MAX_MOD_SIZE; @@ -522,26 +514,22 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev) if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX4) { zc->type_string = "CEX4P"; zc->user_space_type = ZCRYPT_CEX4; - memcpy(zc->speed_rating, CEX4P_SPEED_IDX, - sizeof(CEX4P_SPEED_IDX)); + zc->speed_rating = CEX4P_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX5) { zc->type_string = "CEX5P"; zc->user_space_type = ZCRYPT_CEX5; - memcpy(zc->speed_rating, CEX5P_SPEED_IDX, - sizeof(CEX5P_SPEED_IDX)); + zc->speed_rating = CEX5P_SPEED_IDX; } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX6) { zc->type_string = "CEX6P"; zc->user_space_type = ZCRYPT_CEX6; - memcpy(zc->speed_rating, CEX6P_SPEED_IDX, - sizeof(CEX6P_SPEED_IDX)); + zc->speed_rating = CEX6P_SPEED_IDX; } else { zc->type_string = "CEX7P"; /* wrong user space type, just for compatibility * with the ZCRYPT_STATUS_MASK ioctl. */ zc->user_space_type = ZCRYPT_CEX6; - memcpy(zc->speed_rating, CEX7P_SPEED_IDX, - sizeof(CEX7P_SPEED_IDX)); + zc->speed_rating = CEX7P_SPEED_IDX; } zc->min_mod_size = CEX4C_MIN_MOD_SIZE; zc->max_mod_size = CEX4C_MAX_MOD_SIZE; From 0ae88ccf4c160e02316e054db67156230568cf49 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 23 Sep 2020 09:18:38 +0200 Subject: [PATCH 71/85] s390/zcrypt: New config switch CONFIG_ZCRYPT_DEBUG Introduce a new config switch CONFIG_ZCRYPT_DEBUG which will be used to enable some features for debugging the zcrypt device driver and ap bus system: Another patch will use this for displaying ap card and ap queue state information via sysfs attribute. A furher patch will use this to enable some special treatment for some fields of an crypto request to be able to inject failures and so help debugging with regards to handling of failures. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/crypto/Kconfig | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index aa3a4ed07a666..895a6d5f8b53f 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -71,10 +71,26 @@ config ZCRYPT help Select this option if you want to enable support for s390 cryptographic adapters like: - + PCI-X Cryptographic Coprocessor (PCIXCC) - + Crypto Express 2,3,4 or 5 Coprocessor (CEXxC) - + Crypto Express 2,3,4 or 5 Accelerator (CEXxA) - + Crypto Express 4 or 5 EP11 Coprocessor (CEXxP) + + Crypto Express 2 up to 7 Coprocessor (CEXxC) + + Crypto Express 2 up to 7 Accelerator (CEXxA) + + Crypto Express 4 up to 7 EP11 Coprocessor (CEXxP) + +config ZCRYPT_DEBUG + bool "Enable debug features for s390 cryptographic adapters" + default n + depends on DEBUG_KERNEL + depends on ZCRYPT + help + Say 'Y' here to enable some additional debug features on the + s390 cryptographic adapters driver. + + There will be some more sysfs attributes displayed for ap cards + and queues and some flags on crypto requests are interpreted as + debugging messages to force error injection. + + Do not enable on production level kernel build. + + If unsure, say N. config ZCRYPT_MULTIDEVNODES bool "Support for multiple zcrypt device nodes" From 0b641cbd24445e56073c69dd046be488dcf1965b Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 2 Jul 2020 11:22:01 +0200 Subject: [PATCH 72/85] s390/ap: split ap queue state machine state from device state The state machine for each ap queue covered a mixture of device states and state machine (firmware queue state) states. This patch splits the device states and the state machine states into two different enums and variables. The major state is the device state with currently these values: AP_DEV_STATE_UNINITIATED - fresh and virgin, not touched AP_DEV_STATE_OPERATING - queue dev is working normal AP_DEV_STATE_SHUTDOWN - remove/unbind/shutdown in progress AP_DEV_STATE_ERROR - device is in error state only when the device state is > UNINITIATED the state machine is run. The state machine represents the states of the firmware queue: AP_SM_STATE_RESET_START - starting point, reset (RAPQ) ap queue AP_SM_STATE_RESET_WAIT - reset triggered, waiting to be finished if irqs enabled, set up irq (AQIC) AP_SM_STATE_SETIRQ_WAIT - enable irq triggered, waiting to be finished, then go to IDLE AP_SM_STATE_IDLE - queue is operational but empty AP_SM_STATE_WORKING - queue is operational, requests are stored and replies may wait for getting fetched AP_SM_STATE_QUEUE_FULL - firmware queue is full, so only replies can get fetched For debugging each ap queue shows a sysfs attribute 'states' which displays the device and state machine state and is only available when the kernel is build with CONFIG_ZCRYPT_DEBUG enabled. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 2 +- drivers/s390/crypto/ap_bus.h | 21 +++- drivers/s390/crypto/ap_queue.c | 155 ++++++++++++++++++------- drivers/s390/crypto/zcrypt_msgtype50.c | 8 +- drivers/s390/crypto/zcrypt_msgtype6.c | 26 +++-- 5 files changed, 155 insertions(+), 57 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 231a98c9165d5..1e895fcd25ccc 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1380,7 +1380,7 @@ static void _ap_scan_bus_adapter(int id) if (dev) { if (!broken) { spin_lock_bh(&aq->lock); - broken = aq->sm_state == AP_SM_STATE_BORKED; + broken = aq->dev_state == AP_DEV_STATE_ERROR; spin_unlock_bh(&aq->lock); } if (broken) { diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 1ea046324e8f6..2d4558b5abaf9 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -86,15 +86,12 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) * AP queue state machine states */ enum ap_sm_state { - AP_SM_STATE_RESET_START, + AP_SM_STATE_RESET_START = 0, AP_SM_STATE_RESET_WAIT, AP_SM_STATE_SETIRQ_WAIT, AP_SM_STATE_IDLE, AP_SM_STATE_WORKING, AP_SM_STATE_QUEUE_FULL, - AP_SM_STATE_REMOVE, /* about to be removed from driver */ - AP_SM_STATE_UNBOUND, /* momentary not bound to a driver */ - AP_SM_STATE_BORKED, /* broken */ NR_AP_SM_STATES }; @@ -118,6 +115,17 @@ enum ap_sm_wait { NR_AP_SM_WAIT }; +/* + * AP queue device states + */ +enum ap_dev_state { + AP_DEV_STATE_UNINITIATED = 0, /* fresh and virgin, not touched */ + AP_DEV_STATE_OPERATING, /* queue dev is working normal */ + AP_DEV_STATE_SHUTDOWN, /* remove/unbind/shutdown in progress */ + AP_DEV_STATE_ERROR, /* device is in error state */ + NR_AP_DEV_STATES +}; + struct ap_device; struct ap_message; @@ -169,10 +177,10 @@ struct ap_queue { struct ap_card *card; /* Ptr to assoc. AP card. */ spinlock_t lock; /* Per device lock. */ void *private; /* ap driver private pointer. */ + enum ap_dev_state dev_state; /* queue device state */ ap_qid_t qid; /* AP queue id. */ int interrupt; /* indicate if interrupts are enabled */ int queue_count; /* # messages currently on AP queue. */ - enum ap_sm_state sm_state; /* ap queue state machine state */ int pendingq_count; /* # requests on pendingq list. */ int requestq_count; /* # requests on requestq list. */ u64 total_request_count; /* # requests ever for this AP device.*/ @@ -181,6 +189,7 @@ struct ap_queue { struct list_head pendingq; /* List of message sent to AP queue. */ struct list_head requestq; /* List of message yet to be sent. */ struct ap_message *reply; /* Per device reply message. */ + enum ap_sm_state sm_state; /* ap queue state machine state */ }; #define to_ap_queue(x) container_of((x), struct ap_queue, ap_dev.device) @@ -234,7 +243,7 @@ int ap_recv(ap_qid_t, unsigned long long *, void *, size_t); enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event); enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event); -void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg); +int ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg); void ap_cancel_message(struct ap_queue *aq, struct ap_message *ap_msg); void ap_flush_queue(struct ap_queue *aq); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 688ebebbf98cb..e7ecbcc18db36 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -195,7 +195,7 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) aq->sm_state = AP_SM_STATE_IDLE; return AP_SM_WAIT_NONE; default: - aq->sm_state = AP_SM_STATE_BORKED; + aq->dev_state = AP_DEV_STATE_ERROR; return AP_SM_WAIT_NONE; } } @@ -245,7 +245,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) ap_msg->receive(aq, ap_msg, NULL); return AP_SM_WAIT_AGAIN; default: - aq->sm_state = AP_SM_STATE_BORKED; + aq->dev_state = AP_DEV_STATE_ERROR; return AP_SM_WAIT_NONE; } } @@ -284,7 +284,7 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) case AP_RESPONSE_DECONFIGURED: case AP_RESPONSE_CHECKSTOPPED: default: - aq->sm_state = AP_SM_STATE_BORKED; + aq->dev_state = AP_DEV_STATE_ERROR; return AP_SM_WAIT_NONE; } } @@ -323,7 +323,7 @@ static enum ap_sm_wait ap_sm_reset_wait(struct ap_queue *aq) case AP_RESPONSE_DECONFIGURED: case AP_RESPONSE_CHECKSTOPPED: default: - aq->sm_state = AP_SM_STATE_BORKED; + aq->dev_state = AP_DEV_STATE_ERROR; return AP_SM_WAIT_NONE; } } @@ -360,7 +360,7 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq) case AP_RESPONSE_NO_PENDING_REPLY: return AP_SM_WAIT_TIMEOUT; default: - aq->sm_state = AP_SM_STATE_BORKED; + aq->dev_state = AP_DEV_STATE_ERROR; return AP_SM_WAIT_NONE; } } @@ -393,23 +393,14 @@ static ap_func_t *ap_jumptable[NR_AP_SM_STATES][NR_AP_SM_EVENTS] = { [AP_SM_EVENT_POLL] = ap_sm_read, [AP_SM_EVENT_TIMEOUT] = ap_sm_reset, }, - [AP_SM_STATE_REMOVE] = { - [AP_SM_EVENT_POLL] = ap_sm_nop, - [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, - }, - [AP_SM_STATE_UNBOUND] = { - [AP_SM_EVENT_POLL] = ap_sm_nop, - [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, - }, - [AP_SM_STATE_BORKED] = { - [AP_SM_EVENT_POLL] = ap_sm_nop, - [AP_SM_EVENT_TIMEOUT] = ap_sm_nop, - }, }; enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event) { - return ap_jumptable[aq->sm_state][event](aq); + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) + return ap_jumptable[aq->sm_state][event](aq); + else + return AP_SM_WAIT_NONE; } enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event) @@ -429,12 +420,20 @@ static ssize_t request_count_show(struct device *dev, char *buf) { struct ap_queue *aq = to_ap_queue(dev); + bool valid = false; u64 req_cnt; spin_lock_bh(&aq->lock); - req_cnt = aq->total_request_count; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { + req_cnt = aq->total_request_count; + valid = true; + } spin_unlock_bh(&aq->lock); - return scnprintf(buf, PAGE_SIZE, "%llu\n", req_cnt); + + if (valid) + return scnprintf(buf, PAGE_SIZE, "%llu\n", req_cnt); + else + return scnprintf(buf, PAGE_SIZE, "-\n"); } static ssize_t request_count_store(struct device *dev, @@ -459,7 +458,8 @@ static ssize_t requestq_count_show(struct device *dev, unsigned int reqq_cnt = 0; spin_lock_bh(&aq->lock); - reqq_cnt = aq->requestq_count; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) + reqq_cnt = aq->requestq_count; spin_unlock_bh(&aq->lock); return scnprintf(buf, PAGE_SIZE, "%d\n", reqq_cnt); } @@ -473,7 +473,8 @@ static ssize_t pendingq_count_show(struct device *dev, unsigned int penq_cnt = 0; spin_lock_bh(&aq->lock); - penq_cnt = aq->pendingq_count; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) + penq_cnt = aq->pendingq_count; spin_unlock_bh(&aq->lock); return scnprintf(buf, PAGE_SIZE, "%d\n", penq_cnt); } @@ -542,12 +543,79 @@ static ssize_t interrupt_show(struct device *dev, static DEVICE_ATTR_RO(interrupt); +#ifdef CONFIG_ZCRYPT_DEBUG +static ssize_t states_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_queue *aq = to_ap_queue(dev); + int rc = 0; + + spin_lock_bh(&aq->lock); + /* queue device state */ + switch (aq->dev_state) { + case AP_DEV_STATE_UNINITIATED: + rc = scnprintf(buf, PAGE_SIZE, "UNINITIATED\n"); + break; + case AP_DEV_STATE_OPERATING: + rc = scnprintf(buf, PAGE_SIZE, "OPERATING"); + break; + case AP_DEV_STATE_SHUTDOWN: + rc = scnprintf(buf, PAGE_SIZE, "SHUTDOWN"); + break; + case AP_DEV_STATE_ERROR: + rc = scnprintf(buf, PAGE_SIZE, "ERROR"); + break; + default: + rc = scnprintf(buf, PAGE_SIZE, "UNKNOWN"); + } + /* state machine state */ + if (aq->dev_state) { + switch (aq->sm_state) { + case AP_SM_STATE_RESET_START: + rc += scnprintf(buf + rc, PAGE_SIZE - rc, + " [RESET_START]\n"); + break; + case AP_SM_STATE_RESET_WAIT: + rc += scnprintf(buf + rc, PAGE_SIZE - rc, + " [RESET_WAIT]\n"); + break; + case AP_SM_STATE_SETIRQ_WAIT: + rc += scnprintf(buf + rc, PAGE_SIZE - rc, + " [SETIRQ_WAIT]\n"); + break; + case AP_SM_STATE_IDLE: + rc += scnprintf(buf + rc, PAGE_SIZE - rc, + " [IDLE]\n"); + break; + case AP_SM_STATE_WORKING: + rc += scnprintf(buf + rc, PAGE_SIZE - rc, + " [WORKING]\n"); + break; + case AP_SM_STATE_QUEUE_FULL: + rc += scnprintf(buf + rc, PAGE_SIZE - rc, + " [FULL]\n"); + break; + default: + rc += scnprintf(buf + rc, PAGE_SIZE - rc, + " [UNKNOWN]\n"); + } + } + spin_unlock_bh(&aq->lock); + + return rc; +} +static DEVICE_ATTR_RO(states); +#endif + static struct attribute *ap_queue_dev_attrs[] = { &dev_attr_request_count.attr, &dev_attr_requestq_count.attr, &dev_attr_pendingq_count.attr, &dev_attr_reset.attr, &dev_attr_interrupt.attr, +#ifdef CONFIG_ZCRYPT_DEBUG + &dev_attr_states.attr, +#endif NULL }; @@ -587,7 +655,6 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type) aq->ap_dev.device.type = &ap_queue_type; aq->ap_dev.device_type = device_type; aq->qid = qid; - aq->sm_state = AP_SM_STATE_UNBOUND; aq->interrupt = AP_INTR_DISABLED; spin_lock_init(&aq->lock); INIT_LIST_HEAD(&aq->pendingq); @@ -612,22 +679,30 @@ EXPORT_SYMBOL(ap_queue_init_reply); * @aq: The AP device to queue the message to * @ap_msg: The message that is to be added */ -void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg) +int ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg) { - /* For asynchronous message handling a valid receive-callback - * is required. - */ + int rc = 0; + + /* msg needs to have a valid receive-callback */ BUG_ON(!ap_msg->receive); spin_lock_bh(&aq->lock); - /* Queue the message. */ - list_add_tail(&ap_msg->list, &aq->requestq); - aq->requestq_count++; - aq->total_request_count++; - atomic64_inc(&aq->card->total_request_count); + + /* only allow to queue new messages if device state is ok */ + if (aq->dev_state == AP_DEV_STATE_OPERATING) { + list_add_tail(&ap_msg->list, &aq->requestq); + aq->requestq_count++; + aq->total_request_count++; + atomic64_inc(&aq->card->total_request_count); + } else + rc = -ENODEV; + /* Send/receive as many request from the queue as possible. */ ap_wait(ap_sm_event_loop(aq, AP_SM_EVENT_POLL)); + spin_unlock_bh(&aq->lock); + + return rc; } EXPORT_SYMBOL(ap_queue_message); @@ -698,8 +773,8 @@ void ap_queue_prepare_remove(struct ap_queue *aq) spin_lock_bh(&aq->lock); /* flush queue */ __ap_flush_queue(aq); - /* set REMOVE state to prevent new messages are queued in */ - aq->sm_state = AP_SM_STATE_REMOVE; + /* move queue device state to SHUTDOWN in progress */ + aq->dev_state = AP_DEV_STATE_SHUTDOWN; spin_unlock_bh(&aq->lock); del_timer_sync(&aq->timeout); } @@ -707,21 +782,21 @@ void ap_queue_prepare_remove(struct ap_queue *aq) void ap_queue_remove(struct ap_queue *aq) { /* - * all messages have been flushed and the state is - * AP_SM_STATE_REMOVE. Now reset with zero which also - * clears the irq registration and move the state - * to AP_SM_STATE_UNBOUND to signal that this queue - * is not used by any driver currently. + * all messages have been flushed and the device state + * is SHUTDOWN. Now reset with zero which also clears + * the irq registration and move the device state + * to the initial value AP_DEV_STATE_UNINITIATED. */ spin_lock_bh(&aq->lock); ap_zapq(aq->qid); - aq->sm_state = AP_SM_STATE_UNBOUND; + aq->dev_state = AP_DEV_STATE_UNINITIATED; spin_unlock_bh(&aq->lock); } void ap_queue_init_state(struct ap_queue *aq) { spin_lock_bh(&aq->lock); + aq->dev_state = AP_DEV_STATE_OPERATING; aq->sm_state = AP_SM_STATE_RESET_START; ap_wait(ap_sm_event(aq, AP_SM_EVENT_POLL)); spin_unlock_bh(&aq->lock); diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 7aedc338b4459..349306c2f155d 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -471,7 +471,9 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, if (rc) goto out_free; init_completion(&work); - ap_queue_message(zq->queue, &ap_msg); + rc = ap_queue_message(zq->queue, &ap_msg); + if (rc) + goto out_free; rc = wait_for_completion_interruptible(&work); if (rc == 0) { rc = ap_msg.rc; @@ -515,7 +517,9 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, if (rc) goto out_free; init_completion(&work); - ap_queue_message(zq->queue, &ap_msg); + rc = ap_queue_message(zq->queue, &ap_msg); + if (rc) + goto out_free; rc = wait_for_completion_interruptible(&work); if (rc == 0) { rc = ap_msg.rc; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 3db901883a5ca..51b9924753ee4 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1027,7 +1027,9 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, if (rc) goto out_free; init_completion(&resp_type.work); - ap_queue_message(zq->queue, &ap_msg); + rc = ap_queue_message(zq->queue, &ap_msg); + if (rc) + goto out_free; rc = wait_for_completion_interruptible(&resp_type.work); if (rc == 0) { rc = ap_msg.rc; @@ -1071,7 +1073,9 @@ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, if (rc) goto out_free; init_completion(&resp_type.work); - ap_queue_message(zq->queue, &ap_msg); + rc = ap_queue_message(zq->queue, &ap_msg); + if (rc) + goto out_free; rc = wait_for_completion_interruptible(&resp_type.work); if (rc == 0) { rc = ap_msg.rc; @@ -1130,7 +1134,9 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, struct response_type *rtype = (struct response_type *)(ap_msg->private); init_completion(&rtype->work); - ap_queue_message(zq->queue, ap_msg); + rc = ap_queue_message(zq->queue, ap_msg); + if (rc) + goto out; rc = wait_for_completion_interruptible(&rtype->work); if (rc == 0) { rc = ap_msg->rc; @@ -1139,7 +1145,7 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, } else /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); - +out: return rc; } @@ -1232,7 +1238,9 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * } init_completion(&rtype->work); - ap_queue_message(zq->queue, ap_msg); + rc = ap_queue_message(zq->queue, ap_msg); + if (rc) + goto out; rc = wait_for_completion_interruptible(&rtype->work); if (rc == 0) { rc = ap_msg->rc; @@ -1241,7 +1249,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * } else /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); - +out: return rc; } @@ -1293,7 +1301,9 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq, msg->cprbx.domain = AP_QID_QUEUE(zq->queue->qid); init_completion(&rtype->work); - ap_queue_message(zq->queue, ap_msg); + rc = ap_queue_message(zq->queue, ap_msg); + if (rc) + goto out; rc = wait_for_completion_interruptible(&rtype->work); if (rc == 0) { rc = ap_msg->rc; @@ -1302,7 +1312,7 @@ static long zcrypt_msgtype6_rng(struct zcrypt_queue *zq, } else /* Signal pending. */ ap_cancel_message(zq->queue, ap_msg); - +out: return rc; } From 2ea2a6099ae3d1708f90f43c81a98cba3d4bb74c Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 2 Jul 2020 15:56:15 +0200 Subject: [PATCH 73/85] s390/ap: add error response code field for ap queue devices On AP instruction failures the last response code is now kept in the struct ap_queue. There is also a new sysfs attribute showing this field (enabled only on debug kernels). Also slight rework of the AP_DBF macros to get some more content into one debug feature message line. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 54 ++++++++++++++-------------- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/ap_debug.h | 8 +++++ drivers/s390/crypto/ap_queue.c | 64 ++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 26 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 1e895fcd25ccc..6d61e89c59845 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -214,7 +214,7 @@ static inline int ap_fetch_qci_info(struct ap_config_info *info) static void __init ap_init_qci_info(void) { if (!ap_qci_available()) { - AP_DBF(DBF_INFO, "%s QCI not supported\n", __func__); + AP_DBF_INFO("%s QCI not supported\n", __func__); return; } @@ -226,18 +226,18 @@ static void __init ap_init_qci_info(void) ap_qci_info = NULL; return; } - AP_DBF(DBF_INFO, "%s successful fetched initial qci info\n", __func__); + AP_DBF_INFO("%s successful fetched initial qci info\n", __func__); if (ap_qci_info->apxa) { if (ap_qci_info->Na) { ap_max_adapter_id = ap_qci_info->Na; - AP_DBF(DBF_INFO, "%s new ap_max_adapter_id is %d\n", - __func__, ap_max_adapter_id); + AP_DBF_INFO("%s new ap_max_adapter_id is %d\n", + __func__, ap_max_adapter_id); } if (ap_qci_info->Nd) { ap_max_domain_id = ap_qci_info->Nd; - AP_DBF(DBF_INFO, "%s new ap_max_domain_id is %d\n", - __func__, ap_max_domain_id); + AP_DBF_INFO("%s new ap_max_domain_id is %d\n", + __func__, ap_max_domain_id); } } } @@ -618,8 +618,8 @@ static int __ap_revise_reserved(struct device *dev, void *dummy) drvres = to_ap_drv(dev->driver)->flags & AP_DRIVER_FLAG_DEFAULT; if (!!devres != !!drvres) { - AP_DBF(DBF_DEBUG, "reprobing queue=%02x.%04x\n", - card, queue); + AP_DBF_DBG("reprobing queue=%02x.%04x\n", + card, queue); rc = device_reprobe(dev); } } @@ -796,7 +796,7 @@ EXPORT_SYMBOL(ap_bus_force_rescan); */ void ap_bus_cfg_chg(void) { - AP_DBF(DBF_INFO, "%s config change, forcing bus rescan\n", __func__); + AP_DBF_DBG("%s config change, forcing bus rescan\n", __func__); ap_bus_force_rescan(); } @@ -947,7 +947,7 @@ static ssize_t ap_domain_store(struct bus_type *bus, ap_domain_index = domain; spin_unlock_bh(&ap_domain_lock); - AP_DBF(DBF_INFO, "stored new default domain=%d\n", domain); + AP_DBF_INFO("stored new default domain=%d\n", domain); return count; } @@ -1208,8 +1208,8 @@ static void ap_select_domain(void) } if (dom <= ap_max_domain_id) { ap_domain_index = dom; - AP_DBF(DBF_DEBUG, "%s new default domain is %d\n", - __func__, ap_domain_index); + AP_DBF_INFO("%s new default domain is %d\n", + __func__, ap_domain_index); } out: spin_unlock_bh(&ap_domain_lock); @@ -1225,8 +1225,11 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func) int comp_type = 0; /* < CEX2A is not supported */ - if (rawtype < AP_DEVICE_TYPE_CEX2A) + if (rawtype < AP_DEVICE_TYPE_CEX2A) { + AP_DBF_WARN("get_comp_type queue=%02x.%04x unsupported type %d\n", + AP_QID_CARD(qid), AP_QID_QUEUE(qid), rawtype); return 0; + } /* up to CEX7 known and fully supported */ if (rawtype <= AP_DEVICE_TYPE_CEX7) return rawtype; @@ -1248,11 +1251,12 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func) comp_type = apinfo.cat; } if (!comp_type) - AP_DBF(DBF_WARN, "queue=%02x.%04x unable to map type %d\n", - AP_QID_CARD(qid), AP_QID_QUEUE(qid), rawtype); + AP_DBF_WARN("get_comp_type queue=%02x.%04x unable to map type %d\n", + AP_QID_CARD(qid), AP_QID_QUEUE(qid), rawtype); else if (comp_type != rawtype) - AP_DBF(DBF_INFO, "queue=%02x.%04x map type %d to %d\n", - AP_QID_CARD(qid), AP_QID_QUEUE(qid), rawtype, comp_type); + AP_DBF_INFO("get_comp_type queue=%02x.%04x map type %d to %d\n", + AP_QID_CARD(qid), AP_QID_QUEUE(qid), + rawtype, comp_type); return comp_type; } @@ -1333,11 +1337,11 @@ static void _ap_scan_bus_adapter(int id) broken = true; } else if (ac->raw_hwtype != type) { /* card type has changed */ - AP_DBF(DBF_INFO, "card=%02x type changed.\n", id); + AP_DBF_INFO("card=%02x type changed.\n", id); broken = true; } else if (ac->functions != func) { /* card functions have changed */ - AP_DBF(DBF_INFO, "card=%02x functions changed.\n", id); + AP_DBF_INFO("card=%02x functions changed.\n", id); broken = true; } if (broken) { @@ -1385,9 +1389,8 @@ static void _ap_scan_bus_adapter(int id) } if (broken) { /* Remove broken device */ - AP_DBF(DBF_DEBUG, - "removing broken queue=%02x.%04x\n", - id, dom); + AP_DBF_DBG("removing broken queue=%02x.%04x\n", + id, dom); device_unregister(dev); } put_device(dev); @@ -1448,7 +1451,7 @@ static void ap_scan_bus(struct work_struct *unused) ap_fetch_qci_info(ap_qci_info); ap_select_domain(); - AP_DBF(DBF_DEBUG, "%s running\n", __func__); + AP_DBF_DBG("%s running\n", __func__); /* loop over all possible adapters */ for (id = 0; id < AP_DEVICES; id++) @@ -1463,9 +1466,8 @@ static void ap_scan_bus(struct work_struct *unused) if (dev) put_device(dev); else - AP_DBF(DBF_INFO, - "no queue device with default domain %d available\n", - ap_domain_index); + AP_DBF_INFO("no queue device with default domain %d available\n", + ap_domain_index); } mod_timer(&ap_config_timer, jiffies + ap_config_time * HZ); diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 2d4558b5abaf9..0b66e8866a2c9 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -190,6 +190,7 @@ struct ap_queue { struct list_head requestq; /* List of message yet to be sent. */ struct ap_message *reply; /* Per device reply message. */ enum ap_sm_state sm_state; /* ap queue state machine state */ + int last_err_rc; /* last error state response code */ }; #define to_ap_queue(x) container_of((x), struct ap_queue, ap_dev.device) diff --git a/drivers/s390/crypto/ap_debug.h b/drivers/s390/crypto/ap_debug.h index dc675eb5aef6f..34b0350d0b1ae 100644 --- a/drivers/s390/crypto/ap_debug.h +++ b/drivers/s390/crypto/ap_debug.h @@ -20,6 +20,14 @@ #define AP_DBF(...) \ debug_sprintf_event(ap_dbf_info, ##__VA_ARGS__) +#define AP_DBF_ERR(...) \ + debug_sprintf_event(ap_dbf_info, DBF_ERR, ##__VA_ARGS__) +#define AP_DBF_WARN(...) \ + debug_sprintf_event(ap_dbf_info, DBF_WARN, ##__VA_ARGS__) +#define AP_DBF_INFO(...) \ + debug_sprintf_event(ap_dbf_info, DBF_INFO, ##__VA_ARGS__) +#define AP_DBF_DBG(...) \ + debug_sprintf_event(ap_dbf_info, DBF_DEBUG, ##__VA_ARGS__) extern debug_info_t *ap_dbf_info; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index e7ecbcc18db36..69ea3d2c20aeb 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -196,6 +196,10 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) return AP_SM_WAIT_NONE; default: aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = status.response_code; + AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; } } @@ -246,6 +250,10 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) return AP_SM_WAIT_AGAIN; default: aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = status.response_code; + AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; } } @@ -285,6 +293,10 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) case AP_RESPONSE_CHECKSTOPPED: default: aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = status.response_code; + AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; } } @@ -324,6 +336,10 @@ static enum ap_sm_wait ap_sm_reset_wait(struct ap_queue *aq) case AP_RESPONSE_CHECKSTOPPED: default: aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = status.response_code; + AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; } } @@ -361,6 +377,10 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq) return AP_SM_WAIT_TIMEOUT; default: aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = status.response_code; + AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + __func__, status.response_code, + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; } } @@ -605,6 +625,49 @@ static ssize_t states_show(struct device *dev, return rc; } static DEVICE_ATTR_RO(states); + +static ssize_t last_err_rc_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_queue *aq = to_ap_queue(dev); + int rc; + + spin_lock_bh(&aq->lock); + rc = aq->last_err_rc; + spin_unlock_bh(&aq->lock); + + switch (rc) { + case AP_RESPONSE_NORMAL: + return scnprintf(buf, PAGE_SIZE, "NORMAL\n"); + case AP_RESPONSE_Q_NOT_AVAIL: + return scnprintf(buf, PAGE_SIZE, "Q_NOT_AVAIL\n"); + case AP_RESPONSE_RESET_IN_PROGRESS: + return scnprintf(buf, PAGE_SIZE, "RESET_IN_PROGRESS\n"); + case AP_RESPONSE_DECONFIGURED: + return scnprintf(buf, PAGE_SIZE, "DECONFIGURED\n"); + case AP_RESPONSE_CHECKSTOPPED: + return scnprintf(buf, PAGE_SIZE, "CHECKSTOPPED\n"); + case AP_RESPONSE_BUSY: + return scnprintf(buf, PAGE_SIZE, "BUSY\n"); + case AP_RESPONSE_INVALID_ADDRESS: + return scnprintf(buf, PAGE_SIZE, "INVALID_ADDRESS\n"); + case AP_RESPONSE_OTHERWISE_CHANGED: + return scnprintf(buf, PAGE_SIZE, "OTHERWISE_CHANGED\n"); + case AP_RESPONSE_Q_FULL: + return scnprintf(buf, PAGE_SIZE, "Q_FULL/NO_PENDING_REPLY\n"); + case AP_RESPONSE_INDEX_TOO_BIG: + return scnprintf(buf, PAGE_SIZE, "INDEX_TOO_BIG\n"); + case AP_RESPONSE_NO_FIRST_PART: + return scnprintf(buf, PAGE_SIZE, "NO_FIRST_PART\n"); + case AP_RESPONSE_MESSAGE_TOO_BIG: + return scnprintf(buf, PAGE_SIZE, "MESSAGE_TOO_BIG\n"); + case AP_RESPONSE_REQ_FAC_NOT_INST: + return scnprintf(buf, PAGE_SIZE, "REQ_FAC_NOT_INST\n"); + default: + return scnprintf(buf, PAGE_SIZE, "response code %d\n", rc); + } +} +static DEVICE_ATTR_RO(last_err_rc); #endif static struct attribute *ap_queue_dev_attrs[] = { @@ -615,6 +678,7 @@ static struct attribute *ap_queue_dev_attrs[] = { &dev_attr_interrupt.attr, #ifdef CONFIG_ZCRYPT_DEBUG &dev_attr_states.attr, + &dev_attr_last_err_rc.attr, #endif NULL }; From 4f2fcccdb547b09a4532c705078811e672fb9235 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 2 Jul 2020 16:57:00 +0200 Subject: [PATCH 74/85] s390/ap: add card/queue deconfig state This patch adds a new config state to the ap card and queue devices. This state reflects the response code 0x03 "AP deconfigured" on TQAP invocation and is tracked with every ap bus scan. Together with this new state now a card/queue device which is 'deconfigured' is not disposed any more. However, for backward compatibility the online state now needs to take this state into account. So a card/queue is offline when the device is not configured. Furthermore a device can't get switched from offline to online state when not configured. The config state is shown in sysfs at /sys/devices/ap/cardxx/config for the card and /sys/devices/ap/cardxx/xx.yyyy/config for each queue within each card. It is a read-only attribute reflecting the negation of the 'AP deconfig' state as it is noted in the AP documents. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.c | 364 +++++++++++++++++++---------- drivers/s390/crypto/ap_bus.h | 2 + drivers/s390/crypto/ap_card.c | 11 + drivers/s390/crypto/ap_queue.c | 15 ++ drivers/s390/crypto/zcrypt_api.c | 46 ++-- drivers/s390/crypto/zcrypt_card.c | 12 +- drivers/s390/crypto/zcrypt_queue.c | 11 +- 7 files changed, 319 insertions(+), 142 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 6d61e89c59845..485cbfcbf06ed 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -307,7 +307,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain); * false otherwise. */ static bool ap_queue_info(ap_qid_t qid, int *q_type, - unsigned int *q_fac, int *q_depth) + unsigned int *q_fac, int *q_depth, bool *q_decfg) { struct ap_queue_status status; unsigned long info = 0; @@ -322,6 +322,9 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type, switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_RESET_IN_PROGRESS: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + case AP_RESPONSE_BUSY: /* * According to the architecture in all these cases the * info should be filled. All bits 0 is not possible as @@ -332,6 +335,7 @@ static bool ap_queue_info(ap_qid_t qid, int *q_type, *q_type = (int)((info >> 24) & 0xff); *q_fac = (unsigned int)(info >> 32); *q_depth = (int)(info & 0xff); + *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED; switch (*q_type) { /* For CEX2 and CEX3 the available functions * are not reflected by the facilities bits. @@ -1290,154 +1294,278 @@ static int __match_queue_device_with_queue_id(struct device *dev, const void *da /* * Helper function for ap_scan_bus(). - * Does the scan bus job for the given adapter id. + * Remove card device and associated queue devices. + */ +static inline void ap_scan_rm_card_dev_and_queue_devs(struct ap_card *ac) +{ + bus_for_each_dev(&ap_bus_type, NULL, + (void *)(long) ac->id, + __ap_queue_devices_with_id_unregister); + device_unregister(&ac->ap_dev.device); +} + +/* + * Helper function for ap_scan_bus(). + * Does the scan bus job for all the domains within + * a valid adapter given by an ap_card ptr. */ -static void _ap_scan_bus_adapter(int id) +static inline void ap_scan_domains(struct ap_card *ac) { - bool broken; + bool decfg; ap_qid_t qid; unsigned int func; - struct ap_card *ac; struct device *dev; struct ap_queue *aq; + int rc, dom, depth, type; + + /* + * Go through the configuration for the domains and compare them + * to the existing queue devices. Also take care of the config + * and error state for the queue devices. + */ + + for (dom = 0; dom <= ap_max_domain_id; dom++) { + qid = AP_MKQID(ac->id, dom); + dev = bus_find_device(&ap_bus_type, NULL, + (void *)(long) qid, + __match_queue_device_with_qid); + aq = dev ? to_ap_queue(dev) : NULL; + if (!ap_test_config_usage_domain(dom)) { + if (dev) { + AP_DBF_INFO("%s(%d,%d) not in config any more, rm queue device\n", + __func__, ac->id, dom); + device_unregister(dev); + put_device(dev); + } + continue; + } + /* domain is valid, get info from this APQN */ + if (!ap_queue_info(qid, &type, &func, &depth, &decfg)) { + if (aq) { + AP_DBF_INFO( + "%s(%d,%d) ap_queue_info() not successful, rm queue device\n", + __func__, ac->id, dom); + device_unregister(dev); + put_device(dev); + } + continue; + } + /* if no queue device exists, create a new one */ + if (!aq) { + aq = ap_queue_create(qid, ac->ap_dev.device_type); + if (!aq) { + AP_DBF_WARN("%s(%d,%d) ap_queue_create() failed\n", + __func__, ac->id, dom); + continue; + } + aq->card = ac; + aq->config = !decfg; + dev = &aq->ap_dev.device; + dev->bus = &ap_bus_type; + dev->parent = &ac->ap_dev.device; + dev_set_name(dev, "%02x.%04x", ac->id, dom); + /* register queue device */ + rc = device_register(dev); + if (rc) { + AP_DBF_WARN("%s(%d,%d) device_register() failed\n", + __func__, ac->id, dom); + goto put_dev_and_continue; + } + if (decfg) + AP_DBF_INFO("%s(%d,%d) new (decfg) queue device created\n", + __func__, ac->id, dom); + else + AP_DBF_INFO("%s(%d,%d) new queue device created\n", + __func__, ac->id, dom); + goto put_dev_and_continue; + } + /* Check config state on the already existing queue device */ + spin_lock_bh(&aq->lock); + if (decfg && aq->config) { + /* config off this queue device */ + aq->config = false; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { + aq->dev_state = AP_DEV_STATE_ERROR; + aq->last_err_rc = AP_RESPONSE_DECONFIGURED; + } + spin_unlock_bh(&aq->lock); + AP_DBF_INFO("%s(%d,%d) queue device config off\n", + __func__, ac->id, dom); + /* 'receive' pending messages with -EAGAIN */ + ap_flush_queue(aq); + goto put_dev_and_continue; + } + if (!decfg && !aq->config) { + /* config on this queue device */ + aq->config = true; + if (aq->dev_state > AP_DEV_STATE_UNINITIATED) { + aq->dev_state = AP_DEV_STATE_OPERATING; + aq->sm_state = AP_SM_STATE_RESET_START; + } + spin_unlock_bh(&aq->lock); + AP_DBF_INFO("%s(%d,%d) queue device config on\n", + __func__, ac->id, dom); + goto put_dev_and_continue; + } + /* handle other error states */ + if (!decfg && aq->dev_state == AP_DEV_STATE_ERROR) { + spin_unlock_bh(&aq->lock); + /* 'receive' pending messages with -EAGAIN */ + ap_flush_queue(aq); + /* re-init (with reset) the queue device */ + ap_queue_init_state(aq); + AP_DBF_INFO("%s(%d,%d) queue device reinit enforced\n", + __func__, ac->id, dom); + goto put_dev_and_continue; + } + spin_unlock_bh(&aq->lock); +put_dev_and_continue: + put_device(dev); + } +} + +/* + * Helper function for ap_scan_bus(). + * Does the scan bus job for the given adapter id. + */ +static inline void ap_scan_adapter(int ap) +{ + bool decfg; + ap_qid_t qid; + unsigned int func; + struct device *dev; + struct ap_card *ac; int rc, dom, depth, type, comp_type; - /* check if there is a card device registered with this id */ + /* Is there currently a card device for this adapter ? */ dev = bus_find_device(&ap_bus_type, NULL, - (void *)(long) id, + (void *)(long) ap, __match_card_device_with_id); ac = dev ? to_ap_card(dev) : NULL; - if (!ap_test_config_card_id(id)) { - if (dev) { - /* Card device has been removed from configuration */ - bus_for_each_dev(&ap_bus_type, NULL, - (void *)(long) id, - __ap_queue_devices_with_id_unregister); - device_unregister(dev); + + /* Adapter not in configuration ? */ + if (!ap_test_config_card_id(ap)) { + if (ac) { + AP_DBF_INFO("%s(%d) ap not in config any more, rm card and queue devices\n", + __func__, ap); + ap_scan_rm_card_dev_and_queue_devs(ac); put_device(dev); } return; } /* - * This card id is enabled in the configuration. If we already have - * a card device with this id, check if type and functions are still - * the very same. Also verify that at least one queue is available. + * Adapter ap is valid in the current configuration. So do some checks: + * If no card device exists, build one. If a card device exists, check + * for type and functions changed. For all this we need to find a valid + * APQN first. */ - if (ac) { - /* find the first valid queue */ - for (dom = 0; dom < AP_DOMAINS; dom++) { - qid = AP_MKQID(id, dom); - if (ap_queue_info(qid, &type, &func, &depth)) + + for (dom = 0; dom <= ap_max_domain_id; dom++) + if (ap_test_config_usage_domain(dom)) { + qid = AP_MKQID(ap, dom); + if (ap_queue_info(qid, &type, &func, &depth, &decfg)) break; } - broken = false; - if (dom >= AP_DOMAINS) { - /* no accessible queue on this card */ - broken = true; - } else if (ac->raw_hwtype != type) { - /* card type has changed */ - AP_DBF_INFO("card=%02x type changed.\n", id); - broken = true; - } else if (ac->functions != func) { - /* card functions have changed */ - AP_DBF_INFO("card=%02x functions changed.\n", id); - broken = true; + if (dom > ap_max_domain_id) { + /* Could not find a valid APQN for this adapter */ + if (ac) { + AP_DBF_INFO( + "%s(%d) no type info (no APQN found), rm card and queue devices\n", + __func__, ap); + ap_scan_rm_card_dev_and_queue_devs(ac); + put_device(dev); + } else { + AP_DBF_DBG("%s(%d) no type info (no APQN found), ignored\n", + __func__, ap); } - if (broken) { - /* unregister card device and associated queues */ - bus_for_each_dev(&ap_bus_type, NULL, - (void *)(long) id, - __ap_queue_devices_with_id_unregister); - device_unregister(dev); + return; + } + if (!type) { + /* No apdater type info available, an unusable adapter */ + if (ac) { + AP_DBF_INFO("%s(%d) no valid type (0) info, rm card and queue devices\n", + __func__, ap); + ap_scan_rm_card_dev_and_queue_devs(ac); put_device(dev); - /* go back if there is no valid queue on this card */ - if (dom >= AP_DOMAINS) - return; - ac = NULL; + } else { + AP_DBF_DBG("%s(%d) no valid type (0) info, ignored\n", + __func__, ap); } + return; } - /* - * Go through all possible queue ids. Check and maybe create or release - * queue devices for this card. If there exists no card device yet, - * create a card device also. - */ - for (dom = 0; dom < AP_DOMAINS; dom++) { - qid = AP_MKQID(id, dom); - dev = bus_find_device(&ap_bus_type, NULL, - (void *)(long) qid, - __match_queue_device_with_qid); - aq = dev ? to_ap_queue(dev) : NULL; - if (!ap_test_config_usage_domain(dom)) { - if (dev) { - /* Queue device exists but has been - * removed from configuration. - */ - device_unregister(dev); - put_device(dev); - } - continue; - } - /* try to fetch infos about this queue */ - broken = !ap_queue_info(qid, &type, &func, &depth); - if (dev) { - if (!broken) { - spin_lock_bh(&aq->lock); - broken = aq->dev_state == AP_DEV_STATE_ERROR; - spin_unlock_bh(&aq->lock); + if (ac) { + /* Check APQN against existing card device for changes */ + if (ac->raw_hwtype != type) { + AP_DBF_INFO("%s(%d) hwtype %d changed, rm card and queue devices\n", + __func__, ap, type); + ap_scan_rm_card_dev_and_queue_devs(ac); + put_device(dev); + ac = NULL; + } else if (ac->functions != func) { + AP_DBF_INFO("%s(%d) functions 0x%08x changed, rm card and queue devices\n", + __func__, ap, type); + ap_scan_rm_card_dev_and_queue_devs(ac); + put_device(dev); + ac = NULL; + } else { + if (decfg && ac->config) { + ac->config = false; + AP_DBF_INFO("%s(%d) card device config off\n", + __func__, ap); + } - if (broken) { - /* Remove broken device */ - AP_DBF_DBG("removing broken queue=%02x.%04x\n", - id, dom); - device_unregister(dev); + if (!decfg && !ac->config) { + ac->config = true; + AP_DBF_INFO("%s(%d) card device config on\n", + __func__, ap); } - put_device(dev); - continue; } - if (broken) - continue; - /* a new queue device is needed, check out comp type */ + } + + if (!ac) { + /* Build a new card device */ comp_type = ap_get_compatible_type(qid, type, func); - if (!comp_type) - continue; - /* maybe a card device needs to be created first */ + if (!comp_type) { + AP_DBF_WARN("%s(%d) type %d, can't get compatibility type\n", + __func__, ap, type); + return; + } + ac = ap_card_create(ap, depth, type, comp_type, func); if (!ac) { - ac = ap_card_create(id, depth, type, comp_type, func); - if (!ac) - continue; - ac->ap_dev.device.bus = &ap_bus_type; - ac->ap_dev.device.parent = ap_root_device; - dev_set_name(&ac->ap_dev.device, "card%02x", id); - /* Register card device with AP bus */ - rc = device_register(&ac->ap_dev.device); - if (rc) { - put_device(&ac->ap_dev.device); - ac = NULL; - break; - } - /* get it and thus adjust reference counter */ - get_device(&ac->ap_dev.device); + AP_DBF_WARN("%s(%d) ap_card_create() failed\n", + __func__, ap); + return; } - /* now create the new queue device */ - aq = ap_queue_create(qid, comp_type); - if (!aq) - continue; - aq->card = ac; - aq->ap_dev.device.bus = &ap_bus_type; - aq->ap_dev.device.parent = &ac->ap_dev.device; - dev_set_name(&aq->ap_dev.device, "%02x.%04x", id, dom); - /* Register queue device */ - rc = device_register(&aq->ap_dev.device); + ac->config = !decfg; + dev = &ac->ap_dev.device; + dev->bus = &ap_bus_type; + dev->parent = ap_root_device; + dev_set_name(dev, "card%02x", ap); + /* Register the new card device with AP bus */ + rc = device_register(dev); if (rc) { - put_device(&aq->ap_dev.device); - continue; + AP_DBF_WARN("%s(%d) device_register() failed\n", + __func__, ap); + put_device(dev); + return; } - } /* end domain loop */ + /* get it and thus adjust reference counter */ + get_device(dev); + if (decfg) + AP_DBF_INFO("%s(%d) new (decfg) card device type=%d func=0x%08x created\n", + __func__, ap, type, func); + else + AP_DBF_INFO("%s(%d) new card device type=%d func=0x%08x created\n", + __func__, ap, type, func); + } + + /* Verify the domains and the queue devices for this card */ + ap_scan_domains(ac); - if (ac) - put_device(&ac->ap_dev.device); + /* release the card device */ + put_device(&ac->ap_dev.device); } /** @@ -1446,7 +1574,7 @@ static void _ap_scan_bus_adapter(int id) */ static void ap_scan_bus(struct work_struct *unused) { - int id; + int ap; ap_fetch_qci_info(ap_qci_info); ap_select_domain(); @@ -1454,8 +1582,8 @@ static void ap_scan_bus(struct work_struct *unused) AP_DBF_DBG("%s running\n", __func__); /* loop over all possible adapters */ - for (id = 0; id < AP_DEVICES; id++) - _ap_scan_bus_adapter(id); + for (ap = 0; ap <= ap_max_adapter_id; ap++) + ap_scan_adapter(ap); /* check if there is at least one queue available with default domain */ if (ap_domain_index >= 0) { diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 0b66e8866a2c9..56c8bed7b6a17 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -166,6 +166,7 @@ struct ap_card { unsigned int functions; /* AP device function bitfield. */ int queue_depth; /* AP queue depth.*/ int id; /* AP card number. */ + bool config; /* configured state */ atomic64_t total_request_count; /* # requests ever for this AP device.*/ }; @@ -178,6 +179,7 @@ struct ap_queue { spinlock_t lock; /* Per device lock. */ void *private; /* ap driver private pointer. */ enum ap_dev_state dev_state; /* queue device state */ + bool config; /* configured state */ ap_qid_t qid; /* AP queue id. */ int interrupt; /* indicate if interrupts are enabled */ int queue_count; /* # messages currently on AP queue. */ diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index 6588713319ba3..bf1e964b00484 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -139,6 +139,16 @@ static ssize_t modalias_show(struct device *dev, static DEVICE_ATTR_RO(modalias); +static ssize_t config_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_card *ac = to_ap_card(dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", ac->config ? 1 : 0); +} + +static DEVICE_ATTR_RO(config); + static struct attribute *ap_card_dev_attrs[] = { &dev_attr_hwtype.attr, &dev_attr_raw_hwtype.attr, @@ -148,6 +158,7 @@ static struct attribute *ap_card_dev_attrs[] = { &dev_attr_requestq_count.attr, &dev_attr_pendingq_count.attr, &dev_attr_modalias.attr, + &dev_attr_config.attr, NULL }; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 69ea3d2c20aeb..5523338ca65d6 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -563,6 +563,20 @@ static ssize_t interrupt_show(struct device *dev, static DEVICE_ATTR_RO(interrupt); +static ssize_t config_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_queue *aq = to_ap_queue(dev); + int rc; + + spin_lock_bh(&aq->lock); + rc = scnprintf(buf, PAGE_SIZE, "%d\n", aq->config ? 1 : 0); + spin_unlock_bh(&aq->lock); + return rc; +} + +static DEVICE_ATTR_RO(config); + #ifdef CONFIG_ZCRYPT_DEBUG static ssize_t states_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -676,6 +690,7 @@ static struct attribute *ap_queue_dev_attrs[] = { &dev_attr_pendingq_count.attr, &dev_attr_reset.attr, &dev_attr_interrupt.attr, + &dev_attr_config.attr, #ifdef CONFIG_ZCRYPT_DEBUG &dev_attr_states.attr, &dev_attr_last_err_rc.attr, diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 223e1c2332788..ab18e847cee75 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -663,8 +663,9 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, pref_zq = NULL; spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { - /* Check for online accelarator and CCA cards */ - if (!zc->online || !(zc->card->functions & 0x18000000)) + /* Check for useable accelarator or CCA card */ + if (!zc->online || !zc->card->config || + !(zc->card->functions & 0x18000000)) continue; /* Check for size limits */ if (zc->min_mod_size > mex->inputdatalength || @@ -682,8 +683,9 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { - /* check if device is online and eligible */ - if (!zq->online || !zq->ops->rsa_modexpo) + /* check if device is useable and eligible */ + if (!zq->online || !zq->ops->rsa_modexpo || + !zq->queue->config) continue; /* check if device node has admission for this queue */ if (!zcrypt_check_queue(perms, @@ -760,8 +762,9 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, pref_zq = NULL; spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { - /* Check for online accelarator and CCA cards */ - if (!zc->online || !(zc->card->functions & 0x18000000)) + /* Check for useable accelarator or CCA card */ + if (!zc->online || !zc->card->config || + !(zc->card->functions & 0x18000000)) continue; /* Check for size limits */ if (zc->min_mod_size > crt->inputdatalength || @@ -779,8 +782,9 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { - /* check if device is online and eligible */ - if (!zq->online || !zq->ops->rsa_modexpo_crt) + /* check if device is useable and eligible */ + if (!zq->online || !zq->ops->rsa_modexpo_crt || + !zq->queue->config) continue; /* check if device node has admission for this queue */ if (!zcrypt_check_queue(perms, @@ -859,8 +863,9 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, pref_zq = NULL; spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { - /* Check for online CCA cards */ - if (!zc->online || !(zc->card->functions & 0x10000000)) + /* Check for useable CCA card */ + if (!zc->online || !zc->card->config || + !(zc->card->functions & 0x10000000)) continue; /* Check for user selected CCA card */ if (xcRB->user_defined != AUTOSELECT && @@ -878,9 +883,10 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { - /* check if device is online and eligible */ + /* check for device useable and eligible */ if (!zq->online || !zq->ops->send_cprb || + !zq->queue->config || (tdom != AUTOSEL_DOM && tdom != AP_QID_QUEUE(zq->queue->qid))) continue; @@ -1012,8 +1018,9 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, pref_zq = NULL; spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { - /* Check for online EP11 cards */ - if (!zc->online || !(zc->card->functions & 0x04000000)) + /* Check for useable EP11 card */ + if (!zc->online || !zc->card->config || + !(zc->card->functions & 0x04000000)) continue; /* Check for user selected EP11 card */ if (targets && @@ -1031,9 +1038,10 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, if (!zcrypt_card_compare(zc, pref_zc, wgt + cpen, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { - /* check if device is online and eligible */ + /* check if device is useable and eligible */ if (!zq->online || !zq->ops->send_ep11_cprb || + !zq->queue->config || (targets && !is_desired_ep11_queue(zq->queue->qid, target_num, targets))) @@ -1110,16 +1118,18 @@ static long zcrypt_rng(char *buffer) pref_zq = NULL; spin_lock(&zcrypt_list_lock); for_each_zcrypt_card(zc) { - /* Check for online CCA cards */ - if (!zc->online || !(zc->card->functions & 0x10000000)) + /* Check for useable CCA card */ + if (!zc->online || !zc->card->config || + !(zc->card->functions & 0x10000000)) continue; /* get weight index of the card device */ wgt = zc->speed_rating[func_code]; if (!zcrypt_card_compare(zc, pref_zc, wgt, pref_wgt)) continue; for_each_zcrypt_queue(zq, zc) { - /* check if device is online and eligible */ - if (!zq->online || !zq->ops->rng) + /* check if device is useable and eligible */ + if (!zq->online || !zq->ops->rng || + !zq->queue->config) continue; if (!zcrypt_queue_compare(zq, pref_zq, wgt, pref_wgt)) continue; diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index c53cab4b0c9e3..e342eb86acd1d 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -50,22 +50,28 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct zcrypt_card *zc = to_ap_card(dev)->private; + struct ap_card *ac = to_ap_card(dev); + struct zcrypt_card *zc = ac->private; + int online = ac->config && zc->online ? 1 : 0; - return scnprintf(buf, PAGE_SIZE, "%d\n", zc->online); + return scnprintf(buf, PAGE_SIZE, "%d\n", online); } static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct zcrypt_card *zc = to_ap_card(dev)->private; + struct ap_card *ac = to_ap_card(dev); + struct zcrypt_card *zc = ac->private; struct zcrypt_queue *zq; int online, id; if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1) return -EINVAL; + if (online && !ac->config) + return -ENODEV; + zc->online = online; id = zc->card->id; diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 8bae6ad159a76..3c207066313cf 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -40,22 +40,27 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct zcrypt_queue *zq = to_ap_queue(dev)->private; + struct ap_queue *aq = to_ap_queue(dev); + struct zcrypt_queue *zq = aq->private; + int online = aq->config && zq->online ? 1 : 0; - return scnprintf(buf, PAGE_SIZE, "%d\n", zq->online); + return scnprintf(buf, PAGE_SIZE, "%d\n", online); } static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct zcrypt_queue *zq = to_ap_queue(dev)->private; + struct ap_queue *aq = to_ap_queue(dev); + struct zcrypt_queue *zq = aq->private; struct zcrypt_card *zc = zq->zcard; int online; if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1) return -EINVAL; + if (online && (!aq->config || !aq->card->config)) + return -ENODEV; if (online && !zc->online) return -EINVAL; zq->online = online; From 0671cc1048744c9a6f1c896baa85966a5abc42a0 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 27 Jul 2020 14:34:57 +0200 Subject: [PATCH 75/85] s390/sclp: Add support for SCLP AP adapter config/deconfig Add support for AP bus adapter config and deconfig to the sclp core code. The code is statically build into the kernel when ZCRYPT is configured either as module or with static support. This is the base functionality for having configure/deconfigure support in the AP bus and card code. Another patch will exploit this soon. Signed-off-by: Harald Freudenberger Suggested-by: Pierre Morel Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/sclp.h | 2 ++ drivers/s390/char/Makefile | 2 ++ drivers/s390/char/sclp.h | 2 +- drivers/s390/char/sclp_ap.c | 63 ++++++++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 drivers/s390/char/sclp_ap.c diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 90f34c7e2752b..a7bdd128d85b3 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -128,6 +128,8 @@ int sclp_chp_deconfigure(struct chp_id chpid); int sclp_chp_read_info(struct sclp_chp_info *info); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); +int sclp_ap_configure(u32 apid); +int sclp_ap_deconfigure(u32 apid); int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid); int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 845e12ac59549..c6fdb81a068a6 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -34,6 +34,8 @@ obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o obj-$(CONFIG_PCI) += sclp_pci.o +obj-$(subst m,y,$(CONFIG_ZCRYPT)) += sclp_ap.o + obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o obj-$(CONFIG_VMCP) += vmcp.o diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index ccc2d759c575b..69d9cde9ff5a8 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -229,7 +229,7 @@ static inline void sclp_fill_core_info(struct sclp_core_info *info, #define SCLP_HAS_CPU_INFO (sclp.facilities & 0x0800000000000000ULL) #define SCLP_HAS_CPU_RECONFIG (sclp.facilities & 0x0400000000000000ULL) #define SCLP_HAS_PCI_RECONFIG (sclp.facilities & 0x0000000040000000ULL) - +#define SCLP_HAS_AP_RECONFIG (sclp.facilities & 0x0000000100000000ULL) struct gds_subvector { u8 length; diff --git a/drivers/s390/char/sclp_ap.c b/drivers/s390/char/sclp_ap.c new file mode 100644 index 0000000000000..0dd1ca7127950 --- /dev/null +++ b/drivers/s390/char/sclp_ap.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * s390 crypto adapter related sclp functions. + * + * Copyright IBM Corp. 2020 + */ +#define KMSG_COMPONENT "sclp_cmd" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include "sclp.h" + +#define SCLP_CMDW_CONFIGURE_AP 0x001f0001 +#define SCLP_CMDW_DECONFIGURE_AP 0x001e0001 + +struct ap_cfg_sccb { + struct sccb_header header; +} __packed; + +static int do_ap_configure(sclp_cmdw_t cmd, u32 apid) +{ + struct ap_cfg_sccb *sccb; + int rc; + + if (!SCLP_HAS_AP_RECONFIG) + return -EOPNOTSUPP; + + sccb = (struct ap_cfg_sccb *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!sccb) + return -ENOMEM; + + sccb->header.length = PAGE_SIZE; + cmd |= (apid & 0xFF) << 8; + rc = sclp_sync_request(cmd, sccb); + if (rc) + goto out; + switch (sccb->header.response_code) { + case 0x0020: case 0x0120: case 0x0440: case 0x0450: + break; + default: + pr_warn("configure AP adapter %u failed: cmd=0x%08x response=0x%04x\n", + apid, cmd, sccb->header.response_code); + rc = -EIO; + break; + } +out: + free_page((unsigned long) sccb); + return rc; +} + +int sclp_ap_configure(u32 apid) +{ + return do_ap_configure(SCLP_CMDW_CONFIGURE_AP, apid); +} +EXPORT_SYMBOL(sclp_ap_configure); + +int sclp_ap_deconfigure(u32 apid) +{ + return do_ap_configure(SCLP_CMDW_DECONFIGURE_AP, apid); +} +EXPORT_SYMBOL(sclp_ap_deconfigure); From 5caa2af97118308c79f29cc9876aec3ed504f9b0 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 27 Jul 2020 14:49:34 +0200 Subject: [PATCH 76/85] s390/ap: Support AP card SCLP config and deconfig operations Support SCLP AP adapter config and deconfig operations: The sysfs deconfig attribute /sys/devices/ap/cardxx/deconfig for each AP card is now read-write. Writing in a '1' triggers a synchronous SCLP request to configure the adapter, writing in a '0' sends a synchronous SCLP deconfigure request. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_card.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index bf1e964b00484..d98bdd28d23ed 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "ap_bus.h" @@ -147,7 +148,29 @@ static ssize_t config_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%d\n", ac->config ? 1 : 0); } -static DEVICE_ATTR_RO(config); +static ssize_t config_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc = 0, cfg; + struct ap_card *ac = to_ap_card(dev); + + if (sscanf(buf, "%d\n", &cfg) != 1 || cfg < 0 || cfg > 1) + return -EINVAL; + + if (cfg && !ac->config) + rc = sclp_ap_configure(ac->id); + else if (!cfg && ac->config) + rc = sclp_ap_deconfigure(ac->id); + if (rc) + return rc; + + ac->config = cfg ? true : false; + + return count; +} + +static DEVICE_ATTR_RW(config); static struct attribute *ap_card_dev_attrs[] = { &dev_attr_hwtype.attr, From e0332629e33d1926c93348d918aaaf451ef9a16b Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 4 Aug 2020 09:27:47 +0200 Subject: [PATCH 77/85] s390/ap/zcrypt: revisit ap and zcrypt error handling Revisit the ap queue error handling: Based on discussions and evaluatios with the firmware folk here is now a rework of the response code handling for all the AP instructions. The idea is to distinguish between failures because of some kind of invalid request where a retry does not make any sense and a failure where another attempt to send the very same request may succeed. The first case is handled by returning EINVAL to the userspace application. The second case results in retries within the zcrypt API controlled by a per message retry counter. Revisit the zcrpyt error handling: Similar here, based on discussions with the firmware people here comes a rework of the handling of all the reply codes. Main point here is that there are only very few cases left, where a zcrypt device queue is switched to offline. It should never be the case that an AP reply message is 'unknown' to the device driver as it indicates a total mismatch between device driver and crypto card firmware. In all other cases, the code distinguishes between failure because of invalid message (see above - EINVAL) or failures of the infrastructure (see above - EAGAIN). Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/ap_queue.c | 8 +-- drivers/s390/crypto/zcrypt_debug.h | 8 +++ drivers/s390/crypto/zcrypt_error.h | 88 +++++++++--------------- drivers/s390/crypto/zcrypt_msgtype50.c | 50 +++++++------- drivers/s390/crypto/zcrypt_msgtype6.c | 92 +++++++++++++------------- 6 files changed, 116 insertions(+), 131 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 56c8bed7b6a17..03fd95b5a0e8e 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -50,6 +50,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_RESPONSE_NO_FIRST_PART 0x13 #define AP_RESPONSE_MESSAGE_TOO_BIG 0x15 #define AP_RESPONSE_REQ_FAC_NOT_INST 0x16 +#define AP_RESPONSE_INVALID_DOMAIN 0x42 /* * Known device types diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 5523338ca65d6..ada37f1c7ac71 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -241,6 +241,9 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) case AP_RESPONSE_RESET_IN_PROGRESS: aq->sm_state = AP_SM_STATE_RESET_WAIT; return AP_SM_WAIT_TIMEOUT; + case AP_RESPONSE_INVALID_DOMAIN: + AP_DBF(DBF_WARN, "AP_RESPONSE_INVALID_DOMAIN on NQAP\n"); + fallthrough; case AP_RESPONSE_MESSAGE_TOO_BIG: case AP_RESPONSE_REQ_FAC_NOT_INST: list_del_init(&ap_msg->list); @@ -286,11 +289,6 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) aq->sm_state = AP_SM_STATE_RESET_WAIT; aq->interrupt = AP_INTR_DISABLED; return AP_SM_WAIT_TIMEOUT; - case AP_RESPONSE_BUSY: - return AP_SM_WAIT_TIMEOUT; - case AP_RESPONSE_Q_NOT_AVAIL: - case AP_RESPONSE_DECONFIGURED: - case AP_RESPONSE_CHECKSTOPPED: default: aq->dev_state = AP_DEV_STATE_ERROR; aq->last_err_rc = status.response_code; diff --git a/drivers/s390/crypto/zcrypt_debug.h b/drivers/s390/crypto/zcrypt_debug.h index 241dbb5f75bf3..3225489a1c411 100644 --- a/drivers/s390/crypto/zcrypt_debug.h +++ b/drivers/s390/crypto/zcrypt_debug.h @@ -21,6 +21,14 @@ #define ZCRYPT_DBF(...) \ debug_sprintf_event(zcrypt_dbf_info, ##__VA_ARGS__) +#define ZCRYPT_DBF_ERR(...) \ + debug_sprintf_event(zcrypt_dbf_info, DBF_ERR, ##__VA_ARGS__) +#define ZCRYPT_DBF_WARN(...) \ + debug_sprintf_event(zcrypt_dbf_info, DBF_WARN, ##__VA_ARGS__) +#define ZCRYPT_DBF_INFO(...) \ + debug_sprintf_event(zcrypt_dbf_info, DBF_INFO, ##__VA_ARGS__) +#define ZCRYPT_DBF_DBG(...) \ + debug_sprintf_event(zcrypt_dbf_info, DBF_DEBUG, ##__VA_ARGS__) extern debug_info_t *zcrypt_dbf_info; diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 54a04f8c38ef9..39e626e3a3794 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -52,7 +52,6 @@ struct error_hdr { #define REP82_ERROR_INVALID_COMMAND 0x30 #define REP82_ERROR_MALFORMED_MSG 0x40 #define REP82_ERROR_INVALID_SPECIAL_CMD 0x41 -#define REP82_ERROR_INVALID_DOMAIN_PRECHECK 0x42 #define REP82_ERROR_RESERVED_FIELDO 0x50 /* old value */ #define REP82_ERROR_WORD_ALIGNMENT 0x60 #define REP82_ERROR_MESSAGE_LENGTH 0x80 @@ -67,7 +66,6 @@ struct error_hdr { #define REP82_ERROR_ZERO_BUFFER_LEN 0xB0 #define REP88_ERROR_MODULE_FAILURE 0x10 - #define REP88_ERROR_MESSAGE_TYPE 0x20 #define REP88_ERROR_MESSAGE_MALFORMD 0x22 #define REP88_ERROR_MESSAGE_LENGTH 0x23 @@ -85,78 +83,56 @@ static inline int convert_error(struct zcrypt_queue *zq, int queue = AP_QID_QUEUE(zq->queue->qid); switch (ehdr->reply_code) { - case REP82_ERROR_OPERAND_INVALID: - case REP82_ERROR_OPERAND_SIZE: - case REP82_ERROR_EVEN_MOD_IN_OPND: - case REP88_ERROR_MESSAGE_MALFORMD: - case REP82_ERROR_INVALID_DOMAIN_PRECHECK: - case REP82_ERROR_INVALID_DOMAIN_PENDING: - case REP82_ERROR_INVALID_SPECIAL_CMD: - case REP82_ERROR_FILTERED_BY_HYPERVISOR: - // REP88_ERROR_INVALID_KEY // '82' CEX2A - // REP88_ERROR_OPERAND // '84' CEX2A - // REP88_ERROR_OPERAND_EVEN_MOD // '85' CEX2A - /* Invalid input data. */ + case REP82_ERROR_INVALID_MSG_LEN: /* 0x23 */ + case REP82_ERROR_RESERVD_FIELD: /* 0x24 */ + case REP82_ERROR_FORMAT_FIELD: /* 0x29 */ + case REP82_ERROR_MALFORMED_MSG: /* 0x40 */ + case REP82_ERROR_INVALID_SPECIAL_CMD: /* 0x41 */ + case REP82_ERROR_MESSAGE_LENGTH: /* 0x80 */ + case REP82_ERROR_OPERAND_INVALID: /* 0x82 */ + case REP82_ERROR_OPERAND_SIZE: /* 0x84 */ + case REP82_ERROR_EVEN_MOD_IN_OPND: /* 0x85 */ + case REP82_ERROR_INVALID_DOMAIN_PENDING: /* 0x8A */ + case REP82_ERROR_FILTERED_BY_HYPERVISOR: /* 0x8B */ + case REP82_ERROR_PACKET_TRUNCATED: /* 0xA0 */ + case REP88_ERROR_MESSAGE_MALFORMD: /* 0x22 */ + case REP88_ERROR_KEY_TYPE: /* 0x34 */ + /* RY indicates malformed request */ ZCRYPT_DBF(DBF_WARN, - "device=%02x.%04x reply=0x%02x => rc=EINVAL\n", + "dev=%02x.%04x RY=0x%02x => rc=EINVAL\n", card, queue, ehdr->reply_code); return -EINVAL; - case REP82_ERROR_MESSAGE_TYPE: - // REP88_ERROR_MESSAGE_TYPE // '20' CEX2A + case REP82_ERROR_MACHINE_FAILURE: /* 0x10 */ + case REP82_ERROR_MESSAGE_TYPE: /* 0x20 */ + case REP82_ERROR_TRANSPORT_FAIL: /* 0x90 */ /* - * To sent a message of the wrong type is a bug in the - * device driver. Send error msg, disable the device - * and then repeat the request. + * Msg to wrong type or card/infrastructure failure. + * Trigger rescan of the ap bus, trigger retry request. */ atomic_set(&zcrypt_rescan_req, 1); - zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", - card, queue); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n", - card, queue, ehdr->reply_code); - return -EAGAIN; - case REP82_ERROR_TRANSPORT_FAIL: - /* Card or infrastructure failure, disable card */ - atomic_set(&zcrypt_rescan_req, 1); - zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", - card, queue); /* For type 86 response show the apfs value (failure reason) */ - if (ehdr->type == TYPE86_RSP_CODE) { + if (ehdr->reply_code == REP82_ERROR_TRANSPORT_FAIL && + ehdr->type == TYPE86_RSP_CODE) { struct { struct type86_hdr hdr; struct type86_fmt2_ext fmt2; } __packed * head = reply->msg; unsigned int apfs = *((u32 *)head->fmt2.apfs); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x reply=0x%02x apfs=0x%x => online=0 rc=EAGAIN\n", - card, queue, apfs, ehdr->reply_code); + ZCRYPT_DBF(DBF_WARN, + "dev=%02x.%04x RY=0x%02x apfs=0x%x => bus rescan, rc=EAGAIN\n", + card, queue, ehdr->reply_code, apfs); } else - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n", + ZCRYPT_DBF(DBF_WARN, + "dev=%02x.%04x RY=0x%02x => bus rescan, rc=EAGAIN\n", card, queue, ehdr->reply_code); return -EAGAIN; - case REP82_ERROR_MACHINE_FAILURE: - // REP88_ERROR_MODULE_FAILURE // '10' CEX2A - /* If a card fails disable it and repeat the request. */ - atomic_set(&zcrypt_rescan_req, 1); - zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", - card, queue); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n", - card, queue, ehdr->reply_code); - return -EAGAIN; default: - zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", - card, queue); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x reply=0x%02x => online=0 rc=EAGAIN\n", + /* Assume request is valid and a retry will be worth it */ + ZCRYPT_DBF(DBF_WARN, + "dev=%02x.%04x RY=0x%02x => rc=EAGAIN\n", card, queue, ehdr->reply_code); - return -EAGAIN; /* repeat the request on a different device. */ + return -EAGAIN; } } diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 349306c2f155d..0f7f404ed8583 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -356,15 +356,15 @@ static int convert_type80(struct zcrypt_queue *zq, if (t80h->len < sizeof(*t80h) + outputdatalength) { /* The result is too short, the CEXxA card may not do that.. */ zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", + pr_err("Crypto dev=%02x.%04x code=0x%02x => online=0 rc=EAGAIN\n", AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid)); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x code=0x%02x => online=0 rc=EAGAIN\n", - AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - t80h->code); - return -EAGAIN; /* repeat the request on a different device. */ + AP_QID_QUEUE(zq->queue->qid), + t80h->code); + ZCRYPT_DBF_ERR("dev=%02x.%04x code=0x%02x => online=0 rc=EAGAIN\n", + AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), + t80h->code); + return -EAGAIN; } if (zq->zcard->user_space_type == ZCRYPT_CEX2A) BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE); @@ -376,10 +376,10 @@ static int convert_type80(struct zcrypt_queue *zq, return 0; } -static int convert_response(struct zcrypt_queue *zq, - struct ap_message *reply, - char __user *outputdata, - unsigned int outputdatalength) +static int convert_response_cex2a(struct zcrypt_queue *zq, + struct ap_message *reply, + char __user *outputdata, + unsigned int outputdatalength) { /* Response type byte is the second byte in the response. */ unsigned char rtype = ((unsigned char *) reply->msg)[1]; @@ -393,15 +393,15 @@ static int convert_response(struct zcrypt_queue *zq, outputdata, outputdatalength); default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", + pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid)); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n", - AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - (unsigned int) rtype); - return -EAGAIN; /* repeat the request on a different device. */ + AP_QID_QUEUE(zq->queue->qid), + (int) rtype); + ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", + AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), + (int) rtype); + return -EAGAIN; } } @@ -478,8 +478,9 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, if (rc == 0) { rc = ap_msg.rc; if (rc == 0) - rc = convert_response(zq, &ap_msg, mex->outputdata, - mex->outputdatalength); + rc = convert_response_cex2a(zq, &ap_msg, + mex->outputdata, + mex->outputdatalength); } else /* Signal pending. */ ap_cancel_message(zq->queue, &ap_msg); @@ -524,8 +525,9 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, if (rc == 0) { rc = ap_msg.rc; if (rc == 0) - rc = convert_response(zq, &ap_msg, crt->outputdata, - crt->outputdatalength); + rc = convert_response_cex2a(zq, &ap_msg, + crt->outputdata, + crt->outputdatalength); } else /* Signal pending. */ ap_cancel_message(zq->queue, &ap_msg); diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 51b9924753ee4..30eaa7a7aede0 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -650,23 +650,22 @@ static int convert_type86_ica(struct zcrypt_queue *zq, (service_rc == 8 && service_rs == 72) || (service_rc == 8 && service_rs == 770) || (service_rc == 12 && service_rs == 769)) { - ZCRYPT_DBF(DBF_DEBUG, - "device=%02x.%04x rc/rs=%d/%d => rc=EINVAL\n", - AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - (int) service_rc, (int) service_rs); + ZCRYPT_DBF_WARN("dev=%02x.%04x rc/rs=%d/%d => rc=EINVAL\n", + AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), + (int) service_rc, (int) service_rs); return -EINVAL; } zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", + pr_err("Crypto dev=%02x.%04x rc/rs=%d/%d online=0 rc=EAGAIN\n", AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid)); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x rc/rs=%d/%d => online=0 rc=EAGAIN\n", - AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - (int) service_rc, (int) service_rs); - return -EAGAIN; /* repeat the request on a different device. */ + AP_QID_QUEUE(zq->queue->qid), + (int) service_rc, (int) service_rs); + ZCRYPT_DBF_ERR("dev=%02x.%04x rc/rs=%d/%d => online=0 rc=EAGAIN\n", + AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), + (int) service_rc, (int) service_rs); + return -EAGAIN; } data = msg->text; reply_len = msg->length - 2; @@ -800,17 +799,18 @@ static int convert_response_ica(struct zcrypt_queue *zq, return convert_type86_ica(zq, reply, outputdata, outputdatalength); fallthrough; /* wrong cprb version is an unknown response */ - default: /* Unknown response type, this should NEVER EVER happen */ + default: + /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", + pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid)); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n", - AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - (int) msg->hdr.type); - return -EAGAIN; /* repeat the request on a different device. */ + AP_QID_QUEUE(zq->queue->qid), + (int) msg->hdr.type); + ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", + AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), + (int) msg->hdr.type); + return -EAGAIN; } } @@ -836,15 +836,15 @@ static int convert_response_xcrb(bool userspace, struct zcrypt_queue *zq, default: /* Unknown response type, this should NEVER EVER happen */ xcRB->status = 0x0008044DL; /* HDD_InvalidParm */ zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", + pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid)); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n", - AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - (int) msg->hdr.type); - return -EAGAIN; /* repeat the request on a different device. */ + AP_QID_QUEUE(zq->queue->qid), + (int) msg->hdr.type); + ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", + AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), + (int) msg->hdr.type); + return -EAGAIN; } } @@ -865,15 +865,15 @@ static int convert_response_ep11_xcrb(bool userspace, struct zcrypt_queue *zq, fallthrough; /* wrong cprb version is an unknown resp */ default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", + pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid)); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n", - AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - (int) msg->hdr.type); - return -EAGAIN; /* repeat the request on a different device. */ + AP_QID_QUEUE(zq->queue->qid), + (int) msg->hdr.type); + ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", + AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), + (int) msg->hdr.type); + return -EAGAIN; } } @@ -895,15 +895,15 @@ static int convert_response_rng(struct zcrypt_queue *zq, fallthrough; /* wrong cprb version is an unknown response */ default: /* Unknown response type, this should NEVER EVER happen */ zq->online = 0; - pr_err("Cryptographic device %02x.%04x failed and was set offline\n", + pr_err("Crypto dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid)); - ZCRYPT_DBF(DBF_ERR, - "device=%02x.%04x rtype=0x%02x => online=0 rc=EAGAIN\n", - AP_QID_CARD(zq->queue->qid), - AP_QID_QUEUE(zq->queue->qid), - (int) msg->hdr.type); - return -EAGAIN; /* repeat the request on a different device. */ + AP_QID_QUEUE(zq->queue->qid), + (int) msg->hdr.type); + ZCRYPT_DBF_ERR("dev=%02x.%04x unknown response type 0x%02x => online=0 rc=EAGAIN\n", + AP_QID_CARD(zq->queue->qid), + AP_QID_QUEUE(zq->queue->qid), + (int) msg->hdr.type); + return -EAGAIN; } } From 3730f5300b450bc89306c3ab79c254e6039d9197 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Sat, 26 Sep 2020 22:51:38 +0200 Subject: [PATCH 78/85] s390/zcrypt: move ap_msg param one level up the call chain Move the creating and disposal of the struct ap_message one level up the call chain. The ap message was constructed in the calling functions in msgtype50 and msgtype6 but only for the ica rsa messages. For CCA and EP11 CPRBs the ap message struct is created in the zcrypt api functions. This patch moves the construction of the ap message struct into the functions zcrypt_rsa_modexpo and zcrypt_rsa_crt. So now all the 4 zcrypt api functions zcrypt_rsa_modexpo, zcrypt_rsa_crt, zcrypt_send_cprb and zcrypt_send_ep11_cprb appear and act similar. There are no functional changes coming with this patch. However, the availability of the ap_message struct has advantages which will be needed by a follow up patch. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_api.c | 12 ++++- drivers/s390/crypto/zcrypt_api.h | 6 ++- drivers/s390/crypto/zcrypt_msgtype50.c | 74 +++++++++++++------------- drivers/s390/crypto/zcrypt_msgtype6.c | 62 ++++++++++----------- 4 files changed, 82 insertions(+), 72 deletions(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index ab18e847cee75..d642be65a3a08 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -635,6 +635,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; + struct ap_message ap_msg; unsigned int wgt = 0, pref_wgt = 0; unsigned int func_code; int cpen, qpen, qid = 0, rc = -ENODEV; @@ -642,6 +643,8 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO); + ap_init_message(&ap_msg); + if (mex->outputdatalength < mex->inputdatalength) { func_code = 0; rc = -EINVAL; @@ -712,13 +715,14 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, } qid = pref_zq->queue->qid; - rc = pref_zq->ops->rsa_modexpo(pref_zq, mex); + rc = pref_zq->ops->rsa_modexpo(pref_zq, mex, &ap_msg); spin_lock(&zcrypt_list_lock); zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt); spin_unlock(&zcrypt_list_lock); out: + ap_release_message(&ap_msg); if (tr) { tr->last_rc = rc; tr->last_qid = qid; @@ -734,6 +738,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, { struct zcrypt_card *zc, *pref_zc; struct zcrypt_queue *zq, *pref_zq; + struct ap_message ap_msg; unsigned int wgt = 0, pref_wgt = 0; unsigned int func_code; int cpen, qpen, qid = 0, rc = -ENODEV; @@ -741,6 +746,8 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, trace_s390_zcrypt_req(crt, TP_ICARSACRT); + ap_init_message(&ap_msg); + if (crt->outputdatalength < crt->inputdatalength) { func_code = 0; rc = -EINVAL; @@ -811,13 +818,14 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, } qid = pref_zq->queue->qid; - rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt); + rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt, &ap_msg); spin_lock(&zcrypt_list_lock); zcrypt_drop_queue(pref_zc, pref_zq, mod, wgt); spin_unlock(&zcrypt_list_lock); out: + ap_release_message(&ap_msg); if (tr) { tr->last_rc = rc; tr->last_qid = qid; diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 263ed17354314..03804f03b5d05 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -68,9 +68,11 @@ struct zcrypt_track { #define TRACK_AGAIN_QUEUE_WEIGHT_PENALTY 10000 struct zcrypt_ops { - long (*rsa_modexpo)(struct zcrypt_queue *, struct ica_rsa_modexpo *); + long (*rsa_modexpo)(struct zcrypt_queue *, struct ica_rsa_modexpo *, + struct ap_message *); long (*rsa_modexpo_crt)(struct zcrypt_queue *, - struct ica_rsa_modexpo_crt *); + struct ica_rsa_modexpo_crt *, + struct ap_message *); long (*send_cprb)(bool userspace, struct zcrypt_queue *, struct ica_xcRB *, struct ap_message *); long (*send_ep11_cprb)(bool userspace, struct zcrypt_queue *, struct ep11_urb *, diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 0f7f404ed8583..c543b59595099 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -450,42 +450,41 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0); * @mex: pointer to the modexpo request buffer */ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, - struct ica_rsa_modexpo *mex) + struct ica_rsa_modexpo *mex, + struct ap_message *ap_msg) { - struct ap_message ap_msg; struct completion work; int rc; - ap_init_message(&ap_msg); if (zq->zcard->user_space_type == ZCRYPT_CEX2A) - ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); + ap_msg->msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); else - ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); - if (!ap_msg.msg) + ap_msg->msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; - ap_msg.receive = zcrypt_cex2a_receive; - ap_msg.psmid = (((unsigned long long) current->pid) << 32) + - atomic_inc_return(&zcrypt_step); - ap_msg.private = &work; - rc = ICAMEX_msg_to_type50MEX_msg(zq, &ap_msg, mex); + ap_msg->receive = zcrypt_cex2a_receive; + ap_msg->psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg->private = &work; + rc = ICAMEX_msg_to_type50MEX_msg(zq, ap_msg, mex); if (rc) - goto out_free; + goto out; init_completion(&work); - rc = ap_queue_message(zq->queue, &ap_msg); + rc = ap_queue_message(zq->queue, ap_msg); if (rc) - goto out_free; + goto out; rc = wait_for_completion_interruptible(&work); if (rc == 0) { - rc = ap_msg.rc; + rc = ap_msg->rc; if (rc == 0) - rc = convert_response_cex2a(zq, &ap_msg, + rc = convert_response_cex2a(zq, ap_msg, mex->outputdata, mex->outputdatalength); } else /* Signal pending. */ - ap_cancel_message(zq->queue, &ap_msg); -out_free: - kfree(ap_msg.msg); + ap_cancel_message(zq->queue, ap_msg); +out: + ap_msg->private = NULL; return rc; } @@ -497,42 +496,41 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq, * @crt: pointer to the modexpoc_crt request buffer */ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq, - struct ica_rsa_modexpo_crt *crt) + struct ica_rsa_modexpo_crt *crt, + struct ap_message *ap_msg) { - struct ap_message ap_msg; struct completion work; int rc; - ap_init_message(&ap_msg); if (zq->zcard->user_space_type == ZCRYPT_CEX2A) - ap_msg.msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); + ap_msg->msg = kmalloc(MSGTYPE50_CRB2_MAX_MSG_SIZE, GFP_KERNEL); else - ap_msg.msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); - if (!ap_msg.msg) + ap_msg->msg = kmalloc(MSGTYPE50_CRB3_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; - ap_msg.receive = zcrypt_cex2a_receive; - ap_msg.psmid = (((unsigned long long) current->pid) << 32) + - atomic_inc_return(&zcrypt_step); - ap_msg.private = &work; - rc = ICACRT_msg_to_type50CRT_msg(zq, &ap_msg, crt); + ap_msg->receive = zcrypt_cex2a_receive; + ap_msg->psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg->private = &work; + rc = ICACRT_msg_to_type50CRT_msg(zq, ap_msg, crt); if (rc) - goto out_free; + goto out; init_completion(&work); - rc = ap_queue_message(zq->queue, &ap_msg); + rc = ap_queue_message(zq->queue, ap_msg); if (rc) - goto out_free; + goto out; rc = wait_for_completion_interruptible(&work); if (rc == 0) { - rc = ap_msg.rc; + rc = ap_msg->rc; if (rc == 0) - rc = convert_response_cex2a(zq, &ap_msg, + rc = convert_response_cex2a(zq, ap_msg, crt->outputdata, crt->outputdatalength); } else /* Signal pending. */ - ap_cancel_message(zq->queue, &ap_msg); -out_free: - kfree(ap_msg.msg); + ap_cancel_message(zq->queue, ap_msg); +out: + ap_msg->private = NULL; return rc; } diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 30eaa7a7aede0..aab7f06920353 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1007,41 +1007,42 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0); * @mex: pointer to the modexpo request buffer */ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, - struct ica_rsa_modexpo *mex) + struct ica_rsa_modexpo *mex, + struct ap_message *ap_msg) { - struct ap_message ap_msg; struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_ICA, }; int rc; - ap_init_message(&ap_msg); - ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL); - if (!ap_msg.msg) + ap_msg->msg = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; - ap_msg.receive = zcrypt_msgtype6_receive; - ap_msg.psmid = (((unsigned long long) current->pid) << 32) + - atomic_inc_return(&zcrypt_step); - ap_msg.private = &resp_type; - rc = ICAMEX_msg_to_type6MEX_msgX(zq, &ap_msg, mex); + ap_msg->receive = zcrypt_msgtype6_receive; + ap_msg->psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg->private = &resp_type; + rc = ICAMEX_msg_to_type6MEX_msgX(zq, ap_msg, mex); if (rc) goto out_free; init_completion(&resp_type.work); - rc = ap_queue_message(zq->queue, &ap_msg); + rc = ap_queue_message(zq->queue, ap_msg); if (rc) goto out_free; rc = wait_for_completion_interruptible(&resp_type.work); if (rc == 0) { - rc = ap_msg.rc; + rc = ap_msg->rc; if (rc == 0) - rc = convert_response_ica(zq, &ap_msg, + rc = convert_response_ica(zq, ap_msg, mex->outputdata, mex->outputdatalength); } else /* Signal pending. */ - ap_cancel_message(zq->queue, &ap_msg); + ap_cancel_message(zq->queue, ap_msg); out_free: - free_page((unsigned long) ap_msg.msg); + free_page((unsigned long) ap_msg->msg); + ap_msg->private = NULL; + ap_msg->msg = NULL; return rc; } @@ -1053,42 +1054,43 @@ static long zcrypt_msgtype6_modexpo(struct zcrypt_queue *zq, * @crt: pointer to the modexpoc_crt request buffer */ static long zcrypt_msgtype6_modexpo_crt(struct zcrypt_queue *zq, - struct ica_rsa_modexpo_crt *crt) + struct ica_rsa_modexpo_crt *crt, + struct ap_message *ap_msg) { - struct ap_message ap_msg; struct response_type resp_type = { .type = CEXXC_RESPONSE_TYPE_ICA, }; int rc; - ap_init_message(&ap_msg); - ap_msg.msg = (void *) get_zeroed_page(GFP_KERNEL); - if (!ap_msg.msg) + ap_msg->msg = (void *) get_zeroed_page(GFP_KERNEL); + if (!ap_msg->msg) return -ENOMEM; - ap_msg.receive = zcrypt_msgtype6_receive; - ap_msg.psmid = (((unsigned long long) current->pid) << 32) + - atomic_inc_return(&zcrypt_step); - ap_msg.private = &resp_type; - rc = ICACRT_msg_to_type6CRT_msgX(zq, &ap_msg, crt); + ap_msg->receive = zcrypt_msgtype6_receive; + ap_msg->psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg->private = &resp_type; + rc = ICACRT_msg_to_type6CRT_msgX(zq, ap_msg, crt); if (rc) goto out_free; init_completion(&resp_type.work); - rc = ap_queue_message(zq->queue, &ap_msg); + rc = ap_queue_message(zq->queue, ap_msg); if (rc) goto out_free; rc = wait_for_completion_interruptible(&resp_type.work); if (rc == 0) { - rc = ap_msg.rc; + rc = ap_msg->rc; if (rc == 0) - rc = convert_response_ica(zq, &ap_msg, + rc = convert_response_ica(zq, ap_msg, crt->outputdata, crt->outputdatalength); } else { /* Signal pending. */ - ap_cancel_message(zq->queue, &ap_msg); + ap_cancel_message(zq->queue, ap_msg); } out_free: - free_page((unsigned long) ap_msg.msg); + free_page((unsigned long) ap_msg->msg); + ap_msg->private = NULL; + ap_msg->msg = NULL; return rc; } From 27c4f6738bdc535e42dfc1295dadc78ab7582939 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 29 Sep 2020 16:07:22 +0200 Subject: [PATCH 79/85] s390/zcrypt: Introduce Failure Injection feature Introduce a way to specify additional debug flags with an crpyto request to be able to trigger certain failures within the zcrypt device drivers and/or ap core code. This failure injection possibility is only enabled with a kernel debug build CONFIG_ZCRYPT_DEBUG) and should never be available on a regular kernel running in production environment. Details: * The ioctl(ICARSAMODEXPO) get's a struct ica_rsa_modexpo. If the leftmost bit of the 32 bit unsigned int inputdatalength field is set, the uppermost 16 bits are separated and used as debug flag value. The process is checked to have the CAP_SYS_ADMIN capability enabled or EPERM is returned. * The ioctl(ICARSACRT) get's a struct ica_rsa_modexpo_crt. If the leftmost bit of the 32 bit unsigned int inputdatalength field is set, the uppermost 16 bits are separated and used als debug flag value. The process is checked to have the CAP_SYS_ADMIN capability enabled or EPERM is returned. * The ioctl(ZSECSENDCPRB) used to send CCA CPRBs get's a struct ica_xcRB. If the leftmost bit of the 32 bit unsigned int status field is set, the uppermost 16 bits of this field are used as debug flag value. The process is checked to have the CAP_SYS_ADMIN capability enabled or EPERM is returned. * The ioctl(ZSENDEP11CPRB) used to send EP11 CPRBs get's a struct ep11_urb. If the leftmost bit of the 64 bit unsigned int req_len field is set, the uppermost 16 bits of this field are used as debug flag value. The process is checked to have the CAP_SYS_ADMIN capability enabled or EPERM is returned. So it is possible to send an additional 16 bit value to the zcrypt API to be used to carry a failure injection command which may trigger special behavior within the zcrypt API and layers below. This 16 bit value is for the rest of the test referred as 'fi command' for Failure Injection. The lower 8 bits of the fi command construct a numerical argument in the range of 1-255 and is the 'fi action' to be performed with the request or the resulting reply: * 0x00 (all requests): No failure injection action but flags may be provided which may affect the processing of the request or reply. * 0x01 (only CCA CPRBs): The CPRB's agent_ID field is set to 'FF'. This results in an reply code 0x90 (Transport-Protocol Failure). * 0x02 (only CCA CPRBs): After the APQN to send to has been chosen, the domain field within the CPRB is overwritten with value 99 to enforce an reply with RY 0x8A. * 0x03 (all requests): At NQAP invocation the invalid qid value 0xFF00 is used causing an response code of 0x01 (AP queue not valid). The upper 8 bits of the fi command may carry bit flags which may influence the processing of an request or response: * 0x01: No retry. If this bit is set, the usual loop in the zcrypt API which retries an CPRB up to 10 times when the lower layers return with EAGAIN is abandoned after the first attempt to send the CPRB. * 0x02: Toggle special. Toggles the special bit on this request. This should result in an reply code RY~0x41 and result in an ioctl failure with errno EINVAL. This failure injection possibilities may get some further extensions in the future. As of now this is a starting point for Continuous Test and Integration to trigger some failures and watch for the reaction of the ap bus and zcrypt device driver code. Signed-off-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_bus.h | 29 +++++++- drivers/s390/crypto/ap_queue.c | 10 ++- drivers/s390/crypto/zcrypt_api.c | 91 ++++++++++++++++++++++++++ drivers/s390/crypto/zcrypt_api.h | 3 + drivers/s390/crypto/zcrypt_msgtype50.c | 11 ++++ drivers/s390/crypto/zcrypt_msgtype6.c | 10 +++ 6 files changed, 151 insertions(+), 3 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 03fd95b5a0e8e..5029b80132aae 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -200,12 +200,37 @@ struct ap_queue { typedef enum ap_sm_wait (ap_func_t)(struct ap_queue *queue); +/* failure injection cmd struct */ +struct ap_fi { + union { + u16 cmd; /* fi flags + action */ + struct { + u8 flags; /* fi flags only */ + u8 action; /* fi action only */ + }; + }; +}; + +/* all currently known fi actions */ +enum ap_fi_actions { + AP_FI_ACTION_CCA_AGENT_FF = 0x01, + AP_FI_ACTION_CCA_DOM_INVAL = 0x02, + AP_FI_ACTION_NQAP_QID_INVAL = 0x03, +}; + +/* all currently known fi flags */ +enum ap_fi_flags { + AP_FI_FLAG_NO_RETRY = 0x01, + AP_FI_FLAG_TOGGLE_SPECIAL = 0x02, +}; + struct ap_message { struct list_head list; /* Request queueing. */ unsigned long long psmid; /* Message id. */ void *msg; /* Pointer to message buffer. */ unsigned int len; /* Message length. */ - u32 flags; /* Flags, see AP_MSG_FLAG_xxx */ + u16 flags; /* Flags, see AP_MSG_FLAG_xxx */ + struct ap_fi fi; /* Failure Injection cmd */ int rc; /* Return code for this message */ void *private; /* ap driver private pointer. */ /* receive is called from tasklet context */ @@ -213,7 +238,7 @@ struct ap_message { struct ap_message *); }; -#define AP_MSG_FLAG_SPECIAL (1 << 16) /* flag msg as 'special' with NQAP */ +#define AP_MSG_FLAG_SPECIAL 1 /* flag msg as 'special' with NQAP */ /** * ap_init_message() - Initialize ap_message. diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index ada37f1c7ac71..13d4fe2c6127b 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -214,12 +214,20 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) { struct ap_queue_status status; struct ap_message *ap_msg; + ap_qid_t qid = aq->qid; if (aq->requestq_count <= 0) return AP_SM_WAIT_NONE; /* Start the next request on the queue. */ ap_msg = list_entry(aq->requestq.next, struct ap_message, list); - status = __ap_send(aq->qid, ap_msg->psmid, +#ifdef CONFIG_ZCRYPT_DEBUG + if (ap_msg->fi.action == AP_FI_ACTION_NQAP_QID_INVAL) { + AP_DBF_WARN("%s fi cmd 0x%04x: forcing invalid qid 0xFF00\n", + __func__, ap_msg->fi.cmd); + qid = 0xFF00; + } +#endif + status = __ap_send(qid, ap_msg->psmid, ap_msg->msg, ap_msg->len, ap_msg->flags & AP_MSG_FLAG_SPECIAL); switch (status.response_code) { diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index d642be65a3a08..08bb0f60c4b0f 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -645,6 +646,11 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, ap_init_message(&ap_msg); +#ifdef CONFIG_ZCRYPT_DEBUG + if (tr && tr->fi.cmd) + ap_msg.fi.cmd = tr->fi.cmd; +#endif + if (mex->outputdatalength < mex->inputdatalength) { func_code = 0; rc = -EINVAL; @@ -748,6 +754,11 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, ap_init_message(&ap_msg); +#ifdef CONFIG_ZCRYPT_DEBUG + if (tr && tr->fi.cmd) + ap_msg.fi.cmd = tr->fi.cmd; +#endif + if (crt->outputdatalength < crt->inputdatalength) { func_code = 0; rc = -EINVAL; @@ -852,6 +863,17 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, xcRB->status = 0; ap_init_message(&ap_msg); + +#ifdef CONFIG_ZCRYPT_DEBUG + if (tr && tr->fi.cmd) + ap_msg.fi.cmd = tr->fi.cmd; + if (tr && tr->fi.action == AP_FI_ACTION_CCA_AGENT_FF) { + ZCRYPT_DBF_WARN("%s fi cmd 0x%04x: forcing invalid agent_ID 'FF'\n", + __func__, tr->fi.cmd); + xcRB->agent_ID = 0x4646; + } +#endif + rc = get_cprb_fc(userspace, xcRB, &ap_msg, &func_code, &domain); if (rc) goto out; @@ -927,6 +949,14 @@ static long _zcrypt_send_cprb(bool userspace, struct ap_perms *perms, if (*domain == AUTOSEL_DOM) *domain = AP_QID_QUEUE(qid); +#ifdef CONFIG_ZCRYPT_DEBUG + if (tr && tr->fi.action == AP_FI_ACTION_CCA_DOM_INVAL) { + ZCRYPT_DBF_WARN("%s fi cmd 0x%04x: forcing invalid domain\n", + __func__, tr->fi.cmd); + *domain = 99; + } +#endif + rc = pref_zq->ops->send_cprb(userspace, pref_zq, xcRB, &ap_msg); spin_lock(&zcrypt_list_lock); @@ -995,6 +1025,11 @@ static long _zcrypt_send_ep11_cprb(bool userspace, struct ap_perms *perms, ap_init_message(&ap_msg); +#ifdef CONFIG_ZCRYPT_DEBUG + if (tr && tr->fi.cmd) + ap_msg.fi.cmd = tr->fi.cmd; +#endif + target_num = (unsigned short) xcrb->targets_num; /* empty list indicates autoselect (all available targets) */ @@ -1377,10 +1412,24 @@ static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg) memset(&tr, 0, sizeof(tr)); if (copy_from_user(&mex, umex, sizeof(mex))) return -EFAULT; + +#ifdef CONFIG_ZCRYPT_DEBUG + if (mex.inputdatalength & (1U << 31)) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + tr.fi.cmd = (u16)(mex.inputdatalength >> 16); + } + mex.inputdatalength &= 0x0000FFFF; +#endif + do { rc = zcrypt_rsa_modexpo(perms, &tr, &mex); if (rc == -EAGAIN) tr.again_counter++; +#ifdef CONFIG_ZCRYPT_DEBUG + if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY)) + break; +#endif } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) @@ -1406,10 +1455,24 @@ static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg) memset(&tr, 0, sizeof(tr)); if (copy_from_user(&crt, ucrt, sizeof(crt))) return -EFAULT; + +#ifdef CONFIG_ZCRYPT_DEBUG + if (crt.inputdatalength & (1U << 31)) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + tr.fi.cmd = (u16)(crt.inputdatalength >> 16); + } + crt.inputdatalength &= 0x0000FFFF; +#endif + do { rc = zcrypt_rsa_crt(perms, &tr, &crt); if (rc == -EAGAIN) tr.again_counter++; +#ifdef CONFIG_ZCRYPT_DEBUG + if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY)) + break; +#endif } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) @@ -1435,10 +1498,24 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg) memset(&tr, 0, sizeof(tr)); if (copy_from_user(&xcRB, uxcRB, sizeof(xcRB))) return -EFAULT; + +#ifdef CONFIG_ZCRYPT_DEBUG + if (xcRB.status & (1U << 31)) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + tr.fi.cmd = (u16)(xcRB.status >> 16); + } + xcRB.status &= 0x0000FFFF; +#endif + do { rc = _zcrypt_send_cprb(true, perms, &tr, &xcRB); if (rc == -EAGAIN) tr.again_counter++; +#ifdef CONFIG_ZCRYPT_DEBUG + if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY)) + break; +#endif } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) @@ -1465,10 +1542,24 @@ static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg) memset(&tr, 0, sizeof(tr)); if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb))) return -EFAULT; + +#ifdef CONFIG_ZCRYPT_DEBUG + if (xcrb.req_len & (1ULL << 63)) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + tr.fi.cmd = (u16)(xcrb.req_len >> 48); + } + xcrb.req_len &= 0x0000FFFFFFFFFFFFULL; +#endif + do { rc = _zcrypt_send_ep11_cprb(true, perms, &tr, &xcrb); if (rc == -EAGAIN) tr.again_counter++; +#ifdef CONFIG_ZCRYPT_DEBUG + if (rc == -EAGAIN && (tr.fi.flags & AP_FI_FLAG_NO_RETRY)) + break; +#endif } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); /* on failure: retry once again after a requested rescan */ if ((rc == -ENODEV) && (zcrypt_process_rescan())) diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 03804f03b5d05..51c0b8bdef50e 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -60,6 +60,9 @@ struct zcrypt_track { int again_counter; /* retry attempts counter */ int last_qid; /* last qid used */ int last_rc; /* last return code */ +#ifdef CONFIG_ZCRYPT_DEBUG + struct ap_fi fi; /* failure injection cmd */ +#endif }; /* defines related to message tracking */ diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index c543b59595099..bf14ee445f892 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -246,6 +246,12 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq, copy_from_user(exp, mex->b_key, mod_len) || copy_from_user(inp, mex->inputdata, mod_len)) return -EFAULT; + +#ifdef CONFIG_ZCRYPT_DEBUG + if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL) + ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; +#endif + return 0; } @@ -332,6 +338,11 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq, copy_from_user(inp, crt->inputdata, mod_len)) return -EFAULT; +#ifdef CONFIG_ZCRYPT_DEBUG + if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL) + ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; +#endif + return 0; } diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index aab7f06920353..307f90657d1dd 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -482,6 +482,11 @@ static int XCRB_msg_to_type6CPRB_msgX(bool userspace, struct ap_message *ap_msg, || memcmp(function_code, "AU", 2) == 0) ap_msg->flags |= AP_MSG_FLAG_SPECIAL; +#ifdef CONFIG_ZCRYPT_DEBUG + if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL) + ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; +#endif + /* copy data block */ if (xcRB->request_data_length && z_copy_from_user(userspace, req_data, xcRB->request_data_address, @@ -569,6 +574,11 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap if (msg->cprbx.flags & 0x20) ap_msg->flags |= AP_MSG_FLAG_SPECIAL; +#ifdef CONFIG_ZCRYPT_DEBUG + if (ap_msg->fi.flags & AP_FI_FLAG_TOGGLE_SPECIAL) + ap_msg->flags ^= AP_MSG_FLAG_SPECIAL; +#endif + return 0; } From 4aa32ee3c058847ff935d1a65da309b67b65354a Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 2 Oct 2020 11:16:49 +0200 Subject: [PATCH 80/85] s390/lib: fix kernel doc for memcmp() s/count/n Signed-off-by: Julian Wiedmann Acked-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/lib/string.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 0e30e6e43b0c5..93b3209b94a2f 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -333,7 +333,7 @@ EXPORT_SYMBOL(memchr); * memcmp - Compare two areas of memory * @s1: One area of memory * @s2: Another area of memory - * @count: The size of the area. + * @n: The size of the area. */ #ifdef __HAVE_ARCH_MEMCMP int memcmp(const void *s1, const void *s2, size_t n) From eefc69a09ca5b441ee136f9fb68ab5970cfc2d51 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 2 Oct 2020 11:18:19 +0200 Subject: [PATCH 81/85] s390/sie: fix typo in SIGP code description s/ait address/at address Signed-off-by: Julian Wiedmann Acked-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- arch/s390/include/uapi/asm/sie.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index 6ca1e68d71031..ede318653c87d 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -29,7 +29,7 @@ { 0x13, "SIGP conditional emergency signal" }, \ { 0x15, "SIGP sense running" }, \ { 0x16, "SIGP set multithreading"}, \ - { 0x17, "SIGP store additional status ait address"} + { 0x17, "SIGP store additional status at address"} #define icpt_prog_codes \ { 0x0001, "Prog Operation" }, \ From b61e1f3281c5a53f24f47849873463514f58c1b8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 18 Sep 2020 10:26:19 +0200 Subject: [PATCH 82/85] s390/kprobes: move insn_page to text segment Move the in-kernel kprobes insn page to text segment. Rationale: having that page in rw data segment is suboptimal, since as soon as a kprobe is set, this will split the 1:1 kernel mapping for a single page which get new permissions. Note: there is always at least one kprobe present for the kretprobe trampoline; so the mapping will always be split into smaller 4k mappings because of this. Moving the kprobes insn page into text segment makes sure that the page is mapped RO/X in any case, and avoids that the 1:1 mapping is split. The kprobe insn_page is defined as a dummy function which is filled with "br %r14" instructions. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/Makefile | 1 + arch/s390/kernel/entry.h | 2 ++ arch/s390/kernel/kprobes.c | 6 ++---- arch/s390/kernel/kprobes_insn_page.S | 22 ++++++++++++++++++++++ 4 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 arch/s390/kernel/kprobes_insn_page.S diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index efca70970761d..dd73b7f074237 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,6 +57,7 @@ obj-$(CONFIG_COMPAT) += $(compat-obj-y) obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_KPROBES) += kprobes_insn_page.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 6475a885cd60f..0f7e4e9176e08 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -87,4 +87,6 @@ void set_fs_fixup(void); unsigned long stack_alloc(void); void stack_free(unsigned long stack); +extern char kprobes_insn_page[]; + #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index b34fa4eef7425..6574774d404e3 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -22,6 +22,7 @@ #include #include #include +#include "entry.h" DEFINE_PER_CPU(struct kprobe *, current_kprobe); DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -31,7 +32,6 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { }; DEFINE_INSN_CACHE_OPS(s390_insn); static int insn_page_in_use; -static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE); void *alloc_insn_page(void) { @@ -53,13 +53,11 @@ static void *alloc_s390_insn_page(void) { if (xchg(&insn_page_in_use, 1) == 1) return NULL; - __set_memory((unsigned long) &insn_page, 1, SET_MEMORY_RO | SET_MEMORY_X); - return &insn_page; + return &kprobes_insn_page; } static void free_s390_insn_page(void *page) { - __set_memory((unsigned long) page, 1, SET_MEMORY_RW | SET_MEMORY_NX); xchg(&insn_page_in_use, 0); } diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S new file mode 100644 index 0000000000000..f6cb022ef8c80 --- /dev/null +++ b/arch/s390/kernel/kprobes_insn_page.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +/* + * insn_page is a special 4k aligned dummy function for kprobes. + * It will contain all kprobed instructions that are out-of-line executed. + * The page must be within the kernel image to guarantee that the + * out-of-line instructions are within 2GB distance of their original + * location. Using a dummy function ensures that the insn_page is within + * the text section of the kernel and mapped read-only/executable from + * the beginning on, thus avoiding to split large mappings if the page + * would be in the data section instead. + */ + .section .kprobes.text, "ax" + .align 4096 +ENTRY(kprobes_insn_page) + .rept 2048 + .word 0x07fe + .endr +ENDPROC(kprobes_insn_page) + .previous From 4366dd7251259806e57251cb2d699f0863841775 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Thu, 8 Oct 2020 08:43:09 +0200 Subject: [PATCH 83/85] s390/zcrypt: fix wrong format specifications Fixes 5 wrong format specification findings found by the kernel test robot in ap_queue.c: warning: format specifies type 'unsigned char' but the argument has type 'int' [-Wformat] __func__, status.response_code, Signed-off-by: Harald Freudenberger Reported-by: kernel test robot Fixes: 2ea2a6099ae3 ("s390/ap: add error response code field for ap queue devices") Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/ap_queue.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 13d4fe2c6127b..ecefc25eff0c0 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -197,7 +197,7 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq) default: aq->dev_state = AP_DEV_STATE_ERROR; aq->last_err_rc = status.response_code; - AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; @@ -262,7 +262,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq) default: aq->dev_state = AP_DEV_STATE_ERROR; aq->last_err_rc = status.response_code; - AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; @@ -300,7 +300,7 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq) default: aq->dev_state = AP_DEV_STATE_ERROR; aq->last_err_rc = status.response_code; - AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; @@ -343,7 +343,7 @@ static enum ap_sm_wait ap_sm_reset_wait(struct ap_queue *aq) default: aq->dev_state = AP_DEV_STATE_ERROR; aq->last_err_rc = status.response_code; - AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; @@ -384,7 +384,7 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq) default: aq->dev_state = AP_DEV_STATE_ERROR; aq->last_err_rc = status.response_code; - AP_DBF_WARN("%s RC 0x%02hhx on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", + AP_DBF_WARN("%s RC 0x%02x on 0x%02x.%04x -> AP_DEV_STATE_ERROR\n", __func__, status.response_code, AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); return AP_SM_WAIT_NONE; From db5273975622af17e265d5d96b41427cde4c25b0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 8 Oct 2020 16:28:15 +0200 Subject: [PATCH 84/85] s390/uaccess: add default cases for __put_user_fn()/__get_user_fn() Add default cases for __put_user_fn()/__get_user_fn(). This doesn't fix anything since the functions are only called with sane values. However we get rid of smatch warnings: ./arch/s390/include/asm/uaccess.h:143 __get_user_fn() error: uninitialized symbol 'rc'. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 23c85801cf043..bf47d93ff1c69 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -60,6 +60,9 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); #define INLINE_COPY_TO_USER #endif +int __put_user_bad(void) __attribute__((noreturn)); +int __get_user_bad(void) __attribute__((noreturn)); + #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES #define __put_get_user_asm(to, from, size, spec) \ @@ -109,6 +112,9 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon (unsigned long *)x, size, spec); break; + default: + __put_user_bad(); + break; } return rc; } @@ -139,6 +145,9 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign (unsigned long __user *)ptr, size, spec); break; + default: + __get_user_bad(); + break; } return rc; } @@ -190,8 +199,6 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s }) -int __put_user_bad(void) __attribute__((noreturn)); - #define __get_user(x, ptr) \ ({ \ int __gu_err = -EFAULT; \ @@ -238,8 +245,6 @@ int __put_user_bad(void) __attribute__((noreturn)); __get_user(x, ptr); \ }) -int __get_user_bad(void) __attribute__((noreturn)); - unsigned long __must_check raw_copy_in_user(void __user *to, const void __user *from, unsigned long n); From 10e5afb3d260f2d2521889d87ebdefb7fc3d4087 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 8 Oct 2020 16:43:17 +0200 Subject: [PATCH 85/85] s390/uaccess: fix indentation Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index bf47d93ff1c69..c868e7ee49b35 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -188,7 +188,7 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s default: \ __put_user_bad(); \ break; \ - } \ + } \ __builtin_expect(__pu_err, 0); \ })