From 3ef32d90cdaa0cfa6c4ffd18f8d646a658163e3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:33 +0100 Subject: [PATCH 01/55] x86/vdso: Fix latent bug in vclock_pages calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vclock pages are *after* the non-vclock pages. Currently there are both two vclock and two non-vclock pages so the existing logic works by accident. As soon as the number of pages changes it will break however. This will be the case with the introduction of the generic vDSO data storage. Use a macro to keep the calculation understandable and in sync between the linker script and mapping code. Fixes: e93d2521b27f ("x86/vdso: Split virtual clock pages into dedicated mapping") Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-1-13a4669dfc8c@linutronix.de --- arch/x86/entry/vdso/vdso-layout.lds.S | 2 +- arch/x86/entry/vdso/vma.c | 2 +- arch/x86/include/asm/vdso/vsyscall.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 872947c1004c3..918606ff92a98 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -24,7 +24,7 @@ SECTIONS timens_page = vvar_start + PAGE_SIZE; - vclock_pages = vvar_start + VDSO_NR_VCLOCK_PAGES * PAGE_SIZE; + vclock_pages = VDSO_VCLOCK_PAGES_START(vvar_start); pvclock_page = vclock_pages + VDSO_PAGE_PVCLOCK_OFFSET * PAGE_SIZE; hvclock_page = vclock_pages + VDSO_PAGE_HVCLOCK_OFFSET * PAGE_SIZE; diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 39e6efc1a9cab..aa62949335ece 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -290,7 +290,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) } vma = _install_special_mapping(mm, - addr + (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE, + VDSO_VCLOCK_PAGES_START(addr), VDSO_NR_VCLOCK_PAGES * PAGE_SIZE, VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| VM_PFNMAP, diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h index 37b4a70559a82..88b31d4cdfaf3 100644 --- a/arch/x86/include/asm/vdso/vsyscall.h +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -6,6 +6,7 @@ #define __VVAR_PAGES 4 #define VDSO_NR_VCLOCK_PAGES 2 +#define VDSO_VCLOCK_PAGES_START(_b) ((_b) + (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE) #define VDSO_PAGE_PVCLOCK_OFFSET 0 #define VDSO_PAGE_HVCLOCK_OFFSET 1 From 30533a55ec8e6cd532989421a2d99ec422396fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:34 +0100 Subject: [PATCH 02/55] parisc: Remove unused symbol vdso_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The symbol vdso_data is and has been unused. Remove it. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-2-13a4669dfc8c@linutronix.de --- arch/parisc/include/asm/vdso.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h index 2a2dc11b5545f..5f581c1d64606 100644 --- a/arch/parisc/include/asm/vdso.h +++ b/arch/parisc/include/asm/vdso.h @@ -12,8 +12,6 @@ #define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) #define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) -extern struct vdso_data *vdso_data; - #endif /* __ASSEMBLY __ */ /* Default link addresses for the vDSOs */ From 5b47aba858101639a4442c2140b73f64eb796ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:35 +0100 Subject: [PATCH 03/55] vdso: Introduce vdso/align.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vDSO implementation can only include headers from the vdso/ namespace. To enable the usage of the ALIGN() macro from the vDSO, move linux/align.h to vdso/align.h wholly. As the only dependency linux/const.h is only a wrapper around vdso/const.h anyways adapt that dependency. Also provide a compatibility wrapper linux/align.h. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-3-13a4669dfc8c@linutronix.de --- include/linux/align.h | 10 +--------- include/vdso/align.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 include/vdso/align.h diff --git a/include/linux/align.h b/include/linux/align.h index 2b4acec7b95a2..55debf105a5d6 100644 --- a/include/linux/align.h +++ b/include/linux/align.h @@ -2,14 +2,6 @@ #ifndef _LINUX_ALIGN_H #define _LINUX_ALIGN_H -#include - -/* @a is a power of 2 value */ -#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) -#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) -#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) -#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) -#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) -#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) +#include #endif /* _LINUX_ALIGN_H */ diff --git a/include/vdso/align.h b/include/vdso/align.h new file mode 100644 index 0000000000000..02dd8626b5c5a --- /dev/null +++ b/include/vdso/align.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_ALIGN_H +#define __VDSO_ALIGN_H + +#include + +/* @a is a power of 2 value */ +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#endif /* __VDSO_ALIGN_H */ From 127b0e05c1669d240426719b3b9db8a8366eed50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:36 +0100 Subject: [PATCH 04/55] vdso: Rename included Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the Makefile is included into other Makefiles it can not be used to define objects to be built from the current source directory. However the generic datastore will introduce such a local source file. Rename the included Makefile so it is clear how it is to be used and to make room for a regular Makefile in lib/vdso/. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-4-13a4669dfc8c@linutronix.de --- arch/arm/vdso/Makefile | 2 +- arch/arm64/kernel/vdso/Makefile | 2 +- arch/arm64/kernel/vdso32/Makefile | 2 +- arch/csky/kernel/vdso/Makefile | 2 +- arch/loongarch/vdso/Makefile | 2 +- arch/mips/vdso/Makefile | 2 +- arch/parisc/kernel/vdso32/Makefile | 2 +- arch/parisc/kernel/vdso64/Makefile | 2 +- arch/powerpc/kernel/vdso/Makefile | 2 +- arch/riscv/kernel/vdso/Makefile | 2 +- arch/s390/kernel/vdso32/Makefile | 2 +- arch/s390/kernel/vdso64/Makefile | 2 +- arch/x86/entry/vdso/Makefile | 2 +- lib/vdso/{Makefile => Makefile.include} | 0 14 files changed, 13 insertions(+), 13 deletions(-) rename lib/vdso/{Makefile => Makefile.include} (100%) diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 8a306bbec4a0c..cb044bfd145d1 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include hostprogs := vdsomunge diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 35685c0360441..5e27e46aa4967 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -7,7 +7,7 @@ # # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 25a2cb6317f35..f2dfdc7dc8185 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -3,7 +3,7 @@ # Makefile for vdso32 # -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include # Same as cc-*option, but using CC_COMPAT instead of CC ifeq ($(CONFIG_CC_IS_CLANG), y) diff --git a/arch/csky/kernel/vdso/Makefile b/arch/csky/kernel/vdso/Makefile index 069ef0b17fe52..a3042287a0707 100644 --- a/arch/csky/kernel/vdso/Makefile +++ b/arch/csky/kernel/vdso/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include # Symbols present in the vdso vdso-syms += rt_sigreturn diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index fdde1bcd4e266..1c26147aff701 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -2,7 +2,7 @@ # Objects to go into the VDSO. # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o vgetrandom.o \ vgetrandom-chacha.o sigreturn.o diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index b289b2c1b2946..fb4c493aaffa9 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -2,7 +2,7 @@ # Objects to go into the VDSO. # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso-y := elf.o vgettimeofday.o sigreturn.o diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile index 288f8b85978fb..4ee8d17da229b 100644 --- a/arch/parisc/kernel/vdso32/Makefile +++ b/arch/parisc/kernel/vdso32/Makefile @@ -1,5 +1,5 @@ # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include KCOV_INSTRUMENT := n diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile index bc5d9553f3112..c63f4069170f4 100644 --- a/arch/parisc/kernel/vdso64/Makefile +++ b/arch/parisc/kernel/vdso64/Makefile @@ -1,5 +1,5 @@ # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include KCOV_INSTRUMENT := n diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 0e3ed6fb199ff..e8824f9333261 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -3,7 +3,7 @@ # List of files in the vdso, has to be asm only for now # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9a1b555e87331..ad73607abc280 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -2,7 +2,7 @@ # Copied from arch/tile/kernel/vdso/Makefile # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include # Symbols present in the vdso vdso-syms = rt_sigreturn ifdef CONFIG_64BIT diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 2c5afb88d2982..1e4ddd1a683ff 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = vdso_user_wrapper-32.o note-32.o # Build rules diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index ad206f2068d8c..d8f0df7428096 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index c9216ac4fb1eb..1c0072336e661 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -4,7 +4,7 @@ # # Include the generic Makefile to check the built vDSO: -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include # Files to link into the vDSO: vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vgetrandom.o vgetrandom-chacha.o diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile.include similarity index 100% rename from lib/vdso/Makefile rename to lib/vdso/Makefile.include From df7fcbefa71090a276fb841ffe19b8e5f12b4767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:37 +0100 Subject: [PATCH 05/55] vdso: Add generic time data storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Historically each architecture defined their own way to store the vDSO data page. Add a generic mechanism to provide storage for that page. Furthermore this generic storage will be extended to also provide uniform storage for *non*-time-related data, like the random state or architecture-specific data. These will have their own pages and data structures, so rename 'vdso_data' into 'vdso_time_data' to make that split clear from the name. Also introduce a new consistent naming scheme for the symbols related to the vDSO, which makes it clear if the symbol is accessible from userspace or kernel space and the type of data behind the symbol. The generic fault handler contains an optimization to prefault the vvar page when the timens page is accessed. This was lifted from s390 and x86. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-5-13a4669dfc8c@linutronix.de --- include/asm-generic/vdso/vsyscall.h | 16 +++++ include/linux/time_namespace.h | 1 + include/linux/vdso_datastore.h | 10 +++ include/vdso/datapage.h | 41 ++++++++--- kernel/time/vsyscall.c | 8 +-- lib/Makefile | 2 +- lib/vdso/Kconfig | 5 ++ lib/vdso/Makefile | 3 + lib/vdso/datastore.c | 103 ++++++++++++++++++++++++++++ lib/vdso/gettimeofday.c | 25 +++++-- 10 files changed, 195 insertions(+), 19 deletions(-) create mode 100644 include/linux/vdso_datastore.h create mode 100644 lib/vdso/Makefile create mode 100644 lib/vdso/datastore.c diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index 01dafd604188f..ac5b93b919932 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -4,12 +4,28 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_GENERIC_VDSO_DATA_STORE + +#ifndef __arch_get_vdso_u_time_data +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) +{ + return vdso_u_time_data; +} +#endif + +#else /* !CONFIG_GENERIC_VDSO_DATA_STORE */ + #ifndef __arch_get_k_vdso_data static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) { return NULL; } #endif /* __arch_get_k_vdso_data */ +#define vdso_k_time_data __arch_get_k_vdso_data() + +#define __arch_get_vdso_u_time_data __arch_get_vdso_data + +#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ #ifndef __arch_update_vsyscall static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata) diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 876e31b4461d0..4b81db223f545 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -8,6 +8,7 @@ #include #include #include +#include struct user_namespace; extern struct user_namespace init_user_ns; diff --git a/include/linux/vdso_datastore.h b/include/linux/vdso_datastore.h new file mode 100644 index 0000000000000..a91fa24b06e09 --- /dev/null +++ b/include/linux/vdso_datastore.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VDSO_DATASTORE_H +#define _LINUX_VDSO_DATASTORE_H + +#include + +extern const struct vm_special_mapping vdso_vvar_mapping; +struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr); + +#endif /* _LINUX_VDSO_DATASTORE_H */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index d967baa0cd0c6..b3d8087488ff3 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -45,11 +45,11 @@ struct arch_vdso_time_data {}; * * There is one vdso_timestamp object in vvar for each vDSO-accelerated * clock_id. For high-resolution clocks, this encodes the time - * corresponding to vdso_data.cycle_last. For coarse clocks this encodes + * corresponding to vdso_time_data.cycle_last. For coarse clocks this encodes * the actual time. * * To be noticed that for highres clocks nsec is left-shifted by - * vdso_data.cs[x].shift. + * vdso_time_data[x].shift. */ struct vdso_timestamp { u64 sec; @@ -57,7 +57,7 @@ struct vdso_timestamp { }; /** - * struct vdso_data - vdso datapage representation + * struct vdso_time_data - vdso datapage representation * @seq: timebase sequence counter * @clock_mode: clock mode * @cycle_last: timebase at clocksource init @@ -74,7 +74,7 @@ struct vdso_timestamp { * @arch_data: architecture specific data (optional, defaults * to an empty struct) * - * vdso_data will be accessed by 64 bit and compat code at the same time + * vdso_time_data will be accessed by 64 bit and compat code at the same time * so we should be careful before modifying this structure. * * The ordering of the struct members is optimized to have fast access to the @@ -92,7 +92,7 @@ struct vdso_timestamp { * For clocks which are not affected by time namespace adjustment the * offset must be zero. */ -struct vdso_data { +struct vdso_time_data { u32 seq; s32 clock_mode; @@ -117,6 +117,8 @@ struct vdso_data { struct arch_vdso_time_data arch_data; }; +#define vdso_data vdso_time_data + /** * struct vdso_rng_data - vdso RNG state information * @generation: counter representing the number of RNG reseeds @@ -136,18 +138,34 @@ struct vdso_rng_data { * With the hidden visibility, the compiler simply generates a PC-relative * relocation, and this is what we need. */ -extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden"))); -extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden"))); +#ifndef CONFIG_GENERIC_VDSO_DATA_STORE +extern struct vdso_time_data _vdso_data[CS_BASES] __attribute__((visibility("hidden"))); +extern struct vdso_time_data _timens_data[CS_BASES] __attribute__((visibility("hidden"))); extern struct vdso_rng_data _vdso_rng_data __attribute__((visibility("hidden"))); +#else +extern struct vdso_time_data vdso_u_time_data[CS_BASES] __attribute__((visibility("hidden"))); + +extern struct vdso_time_data *vdso_k_time_data; +#endif /** * union vdso_data_store - Generic vDSO data page */ union vdso_data_store { - struct vdso_data data[CS_BASES]; + struct vdso_time_data data[CS_BASES]; u8 page[1U << CONFIG_PAGE_SHIFT]; }; +#ifdef CONFIG_GENERIC_VDSO_DATA_STORE + +enum vdso_pages { + VDSO_TIME_PAGE_OFFSET, + VDSO_TIMENS_PAGE_OFFSET, + VDSO_NR_PAGES +}; + +#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ + /* * The generic vDSO implementation requires that gettimeofday.h * provides: @@ -164,6 +182,13 @@ union vdso_data_store { #include #endif /* ENABLE_COMPAT_VDSO */ +#else /* !__ASSEMBLY__ */ + +#define VDSO_VVAR_SYMS \ + PROVIDE(vdso_u_data = . - __VDSO_PAGES * PAGE_SIZE); \ + PROVIDE(vdso_u_time_data = vdso_u_data); \ + + #endif /* !__ASSEMBLY__ */ #endif /* __VDSO_DATAPAGE_H */ diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 05d3831431658..09c1e39a6dd8e 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -77,7 +77,7 @@ static inline void update_vdso_data(struct vdso_data *vdata, void update_vsyscall(struct timekeeper *tk) { - struct vdso_data *vdata = __arch_get_k_vdso_data(); + struct vdso_data *vdata = vdso_k_time_data; struct vdso_timestamp *vdso_ts; s32 clock_mode; u64 nsec; @@ -128,7 +128,7 @@ void update_vsyscall(struct timekeeper *tk) void update_vsyscall_tz(void) { - struct vdso_data *vdata = __arch_get_k_vdso_data(); + struct vdso_data *vdata = vdso_k_time_data; vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest; vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime; @@ -150,7 +150,7 @@ void update_vsyscall_tz(void) */ unsigned long vdso_update_begin(void) { - struct vdso_data *vdata = __arch_get_k_vdso_data(); + struct vdso_data *vdata = vdso_k_time_data; unsigned long flags = timekeeper_lock_irqsave(); vdso_write_begin(vdata); @@ -167,7 +167,7 @@ unsigned long vdso_update_begin(void) */ void vdso_update_end(unsigned long flags) { - struct vdso_data *vdata = __arch_get_k_vdso_data(); + struct vdso_data *vdata = vdso_k_time_data; vdso_write_end(vdata); __arch_sync_vdso_data(vdata); diff --git a/lib/Makefile b/lib/Makefile index d5cfc7afbbb82..fcdee83deb5c6 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -132,7 +132,7 @@ endif obj-$(CONFIG_DEBUG_INFO_REDUCED) += debug_info.o CFLAGS_debug_info.o += $(call cc-option, -femit-struct-debug-detailed=any) -obj-y += math/ crypto/ +obj-y += math/ crypto/ vdso/ obj-$(CONFIG_GENERIC_IOMAP) += iomap.o obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index 82fe827af5426..45df764b49ad6 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -43,3 +43,8 @@ config VDSO_GETRANDOM bool help Selected by architectures that support vDSO getrandom(). + +config GENERIC_VDSO_DATA_STORE + bool + help + Selected by architectures that use the generic vDSO data store. diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile new file mode 100644 index 0000000000000..aedd40aaa950c --- /dev/null +++ b/lib/vdso/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_GENERIC_VDSO_DATA_STORE) += datastore.o diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c new file mode 100644 index 0000000000000..d3181768424cc --- /dev/null +++ b/lib/vdso/datastore.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include + +/* + * The vDSO data page. + */ +#ifdef CONFIG_HAVE_GENERIC_VDSO +static union vdso_data_store vdso_time_data_store __page_aligned_data; +struct vdso_time_data *vdso_k_time_data = vdso_time_data_store.data; +static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE); +#endif /* CONFIG_HAVE_GENERIC_VDSO */ + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long addr, pfn; + vm_fault_t err; + + switch (vmf->pgoff) { + case VDSO_TIME_PAGE_OFFSET: + if (!IS_ENABLED(CONFIG_HAVE_GENERIC_VDSO)) + return VM_FAULT_SIGBUS; + pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); + if (timens_page) { + /* + * Fault in VVAR page too, since it will be accessed + * to get clock data anyway. + */ + addr = vmf->address + VDSO_TIMENS_PAGE_OFFSET * PAGE_SIZE; + err = vmf_insert_pfn(vma, addr, pfn); + if (unlikely(err & VM_FAULT_ERROR)) + return err; + pfn = page_to_pfn(timens_page); + } + break; + case VDSO_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!IS_ENABLED(CONFIG_TIME_NS) || !timens_page) + return VM_FAULT_SIGBUS; + pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); + break; + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +const struct vm_special_mapping vdso_vvar_mapping = { + .name = "[vvar]", + .fault = vvar_fault, +}; + +struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr) +{ + return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, + VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP | VM_PFNMAP, + &vdso_vvar_mapping); +} + +#ifdef CONFIG_TIME_NS +/* + * The vvar page layout depends on whether a task belongs to the root or + * non-root time namespace. Whenever a task changes its namespace, the VVAR + * page tables are cleared and then they will be re-faulted with a + * corresponding layout. + * See also the comment near timens_setup_vdso_data() for details. + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + + mmap_read_lock(mm); + for_each_vma(vmi, vma) { + if (vma_is_special_mapping(vma, &vdso_vvar_mapping)) + zap_vma_pages(vma); + } + mmap_read_unlock(mm); + + return 0; +} + +struct vdso_time_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_time_data *)vvar_page; +} +#endif diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index c01eaafd80419..20c5b8752fcc8 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -5,6 +5,9 @@ #include #include +/* Bring in default accessors */ +#include + #ifndef vdso_calc_ns #ifdef VDSO_DELTA_NOMASK @@ -69,6 +72,16 @@ static inline bool vdso_cycles_ok(u64 cycles) #endif #ifdef CONFIG_TIME_NS + +#ifdef CONFIG_GENERIC_VDSO_DATA_STORE +static __always_inline +const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd) +{ + return (void *)vd + PAGE_SIZE; +} +#define __arch_get_timens_vdso_data(vd) __arch_get_vdso_u_timens_data(vd) +#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ + static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, struct __kernel_timespec *ts) { @@ -282,7 +295,7 @@ __cvdso_clock_gettime_data(const struct vdso_data *vd, clockid_t clock, static __maybe_unused int __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { - return __cvdso_clock_gettime_data(__arch_get_vdso_data(), clock, ts); + return __cvdso_clock_gettime_data(__arch_get_vdso_u_time_data(), clock, ts); } #ifdef BUILD_VDSO32 @@ -308,7 +321,7 @@ __cvdso_clock_gettime32_data(const struct vdso_data *vd, clockid_t clock, static __maybe_unused int __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) { - return __cvdso_clock_gettime32_data(__arch_get_vdso_data(), clock, res); + return __cvdso_clock_gettime32_data(__arch_get_vdso_u_time_data(), clock, res); } #endif /* BUILD_VDSO32 */ @@ -342,7 +355,7 @@ __cvdso_gettimeofday_data(const struct vdso_data *vd, static __maybe_unused int __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { - return __cvdso_gettimeofday_data(__arch_get_vdso_data(), tv, tz); + return __cvdso_gettimeofday_data(__arch_get_vdso_u_time_data(), tv, tz); } #ifdef VDSO_HAS_TIME @@ -365,7 +378,7 @@ __cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time) static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time) { - return __cvdso_time_data(__arch_get_vdso_data(), time); + return __cvdso_time_data(__arch_get_vdso_u_time_data(), time); } #endif /* VDSO_HAS_TIME */ @@ -425,7 +438,7 @@ int __cvdso_clock_getres_data(const struct vdso_data *vd, clockid_t clock, static __maybe_unused int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) { - return __cvdso_clock_getres_data(__arch_get_vdso_data(), clock, res); + return __cvdso_clock_getres_data(__arch_get_vdso_u_time_data(), clock, res); } #ifdef BUILD_VDSO32 @@ -451,7 +464,7 @@ __cvdso_clock_getres_time32_data(const struct vdso_data *vd, clockid_t clock, static __maybe_unused int __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res) { - return __cvdso_clock_getres_time32_data(__arch_get_vdso_data(), + return __cvdso_clock_getres_time32_data(__arch_get_vdso_u_time_data(), clock, res); } #endif /* BUILD_VDSO32 */ From 51d6ca373f459fa6c91743e14ae69854d844aa38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:38 +0100 Subject: [PATCH 06/55] vdso: Add generic random data storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend the generic vDSO data storage with a page for the random state data. The random state data is stored in a dedicated page, as the existing storage page is only meant for time-related, time-namespace-aware data. This simplifies to access logic to not need to handle time namespaces anymore and also frees up more space in the time-related page. In case further generic vDSO data store is required it can be added to the random state page. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-6-13a4669dfc8c@linutronix.de --- drivers/char/random.c | 6 +++--- include/asm-generic/vdso/vsyscall.h | 12 ++++++++++++ include/vdso/datapage.h | 10 ++++++++++ lib/vdso/datastore.c | 14 ++++++++++++++ lib/vdso/getrandom.c | 8 ++++++-- 5 files changed, 45 insertions(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 2581186fa61b7..92cbd24a36d8c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -278,7 +278,7 @@ static void crng_reseed(struct work_struct *work) WRITE_ONCE(base_crng.generation, next_gen); #ifdef CONFIG_VDSO_GETRANDOM /* base_crng.generation's invalid value is ULONG_MAX, while - * _vdso_rng_data.generation's invalid value is 0, so add one to the + * vdso_k_rng_data->generation's invalid value is 0, so add one to the * former to arrive at the latter. Use smp_store_release so that this * is ordered with the write above to base_crng.generation. Pairs with * the smp_rmb() before the syscall in the vDSO code. @@ -290,7 +290,7 @@ static void crng_reseed(struct work_struct *work) * because the vDSO side only checks whether the value changed, without * actually using or interpreting the value. */ - smp_store_release((unsigned long *)&__arch_get_k_vdso_rng_data()->generation, next_gen + 1); + smp_store_release((unsigned long *)&vdso_k_rng_data->generation, next_gen + 1); #endif if (!static_branch_likely(&crng_is_ready)) crng_init = CRNG_READY; @@ -743,7 +743,7 @@ static void __cold _credit_init_bits(size_t bits) queue_work(system_unbound_wq, &set_ready); atomic_notifier_call_chain(&random_ready_notifier, 0, NULL); #ifdef CONFIG_VDSO_GETRANDOM - WRITE_ONCE(__arch_get_k_vdso_rng_data()->is_ready, true); + WRITE_ONCE(vdso_k_rng_data->is_ready, true); #endif wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index ac5b93b919932..a5f973e4e2723 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -13,6 +13,13 @@ static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data( } #endif +#ifndef __arch_get_vdso_u_rng_data +static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(void) +{ + return &vdso_u_rng_data; +} +#endif + #else /* !CONFIG_GENERIC_VDSO_DATA_STORE */ #ifndef __arch_get_k_vdso_data @@ -25,6 +32,11 @@ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) #define __arch_get_vdso_u_time_data __arch_get_vdso_data +#ifndef __arch_get_vdso_u_rng_data +#define __arch_get_vdso_u_rng_data() __arch_get_vdso_rng_data() +#endif +#define vdso_k_rng_data __arch_get_k_vdso_rng_data() + #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ #ifndef __arch_update_vsyscall diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index b3d8087488ff3..7dbc8797e3b89 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -144,8 +144,10 @@ extern struct vdso_time_data _timens_data[CS_BASES] __attribute__((visibility("h extern struct vdso_rng_data _vdso_rng_data __attribute__((visibility("hidden"))); #else extern struct vdso_time_data vdso_u_time_data[CS_BASES] __attribute__((visibility("hidden"))); +extern struct vdso_rng_data vdso_u_rng_data __attribute__((visibility("hidden"))); extern struct vdso_time_data *vdso_k_time_data; +extern struct vdso_rng_data *vdso_k_rng_data; #endif /** @@ -161,6 +163,7 @@ union vdso_data_store { enum vdso_pages { VDSO_TIME_PAGE_OFFSET, VDSO_TIMENS_PAGE_OFFSET, + VDSO_RNG_PAGE_OFFSET, VDSO_NR_PAGES }; @@ -184,9 +187,16 @@ enum vdso_pages { #else /* !__ASSEMBLY__ */ +#ifdef CONFIG_VDSO_GETRANDOM +#define __vdso_u_rng_data PROVIDE(vdso_u_rng_data = vdso_u_data + 2 * PAGE_SIZE); +#else +#define __vdso_u_rng_data +#endif + #define VDSO_VVAR_SYMS \ PROVIDE(vdso_u_data = . - __VDSO_PAGES * PAGE_SIZE); \ PROVIDE(vdso_u_time_data = vdso_u_data); \ + __vdso_u_rng_data \ #endif /* !__ASSEMBLY__ */ diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index d3181768424cc..9260b00dc8520 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -17,6 +17,15 @@ struct vdso_time_data *vdso_k_time_data = vdso_time_data_store.data; static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE); #endif /* CONFIG_HAVE_GENERIC_VDSO */ +#ifdef CONFIG_VDSO_GETRANDOM +static union { + struct vdso_rng_data data; + u8 page[PAGE_SIZE]; +} vdso_rng_data_store __page_aligned_data; +struct vdso_rng_data *vdso_k_rng_data = &vdso_rng_data_store.data; +static_assert(sizeof(vdso_rng_data_store) == PAGE_SIZE); +#endif /* CONFIG_VDSO_GETRANDOM */ + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -53,6 +62,11 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, return VM_FAULT_SIGBUS; pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); break; + case VDSO_RNG_PAGE_OFFSET: + if (!IS_ENABLED(CONFIG_VDSO_GETRANDOM)) + return VM_FAULT_SIGBUS; + pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data)); + break; default: return VM_FAULT_SIGBUS; } diff --git a/lib/vdso/getrandom.c b/lib/vdso/getrandom.c index 938ca539aaa64..440f8a6203a69 100644 --- a/lib/vdso/getrandom.c +++ b/lib/vdso/getrandom.c @@ -12,6 +12,9 @@ #include #include +/* Bring in default accessors */ +#include + #undef PAGE_SIZE #undef PAGE_MASK #define PAGE_SIZE (1UL << CONFIG_PAGE_SHIFT) @@ -152,7 +155,7 @@ __cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_ /* * Prevent the syscall from being reordered wrt current_generation. Pairs with the - * smp_store_release(&_vdso_rng_data.generation) in random.c. + * smp_store_release(&vdso_k_rng_data->generation) in random.c. */ smp_rmb(); @@ -256,5 +259,6 @@ __cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_ static __always_inline ssize_t __cvdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) { - return __cvdso_getrandom_data(__arch_get_vdso_rng_data(), buffer, len, flags, opaque_state, opaque_len); + return __cvdso_getrandom_data(__arch_get_vdso_u_rng_data(), buffer, len, flags, + opaque_state, opaque_len); } From 365841e1557ace6e8b4d5ba06a6cf53f699bc684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:39 +0100 Subject: [PATCH 07/55] vdso: Add generic architecture-specific data storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some architectures need to expose architecture-specific data to the vDSO. Enable the generic vDSO storage mechanism to both store and map this data. Some architectures require more than a single page, like LoongArch, so prepare for that usecase, too. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-7-13a4669dfc8c@linutronix.de --- arch/Kconfig | 4 ++++ include/vdso/datapage.h | 25 +++++++++++++++++++++++++ lib/vdso/datastore.c | 14 ++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index b8a4ff3655822..9f6eb09ef12d7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1584,6 +1584,10 @@ config HAVE_SPARSE_SYSCALL_NR entries at 4000, 5000 and 6000 locations. This option turns on syscall related optimizations for a given architecture. +config ARCH_HAS_VDSO_ARCH_DATA + depends on GENERIC_VDSO_DATA_STORE + bool + config ARCH_HAS_VDSO_TIME_DATA bool diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 7dbc8797e3b89..46658b39c2825 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -9,11 +9,13 @@ #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -25,6 +27,15 @@ struct arch_vdso_time_data {}; #endif +#if defined(CONFIG_ARCH_HAS_VDSO_ARCH_DATA) +#include +#elif defined(CONFIG_GENERIC_VDSO_DATA_STORE) +struct vdso_arch_data { + /* Needed for the generic code, never actually used at runtime */ + char __unused; +}; +#endif + #define VDSO_BASES (CLOCK_TAI + 1) #define VDSO_HRES (BIT(CLOCK_REALTIME) | \ BIT(CLOCK_MONOTONIC) | \ @@ -145,9 +156,11 @@ extern struct vdso_rng_data _vdso_rng_data __attribute__((visibility("hidden"))) #else extern struct vdso_time_data vdso_u_time_data[CS_BASES] __attribute__((visibility("hidden"))); extern struct vdso_rng_data vdso_u_rng_data __attribute__((visibility("hidden"))); +extern struct vdso_arch_data vdso_u_arch_data __attribute__((visibility("hidden"))); extern struct vdso_time_data *vdso_k_time_data; extern struct vdso_rng_data *vdso_k_rng_data; +extern struct vdso_arch_data *vdso_k_arch_data; #endif /** @@ -160,10 +173,15 @@ union vdso_data_store { #ifdef CONFIG_GENERIC_VDSO_DATA_STORE +#define VDSO_ARCH_DATA_SIZE ALIGN(sizeof(struct vdso_arch_data), PAGE_SIZE) +#define VDSO_ARCH_DATA_PAGES (VDSO_ARCH_DATA_SIZE >> PAGE_SHIFT) + enum vdso_pages { VDSO_TIME_PAGE_OFFSET, VDSO_TIMENS_PAGE_OFFSET, VDSO_RNG_PAGE_OFFSET, + VDSO_ARCH_PAGES_START, + VDSO_ARCH_PAGES_END = VDSO_ARCH_PAGES_START + VDSO_ARCH_DATA_PAGES - 1, VDSO_NR_PAGES }; @@ -193,10 +211,17 @@ enum vdso_pages { #define __vdso_u_rng_data #endif +#ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA +#define __vdso_u_arch_data PROVIDE(vdso_u_arch_data = vdso_u_data + 3 * PAGE_SIZE); +#else +#define __vdso_u_arch_data +#endif + #define VDSO_VVAR_SYMS \ PROVIDE(vdso_u_data = . - __VDSO_PAGES * PAGE_SIZE); \ PROVIDE(vdso_u_time_data = vdso_u_data); \ __vdso_u_rng_data \ + __vdso_u_arch_data \ #endif /* !__ASSEMBLY__ */ diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index 9260b00dc8520..0959d62d78586 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -26,6 +26,14 @@ struct vdso_rng_data *vdso_k_rng_data = &vdso_rng_data_store.data; static_assert(sizeof(vdso_rng_data_store) == PAGE_SIZE); #endif /* CONFIG_VDSO_GETRANDOM */ +#ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA +static union { + struct vdso_arch_data data; + u8 page[VDSO_ARCH_DATA_SIZE]; +} vdso_arch_data_store __page_aligned_data; +struct vdso_arch_data *vdso_k_arch_data = &vdso_arch_data_store.data; +#endif /* CONFIG_ARCH_HAS_VDSO_ARCH_DATA */ + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -67,6 +75,12 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, return VM_FAULT_SIGBUS; pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data)); break; + case VDSO_ARCH_PAGES_START ... VDSO_ARCH_PAGES_END: + if (!IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA)) + return VM_FAULT_SIGBUS; + pfn = __phys_to_pfn(__pa_symbol(vdso_k_arch_data)) + + vmf->pgoff - VDSO_ARCH_PAGES_START; + break; default: return VM_FAULT_SIGBUS; } From 0b3bc3354eb9ad36719a044726092750a2ba01ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:40 +0100 Subject: [PATCH 08/55] arm64: vdso: Switch to generic storage implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic storage implementation provides the same features as the custom one. However it can be shared between architectures, making maintenance easier. This switch also moves the random state data out of the time data page. The currently used hardcoded __VDSO_RND_DATA_OFFSET does not take into account changes to the time data page layout. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-8-13a4669dfc8c@linutronix.de --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/vdso.h | 2 +- .../include/asm/vdso/compat_gettimeofday.h | 36 +++----- arch/arm64/include/asm/vdso/getrandom.h | 12 --- arch/arm64/include/asm/vdso/gettimeofday.h | 16 +--- arch/arm64/include/asm/vdso/vsyscall.h | 25 +----- arch/arm64/kernel/vdso.c | 90 ++----------------- arch/arm64/kernel/vdso/vdso.lds.S | 7 +- arch/arm64/kernel/vdso32/vdso.lds.S | 7 +- 9 files changed, 26 insertions(+), 170 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fcdd0ed3eca89..3770b73f270bb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -162,6 +162,7 @@ config ARM64 select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select HARDIRQS_SW_RESEND select HAS_IOPORT diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 3e3c3fdb18427..61679070f595c 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -5,7 +5,7 @@ #ifndef __ASM_VDSO_H #define __ASM_VDSO_H -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 778c1202bbbf9..957ee12fcc54b 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -104,7 +104,7 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) } static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { u64 res; @@ -131,43 +131,31 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return res; } -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) { - const struct vdso_data *ret; + const struct vdso_time_data *ret; /* - * This simply puts &_vdso_data into ret. The reason why we don't use - * `ret = _vdso_data` is that the compiler tends to optimise this in a - * very suboptimal way: instead of keeping &_vdso_data in a register, - * it goes through a relocation almost every time _vdso_data must be + * This simply puts &_vdso_time_data into ret. The reason why we don't use + * `ret = _vdso_time_data` is that the compiler tends to optimise this in a + * very suboptimal way: instead of keeping &_vdso_time_data in a register, + * it goes through a relocation almost every time _vdso_time_data must be * accessed (even in subfunctions). This is both time and space * consuming: each relocation uses a word in the code section, and it * has to be loaded at runtime. * * This trick hides the assignment from the compiler. Since it cannot * track where the pointer comes from, it will only use one relocation - * where __arch_get_vdso_data() is called, and then keep the result in - * a register. + * where __aarch64_get_vdso_u_time_data() is called, and then keep the + * result in a register. */ - asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data)); + asm volatile("mov %0, %1" : "=r"(ret) : "r"(vdso_u_time_data)); return ret; } +#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - const struct vdso_data *ret; - - /* See __arch_get_vdso_data(). */ - asm volatile("mov %0, %1" : "=r"(ret) : "r"(_timens_data)); - - return ret; -} -#endif - -static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +static inline bool vdso_clocksource_ok(const struct vdso_time_data *vd) { return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER; } diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h index 342f807e20444..a2197da1951b0 100644 --- a/arch/arm64/include/asm/vdso/getrandom.h +++ b/arch/arm64/include/asm/vdso/getrandom.h @@ -33,18 +33,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns return ret; } -static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) -{ - /* - * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace - * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ - * PAGE_OFFSET points to the real VVAR page. - */ - if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) - return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * (1UL << CONFIG_PAGE_SHIFT); - return &_vdso_rng_data; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 764d13e2916c5..92a2b59a9f3df 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -67,7 +67,7 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) } static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { u64 res; @@ -99,20 +99,6 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return res; } -static __always_inline -const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index eea51946d45a2..3f65cbd00635a 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -2,41 +2,18 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#define __VDSO_RND_DATA_OFFSET 480 - #ifndef __ASSEMBLY__ #include -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - #define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) -extern struct vdso_data *vdso_data; /* * Update the vDSO data page to keep in sync with kernel timekeeping. */ static __always_inline -struct vdso_data *__arm64_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __arm64_get_k_vdso_data - -static __always_inline -struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) -{ - return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; -} -#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data - -static __always_inline -void __arm64_update_vsyscall(struct vdso_data *vdata) +void __arm64_update_vsyscall(struct vdso_time_data *vdata) { vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; vdata[CS_RAW].mask = VDSO_PRECISION_MASK; diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index e8ed8e5b713b5..887ac0b05961e 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -57,12 +57,6 @@ static struct vdso_abi_info vdso_info[] __ro_after_init = { #endif /* CONFIG_COMPAT_VDSO */ }; -/* - * The vDSO data page. - */ -static union vdso_data_store vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = vdso_data_store.data; - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { @@ -104,78 +98,6 @@ static int __init __vdso_init(enum vdso_abi abi) return 0; } -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -static const struct vm_special_mapping vvar_map; - -/* - * The vvar mapping contains data for a specific time namespace, so when a task - * changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &vvar_map)) - zap_vma_pages(vma); - } - - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - if (timens_page) - pfn = page_to_pfn(timens_page); - else - pfn = sym_to_pfn(vdso_data); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = sym_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - -static const struct vm_special_mapping vvar_map = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static int __setup_additional_pages(enum vdso_abi abi, struct mm_struct *mm, struct linux_binprm *bprm, @@ -185,11 +107,11 @@ static int __setup_additional_pages(enum vdso_abi abi, unsigned long gp_flags = 0; void *ret; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ - vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; + vdso_mapping_len = vdso_text_len + VDSO_NR_PAGES * PAGE_SIZE; vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { @@ -197,16 +119,14 @@ static int __setup_additional_pages(enum vdso_abi abi, goto up_fail; } - ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_PFNMAP, - &vvar_map); + ret = vdso_install_vvar_mapping(mm, vdso_base); if (IS_ERR(ret)) goto up_fail; if (system_supports_bti_kernel()) gp_flags = VM_ARM64_BTI; - vdso_base += VVAR_NR_PAGES * PAGE_SIZE; + vdso_base += VDSO_NR_PAGES * PAGE_SIZE; mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC|gp_flags| diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 4ec32e86a8da2..2e20d0593f182 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -20,11 +20,8 @@ OUTPUT_ARCH(aarch64) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S index 732702a187e9e..e02b27487ce80 100644 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -12,16 +12,15 @@ #include #include #include +#include OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") OUTPUT_ARCH(arm) SECTIONS { - PROVIDE_HIDDEN(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE_HIDDEN(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text From 46fe55b204bfb007e0f0a5409dcda063ad98b089 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:41 +0100 Subject: [PATCH 09/55] riscv: vdso: Switch to generic storage implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic storage implementation provides the same features as the custom one. However it can be shared between architectures, making maintenance easier. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-9-13a4669dfc8c@linutronix.de --- arch/riscv/Kconfig | 3 +- arch/riscv/include/asm/vdso.h | 2 +- .../asm/vdso/{time_data.h => arch_data.h} | 8 +- arch/riscv/include/asm/vdso/gettimeofday.h | 14 +-- arch/riscv/include/asm/vdso/vsyscall.h | 9 -- arch/riscv/kernel/sys_hwprobe.c | 3 +- arch/riscv/kernel/vdso.c | 90 +------------------ arch/riscv/kernel/vdso/hwprobe.c | 6 +- arch/riscv/kernel/vdso/vdso.lds.S | 7 +- 9 files changed, 18 insertions(+), 124 deletions(-) rename arch/riscv/include/asm/vdso/{time_data.h => arch_data.h} (71%) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7612c52e9b1e3..aa8ea53186c04 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -53,7 +53,7 @@ config RISCV select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN - select ARCH_HAS_VDSO_TIME_DATA + select ARCH_HAS_VDSO_ARCH_DATA if GENERIC_VDSO_DATA_STORE select ARCH_KEEP_MEMBLOCK if ACPI select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if 64BIT && MMU select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX @@ -116,6 +116,7 @@ config RISCV select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL if MMU && 64BIT + select GENERIC_VDSO_DATA_STORE if MMU select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAS_IOPORT if MMU diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index f891478829a52..c130d8100232c 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -14,7 +14,7 @@ */ #ifdef CONFIG_MMU -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #ifndef __ASSEMBLY__ #include diff --git a/arch/riscv/include/asm/vdso/time_data.h b/arch/riscv/include/asm/vdso/arch_data.h similarity index 71% rename from arch/riscv/include/asm/vdso/time_data.h rename to arch/riscv/include/asm/vdso/arch_data.h index dfa65228999be..da57a3786f7a5 100644 --- a/arch/riscv/include/asm/vdso/time_data.h +++ b/arch/riscv/include/asm/vdso/arch_data.h @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __RISCV_ASM_VDSO_TIME_DATA_H -#define __RISCV_ASM_VDSO_TIME_DATA_H +#ifndef __RISCV_ASM_VDSO_ARCH_DATA_H +#define __RISCV_ASM_VDSO_ARCH_DATA_H #include #include #include -struct arch_vdso_time_data { +struct vdso_arch_data { /* Stash static answers to the hwprobe queries when all CPUs are selected. */ __u64 all_cpu_hwprobe_values[RISCV_HWPROBE_MAX_KEY + 1]; @@ -14,4 +14,4 @@ struct arch_vdso_time_data { __u8 homogeneous_cpus; }; -#endif /* __RISCV_ASM_VDSO_TIME_DATA_H */ +#endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */ diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h index ba3283cf7acca..29164f84f93ce 100644 --- a/arch/riscv/include/asm/vdso/gettimeofday.h +++ b/arch/riscv/include/asm/vdso/gettimeofday.h @@ -69,7 +69,7 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) #endif /* CONFIG_GENERIC_TIME_VSYSCALL */ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { /* * The purpose of csr_read(CSR_TIME) is to trap the system into @@ -79,18 +79,6 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return csr_read(CSR_TIME); } -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/riscv/include/asm/vdso/vsyscall.h b/arch/riscv/include/asm/vdso/vsyscall.h index e8a9c4b53c0c9..1140b54b4bc82 100644 --- a/arch/riscv/include/asm/vdso/vsyscall.h +++ b/arch/riscv/include/asm/vdso/vsyscall.h @@ -6,15 +6,6 @@ #include -extern struct vdso_data *vdso_data; - -static __always_inline struct vdso_data *__riscv_get_k_vdso_data(void) -{ - return vdso_data; -} - -#define __arch_get_k_vdso_data __riscv_get_k_vdso_data - /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index bcd3b816306c2..04a4e54955128 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -450,8 +450,7 @@ static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, static int __init init_hwprobe_vdso_data(void) { - struct vdso_data *vd = __arch_get_k_vdso_data(); - struct arch_vdso_time_data *avd = &vd->arch_data; + struct vdso_arch_data *avd = vdso_k_arch_data; u64 id_bitsmash = 0; struct riscv_hwprobe pair; int key; diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 3ca3ae4277e18..cc2895d1fbc2f 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -13,20 +13,11 @@ #include #include #include -#include +#include #include #include -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - -#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) - -static union vdso_data_store vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = vdso_data_store.data; +#define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT) struct __vdso_info { const char *name; @@ -79,78 +70,6 @@ static void __init __vdso_init(struct __vdso_info *vdso_info) vdso_info->cm->pages = vdso_pagelist; } -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -static const struct vm_special_mapping rv_vvar_map; - -/* - * The vvar mapping contains data for a specific time namespace, so when a task - * changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &rv_vvar_map)) - zap_vma_pages(vma); - } - - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - if (timens_page) - pfn = page_to_pfn(timens_page); - else - pfn = sym_to_pfn(vdso_data); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = sym_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - -static const struct vm_special_mapping rv_vvar_map = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping rv_vdso_map __ro_after_init = { .name = "[vdso]", .mremap = vdso_mremap, @@ -196,7 +115,7 @@ static int __setup_additional_pages(struct mm_struct *mm, unsigned long vdso_base, vdso_text_len, vdso_mapping_len; void *ret; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ @@ -208,8 +127,7 @@ static int __setup_additional_pages(struct mm_struct *mm, goto up_fail; } - ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, - (VM_READ | VM_MAYREAD | VM_PFNMAP), &rv_vvar_map); + ret = vdso_install_vvar_mapping(mm, vdso_base); if (IS_ERR(ret)) goto up_fail; diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index a158c029344f6..2ddeba6c68dda 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -16,8 +16,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, size_t cpusetsize, unsigned long *cpus, unsigned int flags) { - const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_time_data *avd = &vd->arch_data; + const struct vdso_arch_data *avd = &vdso_u_arch_data; bool all_cpus = !cpusetsize && !cpus; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; @@ -51,8 +50,7 @@ static int riscv_vdso_get_cpus(struct riscv_hwprobe *pairs, size_t pair_count, size_t cpusetsize, unsigned long *cpus, unsigned int flags) { - const struct vdso_data *vd = __arch_get_vdso_data(); - const struct arch_vdso_time_data *avd = &vd->arch_data; + const struct vdso_arch_data *avd = &vdso_u_arch_data; struct riscv_hwprobe *p = pairs; struct riscv_hwprobe *end = pairs + pair_count; unsigned char *c = (unsigned char *)cpus; diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index cbe2a179331d2..8e86965a8aae4 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -4,15 +4,14 @@ */ #include #include +#include OUTPUT_ARCH(riscv) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text From d2862bb9d9ca58f41319a55db0ca275774ae0c6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:42 +0100 Subject: [PATCH 10/55] LoongArch: vDSO: Switch to generic storage implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic storage implementation provides the same features as the custom one. However it can be shared between architectures, making maintenance easier. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-10-13a4669dfc8c@linutronix.de --- arch/loongarch/Kconfig | 2 + arch/loongarch/include/asm/vdso.h | 1 - arch/loongarch/include/asm/vdso/arch_data.h | 25 +++++ arch/loongarch/include/asm/vdso/getrandom.h | 5 - .../loongarch/include/asm/vdso/gettimeofday.h | 14 +-- arch/loongarch/include/asm/vdso/vdso.h | 38 +------- arch/loongarch/include/asm/vdso/vsyscall.h | 17 ---- arch/loongarch/kernel/asm-offsets.c | 2 +- arch/loongarch/kernel/vdso.c | 92 +------------------ arch/loongarch/vdso/vdso.lds.S | 8 +- arch/loongarch/vdso/vgetcpu.c | 12 +-- 11 files changed, 39 insertions(+), 177 deletions(-) create mode 100644 arch/loongarch/include/asm/vdso/arch_data.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 2b8bd27a852fe..d7ddf2a43e63b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -30,6 +30,7 @@ config LOONGARCH select ARCH_HAS_SET_MEMORY select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_VDSO_ARCH_DATA select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION @@ -106,6 +107,7 @@ config LOONGARCH select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select GPIOLIB select HAS_IOPORT diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h index d3ba35eb23e77..f72ec79e2dde5 100644 --- a/arch/loongarch/include/asm/vdso.h +++ b/arch/loongarch/include/asm/vdso.h @@ -31,7 +31,6 @@ struct loongarch_vdso_info { unsigned long size; unsigned long offset_sigreturn; struct vm_special_mapping code_mapping; - struct vm_special_mapping data_mapping; }; extern struct loongarch_vdso_info vdso_info; diff --git a/arch/loongarch/include/asm/vdso/arch_data.h b/arch/loongarch/include/asm/vdso/arch_data.h new file mode 100644 index 0000000000000..322d0a5f1c844 --- /dev/null +++ b/arch/loongarch/include/asm/vdso/arch_data.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Author: Huacai Chen + * Copyright (C) 2020-2022 Loongson Technology Corporation Limited + */ + +#ifndef _VDSO_ARCH_DATA_H +#define _VDSO_ARCH_DATA_H + +#ifndef __ASSEMBLY__ + +#include +#include + +struct vdso_pcpu_data { + u32 node; +} ____cacheline_aligned_in_smp; + +struct vdso_arch_data { + struct vdso_pcpu_data pdata[NR_CPUS]; +}; + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/loongarch/include/asm/vdso/getrandom.h b/arch/loongarch/include/asm/vdso/getrandom.h index e80f3c4ac7481..48c43f55b039b 100644 --- a/arch/loongarch/include/asm/vdso/getrandom.h +++ b/arch/loongarch/include/asm/vdso/getrandom.h @@ -28,11 +28,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns return ret; } -static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) -{ - return &_loongarch_data.rng_data; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/loongarch/include/asm/vdso/gettimeofday.h b/arch/loongarch/include/asm/vdso/gettimeofday.h index 7eb3f041af764..88cfcf1331163 100644 --- a/arch/loongarch/include/asm/vdso/gettimeofday.h +++ b/arch/loongarch/include/asm/vdso/gettimeofday.h @@ -72,7 +72,7 @@ static __always_inline int clock_getres_fallback( } static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { uint64_t count; @@ -89,18 +89,6 @@ static inline bool loongarch_vdso_hres_capable(void) } #define __arch_vdso_hres_capable loongarch_vdso_hres_capable -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h index 1c183a9b2115a..50c65fb29dafb 100644 --- a/arch/loongarch/include/asm/vdso/vdso.h +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -12,43 +12,9 @@ #include #include #include +#include -struct vdso_pcpu_data { - u32 node; -} ____cacheline_aligned_in_smp; - -struct loongarch_vdso_data { - struct vdso_pcpu_data pdata[NR_CPUS]; - struct vdso_rng_data rng_data; -}; - -/* - * The layout of vvar: - * - * high - * +---------------------+--------------------------+ - * | loongarch vdso data | LOONGARCH_VDSO_DATA_SIZE | - * +---------------------+--------------------------+ - * | time-ns vdso data | PAGE_SIZE | - * +---------------------+--------------------------+ - * | generic vdso data | PAGE_SIZE | - * +---------------------+--------------------------+ - * low - */ -#define LOONGARCH_VDSO_DATA_SIZE PAGE_ALIGN(sizeof(struct loongarch_vdso_data)) -#define LOONGARCH_VDSO_DATA_PAGES (LOONGARCH_VDSO_DATA_SIZE >> PAGE_SHIFT) - -enum vvar_pages { - VVAR_GENERIC_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_LOONGARCH_PAGES_START, - VVAR_LOONGARCH_PAGES_END = VVAR_LOONGARCH_PAGES_START + LOONGARCH_VDSO_DATA_PAGES - 1, - VVAR_NR_PAGES, -}; - -#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) - -extern struct loongarch_vdso_data _loongarch_data __attribute__((visibility("hidden"))); +#define VVAR_SIZE (VDSO_NR_PAGES << PAGE_SHIFT) #endif /* __ASSEMBLY__ */ diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h index 8987e951d0a93..1140b54b4bc82 100644 --- a/arch/loongarch/include/asm/vdso/vsyscall.h +++ b/arch/loongarch/include/asm/vdso/vsyscall.h @@ -6,23 +6,6 @@ #include -extern struct vdso_data *vdso_data; -extern struct vdso_rng_data *vdso_rng_data; - -static __always_inline -struct vdso_data *__loongarch_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __loongarch_get_k_vdso_data - -static __always_inline -struct vdso_rng_data *__loongarch_get_k_vdso_rng_data(void) -{ - return vdso_rng_data; -} -#define __arch_get_k_vdso_rng_data __loongarch_get_k_vdso_rng_data - /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c index 8be1c38ad8eb2..6f1524bba957e 100644 --- a/arch/loongarch/kernel/asm-offsets.c +++ b/arch/loongarch/kernel/asm-offsets.c @@ -315,6 +315,6 @@ static void __used output_vdso_defines(void) { COMMENT("LoongArch vDSO offsets."); - DEFINE(__VVAR_PAGES, VVAR_NR_PAGES); + DEFINE(__VDSO_PAGES, VDSO_NR_PAGES); BLANK(); } diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index 05e5fbac102a9..10cf1608c7b32 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -25,18 +25,6 @@ extern char vdso_start[], vdso_end[]; -/* Kernel-provided data used by the VDSO. */ -static union vdso_data_store generic_vdso_data __page_aligned_data; - -static union { - u8 page[LOONGARCH_VDSO_DATA_SIZE]; - struct loongarch_vdso_data vdata; -} loongarch_vdso_data __page_aligned_data; - -struct vdso_data *vdso_data = generic_vdso_data.data; -struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata; -struct vdso_rng_data *vdso_rng_data = &loongarch_vdso_data.vdata.rng_data; - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { current->mm->context.vdso = (void *)(new_vma->vm_start); @@ -44,53 +32,12 @@ static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struc return 0; } -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - unsigned long pfn; - struct page *timens_page = find_timens_vvar_page(vma); - - switch (vmf->pgoff) { - case VVAR_GENERIC_PAGE_OFFSET: - if (!timens_page) - pfn = sym_to_pfn(vdso_data); - else - pfn = page_to_pfn(timens_page); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace specific - * VVAR is mapped with the VVAR_GENERIC_PAGE_OFFSET and the real - * VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - else - pfn = sym_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - case VVAR_LOONGARCH_PAGES_START ... VVAR_LOONGARCH_PAGES_END: - pfn = sym_to_pfn(&loongarch_vdso_data) + vmf->pgoff - VVAR_LOONGARCH_PAGES_START; - break; - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - struct loongarch_vdso_info vdso_info = { .vdso = vdso_start, .code_mapping = { .name = "[vdso]", .mremap = vdso_mremap, }, - .data_mapping = { - .name = "[vvar]", - .fault = vvar_fault, - }, .offset_sigreturn = vdso_offset_sigreturn, }; @@ -101,7 +48,7 @@ static int __init init_vdso(void) BUG_ON(!PAGE_ALIGNED(vdso_info.vdso)); for_each_possible_cpu(cpu) - vdso_pdata[cpu].node = cpu_to_node(cpu); + vdso_k_arch_data->pdata[cpu].node = cpu_to_node(cpu); vdso_info.size = PAGE_ALIGN(vdso_end - vdso_start); vdso_info.code_mapping.pages = @@ -115,37 +62,6 @@ static int __init init_vdso(void) } subsys_initcall(init_vdso); -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The vvar mapping contains data for a specific time namespace, so when a - * task changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &vdso_info.data_mapping)) - zap_vma_pages(vma); - } - mmap_read_unlock(mm); - - return 0; -} -#endif - static unsigned long vdso_base(void) { unsigned long base = STACK_TOP; @@ -181,9 +97,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto out; } - vma = _install_special_mapping(mm, data_addr, VVAR_SIZE, - VM_READ | VM_MAYREAD | VM_PFNMAP, - &info->data_mapping); + vma = vdso_install_vvar_mapping(mm, data_addr); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S index 160cfaef2de45..8ff9864999475 100644 --- a/arch/loongarch/vdso/vdso.lds.S +++ b/arch/loongarch/vdso/vdso.lds.S @@ -5,6 +5,7 @@ */ #include #include +#include OUTPUT_FORMAT("elf64-loongarch", "elf64-loongarch", "elf64-loongarch") @@ -12,11 +13,8 @@ OUTPUT_ARCH(loongarch) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif - PROVIDE(_loongarch_data = _vdso_data + 2 * PAGE_SIZE); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c index 0db51258b2a7c..5301cd9d0f839 100644 --- a/arch/loongarch/vdso/vgetcpu.c +++ b/arch/loongarch/vdso/vgetcpu.c @@ -19,27 +19,19 @@ static __always_inline int read_cpu_id(void) return cpu_id; } -static __always_inline const struct vdso_pcpu_data *get_pcpu_data(void) -{ - return _loongarch_data.pdata; -} - extern int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused); int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) { int cpu_id; - const struct vdso_pcpu_data *data; cpu_id = read_cpu_id(); if (cpu) *cpu = cpu_id; - if (node) { - data = get_pcpu_data(); - *node = data[cpu_id].node; - } + if (node) + *node = vdso_u_arch_data.pdata[cpu_id].node; return 0; } From 31e9fa2ba9ad64ef09eb4a8326e5cc8af3f6fa1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:43 +0100 Subject: [PATCH 11/55] arm: vdso: Switch to generic storage implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic storage implementation provides the same features as the custom one. However it can be shared between architectures, making maintenance easier. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-11-13a4669dfc8c@linutronix.de --- arch/arm/include/asm/vdso.h | 2 ++ arch/arm/include/asm/vdso/gettimeofday.h | 7 +---- arch/arm/include/asm/vdso/vsyscall.h | 12 ++------- arch/arm/kernel/asm-offsets.c | 4 --- arch/arm/kernel/vdso.c | 34 +++++------------------- arch/arm/mm/Kconfig | 1 + arch/arm/vdso/vdso.lds.S | 4 +-- 7 files changed, 15 insertions(+), 49 deletions(-) diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h index 5b85889f82eeb..88364a6727ffc 100644 --- a/arch/arm/include/asm/vdso.h +++ b/arch/arm/include/asm/vdso.h @@ -4,6 +4,8 @@ #ifdef __KERNEL__ +#define __VDSO_PAGES 4 + #ifndef __ASSEMBLY__ struct mm_struct; diff --git a/arch/arm/include/asm/vdso/gettimeofday.h b/arch/arm/include/asm/vdso/gettimeofday.h index 592d3d015ca72..1e9f81639c88c 100644 --- a/arch/arm/include/asm/vdso/gettimeofday.h +++ b/arch/arm/include/asm/vdso/gettimeofday.h @@ -112,7 +112,7 @@ static inline bool arm_vdso_hres_capable(void) #define __arch_vdso_hres_capable arm_vdso_hres_capable static __always_inline u64 __arch_get_hw_counter(int clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { #ifdef CONFIG_ARM_ARCH_TIMER u64 cycle_now; @@ -135,11 +135,6 @@ static __always_inline u64 __arch_get_hw_counter(int clock_mode, #endif } -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm/include/asm/vdso/vsyscall.h b/arch/arm/include/asm/vdso/vsyscall.h index 705414710dcdb..4e7226ad02ec4 100644 --- a/arch/arm/include/asm/vdso/vsyscall.h +++ b/arch/arm/include/asm/vdso/vsyscall.h @@ -7,22 +7,14 @@ #include #include -extern struct vdso_data *vdso_data; extern bool cntvct_ok; static __always_inline -struct vdso_data *__arm_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __arm_get_k_vdso_data - -static __always_inline -void __arm_sync_vdso_data(struct vdso_data *vdata) +void __arch_sync_vdso_time_data(struct vdso_time_data *vdata) { flush_dcache_page(virt_to_page(vdata)); } -#define __arch_sync_vdso_data __arm_sync_vdso_data +#define __arch_sync_vdso_time_data __arch_sync_vdso_time_data /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 4853875740d0f..123f4a8ef4466 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -153,10 +153,6 @@ int main(void) DEFINE(CACHE_WRITEBACK_ORDER, __CACHE_WRITEBACK_ORDER); DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE); BLANK(); -#ifdef CONFIG_VDSO - DEFINE(VDSO_DATA_SIZE, sizeof(union vdso_data_store)); -#endif - BLANK(); #ifdef CONFIG_ARM_MPU DEFINE(MPU_RNG_INFO_RNGS, offsetof(struct mpu_rgn_info, rgns)); DEFINE(MPU_RNG_INFO_USED, offsetof(struct mpu_rgn_info, used)); diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 29dd2f3c62fec..325448ffbba0c 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -33,15 +34,6 @@ extern char vdso_start[], vdso_end[]; /* Total number of pages needed for the data and text portions of the VDSO. */ unsigned int vdso_total_pages __ro_after_init; -static union vdso_data_store vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = vdso_data_store.data; - -static struct page *vdso_data_page __ro_after_init; -static const struct vm_special_mapping vdso_data_mapping = { - .name = "[vvar]", - .pages = &vdso_data_page, -}; - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { @@ -192,9 +184,6 @@ static int __init vdso_init(void) if (vdso_text_pagelist == NULL) return -ENOMEM; - /* Grab the VDSO data page. */ - vdso_data_page = virt_to_page(vdso_data); - /* Grab the VDSO text pages. */ for (i = 0; i < text_pages; i++) { struct page *page; @@ -205,7 +194,7 @@ static int __init vdso_init(void) vdso_text_mapping.pages = vdso_text_pagelist; - vdso_total_pages = 1; /* for the data/vvar page */ + vdso_total_pages = VDSO_NR_PAGES; /* for the data/vvar pages */ vdso_total_pages += text_pages; cntvct_ok = cntvct_functional(); @@ -216,16 +205,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -static int install_vvar(struct mm_struct *mm, unsigned long addr) -{ - struct vm_area_struct *vma; - - vma = _install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ | VM_MAYREAD, - &vdso_data_mapping); - - return PTR_ERR_OR_ZERO(vma); -} +static_assert(__VDSO_PAGES == VDSO_NR_PAGES); /* assumes mmap_lock is write-locked */ void arm_install_vdso(struct mm_struct *mm, unsigned long addr) @@ -238,12 +218,12 @@ void arm_install_vdso(struct mm_struct *mm, unsigned long addr) if (vdso_text_pagelist == NULL) return; - if (install_vvar(mm, addr)) + if (IS_ERR(vdso_install_vvar_mapping(mm, addr))) return; - /* Account for vvar page. */ - addr += PAGE_SIZE; - len = (vdso_total_pages - 1) << PAGE_SHIFT; + /* Account for vvar pages. */ + addr += VDSO_NR_PAGES * PAGE_SIZE; + len = (vdso_total_pages - VDSO_NR_PAGES) << PAGE_SHIFT; vma = _install_special_mapping(mm, addr, len, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 2b6f50dd54784..5c1023a6d78c1 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -928,6 +928,7 @@ config VDSO select GENERIC_TIME_VSYSCALL select GENERIC_VDSO_32 select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_DATA_STORE help Place in the process address space an ELF shared object providing fast implementations of gettimeofday and diff --git a/arch/arm/vdso/vdso.lds.S b/arch/arm/vdso/vdso.lds.S index 9bfa0f52923c8..7c08371f44002 100644 --- a/arch/arm/vdso/vdso.lds.S +++ b/arch/arm/vdso/vdso.lds.S @@ -11,16 +11,16 @@ */ #include -#include #include #include +#include OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") OUTPUT_ARCH(arm) SECTIONS { - PROVIDE(_vdso_data = . - VDSO_DATA_SIZE); + VDSO_VVAR_SYMS . = SIZEOF_HEADERS; From 9bf39a65b20cd03b10c618b1b1c4703538a5b564 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:44 +0100 Subject: [PATCH 12/55] s390/vdso: Switch to generic storage implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic storage implementation provides the same features as the custom one. However it can be shared between architectures, making maintenance easier. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Heiko Carstens Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-12-13a4669dfc8c@linutronix.de --- arch/s390/Kconfig | 1 + arch/s390/include/asm/vdso.h | 4 +- arch/s390/include/asm/vdso/getrandom.h | 12 --- arch/s390/include/asm/vdso/gettimeofday.h | 15 +--- arch/s390/include/asm/vdso/vsyscall.h | 20 ----- arch/s390/kernel/time.c | 6 +- arch/s390/kernel/vdso.c | 97 ++--------------------- arch/s390/kernel/vdso32/vdso32.lds.S | 7 +- arch/s390/kernel/vdso64/vdso64.lds.S | 8 +- 9 files changed, 17 insertions(+), 153 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9c9ec08d78c71..7ed6f229250cf 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -166,6 +166,7 @@ config S390 select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select GENERIC_IOREMAP if PCI select HAVE_ALIGNED_STRUCT_PAGE diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 92c73e4d97a93..420a073fdde5d 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -6,13 +6,11 @@ #ifndef __ASSEMBLY__ -extern struct vdso_data *vdso_data; - int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #define VDSO_VERSION_STRING LINUX_2.6.29 diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h index 36355af7160be..f8713ce39bb2f 100644 --- a/arch/s390/include/asm/vdso/getrandom.h +++ b/arch/s390/include/asm/vdso/getrandom.h @@ -23,18 +23,6 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags); } -static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) -{ - /* - * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace - * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ - * PAGE_OFFSET points to the real VVAR page. - */ - if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) - return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - return &_vdso_rng_data; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index 7937765ccfa5b..fb4564308e9d7 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -14,12 +14,7 @@ #include -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) +static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd) { u64 adj, now; @@ -49,12 +44,4 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) return syscall2(__NR_clock_getres, (long)clkid, (long)ts); } -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif - #endif diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h index 3eb576ecd3bd9..d346ebe513015 100644 --- a/arch/s390/include/asm/vdso/vsyscall.h +++ b/arch/s390/include/asm/vdso/vsyscall.h @@ -2,32 +2,12 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#define __VDSO_RND_DATA_OFFSET 768 - #ifndef __ASSEMBLY__ #include #include #include -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES -}; - -static __always_inline struct vdso_data *__s390_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __s390_get_k_vdso_data - -static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void) -{ - return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; -} -#define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data - /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e9f47c3a61978..41ca3586b19f6 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -84,7 +84,7 @@ void __init time_early_init(void) /* Initialize TOD steering parameters */ tod_steering_end = tod_clock_base.tod; for (cs = 0; cs < CS_BASES; cs++) - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data[cs].arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -390,8 +390,8 @@ static void clock_sync_global(long delta) tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); for (cs = 0; cs < CS_BASES; cs++) { - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; + vdso_k_time_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data[cs].arch_data.tod_steering_delta = tod_steering_delta; } /* Update LPAR offset. */ diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 598b512cde012..70c8f9ad13cde 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -16,8 +16,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -26,85 +26,6 @@ extern char vdso64_start[], vdso64_end[]; extern char vdso32_start[], vdso32_end[]; -static struct vm_special_mapping vvar_mapping; - -static union vdso_data_store vdso_data_store __page_aligned_data; - -struct vdso_data *vdso_data = vdso_data_store.data; - -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The VVAR page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (!vma_is_special_mapping(vma, &vvar_mapping)) - continue; - zap_vma_pages(vma); - break; - } - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - if (timens_page) { - /* - * Fault in VVAR page too, since it will be accessed - * to get clock data anyway. - */ - addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - pfn = page_to_pfn(timens_page); - } - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - return vmf_insert_pfn(vma, vmf->address, pfn); -} - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -112,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping vdso64_mapping = { .name = "[vdso]", .mremap = vdso_mremap, @@ -142,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) struct vm_area_struct *vma; int rc; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -157,14 +73,11 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, vvar_start); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC| @@ -220,7 +133,7 @@ unsigned long vdso_text_size(void) unsigned long vdso_size(void) { - return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index c916c4f73f766..9630d58c20806 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -6,16 +6,15 @@ #include #include +#include OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") OUTPUT_ARCH(s390:31-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index ec42b7d9cb530..e4f6551ae8988 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -7,17 +7,15 @@ #include #include #include +#include OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text From 69896119dc9d6cdb7db3914a88c4a7b31e342a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:45 +0100 Subject: [PATCH 13/55] MIPS: vdso: Switch to generic storage implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic storage implementation provides the same features as the custom one. However it can be shared between architectures, making maintenance easier. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-13-13a4669dfc8c@linutronix.de --- arch/mips/Kconfig | 1 + arch/mips/include/asm/vdso/gettimeofday.h | 9 +++-- arch/mips/include/asm/vdso/vdso.h | 19 ++++----- arch/mips/include/asm/vdso/vsyscall.h | 14 +------ arch/mips/kernel/vdso.c | 47 +++++++++-------------- arch/mips/vdso/vdso.lds.S | 5 ++- 6 files changed, 38 insertions(+), 57 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1924f2d839323..b51168e319ea3 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -51,6 +51,7 @@ config MIPS select GENERIC_SMP_IDLE_THREAD select GENERIC_IDLE_POLL_SETUP select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_DATA_STORE select GUP_GET_PXX_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT select HAS_IOPORT if !NO_IOPORT_MAP || ISA select HAVE_ARCH_COMPILER_H diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index 44a45f3fa4b01..fd32baa30e172 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -167,7 +167,7 @@ static __always_inline u64 read_r4k_count(void) #ifdef CONFIG_CLKSRC_MIPS_GIC -static __always_inline u64 read_gic_count(const struct vdso_data *data) +static __always_inline u64 read_gic_count(const struct vdso_time_data *data) { void __iomem *gic = get_gic(data); u32 hi, hi2, lo; @@ -184,7 +184,7 @@ static __always_inline u64 read_gic_count(const struct vdso_data *data) #endif static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { #ifdef CONFIG_CSRC_R4K if (clock_mode == VDSO_CLOCKMODE_R4K) @@ -209,10 +209,11 @@ static inline bool mips_vdso_hres_capable(void) } #define __arch_vdso_hres_capable mips_vdso_hres_capable -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) { - return get_vdso_data(); + return get_vdso_time_data(); } +#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data #endif /* !__ASSEMBLY__ */ diff --git a/arch/mips/include/asm/vdso/vdso.h b/arch/mips/include/asm/vdso/vdso.h index 6cd88191fefa9..acd0efcd3d93e 100644 --- a/arch/mips/include/asm/vdso/vdso.h +++ b/arch/mips/include/asm/vdso/vdso.h @@ -5,16 +5,18 @@ */ #include +#include + +#define __VDSO_PAGES 4 #ifndef __ASSEMBLY__ #include -#include #include -static inline unsigned long get_vdso_base(void) +static inline const struct vdso_time_data *get_vdso_time_data(void) { - unsigned long addr; + const struct vdso_time_data *addr; /* * We can't use cpu_has_mips_r6 since it needs the cpu_data[] @@ -27,7 +29,7 @@ static inline unsigned long get_vdso_base(void) * We can't use addiupc because there is no label-label * support for the addiupc reloc */ - __asm__("lapc %0, _start \n" + __asm__("lapc %0, vdso_u_time_data \n" : "=r" (addr) : :); #else /* @@ -46,7 +48,7 @@ static inline unsigned long get_vdso_base(void) " .set noreorder \n" " bal 1f \n" " nop \n" - " .word _start - . \n" + " .word vdso_u_time_data - . \n" "1: lw %0, 0($31) \n" " " STR(PTR_ADDU) " %0, $31, %0 \n" " .set pop \n" @@ -58,14 +60,9 @@ static inline unsigned long get_vdso_base(void) return addr; } -static inline const struct vdso_data *get_vdso_data(void) -{ - return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE); -} - #ifdef CONFIG_CLKSRC_MIPS_GIC -static inline void __iomem *get_gic(const struct vdso_data *data) +static inline void __iomem *get_gic(const struct vdso_time_data *data) { return (void __iomem *)((unsigned long)data & PAGE_MASK) - PAGE_SIZE; } diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h index a4582870aaea4..2b1debb62dee0 100644 --- a/arch/mips/include/asm/vdso/vsyscall.h +++ b/arch/mips/include/asm/vdso/vsyscall.h @@ -2,22 +2,12 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H +#include + #ifndef __ASSEMBLY__ #include -extern struct vdso_data *vdso_data; - -/* - * Update the vDSO data page to keep in sync with kernel timekeeping. - */ -static __always_inline -struct vdso_data *__mips_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __mips_get_k_vdso_data - /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 75c9d3618f588..de096777172f0 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -23,20 +24,7 @@ #include #include -/* Kernel-provided data used by the VDSO. */ -static union vdso_data_store mips_vdso_data __page_aligned_data; -struct vdso_data *vdso_data = mips_vdso_data.data; - -/* - * Mapping for the VDSO data/GIC pages. The real pages are mapped manually, as - * what we map and where within the area they are mapped is determined at - * runtime. - */ -static struct page *no_pages[] = { NULL }; -static struct vm_special_mapping vdso_vvar_mapping = { - .name = "[vvar]", - .pages = no_pages, -}; +static_assert(VDSO_NR_PAGES == __VDSO_PAGES); static void __init init_vdso_image(struct mips_vdso_image *image) { @@ -90,7 +78,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mips_vdso_image *image = current->thread.abi->vdso; struct mm_struct *mm = current->mm; - unsigned long gic_size, vvar_size, size, base, data_addr, vdso_addr, gic_pfn, gic_base; + unsigned long gic_size, size, base, data_addr, vdso_addr, gic_pfn, gic_base; struct vm_area_struct *vma; int ret; @@ -119,8 +107,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) * the counter registers at the start. */ gic_size = mips_gic_present() ? PAGE_SIZE : 0; - vvar_size = gic_size + PAGE_SIZE; - size = vvar_size + image->size; + size = gic_size + VDSO_NR_PAGES * PAGE_SIZE + image->size; /* * Find a region that's large enough for us to perform the @@ -143,15 +130,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) */ if (cpu_has_dc_aliases) { base = __ALIGN_MASK(base, shm_align_mask); - base += ((unsigned long)vdso_data - gic_size) & shm_align_mask; + base += ((unsigned long)vdso_k_time_data - gic_size) & shm_align_mask; } data_addr = base + gic_size; - vdso_addr = data_addr + PAGE_SIZE; + vdso_addr = data_addr + VDSO_NR_PAGES * PAGE_SIZE; - vma = _install_special_mapping(mm, base, vvar_size, - VM_READ | VM_MAYREAD, - &vdso_vvar_mapping); + vma = vdso_install_vvar_mapping(mm, data_addr); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out; @@ -161,6 +146,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (gic_size) { gic_base = (unsigned long)mips_gic_base + MIPS_GIC_USER_OFS; gic_pfn = PFN_DOWN(__pa(gic_base)); + static const struct vm_special_mapping gic_mapping = { + .name = "[gic]", + .pages = (struct page **) { NULL }, + }; + + vma = _install_special_mapping(mm, base, gic_size, VM_READ | VM_MAYREAD, + &gic_mapping); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out; + } ret = io_remap_pfn_range(vma, base, gic_pfn, gic_size, pgprot_noncached(vma->vm_page_prot)); @@ -168,13 +164,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto out; } - /* Map data page. */ - ret = remap_pfn_range(vma, data_addr, - virt_to_phys(vdso_data) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot); - if (ret) - goto out; - /* Map VDSO image. */ vma = _install_special_mapping(mm, vdso_addr, image->size, VM_READ | VM_EXEC | diff --git a/arch/mips/vdso/vdso.lds.S b/arch/mips/vdso/vdso.lds.S index 836465e3bcb8a..c8bbe56d89cb0 100644 --- a/arch/mips/vdso/vdso.lds.S +++ b/arch/mips/vdso/vdso.lds.S @@ -5,6 +5,8 @@ */ #include +#include +#include #if _MIPS_SIM == _MIPS_SIM_ABI64 OUTPUT_FORMAT("elf64-tradlittlemips", "elf64-tradbigmips", "elf64-tradlittlemips") @@ -18,7 +20,8 @@ OUTPUT_ARCH(mips) SECTIONS { - PROVIDE(_start = .); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; /* From 223970df2bff4caa308246404383baee2b79dba2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:46 +0100 Subject: [PATCH 14/55] powerpc/vdso: Switch to generic storage implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic storage implementation provides the same features as the custom one. However it can be shared between architectures, making maintenance easier. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Christophe Leroy Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-14-13a4669dfc8c@linutronix.de --- arch/powerpc/Kconfig | 2 + arch/powerpc/include/asm/vdso.h | 1 + arch/powerpc/include/asm/vdso/arch_data.h | 37 ++++++ arch/powerpc/include/asm/vdso/getrandom.h | 11 +- arch/powerpc/include/asm/vdso/gettimeofday.h | 29 ++--- arch/powerpc/include/asm/vdso/vsyscall.h | 13 --- arch/powerpc/include/asm/vdso_datapage.h | 44 +------ arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/time.c | 2 +- arch/powerpc/kernel/vdso.c | 115 ++----------------- arch/powerpc/kernel/vdso/cacheflush.S | 2 +- arch/powerpc/kernel/vdso/datapage.S | 4 +- arch/powerpc/kernel/vdso/gettimeofday.S | 4 +- arch/powerpc/kernel/vdso/vdso32.lds.S | 4 +- arch/powerpc/kernel/vdso/vdso64.lds.S | 4 +- arch/powerpc/kernel/vdso/vgettimeofday.c | 14 +-- 16 files changed, 89 insertions(+), 198 deletions(-) create mode 100644 arch/powerpc/include/asm/vdso/arch_data.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 424f188e62d98..8a2a6e403eb1e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -159,6 +159,7 @@ config PPC select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UBSAN + select ARCH_HAS_VDSO_ARCH_DATA select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAVE_EXTRA_ELF_NOTES if SPU_BASE select ARCH_KEEP_MEMBLOCK @@ -209,6 +210,7 @@ config PPC select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select HAS_IOPORT if PCI select HAVE_ARCH_AUDITSYSCALL diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 8d972bc98b55f..1ca23fbfe087a 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_VDSO_H #define VDSO_VERSION_STRING LINUX_2.6.15 +#define __VDSO_PAGES 4 #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/vdso/arch_data.h b/arch/powerpc/include/asm/vdso/arch_data.h new file mode 100644 index 0000000000000..c240a6b875181 --- /dev/null +++ b/arch/powerpc/include/asm/vdso/arch_data.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002 Peter Bergner , IBM + * Copyright (C) 2005 Benjamin Herrenschmidy , + * IBM Corp. + */ +#ifndef _ASM_POWERPC_VDSO_ARCH_DATA_H +#define _ASM_POWERPC_VDSO_ARCH_DATA_H + +#include +#include + +#define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32) + +#ifdef CONFIG_PPC64 + +struct vdso_arch_data { + __u64 tb_ticks_per_sec; /* Timebase tics / sec */ + __u32 dcache_block_size; /* L1 d-cache block size */ + __u32 icache_block_size; /* L1 i-cache block size */ + __u32 dcache_log_block_size; /* L1 d-cache log block size */ + __u32 icache_log_block_size; /* L1 i-cache log block size */ + __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ + __u32 compat_syscall_map[SYSCALL_MAP_SIZE]; /* Map of compat syscalls */ +}; + +#else /* CONFIG_PPC64 */ + +struct vdso_arch_data { + __u64 tb_ticks_per_sec; /* Timebase tics / sec */ + __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ + __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */ +}; + +#endif /* CONFIG_PPC64 */ + +#endif /* _ASM_POWERPC_VDSO_ARCH_DATA_H */ diff --git a/arch/powerpc/include/asm/vdso/getrandom.h b/arch/powerpc/include/asm/vdso/getrandom.h index 80ce0709725eb..067a5396aac6e 100644 --- a/arch/powerpc/include/asm/vdso/getrandom.h +++ b/arch/powerpc/include/asm/vdso/getrandom.h @@ -43,20 +43,21 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig (unsigned long)len, (unsigned long)flags); } -static __always_inline struct vdso_rng_data *__arch_get_vdso_rng_data(void) +static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(void) { - struct vdso_arch_data *data; + struct vdso_rng_data *data; asm ( " bcl 20, 31, .+4 ;" "0: mflr %0 ;" - " addis %0, %0, (_vdso_datapage - 0b)@ha ;" - " addi %0, %0, (_vdso_datapage - 0b)@l ;" + " addis %0, %0, (vdso_u_rng_data - 0b)@ha ;" + " addi %0, %0, (vdso_u_rng_data - 0b)@l ;" : "=r" (data) : : "lr" ); - return &data->rng_data; + return data; } +#define __arch_get_vdso_u_rng_data __arch_get_vdso_u_rng_data ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index c6390890a60c2..dc955f2e0cc51 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -94,22 +94,12 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) #endif static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return get_tb(); } -const struct vdso_data *__arch_get_vdso_data(void); - -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return (void *)vd + (1U << CONFIG_PAGE_SHIFT); -} -#endif - -static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +static inline bool vdso_clocksource_ok(const struct vdso_time_data *vd) { return true; } @@ -135,21 +125,22 @@ static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift) #ifdef __powerpc64__ int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd); + const struct vdso_time_data *vd); int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, - const struct vdso_data *vd); + const struct vdso_time_data *vd); #else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, - const struct vdso_data *vd); + const struct vdso_time_data *vd); int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd); + const struct vdso_time_data *vd); int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, - const struct vdso_data *vd); + const struct vdso_time_data *vd); #endif int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, - const struct vdso_data *vd); + const struct vdso_time_data *vd); __kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, - const struct vdso_data *vd); + const struct vdso_time_data *vd); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h index 48560a1195595..c2c9ae1b22e71 100644 --- a/arch/powerpc/include/asm/vdso/vsyscall.h +++ b/arch/powerpc/include/asm/vdso/vsyscall.h @@ -6,19 +6,6 @@ #include -static __always_inline -struct vdso_data *__arch_get_k_vdso_data(void) -{ - return vdso_data->data; -} -#define __arch_get_k_vdso_data __arch_get_k_vdso_data - -static __always_inline -struct vdso_rng_data *__arch_get_k_vdso_rng_data(void) -{ - return &vdso_data->rng_data; -} - /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index a202f5b634795..95d45a50355d2 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -11,56 +11,18 @@ #ifndef __ASSEMBLY__ -#include -#include #include -#define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32) - -#ifdef CONFIG_PPC64 - -struct vdso_arch_data { - __u64 tb_ticks_per_sec; /* Timebase tics / sec */ - __u32 dcache_block_size; /* L1 d-cache block size */ - __u32 icache_block_size; /* L1 i-cache block size */ - __u32 dcache_log_block_size; /* L1 d-cache log block size */ - __u32 icache_log_block_size; /* L1 i-cache log block size */ - __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ - __u32 compat_syscall_map[SYSCALL_MAP_SIZE]; /* Map of compat syscalls */ - - struct vdso_rng_data rng_data; - - struct vdso_data data[CS_BASES] __aligned(1 << CONFIG_PAGE_SHIFT); -}; - -#else /* CONFIG_PPC64 */ - -struct vdso_arch_data { - __u64 tb_ticks_per_sec; /* Timebase tics / sec */ - __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ - __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */ - struct vdso_rng_data rng_data; - - struct vdso_data data[CS_BASES] __aligned(1 << CONFIG_PAGE_SHIFT); -}; - -#endif /* CONFIG_PPC64 */ - -extern struct vdso_arch_data *vdso_data; - #else /* __ASSEMBLY__ */ -.macro get_datapage ptr offset=0 +.macro get_datapage ptr symbol bcl 20, 31, .+4 999: mflr \ptr - addis \ptr, \ptr, (_vdso_datapage - 999b + \offset)@ha - addi \ptr, \ptr, (_vdso_datapage - 999b + \offset)@l + addis \ptr, \ptr, (\symbol - 999b)@ha + addi \ptr, \ptr, (\symbol - 999b)@l .endm -#include -#include - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7a390bd4f4af3..b3048f6d3822c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -334,7 +334,6 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 0727332ad86fb..15784c5c95c77 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -950,7 +950,7 @@ void __init time_init(void) sys_tz.tz_dsttime = 0; } - vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + vdso_k_arch_data->tb_ticks_per_sec = tb_ticks_per_sec; #ifdef CONFIG_PPC64_PROC_SYSTEMCFG systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; #endif diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 43379365ce1b3..219d67bcf747e 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -32,6 +32,8 @@ #include #include +static_assert(__VDSO_PAGES == VDSO_NR_PAGES); + /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) @@ -40,24 +42,6 @@ extern char vdso64_start, vdso64_end; long sys_ni_syscall(void); -/* - * The vdso data page (aka. systemcfg for old ppc64 fans) is here. - * Once the early boot kernel code no longer needs to muck around - * with it, it will become dynamically allocated - */ -static union { - struct vdso_arch_data data; - u8 page[2 * PAGE_SIZE]; -} vdso_data_store __page_aligned_data; -struct vdso_arch_data *vdso_data = &vdso_data_store.data; - -enum vvar_pages { - VVAR_BASE_PAGE_OFFSET, - VVAR_TIME_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, unsigned long text_size) { @@ -96,14 +80,6 @@ static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struc mm->context.vdso = NULL; } -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf); - -static struct vm_special_mapping vvar_spec __ro_after_init = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping vdso32_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso32_mremap, @@ -116,73 +92,6 @@ static struct vm_special_mapping vdso64_spec __ro_after_init = { .close = vdso_close, }; -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return vvar_page; -} - -/* - * The vvar mapping contains data for a specific time namespace, so when a task - * changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &vvar_spec)) - zap_vma_pages(vma); - } - mmap_read_unlock(mm); - - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; - - switch (vmf->pgoff) { - case VVAR_BASE_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - break; - case VVAR_TIME_PAGE_OFFSET: - if (timens_page) - pfn = page_to_pfn(timens_page); - else - pfn = virt_to_pfn(vdso_data->data); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data->data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -191,7 +100,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int { unsigned long vdso_size, vdso_base, mappings_size; struct vm_special_mapping *vdso_spec; - unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE; + unsigned long vvar_size = VDSO_NR_PAGES * PAGE_SIZE; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -217,9 +126,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int /* Add required alignment. */ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); - vma = _install_special_mapping(mm, vdso_base, vvar_size, - VM_READ | VM_MAYREAD | VM_IO | - VM_DONTDUMP | VM_PFNMAP, &vvar_spec); + vma = vdso_install_vvar_mapping(mm, vdso_base); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -299,10 +206,10 @@ static void __init vdso_setup_syscall_map(void) for (i = 0; i < NR_syscalls; i++) { if (sys_call_table[i] != (void *)&sys_ni_syscall) - vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); + vdso_k_arch_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && compat_sys_call_table[i] != (void *)&sys_ni_syscall) - vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); + vdso_k_arch_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); } } @@ -352,10 +259,10 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) static int __init vdso_init(void) { #ifdef CONFIG_PPC64 - vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; - vdso_data->icache_block_size = ppc64_caches.l1i.block_size; - vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; - vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; + vdso_k_arch_data->dcache_block_size = ppc64_caches.l1d.block_size; + vdso_k_arch_data->icache_block_size = ppc64_caches.l1i.block_size; + vdso_k_arch_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; + vdso_k_arch_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; #endif /* CONFIG_PPC64 */ vdso_setup_syscall_map(); diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index 0085ae464dac9..488d3ade11e64 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -30,7 +30,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_datapage r10 + get_datapage r10 vdso_u_arch_data mtlr r12 .cfi_restore lr #endif diff --git a/arch/powerpc/kernel/vdso/datapage.S b/arch/powerpc/kernel/vdso/datapage.S index db8e167f01667..d23b2e8e2a34c 100644 --- a/arch/powerpc/kernel/vdso/datapage.S +++ b/arch/powerpc/kernel/vdso/datapage.S @@ -28,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_datapage r3 + get_datapage r3 vdso_u_arch_data mtlr r12 #ifdef __powerpc64__ addi r3,r3,CFG_SYSCALL_MAP64 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3 + get_datapage r3 vdso_u_arch_data #ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) #endif diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 5333848322ca6..79c9672124447 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -33,9 +33,9 @@ .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif .ifeq \call_time - get_datapage r5 VDSO_DATA_OFFSET + get_datapage r5 vdso_u_time_data .else - get_datapage r4 VDSO_DATA_OFFSET + get_datapage r4 vdso_u_time_data .endif bl CFUNC(DOTSYM(\funct)) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index 1a1b0b6d681a9..72a1012b8a205 100644 --- a/arch/powerpc/kernel/vdso/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") @@ -16,7 +17,8 @@ OUTPUT_ARCH(powerpc:common) SECTIONS { - PROVIDE(_vdso_datapage = . - 3 * PAGE_SIZE); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index e21b5506cad62..32102a05eaa7e 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") @@ -16,7 +17,8 @@ OUTPUT_ARCH(powerpc:common64) SECTIONS { - PROVIDE(_vdso_datapage = . - 3 * PAGE_SIZE); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vdso/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c index 55a287c9a7366..6f5167d81af5f 100644 --- a/arch/powerpc/kernel/vdso/vgettimeofday.c +++ b/arch/powerpc/kernel/vdso/vgettimeofday.c @@ -7,43 +7,43 @@ #ifdef __powerpc64__ int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime_data(vd, clock, ts); } int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_getres_data(vd, clock_id, res); } #else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime32_data(vd, clock, ts); } int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime_data(vd, clock, ts); } int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_getres_time32_data(vd, clock_id, res); } #endif int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_gettimeofday_data(vd, tv, tz); } -__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_time_data *vd) { return __cvdso_time_data(vd, time); } From dafde29605ebf214747e7602f4f22d3f617373a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:47 +0100 Subject: [PATCH 15/55] x86/vdso: Switch to generic storage implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic storage implementation provides the same features as the custom one. However it can be shared between architectures, making maintenance easier. This switch also moves the random state data out of the time data page. The currently used hardcoded __VDSO_RND_DATA_OFFSET does not take into account changes to the time data page layout. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-15-13a4669dfc8c@linutronix.de --- arch/x86/Kconfig | 1 + arch/x86/entry/vdso/vdso-layout.lds.S | 14 +-- arch/x86/entry/vdso/vma.c | 123 ++--------------------- arch/x86/include/asm/vdso/getrandom.h | 10 -- arch/x86/include/asm/vdso/gettimeofday.h | 25 +---- arch/x86/include/asm/vdso/vsyscall.h | 24 +---- 6 files changed, 19 insertions(+), 178 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 87198d957e2f1..e66ffd941d17d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -178,6 +178,7 @@ config X86 select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL select GENERIC_GETTIMEOFDAY + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select GENERIC_VDSO_OVERFLOW_PROTECT select GUP_GET_PXX_LOW_HIGH if X86_PAE diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 918606ff92a98..e5cecdb0fedf0 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include /* * Linker script for vDSO. This is an ELF shared object prelinked to @@ -17,17 +18,16 @@ SECTIONS * segment. */ - vvar_start = . - __VVAR_PAGES * PAGE_SIZE; - vvar_page = vvar_start; + VDSO_VVAR_SYMS - vdso_rng_data = vvar_page + __VDSO_RND_DATA_OFFSET; - - timens_page = vvar_start + PAGE_SIZE; - - vclock_pages = VDSO_VCLOCK_PAGES_START(vvar_start); + vclock_pages = VDSO_VCLOCK_PAGES_START(vdso_u_data); pvclock_page = vclock_pages + VDSO_PAGE_PVCLOCK_OFFSET * PAGE_SIZE; hvclock_page = vclock_pages + VDSO_PAGE_HVCLOCK_OFFSET * PAGE_SIZE; + /* For compatibility with vdso2c */ + vvar_page = vdso_u_data; + vvar_start = vdso_u_data; + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index aa62949335ece..7245e95c573e1 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -27,13 +27,7 @@ #include #include -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)vvar_page; -} - -static union vdso_data_store vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = vdso_data_store.data; +static_assert(VDSO_NR_PAGES + VDSO_NR_VCLOCK_PAGES == __VDSO_PAGES); unsigned int vclocks_used __read_mostly; @@ -54,7 +48,6 @@ int __init init_vdso_image(const struct vdso_image *image) return 0; } -static const struct vm_special_mapping vvar_mapping; struct linux_binprm; static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, @@ -98,99 +91,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -#ifdef CONFIG_TIME_NS -/* - * The vvar page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &vvar_mapping)) - zap_vma_pages(vma); - } - mmap_read_unlock(mm); - - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - const struct vdso_image *image = vma->vm_mm->context.vdso_image; - unsigned long pfn; - long sym_offset; - - if (!image) - return VM_FAULT_SIGBUS; - - sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) + - image->sym_vvar_start; - - /* - * Sanity check: a symbol offset of zero means that the page - * does not exist for this vdso image, not that the page is at - * offset zero relative to the text mapping. This should be - * impossible here, because sym_offset should only be zero for - * the page past the end of the vvar mapping. - */ - if (sym_offset == 0) - return VM_FAULT_SIGBUS; - - if (sym_offset == image->sym_vvar_page) { - struct page *timens_page = find_timens_vvar_page(vma); - - pfn = __pa_symbol(vdso_data) >> PAGE_SHIFT; - - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the sym_vvar_page offset and - * the real VVAR page is mapped with the sym_timens_page - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (timens_page) { - unsigned long addr; - vm_fault_t err; - - /* - * Optimization: inside time namespace pre-fault - * VVAR page too. As on timens page there are only - * offsets for clocks on VVAR, it'll be faulted - * shortly by VDSO code. - */ - addr = vmf->address + (image->sym_timens_page - sym_offset); - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - - pfn = page_to_pfn(timens_page); - } - - return vmf_insert_pfn(vma, vmf->address, pfn); - - } else if (sym_offset == image->sym_timens_page) { - struct page *timens_page = find_timens_vvar_page(vma); - - if (!timens_page) - return VM_FAULT_SIGBUS; - - pfn = __pa_symbol(vdso_data) >> PAGE_SHIFT; - return vmf_insert_pfn(vma, vmf->address, pfn); - } - - return VM_FAULT_SIGBUS; -} - static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { @@ -212,7 +112,6 @@ static vm_fault_t vvar_vclock_fault(const struct vm_special_mapping *sm, case VDSO_PAGE_HVCLOCK_OFFSET: { unsigned long pfn = hv_get_tsc_pfn(); - if (pfn && vclock_was_used(VDSO_CLOCKMODE_HVCLOCK)) return vmf_insert_pfn(vma, vmf->address, pfn); break; @@ -228,10 +127,6 @@ static const struct vm_special_mapping vdso_mapping = { .fault = vdso_fault, .mremap = vdso_mremap, }; -static const struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; static const struct vm_special_mapping vvar_vclock_mapping = { .name = "[vvar_vclock]", .fault = vvar_vclock_fault, @@ -253,13 +148,13 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) return -EINTR; addr = get_unmapped_area(NULL, addr, - image->size - image->sym_vvar_start, 0, 0); + image->size + __VDSO_PAGES * PAGE_SIZE, 0, 0); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; } - text_start = addr - image->sym_vvar_start; + text_start = addr + __VDSO_PAGES * PAGE_SIZE; /* * MAYWRITE to allow gdb to COW and set breakpoints @@ -276,13 +171,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) goto up_fail; } - vma = _install_special_mapping(mm, - addr, - (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); - + vma = vdso_install_vvar_mapping(mm, addr); if (IS_ERR(vma)) { ret = PTR_ERR(vma); do_munmap(mm, text_start, image->size, NULL); @@ -327,7 +216,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr) */ for_each_vma(vmi, vma) { if (vma_is_special_mapping(vma, &vdso_mapping) || - vma_is_special_mapping(vma, &vvar_mapping) || + vma_is_special_mapping(vma, &vdso_vvar_mapping) || vma_is_special_mapping(vma, &vvar_vclock_mapping)) { mmap_write_unlock(mm); return -EEXIST; diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h index 2bf9c0e970c3e..af85539f6557b 100644 --- a/arch/x86/include/asm/vdso/getrandom.h +++ b/arch/x86/include/asm/vdso/getrandom.h @@ -27,16 +27,6 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig return ret; } -extern struct vdso_rng_data vdso_rng_data - __attribute__((visibility("hidden"))); - -static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) -{ - if (IS_ENABLED(CONFIG_TIME_NS) && __arch_get_vdso_data()->clock_mode == VDSO_CLOCKMODE_TIMENS) - return (void *)&vdso_rng_data + ((void *)&timens_page - (void *)__arch_get_vdso_data()); - return &vdso_rng_data; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 375a34b0f3657..edec796832e08 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -19,12 +19,6 @@ #include #include -extern struct vdso_data vvar_page - __attribute__((visibility("hidden"))); - -extern struct vdso_data timens_page - __attribute__((visibility("hidden"))); - #define VDSO_HAS_TIME 1 #define VDSO_HAS_CLOCK_GETRES 1 @@ -59,14 +53,6 @@ extern struct ms_hyperv_tsc_page hvclock_page __attribute__((visibility("hidden"))); #endif -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return &timens_page; -} -#endif - #ifndef BUILD_VDSO32 static __always_inline @@ -250,7 +236,7 @@ static u64 vread_hvclock(void) #endif static inline u64 __arch_get_hw_counter(s32 clock_mode, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { if (likely(clock_mode == VDSO_CLOCKMODE_TSC)) return (u64)rdtsc_ordered() & S64_MAX; @@ -275,12 +261,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode, return U64_MAX; } -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return &vvar_page; -} - -static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd) +static inline bool arch_vdso_clocksource_ok(const struct vdso_time_data *vd) { return true; } @@ -319,7 +300,7 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * declares everything with the MSB/Sign-bit set as invalid. Therefore the * effective mask is S64_MAX. */ -static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) +static __always_inline u64 vdso_calc_ns(const struct vdso_time_data *vd, u64 cycles, u64 base) { u64 delta = cycles - vd->cycle_last; diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h index 88b31d4cdfaf3..ebbf63420af03 100644 --- a/arch/x86/include/asm/vdso/vsyscall.h +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -2,11 +2,10 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#define __VDSO_RND_DATA_OFFSET 640 -#define __VVAR_PAGES 4 +#define __VDSO_PAGES 6 #define VDSO_NR_VCLOCK_PAGES 2 -#define VDSO_VCLOCK_PAGES_START(_b) ((_b) + (__VVAR_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE) +#define VDSO_VCLOCK_PAGES_START(_b) ((_b) + (__VDSO_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE) #define VDSO_PAGE_PVCLOCK_OFFSET 0 #define VDSO_PAGE_HVCLOCK_OFFSET 1 @@ -15,25 +14,6 @@ #include #include -extern struct vdso_data *vdso_data; - -/* - * Update the vDSO data page to keep in sync with kernel timekeeping. - */ -static __always_inline -struct vdso_data *__x86_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __x86_get_k_vdso_data - -static __always_inline -struct vdso_rng_data *__x86_get_k_vdso_rng_data(void) -{ - return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; -} -#define __arch_get_k_vdso_rng_data __x86_get_k_vdso_rng_data - /* The asm-generic header needs to be included after the definitions above */ #include From 9729dceab17bba8a122e26875f05d291b969ce7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:48 +0100 Subject: [PATCH 16/55] x86/vdso/vdso2c: Remove page handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The values are not used anymore. Also the sanity checks performed by vdso2c can never trigger as they only validate invariants already enforced by the linker script. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-16-13a4669dfc8c@linutronix.de --- arch/x86/entry/vdso/vdso-layout.lds.S | 4 ---- arch/x86/entry/vdso/vdso2c.c | 21 --------------------- arch/x86/entry/vdso/vdso2c.h | 20 -------------------- arch/x86/include/asm/vdso.h | 6 ------ 4 files changed, 51 deletions(-) diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index e5cecdb0fedf0..ec1ac191a0578 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -24,10 +24,6 @@ SECTIONS pvclock_page = vclock_pages + VDSO_PAGE_PVCLOCK_OFFSET * PAGE_SIZE; hvclock_page = vclock_pages + VDSO_PAGE_HVCLOCK_OFFSET * PAGE_SIZE; - /* For compatibility with vdso2c */ - vvar_page = vdso_u_data; - vvar_start = vdso_u_data; - . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 90d15f2a72055..f84e8f8fa5fe6 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -69,33 +69,12 @@ const char *outfilename; -/* Symbols that we need in vdso2c. */ -enum { - sym_vvar_start, - sym_vvar_page, - sym_pvclock_page, - sym_hvclock_page, - sym_timens_page, -}; - -const int special_pages[] = { - sym_vvar_page, - sym_pvclock_page, - sym_hvclock_page, - sym_timens_page, -}; - struct vdso_sym { const char *name; bool export; }; struct vdso_sym required_syms[] = { - [sym_vvar_start] = {"vvar_start", true}, - [sym_vvar_page] = {"vvar_page", true}, - [sym_pvclock_page] = {"pvclock_page", true}, - [sym_hvclock_page] = {"hvclock_page", true}, - [sym_timens_page] = {"timens_page", true}, {"VDSO32_NOTE_MASK", true}, {"__kernel_vsyscall", true}, {"__kernel_sigreturn", true}, diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 67b3e37576a64..78ed1c1f28b92 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -150,26 +150,6 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, } } - /* Validate mapping addresses. */ - for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) { - INT_BITS symval = syms[special_pages[i]]; - - if (!symval) - continue; /* The mapping isn't used; ignore it. */ - - if (symval % 4096) - fail("%s must be a multiple of 4096\n", - required_syms[i].name); - if (symval + 4096 < syms[sym_vvar_start]) - fail("%s underruns vvar_start\n", - required_syms[i].name); - if (symval + 4096 > 0) - fail("%s is on the wrong side of the vdso text\n", - required_syms[i].name); - } - if (syms[sym_vvar_start] % 4096) - fail("vvar_begin must be a multiple of 4096\n"); - if (!image_name) { fwrite(stripped_addr, stripped_len, 1, outfile); return; diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index d7f6592b74a94..80be0da733df4 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -18,12 +18,6 @@ struct vdso_image { unsigned long extable_base, extable_len; const void *extable; - long sym_vvar_start; /* Negative offset to the vvar area */ - - long sym_vvar_page; - long sym_pvclock_page; - long sym_hvclock_page; - long sym_timens_page; long sym_VDSO32_NOTE_MASK; long sym___kernel_sigreturn; long sym___kernel_rt_sigreturn; From 998a8a2608199fc07c3a49200c73708e9df01e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:49 +0100 Subject: [PATCH 17/55] vdso: Remove remnants of architecture-specific random state storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All users of the random vDSO are using the generic storage implementation. Remove the now unnecessary compatibility accessor functions and symbols. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-17-13a4669dfc8c@linutronix.de --- include/asm-generic/vdso/vsyscall.h | 5 ----- include/vdso/datapage.h | 1 - 2 files changed, 6 deletions(-) diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index a5f973e4e2723..13e2ac3736ee9 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -32,11 +32,6 @@ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) #define __arch_get_vdso_u_time_data __arch_get_vdso_data -#ifndef __arch_get_vdso_u_rng_data -#define __arch_get_vdso_u_rng_data() __arch_get_vdso_rng_data() -#endif -#define vdso_k_rng_data __arch_get_k_vdso_rng_data() - #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ #ifndef __arch_update_vsyscall diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 46658b39c2825..497907c3aa11f 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -152,7 +152,6 @@ struct vdso_rng_data { #ifndef CONFIG_GENERIC_VDSO_DATA_STORE extern struct vdso_time_data _vdso_data[CS_BASES] __attribute__((visibility("hidden"))); extern struct vdso_time_data _timens_data[CS_BASES] __attribute__((visibility("hidden"))); -extern struct vdso_rng_data _vdso_rng_data __attribute__((visibility("hidden"))); #else extern struct vdso_time_data vdso_u_time_data[CS_BASES] __attribute__((visibility("hidden"))); extern struct vdso_rng_data vdso_u_rng_data __attribute__((visibility("hidden"))); From ac1a42f4e4e296b5ba5fdb39444f65d6e5196240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 4 Feb 2025 13:05:50 +0100 Subject: [PATCH 18/55] vdso: Remove remnants of architecture-specific time storage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All users of the time releated parts of the vDSO are now using the generic storage implementation. Remove the therefore unnecessary compatibility accessor functions and symbols. Co-developed-by: Nam Cao Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250204-vdso-store-rng-v3-18-13a4669dfc8c@linutronix.de --- include/asm-generic/vdso/vsyscall.h | 20 +++-------- include/linux/time_namespace.h | 3 -- include/vdso/datapage.h | 19 +---------- include/vdso/helpers.h | 8 ++--- kernel/time/namespace.c | 12 +++---- kernel/time/vsyscall.c | 19 +++++------ lib/vdso/datastore.c | 10 +++--- lib/vdso/gettimeofday.c | 51 ++++++++++++++--------------- 8 files changed, 53 insertions(+), 89 deletions(-) diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index 13e2ac3736ee9..1fb3000f50364 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -20,31 +20,19 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(vo } #endif -#else /* !CONFIG_GENERIC_VDSO_DATA_STORE */ - -#ifndef __arch_get_k_vdso_data -static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) -{ - return NULL; -} -#endif /* __arch_get_k_vdso_data */ -#define vdso_k_time_data __arch_get_k_vdso_data() - -#define __arch_get_vdso_u_time_data __arch_get_vdso_data - #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ #ifndef __arch_update_vsyscall -static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata) +static __always_inline void __arch_update_vsyscall(struct vdso_time_data *vdata) { } #endif /* __arch_update_vsyscall */ -#ifndef __arch_sync_vdso_data -static __always_inline void __arch_sync_vdso_data(struct vdso_data *vdata) +#ifndef __arch_sync_vdso_time_data +static __always_inline void __arch_sync_vdso_time_data(struct vdso_time_data *vdata) { } -#endif /* __arch_sync_vdso_data */ +#endif /* __arch_sync_vdso_time_data */ #endif /* !__ASSEMBLY__ */ diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 4b81db223f545..0b8b32bf06551 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -8,7 +8,6 @@ #include #include #include -#include struct user_namespace; extern struct user_namespace init_user_ns; @@ -166,6 +165,4 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) } #endif -struct vdso_data *arch_get_vdso_data(void *vvar_page); - #endif /* _LINUX_TIMENS_H */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 497907c3aa11f..ed4fb4c06e3ee 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -128,8 +128,6 @@ struct vdso_time_data { struct arch_vdso_time_data arch_data; }; -#define vdso_data vdso_time_data - /** * struct vdso_rng_data - vdso RNG state information * @generation: counter representing the number of RNG reseeds @@ -149,10 +147,7 @@ struct vdso_rng_data { * With the hidden visibility, the compiler simply generates a PC-relative * relocation, and this is what we need. */ -#ifndef CONFIG_GENERIC_VDSO_DATA_STORE -extern struct vdso_time_data _vdso_data[CS_BASES] __attribute__((visibility("hidden"))); -extern struct vdso_time_data _timens_data[CS_BASES] __attribute__((visibility("hidden"))); -#else +#ifdef CONFIG_GENERIC_VDSO_DATA_STORE extern struct vdso_time_data vdso_u_time_data[CS_BASES] __attribute__((visibility("hidden"))); extern struct vdso_rng_data vdso_u_rng_data __attribute__((visibility("hidden"))); extern struct vdso_arch_data vdso_u_arch_data __attribute__((visibility("hidden"))); @@ -160,17 +155,6 @@ extern struct vdso_arch_data vdso_u_arch_data __attribute__((visibility("hidden" extern struct vdso_time_data *vdso_k_time_data; extern struct vdso_rng_data *vdso_k_rng_data; extern struct vdso_arch_data *vdso_k_arch_data; -#endif - -/** - * union vdso_data_store - Generic vDSO data page - */ -union vdso_data_store { - struct vdso_time_data data[CS_BASES]; - u8 page[1U << CONFIG_PAGE_SHIFT]; -}; - -#ifdef CONFIG_GENERIC_VDSO_DATA_STORE #define VDSO_ARCH_DATA_SIZE ALIGN(sizeof(struct vdso_arch_data), PAGE_SIZE) #define VDSO_ARCH_DATA_PAGES (VDSO_ARCH_DATA_SIZE >> PAGE_SHIFT) @@ -189,7 +173,6 @@ enum vdso_pages { /* * The generic vDSO implementation requires that gettimeofday.h * provides: - * - __arch_get_vdso_data(): to get the vdso datapage. * - __arch_get_hw_counter(): to get the hw counter based on the * clock_mode. * - gettimeofday_fallback(): fallback for gettimeofday. diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 3ddb03bb05cbe..41c3087070c7a 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -7,7 +7,7 @@ #include #include -static __always_inline u32 vdso_read_begin(const struct vdso_data *vd) +static __always_inline u32 vdso_read_begin(const struct vdso_time_data *vd) { u32 seq; @@ -18,7 +18,7 @@ static __always_inline u32 vdso_read_begin(const struct vdso_data *vd) return seq; } -static __always_inline u32 vdso_read_retry(const struct vdso_data *vd, +static __always_inline u32 vdso_read_retry(const struct vdso_time_data *vd, u32 start) { u32 seq; @@ -28,7 +28,7 @@ static __always_inline u32 vdso_read_retry(const struct vdso_data *vd, return seq != start; } -static __always_inline void vdso_write_begin(struct vdso_data *vd) +static __always_inline void vdso_write_begin(struct vdso_time_data *vd) { /* * WRITE_ONCE() is required otherwise the compiler can validly tear @@ -40,7 +40,7 @@ static __always_inline void vdso_write_begin(struct vdso_data *vd) smp_wmb(); } -static __always_inline void vdso_write_end(struct vdso_data *vd) +static __always_inline void vdso_write_end(struct vdso_time_data *vd) { smp_wmb(); /* diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 0775b9ec952af..12f55aa539adb 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -165,18 +165,18 @@ static struct timens_offset offset_from_ts(struct timespec64 off) * HVCLOCK * VVAR * - * The check for vdso_data->clock_mode is in the unlikely path of + * The check for vdso_time_data->clock_mode is in the unlikely path of * the seq begin magic. So for the non-timens case most of the time * 'seq' is even, so the branch is not taken. * * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check - * for vdso_data->clock_mode is a non-issue. The task is spin waiting for the + * for vdso_time_data->clock_mode is a non-issue. The task is spin waiting for the * update to finish and for 'seq' to become even anyway. * - * Timens page has vdso_data->clock_mode set to VDSO_CLOCKMODE_TIMENS which + * Timens page has vdso_time_data->clock_mode set to VDSO_CLOCKMODE_TIMENS which * enforces the time namespace handling path. */ -static void timens_setup_vdso_data(struct vdso_data *vdata, +static void timens_setup_vdso_data(struct vdso_time_data *vdata, struct time_namespace *ns) { struct timens_offset *offset = vdata->offset; @@ -219,7 +219,7 @@ static DEFINE_MUTEX(offset_lock); static void timens_set_vvar_page(struct task_struct *task, struct time_namespace *ns) { - struct vdso_data *vdata; + struct vdso_time_data *vdata; unsigned int i; if (ns == &init_time_ns) @@ -235,7 +235,7 @@ static void timens_set_vvar_page(struct task_struct *task, goto out; ns->frozen_offsets = true; - vdata = arch_get_vdso_data(page_address(ns->vvar_page)); + vdata = page_address(ns->vvar_page); for (i = 0; i < CS_BASES; i++) timens_setup_vdso_data(&vdata[i], ns); diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 09c1e39a6dd8e..418192296ef7d 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -15,8 +15,7 @@ #include "timekeeping_internal.h" -static inline void update_vdso_data(struct vdso_data *vdata, - struct timekeeper *tk) +static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct timekeeper *tk) { struct vdso_timestamp *vdso_ts; u64 nsec, sec; @@ -77,7 +76,7 @@ static inline void update_vdso_data(struct vdso_data *vdata, void update_vsyscall(struct timekeeper *tk) { - struct vdso_data *vdata = vdso_k_time_data; + struct vdso_time_data *vdata = vdso_k_time_data; struct vdso_timestamp *vdso_ts; s32 clock_mode; u64 nsec; @@ -117,23 +116,23 @@ void update_vsyscall(struct timekeeper *tk) * update of the high resolution parts. */ if (clock_mode != VDSO_CLOCKMODE_NONE) - update_vdso_data(vdata, tk); + update_vdso_time_data(vdata, tk); __arch_update_vsyscall(vdata); vdso_write_end(vdata); - __arch_sync_vdso_data(vdata); + __arch_sync_vdso_time_data(vdata); } void update_vsyscall_tz(void) { - struct vdso_data *vdata = vdso_k_time_data; + struct vdso_time_data *vdata = vdso_k_time_data; vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest; vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime; - __arch_sync_vdso_data(vdata); + __arch_sync_vdso_time_data(vdata); } /** @@ -150,7 +149,7 @@ void update_vsyscall_tz(void) */ unsigned long vdso_update_begin(void) { - struct vdso_data *vdata = vdso_k_time_data; + struct vdso_time_data *vdata = vdso_k_time_data; unsigned long flags = timekeeper_lock_irqsave(); vdso_write_begin(vdata); @@ -167,9 +166,9 @@ unsigned long vdso_update_begin(void) */ void vdso_update_end(unsigned long flags) { - struct vdso_data *vdata = vdso_k_time_data; + struct vdso_time_data *vdata = vdso_k_time_data; vdso_write_end(vdata); - __arch_sync_vdso_data(vdata); + __arch_sync_vdso_time_data(vdata); timekeeper_unlock_irqrestore(flags); } diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index 0959d62d78586..e227fbbcb7969 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -12,7 +12,10 @@ * The vDSO data page. */ #ifdef CONFIG_HAVE_GENERIC_VDSO -static union vdso_data_store vdso_time_data_store __page_aligned_data; +static union { + struct vdso_time_data data[CS_BASES]; + u8 page[PAGE_SIZE]; +} vdso_time_data_store __page_aligned_data; struct vdso_time_data *vdso_k_time_data = vdso_time_data_store.data; static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE); #endif /* CONFIG_HAVE_GENERIC_VDSO */ @@ -123,9 +126,4 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) return 0; } - -struct vdso_time_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_time_data *)vvar_page; -} #endif diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 20c5b8752fcc8..299f027116ee0 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -17,12 +17,12 @@ #endif #ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT -static __always_inline bool vdso_delta_ok(const struct vdso_data *vd, u64 delta) +static __always_inline bool vdso_delta_ok(const struct vdso_time_data *vd, u64 delta) { return delta < vd->max_cycles; } #else -static __always_inline bool vdso_delta_ok(const struct vdso_data *vd, u64 delta) +static __always_inline bool vdso_delta_ok(const struct vdso_time_data *vd, u64 delta) { return true; } @@ -39,7 +39,7 @@ static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift) * Default implementation which works for all sane clocksources. That * obviously excludes x86/TSC. */ -static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base) +static __always_inline u64 vdso_calc_ns(const struct vdso_time_data *vd, u64 cycles, u64 base) { u64 delta = (cycles - vd->cycle_last) & VDSO_DELTA_MASK(vd); @@ -58,7 +58,7 @@ static inline bool __arch_vdso_hres_capable(void) #endif #ifndef vdso_clocksource_ok -static inline bool vdso_clocksource_ok(const struct vdso_data *vd) +static inline bool vdso_clocksource_ok(const struct vdso_time_data *vd) { return vd->clock_mode != VDSO_CLOCKMODE_NONE; } @@ -79,21 +79,20 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim { return (void *)vd + PAGE_SIZE; } -#define __arch_get_timens_vdso_data(vd) __arch_get_vdso_u_timens_data(vd) #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ -static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, +static __always_inline int do_hres_timens(const struct vdso_time_data *vdns, clockid_t clk, struct __kernel_timespec *ts) { const struct timens_offset *offs = &vdns->offset[clk]; const struct vdso_timestamp *vdso_ts; - const struct vdso_data *vd; + const struct vdso_time_data *vd; u64 cycles, ns; u32 seq; s64 sec; vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE); - vd = __arch_get_timens_vdso_data(vd); + vd = __arch_get_vdso_u_timens_data(vd); if (clk != CLOCK_MONOTONIC_RAW) vd = &vd[CS_HRES_COARSE]; else @@ -128,19 +127,19 @@ static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_ } #else static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) +const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd) { return NULL; } -static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk, +static __always_inline int do_hres_timens(const struct vdso_time_data *vdns, clockid_t clk, struct __kernel_timespec *ts) { return -EINVAL; } #endif -static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, +static __always_inline int do_hres(const struct vdso_time_data *vd, clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; @@ -192,10 +191,10 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk, } #ifdef CONFIG_TIME_NS -static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, +static __always_inline int do_coarse_timens(const struct vdso_time_data *vdns, clockid_t clk, struct __kernel_timespec *ts) { - const struct vdso_data *vd = __arch_get_timens_vdso_data(vdns); + const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; const struct timens_offset *offs = &vdns->offset[clk]; u64 nsec; @@ -221,14 +220,14 @@ static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clocki return 0; } #else -static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk, +static __always_inline int do_coarse_timens(const struct vdso_time_data *vdns, clockid_t clk, struct __kernel_timespec *ts) { return -1; } #endif -static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk, +static __always_inline int do_coarse(const struct vdso_time_data *vd, clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; @@ -255,7 +254,7 @@ static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk, } static __always_inline int -__cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock, +__cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { u32 msk; @@ -282,7 +281,7 @@ __cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock, } static __maybe_unused int -__cvdso_clock_gettime_data(const struct vdso_data *vd, clockid_t clock, +__cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { int ret = __cvdso_clock_gettime_common(vd, clock, ts); @@ -300,7 +299,7 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) #ifdef BUILD_VDSO32 static __maybe_unused int -__cvdso_clock_gettime32_data(const struct vdso_data *vd, clockid_t clock, +__cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock, struct old_timespec32 *res) { struct __kernel_timespec ts; @@ -326,7 +325,7 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) #endif /* BUILD_VDSO32 */ static __maybe_unused int -__cvdso_gettimeofday_data(const struct vdso_data *vd, +__cvdso_gettimeofday_data(const struct vdso_time_data *vd, struct __kernel_old_timeval *tv, struct timezone *tz) { @@ -343,7 +342,7 @@ __cvdso_gettimeofday_data(const struct vdso_data *vd, if (unlikely(tz != NULL)) { if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(vd); + vd = __arch_get_vdso_u_timens_data(vd); tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime; @@ -360,13 +359,13 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) #ifdef VDSO_HAS_TIME static __maybe_unused __kernel_old_time_t -__cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time) +__cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time) { __kernel_old_time_t t; if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(vd); + vd = __arch_get_vdso_u_timens_data(vd); t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); @@ -384,7 +383,7 @@ static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time #ifdef VDSO_HAS_CLOCK_GETRES static __maybe_unused -int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock, +int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *res) { u32 msk; @@ -396,7 +395,7 @@ int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock, if (IS_ENABLED(CONFIG_TIME_NS) && vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - vd = __arch_get_timens_vdso_data(vd); + vd = __arch_get_vdso_u_timens_data(vd); /* * Convert the clockid to a bitmask and use it to check which @@ -425,7 +424,7 @@ int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock, } static __maybe_unused -int __cvdso_clock_getres_data(const struct vdso_data *vd, clockid_t clock, +int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *res) { int ret = __cvdso_clock_getres_common(vd, clock, res); @@ -443,7 +442,7 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) #ifdef BUILD_VDSO32 static __maybe_unused int -__cvdso_clock_getres_time32_data(const struct vdso_data *vd, clockid_t clock, +__cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t clock, struct old_timespec32 *res) { struct __kernel_timespec ts; From ba2e35644d09af730880bb404bb6dd84685dcd7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:40 +0100 Subject: [PATCH 19/55] MAINTAINERS: Add vDSO selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These currently have no maintainer besides the default kselftest ones. Add the general vDSO maintainers, too. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-1-28e14e031ed8@linutronix.de --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 25c86f47353de..74c6399529c7f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9777,6 +9777,7 @@ F: include/asm-generic/vdso/vsyscall.h F: include/vdso/ F: kernel/time/vsyscall.c F: lib/vdso/ +F: tools/testing/selftests/vDSO/ GENWQE (IBM Generic Workqueue Card) M: Frank Haverkamp From c4131140961b93883e69a01b09f8ed4bcebefaf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:41 +0100 Subject: [PATCH 20/55] elf, uapi: Add definition for STN_UNDEF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The definition is used by tools/testing/selftests/vDSO/parse_vdso.c. To be able to build the vDSO selftests without a libc dependency, add the definition to the kernels own UAPI headers. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.symtab.html Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-2-28e14e031ed8@linutronix.de --- include/uapi/linux/elf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b44069d29cecc..448695c736404 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -125,6 +125,8 @@ typedef __s64 Elf64_Sxword; #define STB_GLOBAL 1 #define STB_WEAK 2 +#define STN_UNDEF 0 + #define STT_NOTYPE 0 #define STT_OBJECT 1 #define STT_FUNC 2 From 50881d1469cf35eec690d863276cf257c391eec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:42 +0100 Subject: [PATCH 21/55] elf, uapi: Add definition for DT_GNU_HASH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The definition is used by tools/testing/selftests/vDSO/parse_vdso.c. To be able to build the vDSO selftests without a libc dependency, add the define to the kernels own UAPI headers. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/libc-ddefs.html Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-3-28e14e031ed8@linutronix.de --- include/uapi/linux/elf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 448695c736404..c5383cc7bb13c 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -107,6 +107,7 @@ typedef __s64 Elf64_Sxword; #define DT_VALRNGLO 0x6ffffd00 #define DT_VALRNGHI 0x6ffffdff #define DT_ADDRRNGLO 0x6ffffe00 +#define DT_GNU_HASH 0x6ffffef5 #define DT_ADDRRNGHI 0x6ffffeff #define DT_VERSYM 0x6ffffff0 #define DT_RELACOUNT 0x6ffffff9 From 049d19bb3807e10902d41125c9d511025389718f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:43 +0100 Subject: [PATCH 22/55] elf, uapi: Add definitions for VER_FLG_BASE and VER_FLG_WEAK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The definitions are used by tools/testing/selftests/vDSO/parse_vdso.c. To be able to build the vDSO selftests without a libc dependency, add the definitions to the kernels own UAPI headers. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://docs.oracle.com/cd/E19683-01/816-1386/chapter6-80869/index.html Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-4-28e14e031ed8@linutronix.de --- include/uapi/linux/elf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c5383cc7bb13c..d040f12ff1c0a 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -136,6 +136,9 @@ typedef __s64 Elf64_Sxword; #define STT_COMMON 5 #define STT_TLS 6 +#define VER_FLG_BASE 0x1 +#define VER_FLG_WEAK 0x2 + #define ELF_ST_BIND(x) ((x) >> 4) #define ELF_ST_TYPE(x) ((x) & 0xf) #define ELF32_ST_BIND(x) ELF_ST_BIND(x) From 2c86f604f85def1be0b7889c18e055ed63591018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:44 +0100 Subject: [PATCH 23/55] elf, uapi: Add type ElfXX_Versym MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The type is used by tools/testing/selftests/vDSO/parse_vdso.c. To be able to build the vDSO selftests without a libc dependency, add the type to the kernels own UAPI headers. As documented by elf(5). Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-5-28e14e031ed8@linutronix.de --- include/uapi/linux/elf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index d040f12ff1c0a..8846fe03ca5b8 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -11,6 +11,7 @@ typedef __u16 Elf32_Half; typedef __u32 Elf32_Off; typedef __s32 Elf32_Sword; typedef __u32 Elf32_Word; +typedef __u16 Elf32_Versym; /* 64-bit ELF base types. */ typedef __u64 Elf64_Addr; @@ -21,6 +22,7 @@ typedef __s32 Elf64_Sword; typedef __u32 Elf64_Word; typedef __u64 Elf64_Xword; typedef __s64 Elf64_Sxword; +typedef __u16 Elf64_Versym; /* These constants are for the segment types stored in the image headers */ #define PT_NULL 0 From e0d15896f5dc90f4bd36ab79c958f6eff3f7f403 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:45 +0100 Subject: [PATCH 24/55] elf, uapi: Add types ElfXX_Verdef and ElfXX_Veraux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The types are used by tools/testing/selftests/vDSO/parse_vdso.c. To be able to build the vDSO selftests without a libc dependency, add the types to the kernels own UAPI headers. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/symversion.html#VERDEFEXTS Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-6-28e14e031ed8@linutronix.de --- include/uapi/linux/elf.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index 8846fe03ca5b8..49f9f90458d8c 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -491,4 +491,34 @@ typedef struct elf64_note { /* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ #define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) +typedef struct { + Elf32_Half vd_version; + Elf32_Half vd_flags; + Elf32_Half vd_ndx; + Elf32_Half vd_cnt; + Elf32_Word vd_hash; + Elf32_Word vd_aux; + Elf32_Word vd_next; +} Elf32_Verdef; + +typedef struct { + Elf64_Half vd_version; + Elf64_Half vd_flags; + Elf64_Half vd_ndx; + Elf64_Half vd_cnt; + Elf64_Word vd_hash; + Elf64_Word vd_aux; + Elf64_Word vd_next; +} Elf64_Verdef; + +typedef struct { + Elf32_Word vda_name; + Elf32_Word vda_next; +} Elf32_Verdaux; + +typedef struct { + Elf64_Word vda_name; + Elf64_Word vda_next; +} Elf64_Verdaux; + #endif /* _UAPI_LINUX_ELF_H */ From 626fd35278295fbb21f2da331cd8028e9738d965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:46 +0100 Subject: [PATCH 25/55] tools/include: Add uapi/linux/elf.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be used by the vDSO selftests. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-7-28e14e031ed8@linutronix.de --- tools/include/uapi/linux/elf.h | 524 +++++++++++++++++++++++++++++++++ 1 file changed, 524 insertions(+) create mode 100644 tools/include/uapi/linux/elf.h diff --git a/tools/include/uapi/linux/elf.h b/tools/include/uapi/linux/elf.h new file mode 100644 index 0000000000000..5834b83d7f9a4 --- /dev/null +++ b/tools/include/uapi/linux/elf.h @@ -0,0 +1,524 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_ELF_H +#define _LINUX_ELF_H + +#include +#include + +/* 32-bit ELF base types. */ +typedef __u32 Elf32_Addr; +typedef __u16 Elf32_Half; +typedef __u32 Elf32_Off; +typedef __s32 Elf32_Sword; +typedef __u32 Elf32_Word; +typedef __u16 Elf32_Versym; + +/* 64-bit ELF base types. */ +typedef __u64 Elf64_Addr; +typedef __u16 Elf64_Half; +typedef __s16 Elf64_SHalf; +typedef __u64 Elf64_Off; +typedef __s32 Elf64_Sword; +typedef __u32 Elf64_Word; +typedef __u64 Elf64_Xword; +typedef __s64 Elf64_Sxword; +typedef __u16 Elf64_Versym; + +/* These constants are for the segment types stored in the image headers */ +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_TLS 7 /* Thread local storage segment */ +#define PT_LOOS 0x60000000 /* OS-specific */ +#define PT_HIOS 0x6fffffff /* OS-specific */ +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff +#define PT_GNU_EH_FRAME (PT_LOOS + 0x474e550) +#define PT_GNU_STACK (PT_LOOS + 0x474e551) +#define PT_GNU_RELRO (PT_LOOS + 0x474e552) +#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) + + +/* ARM MTE memory tag segment type */ +#define PT_AARCH64_MEMTAG_MTE (PT_LOPROC + 0x2) + +/* + * Extended Numbering + * + * If the real number of program header table entries is larger than + * or equal to PN_XNUM(0xffff), it is set to sh_info field of the + * section header at index 0, and PN_XNUM is set to e_phnum + * field. Otherwise, the section header at index 0 is zero + * initialized, if it exists. + * + * Specifications are available in: + * + * - Oracle: Linker and Libraries. + * Part No: 817–1984–19, August 2011. + * https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf + * + * - System V ABI AMD64 Architecture Processor Supplement + * Draft Version 0.99.4, + * January 13, 2010. + * http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf + */ +#define PN_XNUM 0xffff + +/* These constants define the different elf file types */ +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +/* This is the info that is needed to parse the dynamic section of the file */ +#define DT_NULL 0 +#define DT_NEEDED 1 +#define DT_PLTRELSZ 2 +#define DT_PLTGOT 3 +#define DT_HASH 4 +#define DT_STRTAB 5 +#define DT_SYMTAB 6 +#define DT_RELA 7 +#define DT_RELASZ 8 +#define DT_RELAENT 9 +#define DT_STRSZ 10 +#define DT_SYMENT 11 +#define DT_INIT 12 +#define DT_FINI 13 +#define DT_SONAME 14 +#define DT_RPATH 15 +#define DT_SYMBOLIC 16 +#define DT_REL 17 +#define DT_RELSZ 18 +#define DT_RELENT 19 +#define DT_PLTREL 20 +#define DT_DEBUG 21 +#define DT_TEXTREL 22 +#define DT_JMPREL 23 +#define DT_ENCODING 32 +#define OLD_DT_LOOS 0x60000000 +#define DT_LOOS 0x6000000d +#define DT_HIOS 0x6ffff000 +#define DT_VALRNGLO 0x6ffffd00 +#define DT_VALRNGHI 0x6ffffdff +#define DT_ADDRRNGLO 0x6ffffe00 +#define DT_GNU_HASH 0x6ffffef5 +#define DT_ADDRRNGHI 0x6ffffeff +#define DT_VERSYM 0x6ffffff0 +#define DT_RELACOUNT 0x6ffffff9 +#define DT_RELCOUNT 0x6ffffffa +#define DT_FLAGS_1 0x6ffffffb +#define DT_VERDEF 0x6ffffffc +#define DT_VERDEFNUM 0x6ffffffd +#define DT_VERNEED 0x6ffffffe +#define DT_VERNEEDNUM 0x6fffffff +#define OLD_DT_HIOS 0x6fffffff +#define DT_LOPROC 0x70000000 +#define DT_HIPROC 0x7fffffff + +/* This info is needed when parsing the symbol table */ +#define STB_LOCAL 0 +#define STB_GLOBAL 1 +#define STB_WEAK 2 + +#define STN_UNDEF 0 + +#define STT_NOTYPE 0 +#define STT_OBJECT 1 +#define STT_FUNC 2 +#define STT_SECTION 3 +#define STT_FILE 4 +#define STT_COMMON 5 +#define STT_TLS 6 + +#define VER_FLG_BASE 0x1 +#define VER_FLG_WEAK 0x2 + +#define ELF_ST_BIND(x) ((x) >> 4) +#define ELF_ST_TYPE(x) ((x) & 0xf) +#define ELF32_ST_BIND(x) ELF_ST_BIND(x) +#define ELF32_ST_TYPE(x) ELF_ST_TYPE(x) +#define ELF64_ST_BIND(x) ELF_ST_BIND(x) +#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) + +typedef struct { + Elf32_Sword d_tag; + union { + Elf32_Sword d_val; + Elf32_Addr d_ptr; + } d_un; +} Elf32_Dyn; + +typedef struct { + Elf64_Sxword d_tag; /* entry tag value */ + union { + Elf64_Xword d_val; + Elf64_Addr d_ptr; + } d_un; +} Elf64_Dyn; + +/* The following are used with relocations */ +#define ELF32_R_SYM(x) ((x) >> 8) +#define ELF32_R_TYPE(x) ((x) & 0xff) + +#define ELF64_R_SYM(i) ((i) >> 32) +#define ELF64_R_TYPE(i) ((i) & 0xffffffff) + +typedef struct elf32_rel { + Elf32_Addr r_offset; + Elf32_Word r_info; +} Elf32_Rel; + +typedef struct elf64_rel { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ +} Elf64_Rel; + +typedef struct elf32_rela { + Elf32_Addr r_offset; + Elf32_Word r_info; + Elf32_Sword r_addend; +} Elf32_Rela; + +typedef struct elf64_rela { + Elf64_Addr r_offset; /* Location at which to apply the action */ + Elf64_Xword r_info; /* index and type of relocation */ + Elf64_Sxword r_addend; /* Constant addend used to compute value */ +} Elf64_Rela; + +typedef struct elf32_sym { + Elf32_Word st_name; + Elf32_Addr st_value; + Elf32_Word st_size; + unsigned char st_info; + unsigned char st_other; + Elf32_Half st_shndx; +} Elf32_Sym; + +typedef struct elf64_sym { + Elf64_Word st_name; /* Symbol name, index in string tbl */ + unsigned char st_info; /* Type and binding attributes */ + unsigned char st_other; /* No defined meaning, 0 */ + Elf64_Half st_shndx; /* Associated section index */ + Elf64_Addr st_value; /* Value of the symbol */ + Elf64_Xword st_size; /* Associated symbol size */ +} Elf64_Sym; + + +#define EI_NIDENT 16 + +typedef struct elf32_hdr { + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct elf64_hdr { + unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* These constants define the permissions on sections in the program + header, p_flags. */ +#define PF_R 0x4 +#define PF_W 0x2 +#define PF_X 0x1 + +typedef struct elf32_phdr { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct elf64_phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment, file & memory */ +} Elf64_Phdr; + +/* sh_type */ +#define SHT_NULL 0 +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define SHT_RELA 4 +#define SHT_HASH 5 +#define SHT_DYNAMIC 6 +#define SHT_NOTE 7 +#define SHT_NOBITS 8 +#define SHT_REL 9 +#define SHT_SHLIB 10 +#define SHT_DYNSYM 11 +#define SHT_NUM 12 +#define SHT_LOPROC 0x70000000 +#define SHT_HIPROC 0x7fffffff +#define SHT_LOUSER 0x80000000 +#define SHT_HIUSER 0xffffffff + +/* sh_flags */ +#define SHF_WRITE 0x1 +#define SHF_ALLOC 0x2 +#define SHF_EXECINSTR 0x4 +#define SHF_RELA_LIVEPATCH 0x00100000 +#define SHF_RO_AFTER_INIT 0x00200000 +#define SHF_MASKPROC 0xf0000000 + +/* special section indexes */ +#define SHN_UNDEF 0 +#define SHN_LORESERVE 0xff00 +#define SHN_LOPROC 0xff00 +#define SHN_HIPROC 0xff1f +#define SHN_LIVEPATCH 0xff20 +#define SHN_ABS 0xfff1 +#define SHN_COMMON 0xfff2 +#define SHN_HIRESERVE 0xffff + +typedef struct elf32_shdr { + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + Elf32_Word sh_size; + Elf32_Word sh_link; + Elf32_Word sh_info; + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +} Elf32_Shdr; + +typedef struct elf64_shdr { + Elf64_Word sh_name; /* Section name, index in string tbl */ + Elf64_Word sh_type; /* Type of section */ + Elf64_Xword sh_flags; /* Miscellaneous section attributes */ + Elf64_Addr sh_addr; /* Section virtual addr at execution */ + Elf64_Off sh_offset; /* Section file offset */ + Elf64_Xword sh_size; /* Size of section in bytes */ + Elf64_Word sh_link; /* Index of another section */ + Elf64_Word sh_info; /* Additional section information */ + Elf64_Xword sh_addralign; /* Section alignment */ + Elf64_Xword sh_entsize; /* Entry size if section holds table */ +} Elf64_Shdr; + +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_PAD 8 + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define ELFCLASSNONE 0 /* EI_CLASS */ +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define ELFCLASSNUM 3 + +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_NONE 0 /* e_version, EI_VERSION */ +#define EV_CURRENT 1 +#define EV_NUM 2 + +#define ELFOSABI_NONE 0 +#define ELFOSABI_LINUX 3 + +#ifndef ELF_OSABI +#define ELF_OSABI ELFOSABI_NONE +#endif + +/* + * Notes used in ET_CORE. Architectures export some of the arch register sets + * using the corresponding note types via the PTRACE_GETREGSET and + * PTRACE_SETREGSET requests. + * The note name for these types is "LINUX", except NT_PRFPREG that is named + * "CORE". + */ +#define NT_PRSTATUS 1 +#define NT_PRFPREG 2 +#define NT_PRPSINFO 3 +#define NT_TASKSTRUCT 4 +#define NT_AUXV 6 +/* + * Note to userspace developers: size of NT_SIGINFO note may increase + * in the future to accomodate more fields, don't assume it is fixed! + */ +#define NT_SIGINFO 0x53494749 +#define NT_FILE 0x46494c45 +#define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ +#define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ +#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ +#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ +#define NT_PPC_TAR 0x103 /* Target Address Register */ +#define NT_PPC_PPR 0x104 /* Program Priority Register */ +#define NT_PPC_DSCR 0x105 /* Data Stream Control Register */ +#define NT_PPC_EBB 0x106 /* Event Based Branch Registers */ +#define NT_PPC_PMU 0x107 /* Performance Monitor Registers */ +#define NT_PPC_TM_CGPR 0x108 /* TM checkpointed GPR Registers */ +#define NT_PPC_TM_CFPR 0x109 /* TM checkpointed FPR Registers */ +#define NT_PPC_TM_CVMX 0x10a /* TM checkpointed VMX Registers */ +#define NT_PPC_TM_CVSX 0x10b /* TM checkpointed VSX Registers */ +#define NT_PPC_TM_SPR 0x10c /* TM Special Purpose Registers */ +#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */ +#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */ +#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */ +#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */ +#define NT_PPC_DEXCR 0x111 /* PowerPC DEXCR registers */ +#define NT_PPC_HASHKEYR 0x112 /* PowerPC HASHKEYR register */ +#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ +#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ +#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ +/* Old binutils treats 0x203 as a CET state */ +#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */ +#define NT_X86_XSAVE_LAYOUT 0x205 /* XSAVE layout description */ +#define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ +#define NT_S390_TIMER 0x301 /* s390 timer register */ +#define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ +#define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */ +#define NT_S390_CTRS 0x304 /* s390 control registers */ +#define NT_S390_PREFIX 0x305 /* s390 prefix register */ +#define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ +#define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ +#define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ +#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ +#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ +#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ +#define NT_S390_GS_BC 0x30c /* s390 guarded storage broadcast control block */ +#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */ +#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */ +#define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ +#define NT_ARM_TLS 0x401 /* ARM TLS register */ +#define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ +#define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ +#define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension registers */ +#define NT_ARM_PAC_MASK 0x406 /* ARM pointer authentication code masks */ +#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ +#define NT_ARM_PACG_KEYS 0x408 /* ARM pointer authentication generic key */ +#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */ +#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */ +#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */ +#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */ +#define NT_ARM_ZT 0x40d /* ARM SME ZT registers */ +#define NT_ARM_FPMR 0x40e /* ARM floating point mode register */ +#define NT_ARM_POE 0x40f /* ARM POE registers */ +#define NT_ARM_GCS 0x410 /* ARM GCS state */ +#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */ +#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */ +#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */ +#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */ +#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */ +#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ +#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ +#define NT_RISCV_TAGGED_ADDR_CTRL 0x902 /* RISC-V tagged address control (prctl()) */ +#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */ +#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */ +#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */ +#define NT_LOONGARCH_LASX 0xa03 /* LoongArch Loongson Advanced SIMD Extension registers */ +#define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary Translation registers */ +#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */ +#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */ + +/* Note types with note name "GNU" */ +#define NT_GNU_PROPERTY_TYPE_0 5 + +/* Note header in a PT_NOTE section */ +typedef struct elf32_note { + Elf32_Word n_namesz; /* Name size */ + Elf32_Word n_descsz; /* Content size */ + Elf32_Word n_type; /* Content type */ +} Elf32_Nhdr; + +/* Note header in a PT_NOTE section */ +typedef struct elf64_note { + Elf64_Word n_namesz; /* Name size */ + Elf64_Word n_descsz; /* Content size */ + Elf64_Word n_type; /* Content type */ +} Elf64_Nhdr; + +/* .note.gnu.property types for EM_AARCH64: */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) + +typedef struct { + Elf32_Half vd_version; + Elf32_Half vd_flags; + Elf32_Half vd_ndx; + Elf32_Half vd_cnt; + Elf32_Word vd_hash; + Elf32_Word vd_aux; + Elf32_Word vd_next; +} Elf32_Verdef; + +typedef struct { + Elf64_Half vd_version; + Elf64_Half vd_flags; + Elf64_Half vd_ndx; + Elf64_Half vd_cnt; + Elf64_Word vd_hash; + Elf64_Word vd_aux; + Elf64_Word vd_next; +} Elf64_Verdef; + +typedef struct { + Elf32_Word vda_name; + Elf32_Word vda_next; +} Elf32_Verdaux; + +typedef struct { + Elf64_Word vda_name; + Elf64_Word vda_next; +} Elf64_Verdaux; + +#endif /* _LINUX_ELF_H */ From 1a59f5d31569c742da3dafbbb5620901de11a245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:47 +0100 Subject: [PATCH 26/55] selftests: Add headers target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some selftests need access to a full UAPI headers tree, for example when building with nolibc which heavily relies on UAPI headers. A reference to such a tree is available in the KHDR_INCLUDES variable, but there is currently no way to populate such a tree automatically. Provide a target that the tests can depend on to get access to usable UAPI headers. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-8-28e14e031ed8@linutronix.de --- tools/testing/selftests/lib.mk | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index d6edcfcb5be83..5303900339292 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -228,4 +228,7 @@ $(OUTPUT)/%:%.S $(LINK.S) $^ $(LDLIBS) -o $@ endif -.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir +headers: + $(Q)$(MAKE) -C $(top_srcdir) headers + +.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir headers From 5caaa0aa7c61513a2f606fd6d00fc29ae475ce9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:48 +0100 Subject: [PATCH 27/55] tools/nolibc: add limits.h shim header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit limits.h is a widely used standard header. Missing it from nolibc requires adoption effort to port applications. Add a shim header which includes the global nolibc.h header. It makes all nolibc symbols available. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Tested-by: Willy Tarreau Reviewed-by: Vincenzo Frascino Acked-by: Willy Tarreau Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-9-28e14e031ed8@linutronix.de --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/limits.h | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 tools/include/nolibc/limits.h diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index a1f55fb24bb38..c1299a0531457 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -30,6 +30,7 @@ all_files := \ crt.h \ ctype.h \ errno.h \ + limits.h \ nolibc.h \ signal.h \ stackprotector.h \ diff --git a/tools/include/nolibc/limits.h b/tools/include/nolibc/limits.h new file mode 100644 index 0000000000000..306d4141f4d24 --- /dev/null +++ b/tools/include/nolibc/limits.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Shim limits.h header for NOLIBC. + * Copyright (C) 2025 Thomas Weißschuh + */ + +#include "nolibc.h" From 05c204acf5131740f5f4e193c9749b92da4bcb16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:49 +0100 Subject: [PATCH 28/55] selftests: vDSO: vdso_standalone_test_x86: Use vdso_init_form_sysinfo_ehdr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vdso_standalone_test_x86 is the only user of vdso_init_from_auxv(). Instead of combining the parsing the aux vector with the parsing of the vDSO, split them apart into getauxval() and the regular vdso_init_from_sysinfo_ehdr(). The implementation of getauxval() is taken from tools/include/nolibc/stdlib.h. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-10-28e14e031ed8@linutronix.de --- .../selftests/vDSO/vdso_standalone_test_x86.c | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 644915862af88..500608f89c66b 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "parse_vdso.h" @@ -84,6 +85,30 @@ void to_base10(char *lastdig, time_t n) } } +unsigned long getauxval(const unsigned long *auxv, unsigned long type) +{ + unsigned long ret; + + if (!auxv) + return 0; + + while (1) { + if (!auxv[0] && !auxv[1]) { + ret = 0; + break; + } + + if (auxv[0] == type) { + ret = auxv[1]; + break; + } + + auxv += 2; + } + + return ret; +} + void c_main(void **stack) { /* Parse the stack */ @@ -96,7 +121,7 @@ void c_main(void **stack) stack++; /* Now we're pointing at auxv. Initialize the vDSO parser. */ - vdso_init_from_auxv((void *)stack); + vdso_init_from_sysinfo_ehdr(getauxval((unsigned long *)stack, AT_SYSINFO_EHDR)); /* Find gettimeofday. */ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); From 09dcec64707df85f5def668d4724fce832114d91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:50 +0100 Subject: [PATCH 29/55] selftests: vDSO: parse_vdso: Drop vdso_init_from_auxv() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no users left. This also removes the usage of ElfXX_auxv_t, which is not formally standardized. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-11-28e14e031ed8@linutronix.de --- tools/testing/selftests/vDSO/parse_vdso.c | 14 -------------- tools/testing/selftests/vDSO/parse_vdso.h | 1 - 2 files changed, 15 deletions(-) diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 2fe5e983cb22f..3638fe605e80c 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -297,17 +297,3 @@ void *vdso_sym(const char *version, const char *name) return 0; } - -void vdso_init_from_auxv(void *auxv) -{ - ELF(auxv_t) *elf_auxv = auxv; - for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++) - { - if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) { - vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val); - return; - } - } - - vdso_info.valid = false; -} diff --git a/tools/testing/selftests/vDSO/parse_vdso.h b/tools/testing/selftests/vDSO/parse_vdso.h index de0453067d7cd..09d068ed11f97 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.h +++ b/tools/testing/selftests/vDSO/parse_vdso.h @@ -26,6 +26,5 @@ */ void *vdso_sym(const char *version, const char *name); void vdso_init_from_sysinfo_ehdr(uintptr_t base); -void vdso_init_from_auxv(void *auxv); #endif From c9fbaa879508ea36f5be0b3f1125b8f9f11b4945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:51 +0100 Subject: [PATCH 30/55] selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To allow the usage of parse_vdso.c together with a limited libc like nolibc, use the kernels own elf.h and auxvec.h headers. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-12-28e14e031ed8@linutronix.de --- tools/testing/selftests/vDSO/Makefile | 3 +++ tools/testing/selftests/vDSO/parse_vdso.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 1cf14a8da4380..bc8ca186fb877 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -19,6 +19,9 @@ LDLIBS += -lgcc_s endif include ../lib.mk + +CFLAGS += $(TOOLS_INCLUDES) + $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 3638fe605e80c..200c534cc70e2 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -19,7 +19,8 @@ #include #include #include -#include +#include +#include #include "parse_vdso.h" From 032e871686483ec1bac27ce73e7094692fc76268 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:52 +0100 Subject: [PATCH 31/55] selftests: vDSO: parse_vdso: Test __SIZEOF_LONG__ instead of ULONG_MAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to limits.h(2) ULONG_MAX is only guaranteed to expand to an expression, not a symbolic constant which can be evaluated by the preprocessor. Specifically the definition of ULONG_MAX from nolibc can not be evaluated by the preprocessor. To provide compatibility with nolibc, check with __SIZEOF_LONG__ instead, with is provided directly by the preprocessor and therefore always a symbolic constant. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-13-28e14e031ed8@linutronix.de --- tools/testing/selftests/vDSO/parse_vdso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 200c534cc70e2..902b8f9984a1f 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -26,7 +26,7 @@ /* And here's the code. */ #ifndef ELF_BITS -# if ULONG_MAX > 0xffffffffUL +# if __SIZEOF_LONG__ >= 8 # define ELF_BITS 64 # else # define ELF_BITS 32 From 97a88141241fcf3d1e54a120afefea04fbd1a484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:53 +0100 Subject: [PATCH 32/55] selftests: vDSO: vdso_test_gettimeofday: Clean up includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some unnecessary headers are included, remove them. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-14-28e14e031ed8@linutronix.de --- tools/testing/selftests/vDSO/vdso_test_gettimeofday.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c index e31b18ffae338..636a56ccf8e4e 100644 --- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c +++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c @@ -10,8 +10,6 @@ * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. */ -#include -#include #include #include #include From 4f65df6a58b3de5132f978305c5f807ec3803943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:54 +0100 Subject: [PATCH 33/55] selftests: vDSO: vdso_test_gettimeofday: Make compatible with nolibc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit nolibc does not provide sys/time.h and sys/auxv.h, instead their definitions are available unconditionally. Guard the includes so they are not attempted on nolibc. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-15-28e14e031ed8@linutronix.de --- tools/testing/selftests/vDSO/vdso_test_gettimeofday.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c index 636a56ccf8e4e..9ce795b806f09 100644 --- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c +++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c @@ -11,8 +11,10 @@ */ #include +#ifndef NOLIBC #include #include +#endif #include "../kselftest.h" #include "parse_vdso.h" From 8770a9183fe18442c7cfbb56bb7e006462775a91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 26 Feb 2025 12:44:55 +0100 Subject: [PATCH 34/55] selftests: vDSO: vdso_standalone_test_x86: Switch to nolibc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vdso_standalone_test_x86 provides its own ASM syscall wrappers and _start() implementation. The in-tree nolibc library already provides this functionality for multiple architectures. By making use of nolibc, the standalone testcase can be built from the exact same codebase as the non-standalone version. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Acked-by: Shuah Khan Link: https://lore.kernel.org/all/20250226-parse_vdso-nolibc-v2-16-28e14e031ed8@linutronix.de --- tools/testing/selftests/vDSO/Makefile | 8 +- .../selftests/vDSO/vdso_standalone_test_x86.c | 175 ++++-------------- 2 files changed, 39 insertions(+), 144 deletions(-) diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index bc8ca186fb877..12a0614b9fd49 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -22,13 +22,17 @@ include ../lib.mk CFLAGS += $(TOOLS_INCLUDES) +CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tables \ + -fno-stack-protector -include $(top_srcdir)/tools/include/nolibc/nolibc.h \ + -I$(top_srcdir)/tools/include/nolibc/ $(KHDR_INCLUDES) + $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c -$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c -$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector +$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers +$(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS) $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c $(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 500608f89c66b..9ce795b806f09 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -1,167 +1,58 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vdso_test.c: Sample code to test parse_vdso.c on x86 - * Copyright (c) 2011-2014 Andy Lutomirski + * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and + * vDSO gettimeofday() + * Copyright (c) 2014 Andy Lutomirski * - * You can amuse yourself by compiling with: - * gcc -std=gnu99 -nostdlib - * -Os -fno-asynchronous-unwind-tables -flto -lgcc_s - * vdso_standalone_test_x86.c parse_vdso.c - * to generate a small binary. On x86_64, you can omit -lgcc_s - * if you want the binary to be completely standalone. + * Compile with: + * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c + * + * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. */ -#include +#include +#ifndef NOLIBC +#include #include -#include -#include -#include - -#include "parse_vdso.h" - -/* We need some libc functions... */ -int strcmp(const char *a, const char *b) -{ - /* This implementation is buggy: it never returns -1. */ - while (*a || *b) { - if (*a != *b) - return 1; - if (*a == 0 || *b == 0) - return 1; - a++; - b++; - } - - return 0; -} - -/* - * The clang build needs this, although gcc does not. - * Stolen from lib/string.c. - */ -void *memcpy(void *dest, const void *src, size_t count) -{ - char *tmp = dest; - const char *s = src; - - while (count--) - *tmp++ = *s++; - return dest; -} - -/* ...and two syscalls. This is x86-specific. */ -static inline long x86_syscall3(long nr, long a0, long a1, long a2) -{ - long ret; -#ifdef __x86_64__ - asm volatile ("syscall" : "=a" (ret) : "a" (nr), - "D" (a0), "S" (a1), "d" (a2) : - "cc", "memory", "rcx", - "r8", "r9", "r10", "r11" ); -#else - asm volatile ("int $0x80" : "=a" (ret) : "a" (nr), - "b" (a0), "c" (a1), "d" (a2) : - "cc", "memory" ); #endif - return ret; -} - -static inline long linux_write(int fd, const void *data, size_t len) -{ - return x86_syscall3(__NR_write, fd, (long)data, (long)len); -} -static inline void linux_exit(int code) -{ - x86_syscall3(__NR_exit, code, 0, 0); -} - -void to_base10(char *lastdig, time_t n) -{ - while (n) { - *lastdig = (n % 10) + '0'; - n /= 10; - lastdig--; - } -} +#include "../kselftest.h" +#include "parse_vdso.h" +#include "vdso_config.h" +#include "vdso_call.h" -unsigned long getauxval(const unsigned long *auxv, unsigned long type) +int main(int argc, char **argv) { - unsigned long ret; - - if (!auxv) - return 0; - - while (1) { - if (!auxv[0] && !auxv[1]) { - ret = 0; - break; - } + const char *version = versions[VDSO_VERSION]; + const char **name = (const char **)&names[VDSO_NAMES]; - if (auxv[0] == type) { - ret = auxv[1]; - break; - } - - auxv += 2; + unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + if (!sysinfo_ehdr) { + printf("AT_SYSINFO_EHDR is not present!\n"); + return KSFT_SKIP; } - return ret; -} - -void c_main(void **stack) -{ - /* Parse the stack */ - long argc = (long)*stack; - stack += argc + 2; - - /* Now we're pointing at the environment. Skip it. */ - while(*stack) - stack++; - stack++; - - /* Now we're pointing at auxv. Initialize the vDSO parser. */ - vdso_init_from_sysinfo_ehdr(getauxval((unsigned long *)stack, AT_SYSINFO_EHDR)); + vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); /* Find gettimeofday. */ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); - gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); + gtod_t gtod = (gtod_t)vdso_sym(version, name[0]); - if (!gtod) - linux_exit(1); + if (!gtod) { + printf("Could not find %s\n", name[0]); + return KSFT_SKIP; + } struct timeval tv; - long ret = gtod(&tv, 0); + long ret = VDSO_CALL(gtod, 2, &tv, 0); if (ret == 0) { - char buf[] = "The time is .000000\n"; - to_base10(buf + 31, tv.tv_sec); - to_base10(buf + 38, tv.tv_usec); - linux_write(1, buf, sizeof(buf) - 1); + printf("The time is %lld.%06lld\n", + (long long)tv.tv_sec, (long long)tv.tv_usec); } else { - linux_exit(ret); + printf("%s failed\n", name[0]); + return KSFT_FAIL; } - linux_exit(0); + return 0; } - -/* - * This is the real entry point. It passes the initial stack into - * the C entry point. - */ -asm ( - ".text\n" - ".global _start\n" - ".type _start,@function\n" - "_start:\n\t" -#ifdef __x86_64__ - "mov %rsp,%rdi\n\t" - "and $-16,%rsp\n\t" - "sub $8,%rsp\n\t" - "jmp c_main" -#else - "push %esp\n\t" - "call c_main\n\t" - "int $3" -#endif - ); From 0704bf439655c1f82ff7e623b1124a3c1cb8c907 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 3 Mar 2025 12:11:03 +0100 Subject: [PATCH 35/55] vdso: Introduce vdso/cache.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vDSO implementation can only include headers from the vdso/ namespace. To enable the usage of ____cacheline_aligned from the vDSO, move it and its dependencies into a new header vdso/cache.h. Keep compatibility by including vdso/cache.h from linux/cache.h. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-1-c1b5c69a166f@linutronix.de --- include/linux/cache.h | 9 +-------- include/vdso/cache.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 include/vdso/cache.h diff --git a/include/linux/cache.h b/include/linux/cache.h index ca2a05682a54b..e69768f50d532 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -3,16 +3,13 @@ #define __LINUX_CACHE_H #include +#include #include #ifndef L1_CACHE_ALIGN #define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES) #endif -#ifndef SMP_CACHE_BYTES -#define SMP_CACHE_BYTES L1_CACHE_BYTES -#endif - /** * SMP_CACHE_ALIGN - align a value to the L2 cacheline size * @x: value to align @@ -63,10 +60,6 @@ #define __ro_after_init __section(".data..ro_after_init") #endif -#ifndef ____cacheline_aligned -#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) -#endif - #ifndef ____cacheline_aligned_in_smp #ifdef CONFIG_SMP #define ____cacheline_aligned_in_smp ____cacheline_aligned diff --git a/include/vdso/cache.h b/include/vdso/cache.h new file mode 100644 index 0000000000000..f89d48304bf8f --- /dev/null +++ b/include/vdso/cache.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_CACHE_H +#define __VDSO_CACHE_H + +#include + +#ifndef SMP_CACHE_BYTES +#define SMP_CACHE_BYTES L1_CACHE_BYTES +#endif + +#ifndef ____cacheline_aligned +#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#endif + +#endif /* __VDSO_ALIGN_H */ From b69b47a6b5f67ac1074e0a6baac7f07bdc3dceed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 3 Mar 2025 12:11:04 +0100 Subject: [PATCH 36/55] arm64: Make asm/cache.h compatible with vDSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit asm/cache.h can be used during the vDSO build through vdso/cache.h. Not all definitions in it are compatible with the vDSO, especially the compat vDSO. Hide the more complex definitions from the vDSO build. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-2-c1b5c69a166f@linutronix.de --- arch/arm64/include/asm/cache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 06a4670bdb0b9..99cd6546e72e3 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -35,7 +35,7 @@ #define ARCH_DMA_MINALIGN (128) #define ARCH_KMALLOC_MINALIGN (8) -#ifndef __ASSEMBLY__ +#if !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) #include #include @@ -118,6 +118,6 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void) return ctr; } -#endif /* __ASSEMBLY__ */ +#endif /* !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) */ #endif From fa8152ca221110e14e43040bd442c8140bc2e03c Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:05 +0100 Subject: [PATCH 37/55] vdso: Make vdso_time_data cacheline aligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vdso_time_data is not cacheline aligned at the moment. When instantiating an array, the start of the second array member is not cache line aligned. This increases the number of the required cache lines which needs to be read when handling e.g. CLOCK_MONOTONIC_RAW, because the data spawns an extra cache line if the previous data does not end at a cache line boundary. Therefore make struct vdso_time_data cacheline aligned. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-3-c1b5c69a166f@linutronix.de --- include/vdso/datapage.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index ed4fb4c06e3ee..dfd98f969f151 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -126,7 +127,7 @@ struct vdso_time_data { u32 __unused; struct arch_vdso_time_data arch_data; -}; +} ____cacheline_aligned; /** * struct vdso_rng_data - vdso RNG state information From a05f14de04e989f0bc32cea128243090e5e5d54b Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:06 +0100 Subject: [PATCH 38/55] vdso/datapage: Define vdso_clock to prepare for multiple PTP clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple PTP clocks, which are independent of timekeeping, are required for systems, which utilize PTP for synchronizing e.g. automation systems independent of clock TAI. PTP clocks are slow to access, but applications require fast access to the relevant time similar to the regular timekeeping relevant clocks. To prepare for that the VDSO data representation must be reworked. For transition to the new structure of the vdso, add a define which maps vdso_clock to vdso_data. This will be removed when all users are updated step by step. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-4-c1b5c69a166f@linutronix.de --- include/vdso/datapage.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index dfd98f969f151..1df22e8bb9b31 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -129,6 +129,8 @@ struct vdso_time_data { struct arch_vdso_time_data arch_data; } ____cacheline_aligned; +#define vdso_clock vdso_time_data + /** * struct vdso_rng_data - vdso RNG state information * @generation: counter representing the number of RNG reseeds From e15bf9e34b5716e8c4a50b32a98752ff1f53d647 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:07 +0100 Subject: [PATCH 39/55] vdso/helpers: Prepare introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be an array of VDSO clocks. For now, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. Prepare all functions which need the pointer to the vdso_clock array to work well after the structures get reworked. Replace the struct vdso_time_data pointer with a struct vdso_clock pointer where applicable. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-5-c1b5c69a166f@linutronix.de --- include/vdso/helpers.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 41c3087070c7a..28f0707a46c62 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -7,49 +7,53 @@ #include #include -static __always_inline u32 vdso_read_begin(const struct vdso_time_data *vd) +static __always_inline u32 vdso_read_begin(const struct vdso_clock *vc) { u32 seq; - while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) + while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) cpu_relax(); smp_rmb(); return seq; } -static __always_inline u32 vdso_read_retry(const struct vdso_time_data *vd, +static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc, u32 start) { u32 seq; smp_rmb(); - seq = READ_ONCE(vd->seq); + seq = READ_ONCE(vc->seq); return seq != start; } static __always_inline void vdso_write_begin(struct vdso_time_data *vd) { + struct vdso_clock *vc = vd; + /* * WRITE_ONCE() is required otherwise the compiler can validly tear * updates to vd[x].seq and it is possible that the value seen by the * reader is inconsistent. */ - WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1); + WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); + WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); smp_wmb(); } static __always_inline void vdso_write_end(struct vdso_time_data *vd) { + struct vdso_clock *vc = vd; + smp_wmb(); /* * WRITE_ONCE() is required otherwise the compiler can validly tear * updates to vd[x].seq and it is possible that the value seen by the * reader is inconsistent. */ - WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1); + WRITE_ONCE(vc[CS_HRES_COARSE].seq, vc[CS_HRES_COARSE].seq + 1); + WRITE_ONCE(vc[CS_RAW].seq, vc[CS_RAW].seq + 1); } #endif /* !__ASSEMBLY__ */ From cddb82d1c4de5649b6841a1b94a610f934c5dd5f Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:08 +0100 Subject: [PATCH 40/55] vdso/gettimeofday: Prepare introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. Prepare all functions which need the pointer to the vdso_clock array to work correctly after introducing the new struct. Where applicable, replace the struct vdso_time_data pointer by a struct vdso_clock pointer. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-6-c1b5c69a166f@linutronix.de --- lib/vdso/gettimeofday.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 299f027116ee0..59369a4e9f25f 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -257,6 +257,7 @@ static __always_inline int __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { + const struct vdso_clock *vc = vd; u32 msk; /* Check for negative values or invalid clocks */ @@ -269,15 +270,15 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, */ msk = 1U << clock; if (likely(msk & VDSO_HRES)) - vd = &vd[CS_HRES_COARSE]; + vc = &vc[CS_HRES_COARSE]; else if (msk & VDSO_COARSE) - return do_coarse(&vd[CS_HRES_COARSE], clock, ts); + return do_coarse(&vc[CS_HRES_COARSE], clock, ts); else if (msk & VDSO_RAW) - vd = &vd[CS_RAW]; + vc = &vc[CS_RAW]; else return -1; - return do_hres(vd, clock, ts); + return do_hres(vc, clock, ts); } static __maybe_unused int @@ -328,11 +329,12 @@ static __maybe_unused int __cvdso_gettimeofday_data(const struct vdso_time_data *vd, struct __kernel_old_timeval *tv, struct timezone *tz) { + const struct vdso_clock *vc = vd; if (likely(tv != NULL)) { struct __kernel_timespec ts; - if (do_hres(&vd[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) + if (do_hres(&vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) return gettimeofday_fallback(tv, tz); tv->tv_sec = ts.tv_sec; @@ -341,7 +343,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd, if (unlikely(tz != NULL)) { if (IS_ENABLED(CONFIG_TIME_NS) && - vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + vc->clock_mode == VDSO_CLOCKMODE_TIMENS) vd = __arch_get_vdso_u_timens_data(vd); tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; @@ -361,13 +363,16 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) static __maybe_unused __kernel_old_time_t __cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time) { + const struct vdso_clock *vc = vd; __kernel_old_time_t t; if (IS_ENABLED(CONFIG_TIME_NS) && - vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { vd = __arch_get_vdso_u_timens_data(vd); + vc = vd; + } - t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); + t = READ_ONCE(vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); if (time) *time = t; @@ -386,6 +391,7 @@ static __maybe_unused int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *res) { + const struct vdso_clock *vc = vd; u32 msk; u64 ns; @@ -394,7 +400,7 @@ int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock return -1; if (IS_ENABLED(CONFIG_TIME_NS) && - vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + vc->clock_mode == VDSO_CLOCKMODE_TIMENS) vd = __arch_get_vdso_u_timens_data(vd); /* From 64c3613ce31a1a58e43c2d86eafbb03364986450 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:09 +0100 Subject: [PATCH 41/55] vdso/gettimeofday: Prepare do_hres() for introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. Prepare for the rework of these structures by adding a struct vdso_clock pointer argument to do_hres(), and replace the struct vdso_time_data pointer with the new pointer argument where applicable. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-7-c1b5c69a166f@linutronix.de --- lib/vdso/gettimeofday.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 59369a4e9f25f..15611ab650232 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -139,10 +139,11 @@ static __always_inline int do_hres_timens(const struct vdso_time_data *vdns, clo } #endif -static __always_inline int do_hres(const struct vdso_time_data *vd, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline +int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, + clockid_t clk, struct __kernel_timespec *ts) { - const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; u64 cycles, sec, ns; u32 seq; @@ -154,31 +155,31 @@ static __always_inline int do_hres(const struct vdso_time_data *vd, clockid_t cl /* * Open coded function vdso_read_begin() to handle * VDSO_CLOCKMODE_TIMENS. Time namespace enabled tasks have a - * special VVAR page installed which has vd->seq set to 1 and - * vd->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time + * special VVAR page installed which has vc->seq set to 1 and + * vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time * namespace affected tasks this does not affect performance - * because if vd->seq is odd, i.e. a concurrent update is in - * progress the extra check for vd->clock_mode is just a few - * extra instructions while spin waiting for vd->seq to become + * because if vc->seq is odd, i.e. a concurrent update is in + * progress the extra check for vc->clock_mode is just a few + * extra instructions while spin waiting for vc->seq to become * even again. */ - while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) { + while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) { if (IS_ENABLED(CONFIG_TIME_NS) && - vd->clock_mode == VDSO_CLOCKMODE_TIMENS) + vc->clock_mode == VDSO_CLOCKMODE_TIMENS) return do_hres_timens(vd, clk, ts); cpu_relax(); } smp_rmb(); - if (unlikely(!vdso_clocksource_ok(vd))) + if (unlikely(!vdso_clocksource_ok(vc))) return -1; - cycles = __arch_get_hw_counter(vd->clock_mode, vd); + cycles = __arch_get_hw_counter(vc->clock_mode, vd); if (unlikely(!vdso_cycles_ok(cycles))) return -1; - ns = vdso_calc_ns(vd, cycles, vdso_ts->nsec); + ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); sec = vdso_ts->sec; - } while (unlikely(vdso_read_retry(vd, seq))); + } while (unlikely(vdso_read_retry(vc, seq))); /* * Do this outside the loop: a race inside the loop could result @@ -278,7 +279,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, else return -1; - return do_hres(vc, clock, ts); + return do_hres(vd, vc, clock, ts); } static __maybe_unused int @@ -334,7 +335,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd, if (likely(tv != NULL)) { struct __kernel_timespec ts; - if (do_hres(&vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) + if (do_hres(vd, &vc[CS_HRES_COARSE], CLOCK_REALTIME, &ts)) return gettimeofday_fallback(tv, tz); tv->tv_sec = ts.tv_sec; From 83a2a6b8cfc574dae4e3c65018392dcedc59079a Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:10 +0100 Subject: [PATCH 42/55] vdso/gettimeofday: Prepare do_hres_timens() for introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. Prepare for the rework of these structures by adding a struct vdso_clock pointer argument to do_hres_timens(), and replace the struct vdso_time_data pointer with the new pointer argument where applicable. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-8-c1b5c69a166f@linutronix.de --- lib/vdso/gettimeofday.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 15611ab650232..e8d4b02bcb616 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -81,36 +81,36 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim } #endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ -static __always_inline int do_hres_timens(const struct vdso_time_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline +int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { - const struct timens_offset *offs = &vdns->offset[clk]; + const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); + const struct timens_offset *offs = &vcns->offset[clk]; const struct vdso_timestamp *vdso_ts; - const struct vdso_time_data *vd; + const struct vdso_clock *vc = vd; u64 cycles, ns; u32 seq; s64 sec; - vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE); - vd = __arch_get_vdso_u_timens_data(vd); if (clk != CLOCK_MONOTONIC_RAW) - vd = &vd[CS_HRES_COARSE]; + vc = &vc[CS_HRES_COARSE]; else - vd = &vd[CS_RAW]; - vdso_ts = &vd->basetime[clk]; + vc = &vc[CS_RAW]; + vdso_ts = &vc->basetime[clk]; do { - seq = vdso_read_begin(vd); + seq = vdso_read_begin(vc); - if (unlikely(!vdso_clocksource_ok(vd))) + if (unlikely(!vdso_clocksource_ok(vc))) return -1; - cycles = __arch_get_hw_counter(vd->clock_mode, vd); + cycles = __arch_get_hw_counter(vc->clock_mode, vd); if (unlikely(!vdso_cycles_ok(cycles))) return -1; - ns = vdso_calc_ns(vd, cycles, vdso_ts->nsec); + ns = vdso_calc_ns(vc, cycles, vdso_ts->nsec); sec = vdso_ts->sec; - } while (unlikely(vdso_read_retry(vd, seq))); + } while (unlikely(vdso_read_retry(vc, seq))); /* Add the namespace offset */ sec += offs->sec; @@ -132,8 +132,9 @@ const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_tim return NULL; } -static __always_inline int do_hres_timens(const struct vdso_time_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline +int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { return -EINVAL; } @@ -166,7 +167,7 @@ int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) { if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) - return do_hres_timens(vd, clk, ts); + return do_hres_timens(vd, vc, clk, ts); cpu_relax(); } smp_rmb(); From 70067ae181f302990002aa9f54e2b8503cde7160 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:11 +0100 Subject: [PATCH 43/55] vdso/gettimeofday: Prepare do_coarse() for introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. Prepare for the rework of these structures by adding a struct vdso_clock pointer argument to do_coarse(), and replace the struct vdso_time_data pointer with the new pointer argument where applicable. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-9-c1b5c69a166f@linutronix.de --- lib/vdso/gettimeofday.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index e8d4b02bcb616..36ef7de097e61 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -229,10 +229,11 @@ static __always_inline int do_coarse_timens(const struct vdso_time_data *vdns, c } #endif -static __always_inline int do_coarse(const struct vdso_time_data *vd, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline +int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, + clockid_t clk, struct __kernel_timespec *ts) { - const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; + const struct vdso_timestamp *vdso_ts = &vc->basetime[clk]; u32 seq; do { @@ -240,17 +241,17 @@ static __always_inline int do_coarse(const struct vdso_time_data *vd, clockid_t * Open coded function vdso_read_begin() to handle * VDSO_CLOCK_TIMENS. See comment in do_hres(). */ - while ((seq = READ_ONCE(vd->seq)) & 1) { + while ((seq = READ_ONCE(vc->seq)) & 1) { if (IS_ENABLED(CONFIG_TIME_NS) && - vd->clock_mode == VDSO_CLOCKMODE_TIMENS) - return do_coarse_timens(vd, clk, ts); + vc->clock_mode == VDSO_CLOCKMODE_TIMENS) + return do_coarse_timens(vc, clk, ts); cpu_relax(); } smp_rmb(); ts->tv_sec = vdso_ts->sec; ts->tv_nsec = vdso_ts->nsec; - } while (unlikely(vdso_read_retry(vd, seq))); + } while (unlikely(vdso_read_retry(vc, seq))); return 0; } @@ -274,7 +275,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, if (likely(msk & VDSO_HRES)) vc = &vc[CS_HRES_COARSE]; else if (msk & VDSO_COARSE) - return do_coarse(&vc[CS_HRES_COARSE], clock, ts); + return do_coarse(vd, &vc[CS_HRES_COARSE], clock, ts); else if (msk & VDSO_RAW) vc = &vc[CS_RAW]; else From 8c3f5cb3d33bee748b0408418d0d2a627586a2b5 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:12 +0100 Subject: [PATCH 44/55] vdso/gettimeofday: Prepare do_coarse_timens() for introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. Prepare for the rework of these structures by adding a struct vdso_clock pointer argument to do_coarse_time_ns(), and replace the struct vdso_time_data pointer with the new pointer argument where applicable. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-10-c1b5c69a166f@linutronix.de --- lib/vdso/gettimeofday.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 36ef7de097e61..03fa0393645ac 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -193,21 +193,25 @@ int do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, } #ifdef CONFIG_TIME_NS -static __always_inline int do_coarse_timens(const struct vdso_time_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline +int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); - const struct vdso_timestamp *vdso_ts = &vd->basetime[clk]; - const struct timens_offset *offs = &vdns->offset[clk]; + const struct timens_offset *offs = &vcns->offset[clk]; + const struct vdso_timestamp *vdso_ts; + const struct vdso_clock *vc = vd; u64 nsec; s64 sec; s32 seq; + vdso_ts = &vc->basetime[clk]; + do { - seq = vdso_read_begin(vd); + seq = vdso_read_begin(vc); sec = vdso_ts->sec; nsec = vdso_ts->nsec; - } while (unlikely(vdso_read_retry(vd, seq))); + } while (unlikely(vdso_read_retry(vc, seq))); /* Add the namespace offset */ sec += offs->sec; @@ -222,8 +226,9 @@ static __always_inline int do_coarse_timens(const struct vdso_time_data *vdns, c return 0; } #else -static __always_inline int do_coarse_timens(const struct vdso_time_data *vdns, clockid_t clk, - struct __kernel_timespec *ts) +static __always_inline +int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, + clockid_t clk, struct __kernel_timespec *ts) { return -1; } @@ -244,7 +249,7 @@ int do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, while ((seq = READ_ONCE(vc->seq)) & 1) { if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) - return do_coarse_timens(vc, clk, ts); + return do_coarse_timens(vd, vc, clk, ts); cpu_relax(); } smp_rmb(); From 80801972a11b4c8610cd403e1f235e98cd799350 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:13 +0100 Subject: [PATCH 45/55] vdso/gettimeofday: Prepare helper functions for introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. To prepare for the rework of the data structures, replace the struct vdso_time_data pointer argument of the helper functions with struct vdso_clock pointer where applicable. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-11-c1b5c69a166f@linutronix.de --- lib/vdso/gettimeofday.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 03fa0393645ac..c6ff693455865 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -17,12 +17,12 @@ #endif #ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT -static __always_inline bool vdso_delta_ok(const struct vdso_time_data *vd, u64 delta) +static __always_inline bool vdso_delta_ok(const struct vdso_clock *vc, u64 delta) { - return delta < vd->max_cycles; + return delta < vc->max_cycles; } #else -static __always_inline bool vdso_delta_ok(const struct vdso_time_data *vd, u64 delta) +static __always_inline bool vdso_delta_ok(const struct vdso_clock *vc, u64 delta) { return true; } @@ -39,14 +39,14 @@ static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift) * Default implementation which works for all sane clocksources. That * obviously excludes x86/TSC. */ -static __always_inline u64 vdso_calc_ns(const struct vdso_time_data *vd, u64 cycles, u64 base) +static __always_inline u64 vdso_calc_ns(const struct vdso_clock *vc, u64 cycles, u64 base) { - u64 delta = (cycles - vd->cycle_last) & VDSO_DELTA_MASK(vd); + u64 delta = (cycles - vc->cycle_last) & VDSO_DELTA_MASK(vc); - if (likely(vdso_delta_ok(vd, delta))) - return vdso_shift_ns((delta * vd->mult) + base, vd->shift); + if (likely(vdso_delta_ok(vc, delta))) + return vdso_shift_ns((delta * vc->mult) + base, vc->shift); - return mul_u64_u32_add_u64_shr(delta, vd->mult, base, vd->shift); + return mul_u64_u32_add_u64_shr(delta, vc->mult, base, vc->shift); } #endif /* vdso_calc_ns */ @@ -58,9 +58,9 @@ static inline bool __arch_vdso_hres_capable(void) #endif #ifndef vdso_clocksource_ok -static inline bool vdso_clocksource_ok(const struct vdso_time_data *vd) +static inline bool vdso_clocksource_ok(const struct vdso_clock *vc) { - return vd->clock_mode != VDSO_CLOCKMODE_NONE; + return vc->clock_mode != VDSO_CLOCKMODE_NONE; } #endif From b5afbc106d7cbfa1ffbe7c8da2693e5effbba95e Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:14 +0100 Subject: [PATCH 46/55] vdso/vsyscall: Prepare introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. To prepare for the rework of the data structures, replace the struct vdso_time_data pointer with a struct vdso_clock pointer where applicable. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-12-c1b5c69a166f@linutronix.de --- kernel/time/vsyscall.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 418192296ef7d..dd85b41a70bee 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -18,25 +18,26 @@ static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct timekeeper *tk) { struct vdso_timestamp *vdso_ts; + struct vdso_clock *vc = vdata; u64 nsec, sec; - vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; + vc[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; #ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT - vdata[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles; + vc[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles; #endif - vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask; - vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult; - vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift; - vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last; + vc[CS_HRES_COARSE].mask = tk->tkr_mono.mask; + vc[CS_HRES_COARSE].mult = tk->tkr_mono.mult; + vc[CS_HRES_COARSE].shift = tk->tkr_mono.shift; + vc[CS_RAW].cycle_last = tk->tkr_raw.cycle_last; #ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT - vdata[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles; + vc[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles; #endif - vdata[CS_RAW].mask = tk->tkr_raw.mask; - vdata[CS_RAW].mult = tk->tkr_raw.mult; - vdata[CS_RAW].shift = tk->tkr_raw.shift; + vc[CS_RAW].mask = tk->tkr_raw.mask; + vc[CS_RAW].mult = tk->tkr_raw.mult; + vc[CS_RAW].shift = tk->tkr_raw.shift; /* CLOCK_MONOTONIC */ - vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC]; + vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC]; vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; nsec = tk->tkr_mono.xtime_nsec; @@ -54,7 +55,7 @@ static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct ti nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift; /* CLOCK_BOOTTIME */ - vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME]; + vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME]; vdso_ts->sec = sec; while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { @@ -64,12 +65,12 @@ static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct ti vdso_ts->nsec = nsec; /* CLOCK_MONOTONIC_RAW */ - vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; + vdso_ts = &vc[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; vdso_ts->sec = tk->raw_sec; vdso_ts->nsec = tk->tkr_raw.xtime_nsec; /* CLOCK_TAI */ - vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI]; + vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_TAI]; vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset; vdso_ts->nsec = tk->tkr_mono.xtime_nsec; } @@ -78,6 +79,7 @@ void update_vsyscall(struct timekeeper *tk) { struct vdso_time_data *vdata = vdso_k_time_data; struct vdso_timestamp *vdso_ts; + struct vdso_clock *vc = vdata; s32 clock_mode; u64 nsec; @@ -85,21 +87,21 @@ void update_vsyscall(struct timekeeper *tk) vdso_write_begin(vdata); clock_mode = tk->tkr_mono.clock->vdso_clock_mode; - vdata[CS_HRES_COARSE].clock_mode = clock_mode; - vdata[CS_RAW].clock_mode = clock_mode; + vc[CS_HRES_COARSE].clock_mode = clock_mode; + vc[CS_RAW].clock_mode = clock_mode; /* CLOCK_REALTIME also required for time() */ - vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; + vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; vdso_ts->sec = tk->xtime_sec; vdso_ts->nsec = tk->tkr_mono.xtime_nsec; /* CLOCK_REALTIME_COARSE */ - vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; + vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; vdso_ts->sec = tk->xtime_sec; vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; /* CLOCK_MONOTONIC_COARSE */ - vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE]; + vdso_ts = &vc[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE]; vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; nsec = nsec + tk->wall_to_monotonic.tv_nsec; From 0235220807033c7a1dc2a96cc23b6eae0dd11c81 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:15 +0100 Subject: [PATCH 47/55] vdso/namespace: Rename timens_setup_vdso_data() to reflect new vdso_clock struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. For time namespaces, vdso_time_data needs to be set up. But only the clock related part of the vdso_data thats requires this setup. To reflect the future struct vdso_clock, rename timens_setup_vdso_data() to timns_setup_vdso_clock_data(). No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-13-c1b5c69a166f@linutronix.de --- kernel/time/namespace.c | 6 +++--- lib/vdso/datastore.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 12f55aa539adb..f02430a73be8f 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -176,8 +176,8 @@ static struct timens_offset offset_from_ts(struct timespec64 off) * Timens page has vdso_time_data->clock_mode set to VDSO_CLOCKMODE_TIMENS which * enforces the time namespace handling path. */ -static void timens_setup_vdso_data(struct vdso_time_data *vdata, - struct time_namespace *ns) +static void timens_setup_vdso_clock_data(struct vdso_time_data *vdata, + struct time_namespace *ns) { struct timens_offset *offset = vdata->offset; struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic); @@ -238,7 +238,7 @@ static void timens_set_vvar_page(struct task_struct *task, vdata = page_address(ns->vvar_page); for (i = 0; i < CS_BASES; i++) - timens_setup_vdso_data(&vdata[i], ns); + timens_setup_vdso_clock_data(&vdata[i], ns); out: mutex_unlock(&offset_lock); diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index e227fbbcb7969..4e350f56ace33 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -109,7 +109,7 @@ struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned * non-root time namespace. Whenever a task changes its namespace, the VVAR * page tables are cleared and then they will be re-faulted with a * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. + * See also the comment near timens_setup_vdso_clock_data() for details. */ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { From 5911e16cad61f1735c2f8c847dc43f03f8eaccd2 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:16 +0100 Subject: [PATCH 48/55] time/namespace: Prepare introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. To prepare for the rework of the data structures, replace the struct vdso_time_data pointer with a struct vdso_clock pointer where applicable. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-14-c1b5c69a166f@linutronix.de --- kernel/time/namespace.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index f02430a73be8f..09bc4fb39f24c 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -165,26 +165,26 @@ static struct timens_offset offset_from_ts(struct timespec64 off) * HVCLOCK * VVAR * - * The check for vdso_time_data->clock_mode is in the unlikely path of + * The check for vdso_clock->clock_mode is in the unlikely path of * the seq begin magic. So for the non-timens case most of the time * 'seq' is even, so the branch is not taken. * * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check - * for vdso_time_data->clock_mode is a non-issue. The task is spin waiting for the + * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the * update to finish and for 'seq' to become even anyway. * - * Timens page has vdso_time_data->clock_mode set to VDSO_CLOCKMODE_TIMENS which + * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which * enforces the time namespace handling path. */ -static void timens_setup_vdso_clock_data(struct vdso_time_data *vdata, +static void timens_setup_vdso_clock_data(struct vdso_clock *vc, struct time_namespace *ns) { - struct timens_offset *offset = vdata->offset; + struct timens_offset *offset = vc->offset; struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic); struct timens_offset boottime = offset_from_ts(ns->offsets.boottime); - vdata->seq = 1; - vdata->clock_mode = VDSO_CLOCKMODE_TIMENS; + vc->seq = 1; + vc->clock_mode = VDSO_CLOCKMODE_TIMENS; offset[CLOCK_MONOTONIC] = monotonic; offset[CLOCK_MONOTONIC_RAW] = monotonic; offset[CLOCK_MONOTONIC_COARSE] = monotonic; @@ -220,6 +220,7 @@ static void timens_set_vvar_page(struct task_struct *task, struct time_namespace *ns) { struct vdso_time_data *vdata; + struct vdso_clock *vc; unsigned int i; if (ns == &init_time_ns) @@ -236,9 +237,10 @@ static void timens_set_vvar_page(struct task_struct *task, ns->frozen_offsets = true; vdata = page_address(ns->vvar_page); + vc = vdata; for (i = 0; i < CS_BASES; i++) - timens_setup_vdso_clock_data(&vdata[i], ns); + timens_setup_vdso_clock_data(&vc[i], ns); out: mutex_unlock(&offset_lock); From bf0eff816e467f8decb9b3ac0218cc26060e359f Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:17 +0100 Subject: [PATCH 49/55] x86/vdso: Prepare introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. To prepare for the rework of the data structures, replace the struct vdso_time_data pointer with a struct vdso_clock pointer where applicable. No functional change. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-15-c1b5c69a166f@linutronix.de --- arch/x86/include/asm/vdso/gettimeofday.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index edec796832e08..9e52cc46e1da9 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -261,7 +261,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode, return U64_MAX; } -static inline bool arch_vdso_clocksource_ok(const struct vdso_time_data *vd) +static inline bool arch_vdso_clocksource_ok(const struct vdso_clock *vc) { return true; } @@ -300,34 +300,34 @@ static inline bool arch_vdso_cycles_ok(u64 cycles) * declares everything with the MSB/Sign-bit set as invalid. Therefore the * effective mask is S64_MAX. */ -static __always_inline u64 vdso_calc_ns(const struct vdso_time_data *vd, u64 cycles, u64 base) +static __always_inline u64 vdso_calc_ns(const struct vdso_clock *vc, u64 cycles, u64 base) { - u64 delta = cycles - vd->cycle_last; + u64 delta = cycles - vc->cycle_last; /* * Negative motion and deltas which can cause multiplication * overflow require special treatment. This check covers both as - * negative motion is guaranteed to be greater than @vd::max_cycles + * negative motion is guaranteed to be greater than @vc::max_cycles * due to unsigned comparison. * * Due to the MSB/Sign-bit being used as invalid marker (see * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that * case is also unlikely and will also take the unlikely path here. */ - if (unlikely(delta > vd->max_cycles)) { + if (unlikely(delta > vc->max_cycles)) { /* * Due to the above mentioned TSC wobbles, filter out * negative motion. Per the above masking, the effective * sign bit is now bit 62. */ if (delta & (1ULL << 62)) - return base >> vd->shift; + return base >> vc->shift; /* Handle multiplication overflow gracefully */ - return mul_u64_u32_add_u64_shr(delta & S64_MAX, vd->mult, base, vd->shift); + return mul_u64_u32_add_u64_shr(delta & S64_MAX, vc->mult, base, vc->shift); } - return ((delta * vd->mult) + base) >> vd->shift; + return ((delta * vc->mult) + base) >> vc->shift; } #define vdso_calc_ns vdso_calc_ns From 5340f3cb20989ec9562f7fcd65005e25b2365e6a Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 3 Mar 2025 12:11:18 +0100 Subject: [PATCH 50/55] arm64/vdso: Prepare introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. To prepare for the rework of the data structures, replace the struct vdso_time_data pointer with a struct vdso_clock pointer where applicable. No functional change. Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-16-c1b5c69a166f@linutronix.de --- arch/arm64/include/asm/vdso/compat_gettimeofday.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 957ee12fcc54b..2c6b90d26bc8f 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -155,9 +155,9 @@ static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data( } #define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data -static inline bool vdso_clocksource_ok(const struct vdso_time_data *vd) +static inline bool vdso_clocksource_ok(const struct vdso_clock *vc) { - return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER; + return vc->clock_mode == VDSO_CLOCKMODE_ARCHTIMER; } #define vdso_clocksource_ok vdso_clocksource_ok From ed0c10f34ffd94d6e0d37b830c232f7ca38ea174 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 3 Mar 2025 12:11:19 +0100 Subject: [PATCH 51/55] powerpc/vdso: Prepare introduction of struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be array of VDSO clocks. At the moment, vdso_clock is simply a define which maps vdso_clock to vdso_time_data. To prepare for the rework of the data structures, replace the struct vdso_time_data pointer with a struct vdso_clock pointer where applicable. No functional change. Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-17-c1b5c69a166f@linutronix.de --- arch/powerpc/include/asm/vdso/gettimeofday.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index dc955f2e0cc51..99c9d6f43fde2 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -99,7 +99,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, return get_tb(); } -static inline bool vdso_clocksource_ok(const struct vdso_time_data *vd) +static inline bool vdso_clocksource_ok(const struct vdso_clock *vc) { return true; } From 97a5a90ca234eaa3de8a6aa44d43de2827393019 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:20 +0100 Subject: [PATCH 52/55] vdso: Move architecture related data before basetime data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture related vdso data is required in the fast path when reading CLOCK_MONOTONIC or CLOCK_REALTIME. At the moment, this information is located at the end of the vdso_time_data structure, which is a suboptimal cache layout. Move the architecture specific VDSO data right before the basetime information, which is always required. This change does not have an impact on architectures with CONFIG_ARCH_HAS_VDSO_DATA=n. Architectures, which have it enabled, gain a better cache layout. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-18-c1b5c69a166f@linutronix.de --- include/vdso/datapage.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index 1df22e8bb9b31..bcd19c223783b 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -70,6 +70,8 @@ struct vdso_timestamp { /** * struct vdso_time_data - vdso datapage representation + * @arch_data: architecture specific data (optional, defaults + * to an empty struct) * @seq: timebase sequence counter * @clock_mode: clock mode * @cycle_last: timebase at clocksource init @@ -83,8 +85,6 @@ struct vdso_timestamp { * @tz_dsttime: type of DST correction * @hrtimer_res: hrtimer resolution * @__unused: unused - * @arch_data: architecture specific data (optional, defaults - * to an empty struct) * * vdso_time_data will be accessed by 64 bit and compat code at the same time * so we should be careful before modifying this structure. @@ -105,6 +105,8 @@ struct vdso_timestamp { * offset must be zero. */ struct vdso_time_data { + struct arch_vdso_time_data arch_data; + u32 seq; s32 clock_mode; @@ -125,8 +127,6 @@ struct vdso_time_data { s32 tz_dsttime; u32 hrtimer_res; u32 __unused; - - struct arch_vdso_time_data arch_data; } ____cacheline_aligned; #define vdso_clock vdso_time_data From 886653e36639177dd3ec2e7a4f0dc843d7def3f4 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 3 Mar 2025 12:11:21 +0100 Subject: [PATCH 53/55] vdso: Rework struct vdso_time_data and introduce struct vdso_clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support multiple PTP clocks, the VDSO data structure needs to be reworked. All clock specific data will end up in struct vdso_clock and in struct vdso_time_data there will be an array of VDSO clocks. Now that all preparatory changes are in place: Split the clock related struct members into a separate struct vdso_clock. Make sure all users are aware, that vdso_time_data is no longer initialized as an array and vdso_clock is now the array inside vdso_data. Remove the vdso_clock define, which mapped it to vdso_time_data for the transition. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Nam Cao Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250303-vdso-clock-v1-19-c1b5c69a166f@linutronix.de --- .../include/asm/vdso/compat_gettimeofday.h | 2 +- arch/arm64/include/asm/vdso/vsyscall.h | 4 +- arch/s390/kernel/time.c | 11 +--- include/asm-generic/vdso/vsyscall.h | 2 +- include/vdso/datapage.h | 55 +++++++++++-------- include/vdso/helpers.h | 4 +- kernel/time/namespace.c | 2 +- kernel/time/vsyscall.c | 11 ++-- lib/vdso/datastore.c | 4 +- lib/vdso/gettimeofday.c | 16 +++--- 10 files changed, 57 insertions(+), 54 deletions(-) diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index 2c6b90d26bc8f..d60ea7a72a9cb 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -149,7 +149,7 @@ static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data( * where __aarch64_get_vdso_u_time_data() is called, and then keep the * result in a register. */ - asm volatile("mov %0, %1" : "=r"(ret) : "r"(vdso_u_time_data)); + asm volatile("mov %0, %1" : "=r"(ret) : "r"(&vdso_u_time_data)); return ret; } diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 3f65cbd00635a..de58951b8df6a 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -15,8 +15,8 @@ static __always_inline void __arm64_update_vsyscall(struct vdso_time_data *vdata) { - vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; - vdata[CS_RAW].mask = VDSO_PRECISION_MASK; + vdata->clock_data[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; + vdata->clock_data[CS_RAW].mask = VDSO_PRECISION_MASK; } #define __arch_update_vsyscall __arm64_update_vsyscall diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 41ca3586b19f6..699a18f1c54eb 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -79,12 +79,10 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; - int cs; /* Initialize TOD steering parameters */ tod_steering_end = tod_clock_base.tod; - for (cs = 0; cs < CS_BASES; cs++) - vdso_k_time_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -373,7 +371,6 @@ static void clock_sync_global(long delta) { unsigned long now, adj; struct ptff_qto qto; - int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; @@ -389,10 +386,8 @@ static void clock_sync_global(long delta) panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - for (cs = 0; cs < CS_BASES; cs++) { - vdso_k_time_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_k_time_data[cs].arch_data.tod_steering_delta = tod_steering_delta; - } + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index 1fb3000f50364..b550afa15ecd1 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -9,7 +9,7 @@ #ifndef __arch_get_vdso_u_time_data static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void) { - return vdso_u_time_data; + return &vdso_u_time_data; } #endif diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index bcd19c223783b..1864e76e8f691 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -69,9 +69,7 @@ struct vdso_timestamp { }; /** - * struct vdso_time_data - vdso datapage representation - * @arch_data: architecture specific data (optional, defaults - * to an empty struct) + * struct vdso_clock - vdso per clocksource datapage representation * @seq: timebase sequence counter * @clock_mode: clock mode * @cycle_last: timebase at clocksource init @@ -81,17 +79,9 @@ struct vdso_timestamp { * @shift: clocksource shift * @basetime[clock_id]: basetime per clock_id * @offset[clock_id]: time namespace offset per clock_id - * @tz_minuteswest: minutes west of Greenwich - * @tz_dsttime: type of DST correction - * @hrtimer_res: hrtimer resolution - * @__unused: unused * - * vdso_time_data will be accessed by 64 bit and compat code at the same time - * so we should be careful before modifying this structure. - * - * The ordering of the struct members is optimized to have fast access to the - * often required struct members which are related to CLOCK_REALTIME and - * CLOCK_MONOTONIC. This information is stored in the first cache lines. + * See also struct vdso_time_data for basic access and ordering information as + * struct vdso_clock is used there. * * @basetime is used to store the base time for the system wide time getter * VVAR page. @@ -104,9 +94,7 @@ struct vdso_timestamp { * For clocks which are not affected by time namespace adjustment the * offset must be zero. */ -struct vdso_time_data { - struct arch_vdso_time_data arch_data; - +struct vdso_clock { u32 seq; s32 clock_mode; @@ -122,14 +110,35 @@ struct vdso_time_data { struct vdso_timestamp basetime[VDSO_BASES]; struct timens_offset offset[VDSO_BASES]; }; +}; - s32 tz_minuteswest; - s32 tz_dsttime; - u32 hrtimer_res; - u32 __unused; -} ____cacheline_aligned; +/** + * struct vdso_time_data - vdso datapage representation + * @arch_data: architecture specific data (optional, defaults + * to an empty struct) + * @clock_data: clocksource related data (array) + * @tz_minuteswest: minutes west of Greenwich + * @tz_dsttime: type of DST correction + * @hrtimer_res: hrtimer resolution + * @__unused: unused + * + * vdso_time_data will be accessed by 64 bit and compat code at the same time + * so we should be careful before modifying this structure. + * + * The ordering of the struct members is optimized to have fast acces to the + * often required struct members which are related to CLOCK_REALTIME and + * CLOCK_MONOTONIC. This information is stored in the first cache lines. + */ +struct vdso_time_data { + struct arch_vdso_time_data arch_data; -#define vdso_clock vdso_time_data + struct vdso_clock clock_data[CS_BASES]; + + s32 tz_minuteswest; + s32 tz_dsttime; + u32 hrtimer_res; + u32 __unused; +} ____cacheline_aligned; /** * struct vdso_rng_data - vdso RNG state information @@ -151,7 +160,7 @@ struct vdso_rng_data { * relocation, and this is what we need. */ #ifdef CONFIG_GENERIC_VDSO_DATA_STORE -extern struct vdso_time_data vdso_u_time_data[CS_BASES] __attribute__((visibility("hidden"))); +extern struct vdso_time_data vdso_u_time_data __attribute__((visibility("hidden"))); extern struct vdso_rng_data vdso_u_rng_data __attribute__((visibility("hidden"))); extern struct vdso_arch_data vdso_u_arch_data __attribute__((visibility("hidden"))); diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 28f0707a46c62..0a98fed550ba6 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -30,7 +30,7 @@ static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc, static __always_inline void vdso_write_begin(struct vdso_time_data *vd) { - struct vdso_clock *vc = vd; + struct vdso_clock *vc = vd->clock_data; /* * WRITE_ONCE() is required otherwise the compiler can validly tear @@ -44,7 +44,7 @@ static __always_inline void vdso_write_begin(struct vdso_time_data *vd) static __always_inline void vdso_write_end(struct vdso_time_data *vd) { - struct vdso_clock *vc = vd; + struct vdso_clock *vc = vd->clock_data; smp_wmb(); /* diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 09bc4fb39f24c..e3642278df433 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -237,7 +237,7 @@ static void timens_set_vvar_page(struct task_struct *task, ns->frozen_offsets = true; vdata = page_address(ns->vvar_page); - vc = vdata; + vc = vdata->clock_data; for (i = 0; i < CS_BASES; i++) timens_setup_vdso_clock_data(&vc[i], ns); diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index dd85b41a70bee..01c2ab1e89719 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -17,8 +17,8 @@ static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct timekeeper *tk) { + struct vdso_clock *vc = vdata->clock_data; struct vdso_timestamp *vdso_ts; - struct vdso_clock *vc = vdata; u64 nsec, sec; vc[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; @@ -78,8 +78,8 @@ static inline void update_vdso_time_data(struct vdso_time_data *vdata, struct ti void update_vsyscall(struct timekeeper *tk) { struct vdso_time_data *vdata = vdso_k_time_data; + struct vdso_clock *vc = vdata->clock_data; struct vdso_timestamp *vdso_ts; - struct vdso_clock *vc = vdata; s32 clock_mode; u64 nsec; @@ -109,9 +109,8 @@ void update_vsyscall(struct timekeeper *tk) /* * Read without the seqlock held by clock_getres(). - * Note: No need to have a second copy. */ - WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); + WRITE_ONCE(vdata->hrtimer_res, hrtimer_resolution); /* * If the current clocksource is not VDSO capable, then spare the @@ -131,8 +130,8 @@ void update_vsyscall_tz(void) { struct vdso_time_data *vdata = vdso_k_time_data; - vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest; - vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime; + vdata->tz_minuteswest = sys_tz.tz_minuteswest; + vdata->tz_dsttime = sys_tz.tz_dsttime; __arch_sync_vdso_time_data(vdata); } diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index 4e350f56ace33..c715e217ec657 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -13,10 +13,10 @@ */ #ifdef CONFIG_HAVE_GENERIC_VDSO static union { - struct vdso_time_data data[CS_BASES]; + struct vdso_time_data data; u8 page[PAGE_SIZE]; } vdso_time_data_store __page_aligned_data; -struct vdso_time_data *vdso_k_time_data = vdso_time_data_store.data; +struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data; static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE); #endif /* CONFIG_HAVE_GENERIC_VDSO */ diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index c6ff693455865..93ef801a97ef2 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -87,8 +87,8 @@ int do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *v { const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct timens_offset *offs = &vcns->offset[clk]; + const struct vdso_clock *vc = vd->clock_data; const struct vdso_timestamp *vdso_ts; - const struct vdso_clock *vc = vd; u64 cycles, ns; u32 seq; s64 sec; @@ -199,8 +199,8 @@ int do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock { const struct vdso_time_data *vd = __arch_get_vdso_u_timens_data(vdns); const struct timens_offset *offs = &vcns->offset[clk]; + const struct vdso_clock *vc = vd->clock_data; const struct vdso_timestamp *vdso_ts; - const struct vdso_clock *vc = vd; u64 nsec; s64 sec; s32 seq; @@ -265,7 +265,7 @@ static __always_inline int __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { - const struct vdso_clock *vc = vd; + const struct vdso_clock *vc = vd->clock_data; u32 msk; /* Check for negative values or invalid clocks */ @@ -337,7 +337,7 @@ static __maybe_unused int __cvdso_gettimeofday_data(const struct vdso_time_data *vd, struct __kernel_old_timeval *tv, struct timezone *tz) { - const struct vdso_clock *vc = vd; + const struct vdso_clock *vc = vd->clock_data; if (likely(tv != NULL)) { struct __kernel_timespec ts; @@ -371,13 +371,13 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) static __maybe_unused __kernel_old_time_t __cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time) { - const struct vdso_clock *vc = vd; + const struct vdso_clock *vc = vd->clock_data; __kernel_old_time_t t; if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { vd = __arch_get_vdso_u_timens_data(vd); - vc = vd; + vc = vd->clock_data; } t = READ_ONCE(vc[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec); @@ -399,7 +399,7 @@ static __maybe_unused int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *res) { - const struct vdso_clock *vc = vd; + const struct vdso_clock *vc = vd->clock_data; u32 msk; u64 ns; @@ -420,7 +420,7 @@ int __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t clock /* * Preserves the behaviour of posix_get_hrtimer_res(). */ - ns = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res); + ns = READ_ONCE(vd->hrtimer_res); } else if (msk & VDSO_COARSE) { /* * Preserves the behaviour of posix_get_coarse_res(). From c080f2b8a2e44077aed15f1b4f0fbc0f4a912a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 6 Mar 2025 15:07:20 +0100 Subject: [PATCH 54/55] x86/vdso: Always reject undefined references during linking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using a custom script to detect and fail on undefined references, use --no-undefined for all VDSO linker invocations. Drop the now unused checkundef.sh script. Signed-off-by: Thomas Weißschuh Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250306-vdso-checkundef-v2-1-a26cc315fd73@linutronix.de --- arch/x86/entry/vdso/Makefile | 7 +++---- arch/x86/entry/vdso/checkundef.sh | 10 ---------- 2 files changed, 3 insertions(+), 14 deletions(-) delete mode 100755 arch/x86/entry/vdso/checkundef.sh diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 1c0072336e661..a6430c5636b8d 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -32,7 +32,7 @@ targets += $(foreach x, 64 x32 32, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg CPPFLAGS_vdso.lds += -P -C -VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ +VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 \ -z max-page-size=4096 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE @@ -151,10 +151,9 @@ $(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE quiet_cmd_vdso = VDSO $@ cmd_vdso = $(LD) -o $@ \ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -T $(filter %.lds,$^) $(filter %.o,$^) && \ - sh $(src)/checkundef.sh '$(NM)' '$@' + -T $(filter %.lds,$^) $(filter %.o,$^) -VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \ +VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 --no-undefined \ $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack quiet_cmd_vdso_and_check = VDSO $@ diff --git a/arch/x86/entry/vdso/checkundef.sh b/arch/x86/entry/vdso/checkundef.sh deleted file mode 100755 index 7ee90a9b549da..0000000000000 --- a/arch/x86/entry/vdso/checkundef.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -nm="$1" -file="$2" -$nm "$file" | grep '^ *U' > /dev/null 2>&1 -if [ $? -eq 1 ]; then - exit 0 -else - echo "$file: undefined symbols found" >&2 - exit 1 -fi From 652262975db421767ada3f05b926854bbb357759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 6 Mar 2025 15:07:21 +0100 Subject: [PATCH 55/55] sparc/vdso: Always reject undefined references during linking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using a custom script to detect and fail on undefined references, use --no-undefined for all VDSO linker invocations. Drop the now unused checkundef.sh script. Signed-off-by: Thomas Weißschuh Signed-off-by: Ingo Molnar Cc: David S. Miller Cc: Andreas Larsson Link: https://lore.kernel.org/r/20250306-vdso-checkundef-v2-2-a26cc315fd73@linutronix.de --- arch/sparc/vdso/Makefile | 7 +++---- arch/sparc/vdso/checkundef.sh | 10 ---------- 2 files changed, 3 insertions(+), 14 deletions(-) delete mode 100644 arch/sparc/vdso/checkundef.sh diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 50ec2978cda53..fdc4a8f5a49c5 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -22,7 +22,7 @@ targets += $(foreach x, 32 64, vdso-image-$(x).c vdso$(x).so vdso$(x).so.dbg) CPPFLAGS_vdso.lds += -P -C -VDSO_LDFLAGS_vdso.lds = -m elf64_sparc -soname linux-vdso.so.1 --no-undefined \ +VDSO_LDFLAGS_vdso.lds = -m elf64_sparc -soname linux-vdso.so.1 \ -z max-page-size=8192 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE @@ -101,7 +101,6 @@ $(obj)/vdso32.so.dbg: FORCE \ quiet_cmd_vdso = VDSO $@ cmd_vdso = $(LD) -nostdlib -o $@ \ $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ - -T $(filter %.lds,$^) $(filter %.o,$^) && \ - sh $(src)/checkundef.sh '$(OBJDUMP)' '$@' + -T $(filter %.lds,$^) $(filter %.o,$^) -VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic +VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 -Bsymbolic --no-undefined diff --git a/arch/sparc/vdso/checkundef.sh b/arch/sparc/vdso/checkundef.sh deleted file mode 100644 index 2d85876ffc325..0000000000000 --- a/arch/sparc/vdso/checkundef.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -objdump="$1" -file="$2" -$objdump -t "$file" | grep '*UUND*' | grep -v '#scratch' > /dev/null 2>&1 -if [ $? -eq 1 ]; then - exit 0 -else - echo "$file: undefined symbols found" >&2 - exit 1 -fi