From 1d4beeb4edc79b32cfedd07dab9cf59b6df2ee71 Mon Sep 17 00:00:00 2001 From: Pengfei Xu Date: Wed, 11 Jan 2023 16:15:31 +0800 Subject: [PATCH 01/77] selftests/filesystems: grant executable permission to run_fat_tests.sh When use tools/testing/selftests/kselftest_install.sh to make the kselftest-list.txt under tools/testing/selftests/kselftest_install. Then use tools/testing/selftests/kselftest_install/run_kselftest.sh to run all the kselftests in kselftest-list.txt, it will be blocked by case "filesystems/fat: run_fat_tests.sh" with "Warning: file run_fat_tests.sh is not executable", so grant executable permission to run_fat_tests.sh to fix this issue. Link: https://lkml.kernel.org/r/dfdbba6df8a1ab34bb1e81cd8bd7ca3f9ed5c369.1673424747.git.pengfei.xu@intel.com Fixes: dd7c9be330d8 ("selftests/filesystems: add a vfat RENAME_EXCHANGE test") Signed-off-by: Pengfei Xu Reviewed-by: Javier Martinez Canillas Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/filesystems/fat/run_fat_tests.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/filesystems/fat/run_fat_tests.sh diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh old mode 100644 new mode 100755 From c5136d61377e07e3692df481a221e02283b52ba2 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Tue, 13 Dec 2022 15:45:22 +0800 Subject: [PATCH 02/77] arch/alpha/kernel/smp.c: remove unnecessary (void*) conversions The void * type pointer does not need to be cast and assigned to another pointer. Link: https://lkml.kernel.org/r/20221213074522.3738-1-zeming@nfschina.com Signed-off-by: Li zeming Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Richard Henderson Signed-off-by: Andrew Morton --- arch/alpha/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index f4e20f75438f8..0ede4b044e869 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -628,7 +628,7 @@ flush_tlb_all(void) static void ipi_flush_tlb_mm(void *x) { - struct mm_struct *mm = (struct mm_struct *) x; + struct mm_struct *mm = x; if (mm == current->active_mm && !asn_locked()) flush_tlb_current(mm); else @@ -670,7 +670,7 @@ struct flush_tlb_page_struct { static void ipi_flush_tlb_page(void *x) { - struct flush_tlb_page_struct *data = (struct flush_tlb_page_struct *)x; + struct flush_tlb_page_struct *data = x; struct mm_struct * mm = data->mm; if (mm == current->active_mm && !asn_locked()) From ef890d1548c48af18684ef991f697ab05d7092ed Mon Sep 17 00:00:00 2001 From: Li zeming Date: Tue, 13 Dec 2022 15:36:33 +0800 Subject: [PATCH 03/77] arch/alpha/kernel/process.c: remove unnecessary (void*) conversions generic_ptr is a void * type and does not require a cast. Link: https://lkml.kernel.org/r/20221213073633.3586-1-zeming@nfschina.com Signed-off-by: Li zeming Cc: "Eric W. Biederman" Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Richard Henderson Signed-off-by: Andrew Morton --- arch/alpha/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 65fdae9e48f38..5bc8be1eba11c 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -74,7 +74,7 @@ struct halt_info { static void common_shutdown_1(void *generic_ptr) { - struct halt_info *how = (struct halt_info *)generic_ptr; + struct halt_info *how = generic_ptr; struct percpu_struct *cpup; unsigned long *pflags, flags; int cpuid = smp_processor_id(); From ef64c5ca27d8ddbef506fee325debd45f5aa4731 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 12 Dec 2022 11:46:44 +0900 Subject: [PATCH 04/77] error-injection: remove EI_ETYPE_NONE Patch series "error-injection: Clarify the requirements of error injectable functions". Patches for clarifying the requirement of error injectable functions and to remove the confusing EI_ETYPE_NONE. This patch (of 2): Since the EI_ETYPE_NONE is confusing type, replace it with appropriate errno. The EI_ETYPE_NONE has been introduced for a dummy (error) value, but it can mislead people that they can use ALLOW_ERROR_INJECTION(func, NONE). So remove it from the EI_ETYPE and use appropriate errno instead. Link: https://lkml.kernel.org/r/167081319306.387937.10079195394503045678.stgit@devnote3 Link: https://lkml.kernel.org/r/167081320421.387937.4259807348852421112.stgit@devnote3 Fixes: 663faf9f7bee ("error-injection: Add injectable error types") Signed-off-by: Masami Hiramatsu (Google) Cc: Alexei Starovoitov Cc: Borislav Petkov (AMD) Cc: Chris Mason Cc: Christoph Hellwig Cc: Florent Revest Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Kees Cook Cc: KP Singh Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/asm-generic/error-injection.h | 1 - include/linux/error-injection.h | 2 +- lib/error-inject.c | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h index fbca56bd9cbc8..c0b9d3217ed95 100644 --- a/include/asm-generic/error-injection.h +++ b/include/asm-generic/error-injection.h @@ -4,7 +4,6 @@ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) enum { - EI_ETYPE_NONE, /* Dummy value for undefined case */ EI_ETYPE_NULL, /* Return NULL if failure */ EI_ETYPE_ERRNO, /* Return -ERRNO if failure */ EI_ETYPE_ERRNO_NULL, /* Return -ERRNO or NULL if failure */ diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h index 635a95caf29f3..268fecfc1e82d 100644 --- a/include/linux/error-injection.h +++ b/include/linux/error-injection.h @@ -19,7 +19,7 @@ static inline bool within_error_injection_list(unsigned long addr) static inline int get_injectable_error_type(unsigned long addr) { - return EI_ETYPE_NONE; + return -EOPNOTSUPP; } #endif diff --git a/lib/error-inject.c b/lib/error-inject.c index 1afca1b1cdead..32c14770508ec 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -40,7 +40,7 @@ bool within_error_injection_list(unsigned long addr) int get_injectable_error_type(unsigned long addr) { struct ei_entry *ent; - int ei_type = EI_ETYPE_NONE; + int ei_type = -EINVAL; mutex_lock(&ei_mutex); list_for_each_entry(ent, &error_injection_list, list) { From ca0529aea9ebcdd60b67494b25e97a09d3047642 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 15 Dec 2022 16:33:32 -0800 Subject: [PATCH 05/77] error-injection-remove-ei_etype_none-fix include/linux/error-injection.h needs errno.h lib/error-inject.c Cc: Masami Hiramatsu (Google) Reported-by: Stephen Rothwell Signed-off-by: Andrew Morton --- include/linux/error-injection.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h index 268fecfc1e82d..20e738f4eae8a 100644 --- a/include/linux/error-injection.h +++ b/include/linux/error-injection.h @@ -3,6 +3,7 @@ #define _LINUX_ERROR_INJECTION_H #include +#include #include #ifdef CONFIG_FUNCTION_ERROR_INJECTION From a071ba814a55431e312160ab2ef6fae3a4e37fe0 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 12 Dec 2022 11:46:54 +0900 Subject: [PATCH 06/77] docs: fault-injection: add requirements of error injectable functions Add a section about the requirements of the error injectable functions and the type of errors. Since this section must be read before using ALLOW_ERROR_INJECTION() macro, that section is referred from the comment of the macro too. Link: https://lkml.kernel.org/r/167081321427.387937.15475445689482551048.stgit@devnote3 Link: https://lore.kernel.org/all/20221211115218.2e6e289bb85f8cf53c11aa97@kernel.org/T/#u Signed-off-by: Masami Hiramatsu (Google) Cc: Alexei Starovoitov Cc: Borislav Petkov (AMD) Cc: Chris Mason Cc: Christoph Hellwig Cc: Florent Revest Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Kees Cook Cc: KP Singh Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- .../fault-injection/fault-injection.rst | 65 +++++++++++++++++++ include/asm-generic/error-injection.h | 6 +- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst index 5f6454b9dbd4d..08e420e109739 100644 --- a/Documentation/fault-injection/fault-injection.rst +++ b/Documentation/fault-injection/fault-injection.rst @@ -231,6 +231,71 @@ proc entries This feature is intended for systematic testing of faults in a single system call. See an example below. + +Error Injectable Functions +-------------------------- + +This part is for the kenrel developers considering to add a function to +ALLOW_ERROR_INJECTION() macro. + +Requirements for the Error Injectable Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Since the function-level error injection forcibly changes the code path +and returns an error even if the input and conditions are proper, this can +cause unexpected kernel crash if you allow error injection on the function +which is NOT error injectable. Thus, you (and reviewers) must ensure; + +- The function returns an error code if it fails, and the callers must check + it correctly (need to recover from it). + +- The function does not execute any code which can change any state before + the first error return. The state includes global or local, or input + variable. For example, clear output address storage (e.g. `*ret = NULL`), + increments/decrements counter, set a flag, preempt/irq disable or get + a lock (if those are recovered before returning error, that will be OK.) + +The first requirement is important, and it will result in that the release +(free objects) functions are usually harder to inject errors than allocate +functions. If errors of such release functions are not correctly handled +it will cause a memory leak easily (the caller will confuse that the object +has been released or corrupted.) + +The second one is for the caller which expects the function should always +does something. Thus if the function error injection skips whole of the +function, the expectation is betrayed and causes an unexpected error. + +Type of the Error Injectable Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Each error injectable functions will have the error type specified by the +ALLOW_ERROR_INJECTION() macro. You have to choose it carefully if you add +a new error injectable function. If the wrong error type is chosen, the +kernel may crash because it may not be able to handle the error. +There are 4 types of errors defined in include/asm-generic/error-injection.h + +EI_ETYPE_NULL + This function will return `NULL` if it fails. e.g. return an allocateed + object address. + +EI_ETYPE_ERRNO + This function will return an `-errno` error code if it fails. e.g. return + -EINVAL if the input is wrong. This will include the functions which will + return an address which encodes `-errno` by ERR_PTR() macro. + +EI_ETYPE_ERRNO_NULL + This function will return an `-errno` or `NULL` if it fails. If the caller + of this function checks the return value with IS_ERR_OR_NULL() macro, this + type will be appropriate. + +EI_ETYPE_TRUE + This function will return `true` (non-zero positive value) if it fails. + +If you specifies a wrong type, for example, EI_TYPE_ERRNO for the function +which returns an allocated object, it may cause a problem because the returned +value is not an object address and the caller can not access to the address. + + How to add new fault injection capability ----------------------------------------- diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h index c0b9d3217ed95..b05253f68eaa5 100644 --- a/include/asm-generic/error-injection.h +++ b/include/asm-generic/error-injection.h @@ -19,8 +19,10 @@ struct pt_regs; #ifdef CONFIG_FUNCTION_ERROR_INJECTION /* - * Whitelist generating macro. Specify functions which can be - * error-injectable using this macro. + * Whitelist generating macro. Specify functions which can be error-injectable + * using this macro. If you unsure what is required for the error-injectable + * functions, please read Documentation/fault-injection/fault-injection.rst + * 'Error Injectable Functions' section. */ #define ALLOW_ERROR_INJECTION(fname, _etype) \ static struct error_injection_entry __used \ From 9b0b62d719768f1946a0334c80e0341f06c2c70b Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Mon, 12 Dec 2022 10:16:27 +0800 Subject: [PATCH 07/77] hfs: fix missing hfs_bnode_get() in __hfs_bnode_create Syzbot found a kernel BUG in hfs_bnode_put(): kernel BUG at fs/hfs/bnode.c:466! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 3634 Comm: kworker/u4:5 Not tainted 6.1.0-rc7-syzkaller-00190-g97ee9d1c1696 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Workqueue: writeback wb_workfn (flush-7:0) RIP: 0010:hfs_bnode_put+0x46f/0x480 fs/hfs/bnode.c:466 Code: 8a 80 ff e9 73 fe ff ff 89 d9 80 e1 07 80 c1 03 38 c1 0f 8c a0 fe ff ff 48 89 df e8 db 8a 80 ff e9 93 fe ff ff e8 a1 68 2c ff <0f> 0b e8 9a 68 2c ff 0f 0b 0f 1f 84 00 00 00 00 00 55 41 57 41 56 RSP: 0018:ffffc90003b4f258 EFLAGS: 00010293 RAX: ffffffff825e318f RBX: 0000000000000000 RCX: ffff8880739dd7c0 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffc90003b4f430 R08: ffffffff825e2d9b R09: ffffed10045157d1 R10: ffffed10045157d1 R11: 1ffff110045157d0 R12: ffff8880228abe80 R13: ffff88807016c000 R14: dffffc0000000000 R15: ffff8880228abe00 FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa6ebe88718 CR3: 000000001e93d000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: hfs_write_inode+0x1bc/0xb40 write_inode fs/fs-writeback.c:1440 [inline] __writeback_single_inode+0x4d6/0x670 fs/fs-writeback.c:1652 writeback_sb_inodes+0xb3b/0x18f0 fs/fs-writeback.c:1878 __writeback_inodes_wb+0x125/0x420 fs/fs-writeback.c:1949 wb_writeback+0x440/0x7b0 fs/fs-writeback.c:2054 wb_check_start_all fs/fs-writeback.c:2176 [inline] wb_do_writeback fs/fs-writeback.c:2202 [inline] wb_workfn+0x827/0xef0 fs/fs-writeback.c:2235 process_one_work+0x877/0xdb0 kernel/workqueue.c:2289 worker_thread+0xb14/0x1330 kernel/workqueue.c:2436 kthread+0x266/0x300 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 The BUG_ON() is triggered at here: /* Dispose of resources used by a node */ void hfs_bnode_put(struct hfs_bnode *node) { if (node) { BUG_ON(!atomic_read(&node->refcnt)); <- we have issue here!!!! } } By tracing the refcnt, I found the node is created by hfs_bmap_alloc() with refcnt 1. Then the node is used by hfs_btree_write(). There is a missing of hfs_bnode_get() after find the node. The issue happened in following path: hfs_bmap_alloc hfs_bnode_find __hfs_bnode_create <- allocate a new node with refcnt 1. hfs_bnode_put <- decrease the refcnt hfs_btree_write hfs_bnode_find __hfs_bnode_create hfs_bnode_findhash <- find the node without refcnt increased. hfs_bnode_put <- trigger the BUG_ON() since refcnt is 0. Link: https://lkml.kernel.org/r/20221212021627.3766829-1-liushixin2@huawei.com Reported-by: syzbot+5b04b49a7ec7226c7426@syzkaller.appspotmail.com Signed-off-by: Liu Shixin Cc: Fabio M. De Francesco Cc: Viacheslav Dubeyko Cc: Signed-off-by: Andrew Morton --- fs/hfs/bnode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 2015e42e752a6..6add6ebfef896 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -274,6 +274,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) tree->node_hash[hash] = node; tree->node_hash_cnt++; } else { + hfs_bnode_get(node2); spin_unlock(&tree->hash_lock); kfree(node); wait_event(node2->lock_wq, !test_bit(HFS_BNODE_NEW, &node2->flags)); From 9f0d17e3cfa1a20b9e22706b1dc6cd75a1a79a52 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Fri, 16 Dec 2022 16:04:39 +0100 Subject: [PATCH 08/77] lib/percpu_counter: percpu_counter_add_batch() overflow/underflow Patch series "various irq handling fixes/docu updates". If an interrupt happens between __this_cpu_read(*fbc->counters) and this_cpu_add(*fbc->counters, amount), and that interrupt modifies the per_cpu_counter, then the this_cpu_add() after the interrupt returns may under/overflow. Link: https://lkml.kernel.org/r/20221216150155.200389-1-manfred@colorfullife.com Link: https://lkml.kernel.org/r/20221216150441.200533-1-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: "Sun, Jiebin" Cc: <1vier1@web.de> Cc: Alexander Sverdlin Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- lib/percpu_counter.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 42f729c8e56c4..dba56c5c18379 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -73,28 +73,33 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) EXPORT_SYMBOL(percpu_counter_set); /* - * This function is both preempt and irq safe. The former is due to explicit - * preemption disable. The latter is guaranteed by the fact that the slow path - * is explicitly protected by an irq-safe spinlock whereas the fast patch uses - * this_cpu_add which is irq-safe by definition. Hence there is no need muck - * with irq state before calling this one + * local_irq_save() is needed to make the function irq safe: + * - The slow path would be ok as protected by an irq-safe spinlock. + * - this_cpu_add would be ok as it is irq-safe by definition. + * But: + * The decision slow path/fast path and the actual update must be atomic, too. + * Otherwise a call in process context could check the current values and + * decide that the fast path can be used. If now an interrupt occurs before + * the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters), + * then the this_cpu_add() that is executed after the interrupt has completed + * can produce values larger than "batch" or even overflows. */ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; + unsigned long flags; - preempt_disable(); + local_irq_save(flags); count = __this_cpu_read(*fbc->counters) + amount; if (abs(count) >= batch) { - unsigned long flags; - raw_spin_lock_irqsave(&fbc->lock, flags); + raw_spin_lock(&fbc->lock); fbc->count += count; __this_cpu_sub(*fbc->counters, count - amount); - raw_spin_unlock_irqrestore(&fbc->lock, flags); + raw_spin_unlock(&fbc->lock); } else { this_cpu_add(*fbc->counters, amount); } - preempt_enable(); + local_irq_restore(flags); } EXPORT_SYMBOL(percpu_counter_add_batch); From 30cd6d928c25d22c9b26833b78f708e55e366241 Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Fri, 16 Dec 2022 16:04:40 +0100 Subject: [PATCH 09/77] include/linux/percpu_counter.h: race in uniprocessor percpu_counter_add() The percpu interface is supposed to be preempt and irq safe. But: The uniprocessor implementation of percpu_counter_add() is not irq safe: if an interrupt happens during the +=, then the result is undefined. Therefore: switch from preempt_disable() to local_irq_save(). This prevents interrupts from interrupting the +=, and as a side effect prevents preemption. Link: https://lkml.kernel.org/r/20221216150441.200533-2-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: "Sun, Jiebin" Cc: <1vier1@web.de> Cc: Alexander Sverdlin Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/percpu_counter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index a3aae8d57a421..521a733e21a92 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -152,9 +152,11 @@ __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); fbc->count += amount; - preempt_enable(); + local_irq_restore(flags); } /* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ From 7c0bd22b18a88382074a2fac40a448255709ff67 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 8 Dec 2022 15:31:28 +0100 Subject: [PATCH 10/77] lib: add Dhrystone benchmark test When working on SoC bring-up, (a full) userspace may not be available, making it hard to benchmark the CPU performance of the system under development. Still, one may want to have a rough idea of the (relative) performance of one or more CPU cores, especially when working on e.g. the clock driver that controls the CPU core clock(s). Hence make the classical Dhrystone 2.1 benchmark available as a Linux kernel test module, based on[1]. When built-in, this benchmark can be run without any userspace present. Parallel runs (run on multiple CPU cores) are supported, just kick the "run" file multiple times. Note that the actual figures depend on the configuration options that control compiler optimization (e.g. CONFIG_CC_OPTIMIZE_FOR_SIZE vs. CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE), and on the compiler options used when building the kernel in general. Hence numbers may differ from those obtained by running similar benchmarks in userspace. [1] https://github.com/qris/dhrystone-deb.git Link: https://lkml.kernel.org/r/4d07ad990740a5f1e426ce4566fb514f60ec9bdd.1670509558.git.geert+renesas@glider.be Signed-off-by: Geert Uytterhoeven Cc: Arnd Bergmann Cc: Brendan Higgins Cc: David Gow Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 35 +++++ lib/Makefile | 2 + lib/dhry.h | 358 ++++++++++++++++++++++++++++++++++++++++++++++ lib/dhry_1.c | 283 ++++++++++++++++++++++++++++++++++++ lib/dhry_2.c | 175 ++++++++++++++++++++++ lib/dhry_run.c | 85 +++++++++++ 6 files changed, 938 insertions(+) create mode 100644 lib/dhry.h create mode 100644 lib/dhry_1.c create mode 100644 lib/dhry_2.c create mode 100644 lib/dhry_run.c diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 881c3f84e88a3..918bda4ee120d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2079,6 +2079,41 @@ menuconfig RUNTIME_TESTING_MENU if RUNTIME_TESTING_MENU +config TEST_DHRY + tristate "Dhrystone benchmark test" + help + Enable this to include the Dhrystone 2.1 benchmark. This test + calculates the number of Dhrystones per second, and the number of + DMIPS (Dhrystone MIPS) obtained when the Dhrystone score is divided + by 1757 (the number of Dhrystones per second obtained on the VAX + 11/780, nominally a 1 MIPS machine). + + To run the benchmark, it needs to be enabled explicitly, either from + the kernel command line (when built-in), or from userspace (when + built-in or modular. + + Run once during kernel boot: + + test_dhry.run + + Set number of iterations from kernel command line: + + test_dhry.iterations= + + Set number of iterations from userspace: + + echo > /sys/module/test_dhry/parameters/iterations + + Trigger manual run from userspace: + + echo y > /sys/module/test_dhry/parameters/run + + If the number of iterations is <= 0, the test will devise a suitable + number of iterations (test runs for at least 2s) automatically. + This process takes ca. 4s. + + If unsure, say N. + config LKDTM tristate "Linux Kernel Dump Test Tool Module" depends on DEBUG_FS diff --git a/lib/Makefile b/lib/Makefile index 4d9461bfea42c..06b9dcd80cdaa 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -57,6 +57,8 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o obj-$(CONFIG_FIND_BIT_BENCHMARK) += find_bit_benchmark.o obj-$(CONFIG_TEST_BPF) += test_bpf.o +test_dhry-objs := dhry_1.o dhry_2.o dhry_run.o +obj-$(CONFIG_TEST_DHRY) += test_dhry.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_BITOPS) += test_bitops.o CFLAGS_test_bitops.o += -Werror diff --git a/lib/dhry.h b/lib/dhry.h new file mode 100644 index 0000000000000..e1a4db8e252c3 --- /dev/null +++ b/lib/dhry.h @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + **************************************************************************** + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry.h (part 1 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * Siemens AG, AUT E 51 + * Postfach 3220 + * 8520 Erlangen + * Germany (West) + * Phone: [+49]-9131-7-20330 + * (8-17 Central European Time) + * Usenet: ..!mcsun!unido!estevax!weicker + * + * Original Version (in Ada) published in + * "Communications of the ACM" vol. 27., no. 10 (Oct. 1984), + * pp. 1013 - 1030, together with the statistics + * on which the distribution of statements etc. is based. + * + * In this C version, the following C library functions are used: + * - strcpy, strcmp (inside the measurement loop) + * - printf, scanf (outside the measurement loop) + * In addition, Berkeley UNIX system calls "times ()" or "time ()" + * are used for execution time measurement. For measurements + * on other systems, these calls have to be changed. + * + * Collection of Results: + * Reinhold Weicker (address see above) and + * + * Rick Richardson + * PC Research. Inc. + * 94 Apple Orchard Drive + * Tinton Falls, NJ 07724 + * Phone: (201) 389-8963 (9-17 EST) + * Usenet: ...!uunet!pcrat!rick + * + * Please send results to Rick Richardson and/or Reinhold Weicker. + * Complete information should be given on hardware and software used. + * Hardware information includes: Machine type, CPU, type and size + * of caches; for microprocessors: clock frequency, memory speed + * (number of wait states). + * Software information includes: Compiler (and runtime library) + * manufacturer and version, compilation switches, OS version. + * The Operating System version may give an indication about the + * compiler; Dhrystone itself performs no OS calls in the measurement loop. + * + * The complete output generated by the program should be mailed + * such that at least some checks for correctness can be made. + * + *************************************************************************** + * + * History: This version C/2.1 has been made for two reasons: + * + * 1) There is an obvious need for a common C version of + * Dhrystone, since C is at present the most popular system + * programming language for the class of processors + * (microcomputers, minicomputers) where Dhrystone is used most. + * There should be, as far as possible, only one C version of + * Dhrystone such that results can be compared without + * restrictions. In the past, the C versions distributed + * by Rick Richardson (Version 1.1) and by Reinhold Weicker + * had small (though not significant) differences. + * + * 2) As far as it is possible without changes to the Dhrystone + * statistics, optimizing compilers should be prevented from + * removing significant statements. + * + * This C version has been developed in cooperation with + * Rick Richardson (Tinton Falls, NJ), it incorporates many + * ideas from the "Version 1.1" distributed previously by + * him over the UNIX network Usenet. + * I also thank Chaim Benedelac (National Semiconductor), + * David Ditzel (SUN), Earl Killian and John Mashey (MIPS), + * Alan Smith and Rafael Saavedra-Barrera (UC at Berkeley) + * for their help with comments on earlier versions of the + * benchmark. + * + * Changes: In the initialization part, this version follows mostly + * Rick Richardson's version distributed via Usenet, not the + * version distributed earlier via floppy disk by Reinhold Weicker. + * As a concession to older compilers, names have been made + * unique within the first 8 characters. + * Inside the measurement loop, this version follows the + * version previously distributed by Reinhold Weicker. + * + * At several places in the benchmark, code has been added, + * but within the measurement loop only in branches that + * are not executed. The intention is that optimizing compilers + * should be prevented from moving code out of the measurement + * loop, or from removing code altogether. Since the statements + * that are executed within the measurement loop have NOT been + * changed, the numbers defining the "Dhrystone distribution" + * (distribution of statements, operand types and locality) + * still hold. Except for sophisticated optimizing compilers, + * execution times for this version should be the same as + * for previous versions. + * + * Since it has proven difficult to subtract the time for the + * measurement loop overhead in a correct way, the loop check + * has been made a part of the benchmark. This does have + * an impact - though a very minor one - on the distribution + * statistics which have been updated for this version. + * + * All changes within the measurement loop are described + * and discussed in the companion paper "Rationale for + * Dhrystone version 2". + * + * Because of the self-imposed limitation that the order and + * distribution of the executed statements should not be + * changed, there are still cases where optimizing compilers + * may not generate code for some statements. To a certain + * degree, this is unavoidable for small synthetic benchmarks. + * Users of the benchmark are advised to check code listings + * whether code is generated for all statements of Dhrystone. + * + * Version 2.1 is identical to version 2.0 distributed via + * the UNIX network Usenet in March 1988 except that it corrects + * some minor deficiencies that were found by users of version 2.0. + * The only change within the measurement loop is that a + * non-executed "else" part was added to the "if" statement in + * Func_3, and a non-executed "else" part removed from Proc_3. + * + *************************************************************************** + * + * Compilation model and measurement (IMPORTANT): + * + * This C version of Dhrystone consists of three files: + * - dhry.h (this file, containing global definitions and comments) + * - dhry_1.c (containing the code corresponding to Ada package Pack_1) + * - dhry_2.c (containing the code corresponding to Ada package Pack_2) + * + * The following "ground rules" apply for measurements: + * - Separate compilation + * - No procedure merging + * - Otherwise, compiler optimizations are allowed but should be indicated + * - Default results are those without register declarations + * See the companion paper "Rationale for Dhrystone Version 2" for a more + * detailed discussion of these ground rules. + * + * For 16-Bit processors (e.g. 80186, 80286), times for all compilation + * models ("small", "medium", "large" etc.) should be given if possible, + * together with a definition of these models for the compiler system used. + * + ************************************************************************** + * + * Dhrystone (C version) statistics: + * + * [Comment from the first distribution, updated for version 2. + * Note that because of language differences, the numbers are slightly + * different from the Ada version.] + * + * The following program contains statements of a high level programming + * language (here: C) in a distribution considered representative: + * + * assignments 52 (51.0 %) + * control statements 33 (32.4 %) + * procedure, function calls 17 (16.7 %) + * + * 103 statements are dynamically executed. The program is balanced with + * respect to the three aspects: + * + * - statement type + * - operand type + * - operand locality + * operand global, local, parameter, or constant. + * + * The combination of these three aspects is balanced only approximately. + * + * 1. Statement Type: + * ----------------- number + * + * V1 = V2 9 + * (incl. V1 = F(..) + * V = Constant 12 + * Assignment, 7 + * with array element + * Assignment, 6 + * with record component + * -- + * 34 34 + * + * X = Y +|-|"&&"|"|" Z 5 + * X = Y +|-|"==" Constant 6 + * X = X +|- 1 3 + * X = Y *|/ Z 2 + * X = Expression, 1 + * two operators + * X = Expression, 1 + * three operators + * -- + * 18 18 + * + * if .... 14 + * with "else" 7 + * without "else" 7 + * executed 3 + * not executed 4 + * for ... 7 | counted every time + * while ... 4 | the loop condition + * do ... while 1 | is evaluated + * switch ... 1 + * break 1 + * declaration with 1 + * initialization + * -- + * 34 34 + * + * P (...) procedure call 11 + * user procedure 10 + * library procedure 1 + * X = F (...) + * function call 6 + * user function 5 + * library function 1 + * -- + * 17 17 + * --- + * 103 + * + * The average number of parameters in procedure or function calls + * is 1.82 (not counting the function values as implicit parameters). + * + * + * 2. Operators + * ------------ + * number approximate + * percentage + * + * Arithmetic 32 50.8 + * + * + 21 33.3 + * - 7 11.1 + * * 3 4.8 + * / (int div) 1 1.6 + * + * Comparison 27 42.8 + * + * == 9 14.3 + * /= 4 6.3 + * > 1 1.6 + * < 3 4.8 + * >= 1 1.6 + * <= 9 14.3 + * + * Logic 4 6.3 + * + * && (AND-THEN) 1 1.6 + * | (OR) 1 1.6 + * ! (NOT) 2 3.2 + * + * -- ----- + * 63 100.1 + * + * + * 3. Operand Type (counted once per operand reference): + * --------------- + * number approximate + * percentage + * + * Integer 175 72.3 % + * Character 45 18.6 % + * Pointer 12 5.0 % + * String30 6 2.5 % + * Array 2 0.8 % + * Record 2 0.8 % + * --- ------- + * 242 100.0 % + * + * When there is an access path leading to the final operand (e.g. a record + * component), only the final data type on the access path is counted. + * + * + * 4. Operand Locality: + * ------------------- + * number approximate + * percentage + * + * local variable 114 47.1 % + * global variable 22 9.1 % + * parameter 45 18.6 % + * value 23 9.5 % + * reference 22 9.1 % + * function result 6 2.5 % + * constant 55 22.7 % + * --- ------- + * 242 100.0 % + * + * + * The program does not compute anything meaningful, but it is syntactically + * and semantically correct. All variables have a value assigned to them + * before they are used as a source operand. + * + * There has been no explicit effort to account for the effects of a + * cache, or to balance the use of long or short displacements for code or + * data. + * + *************************************************************************** + */ + +typedef enum { + Ident_1, + Ident_2, + Ident_3, + Ident_4, + Ident_5 +} Enumeration; /* for boolean and enumeration types in Ada, Pascal */ + +/* General definitions: */ + +typedef int One_Thirty; +typedef int One_Fifty; +typedef char Capital_Letter; +typedef int Boolean; +typedef char Str_30[31]; +typedef int Arr_1_Dim[50]; +typedef int Arr_2_Dim[50][50]; + +typedef struct record { + struct record *Ptr_Comp; + Enumeration Discr; + union { + struct { + Enumeration Enum_Comp; + int Int_Comp; + char Str_Comp[31]; + } var_1; + struct { + Enumeration E_Comp_2; + char Str_2_Comp[31]; + } var_2; + struct { + char Ch_1_Comp; + char Ch_2_Comp; + } var_3; + } variant; +} Rec_Type, *Rec_Pointer; + + +extern int Int_Glob; +extern char Ch_1_Glob; + +void Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par); +void Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, + One_Fifty *Int_Par_Ref); +void Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, + int Int_1_Par_Val, int Int_2_Par_Val); +Enumeration Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val); +Boolean Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref); + +int dhry(int n); diff --git a/lib/dhry_1.c b/lib/dhry_1.c new file mode 100644 index 0000000000000..83247106824cc --- /dev/null +++ b/lib/dhry_1.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* + **************************************************************************** + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_1.c (part 2 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + **************************************************************************** + */ + +#include "dhry.h" + +#include +#include +#include + +/* Global Variables: */ + +int Int_Glob; +char Ch_1_Glob; + +static Rec_Pointer Ptr_Glob, Next_Ptr_Glob; +static Boolean Bool_Glob; +static char Ch_2_Glob; +static int Arr_1_Glob[50]; +static int Arr_2_Glob[50][50]; + +static void Proc_3(Rec_Pointer *Ptr_Ref_Par) +/******************/ +/* executed once */ +/* Ptr_Ref_Par becomes Ptr_Glob */ +{ + if (Ptr_Glob) { + /* then, executed */ + *Ptr_Ref_Par = Ptr_Glob->Ptr_Comp; + } + Proc_7(10, Int_Glob, &Ptr_Glob->variant.var_1.Int_Comp); +} /* Proc_3 */ + + +static void Proc_1(Rec_Pointer Ptr_Val_Par) +/******************/ +/* executed once */ +{ + Rec_Pointer Next_Record = Ptr_Val_Par->Ptr_Comp; + /* == Ptr_Glob_Next */ + /* Local variable, initialized with Ptr_Val_Par->Ptr_Comp, */ + /* corresponds to "rename" in Ada, "with" in Pascal */ + + *Ptr_Val_Par->Ptr_Comp = *Ptr_Glob; + Ptr_Val_Par->variant.var_1.Int_Comp = 5; + Next_Record->variant.var_1.Int_Comp = + Ptr_Val_Par->variant.var_1.Int_Comp; + Next_Record->Ptr_Comp = Ptr_Val_Par->Ptr_Comp; + Proc_3(&Next_Record->Ptr_Comp); + /* Ptr_Val_Par->Ptr_Comp->Ptr_Comp == Ptr_Glob->Ptr_Comp */ + if (Next_Record->Discr == Ident_1) { + /* then, executed */ + Next_Record->variant.var_1.Int_Comp = 6; + Proc_6(Ptr_Val_Par->variant.var_1.Enum_Comp, + &Next_Record->variant.var_1.Enum_Comp); + Next_Record->Ptr_Comp = Ptr_Glob->Ptr_Comp; + Proc_7(Next_Record->variant.var_1.Int_Comp, 10, + &Next_Record->variant.var_1.Int_Comp); + } else { + /* not executed */ + *Ptr_Val_Par = *Ptr_Val_Par->Ptr_Comp; + } +} /* Proc_1 */ + + +static void Proc_2(One_Fifty *Int_Par_Ref) +/******************/ +/* executed once */ +/* *Int_Par_Ref == 1, becomes 4 */ +{ + One_Fifty Int_Loc; + Enumeration Enum_Loc; + + Int_Loc = *Int_Par_Ref + 10; + do { + /* executed once */ + if (Ch_1_Glob == 'A') { + /* then, executed */ + Int_Loc -= 1; + *Int_Par_Ref = Int_Loc - Int_Glob; + Enum_Loc = Ident_1; + } /* if */ + } while (Enum_Loc != Ident_1); /* true */ +} /* Proc_2 */ + + +static void Proc_4(void) +/*******/ +/* executed once */ +{ + Boolean Bool_Loc; + + Bool_Loc = Ch_1_Glob == 'A'; + Bool_Glob = Bool_Loc | Bool_Glob; + Ch_2_Glob = 'B'; +} /* Proc_4 */ + + +static void Proc_5(void) +/*******/ +/* executed once */ +{ + Ch_1_Glob = 'A'; + Bool_Glob = false; +} /* Proc_5 */ + + +int dhry(int n) +/*****/ + + /* main program, corresponds to procedures */ + /* Main and Proc_0 in the Ada version */ +{ + One_Fifty Int_1_Loc; + One_Fifty Int_2_Loc; + One_Fifty Int_3_Loc; + char Ch_Index; + Enumeration Enum_Loc; + Str_30 Str_1_Loc; + Str_30 Str_2_Loc; + int Run_Index; + int Number_Of_Runs; + ktime_t Begin_Time, End_Time; + u32 User_Time; + + /* Initializations */ + + Next_Ptr_Glob = (Rec_Pointer)kzalloc(sizeof(Rec_Type), GFP_KERNEL); + Ptr_Glob = (Rec_Pointer)kzalloc(sizeof(Rec_Type), GFP_KERNEL); + + Ptr_Glob->Ptr_Comp = Next_Ptr_Glob; + Ptr_Glob->Discr = Ident_1; + Ptr_Glob->variant.var_1.Enum_Comp = Ident_3; + Ptr_Glob->variant.var_1.Int_Comp = 40; + strcpy(Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING"); + strcpy(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING"); + + Arr_2_Glob[8][7] = 10; + /* Was missing in published program. Without this statement, */ + /* Arr_2_Glob[8][7] would have an undefined value. */ + /* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */ + /* overflow may occur for this array element. */ + + pr_debug("Dhrystone Benchmark, Version 2.1 (Language: C)\n"); + + Number_Of_Runs = n; + + pr_debug("Execution starts, %d runs through Dhrystone\n", + Number_Of_Runs); + + /***************/ + /* Start timer */ + /***************/ + + Begin_Time = ktime_get(); + + for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index) { + Proc_5(); + Proc_4(); + /* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */ + Int_1_Loc = 2; + Int_2_Loc = 3; + strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING"); + Enum_Loc = Ident_2; + Bool_Glob = !Func_2(Str_1_Loc, Str_2_Loc); + /* Bool_Glob == 1 */ + while (Int_1_Loc < Int_2_Loc) { + /* loop body executed once */ + Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc; + /* Int_3_Loc == 7 */ + Proc_7(Int_1_Loc, Int_2_Loc, &Int_3_Loc); + /* Int_3_Loc == 7 */ + Int_1_Loc += 1; + } /* while */ + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Proc_8(Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc); + /* Int_Glob == 5 */ + Proc_1(Ptr_Glob); + for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index) { + /* loop body executed twice */ + if (Enum_Loc == Func_1(Ch_Index, 'C')) { + /* then, not executed */ + Proc_6(Ident_1, &Enum_Loc); + strcpy(Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING"); + Int_2_Loc = Run_Index; + Int_Glob = Run_Index; + } + } + /* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */ + Int_2_Loc = Int_2_Loc * Int_1_Loc; + Int_1_Loc = Int_2_Loc / Int_3_Loc; + Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc; + /* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */ + Proc_2(&Int_1_Loc); + /* Int_1_Loc == 5 */ + + } /* loop "for Run_Index" */ + + /**************/ + /* Stop timer */ + /**************/ + + End_Time = ktime_get(); + +#define dhry_assert_int_eq(val, expected) \ + if (val != expected) \ + pr_err("%s: %d (FAIL, expected %d)\n", #val, val, \ + expected); \ + else \ + pr_debug("%s: %d (OK)\n", #val, val) + +#define dhry_assert_char_eq(val, expected) \ + if (val != expected) \ + pr_err("%s: %c (FAIL, expected %c)\n", #val, val, \ + expected); \ + else \ + pr_debug("%s: %c (OK)\n", #val, val) + +#define dhry_assert_string_eq(val, expected) \ + if (strcmp(val, expected)) \ + pr_err("%s: %s (FAIL, expected %s)\n", #val, val, \ + expected); \ + else \ + pr_debug("%s: %s (OK)\n", #val, val) + + pr_debug("Execution ends\n"); + pr_debug("Final values of the variables used in the benchmark:\n"); + dhry_assert_int_eq(Int_Glob, 5); + dhry_assert_int_eq(Bool_Glob, 1); + dhry_assert_char_eq(Ch_1_Glob, 'A'); + dhry_assert_char_eq(Ch_2_Glob, 'B'); + dhry_assert_int_eq(Arr_1_Glob[8], 7); + dhry_assert_int_eq(Arr_2_Glob[8][7], Number_Of_Runs + 10); + pr_debug("Ptr_Comp: %px\n", Ptr_Glob->Ptr_Comp); + dhry_assert_int_eq(Ptr_Glob->Discr, 0); + dhry_assert_int_eq(Ptr_Glob->variant.var_1.Enum_Comp, 2); + dhry_assert_int_eq(Ptr_Glob->variant.var_1.Int_Comp, 17); + dhry_assert_string_eq(Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING"); + if (Next_Ptr_Glob->Ptr_Comp != Ptr_Glob->Ptr_Comp) + pr_err("Next_Ptr_Glob->Ptr_Comp: %px (expected %px)\n", + Next_Ptr_Glob->Ptr_Comp, Ptr_Glob->Ptr_Comp); + else + pr_debug("Next_Ptr_Glob->Ptr_Comp: %px\n", + Next_Ptr_Glob->Ptr_Comp); + dhry_assert_int_eq(Next_Ptr_Glob->Discr, 0); + dhry_assert_int_eq(Next_Ptr_Glob->variant.var_1.Enum_Comp, 1); + dhry_assert_int_eq(Next_Ptr_Glob->variant.var_1.Int_Comp, 18); + dhry_assert_string_eq(Next_Ptr_Glob->variant.var_1.Str_Comp, + "DHRYSTONE PROGRAM, SOME STRING"); + dhry_assert_int_eq(Int_1_Loc, 5); + dhry_assert_int_eq(Int_2_Loc, 13); + dhry_assert_int_eq(Int_3_Loc, 7); + dhry_assert_int_eq(Enum_Loc, 1); + dhry_assert_string_eq(Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING"); + dhry_assert_string_eq(Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING"); + + User_Time = ktime_to_ms(ktime_sub(End_Time, Begin_Time)); + + kfree(Ptr_Glob); + kfree(Next_Ptr_Glob); + + /* Measurements should last at least 2 seconds */ + if (User_Time < 2 * MSEC_PER_SEC) + return -EAGAIN; + + return div_u64(mul_u32_u32(MSEC_PER_SEC, Number_Of_Runs), User_Time); +} diff --git a/lib/dhry_2.c b/lib/dhry_2.c new file mode 100644 index 0000000000000..c19e661f37d30 --- /dev/null +++ b/lib/dhry_2.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* + **************************************************************************** + * + * "DHRYSTONE" Benchmark Program + * ----------------------------- + * + * Version: C, Version 2.1 + * + * File: dhry_2.c (part 3 of 3) + * + * Date: May 25, 1988 + * + * Author: Reinhold P. Weicker + * + **************************************************************************** + */ + +#include "dhry.h" + +#include + + +static Boolean Func_3(Enumeration Enum_Par_Val) +/***************************/ +/* executed once */ +/* Enum_Par_Val == Ident_3 */ +{ + Enumeration Enum_Loc; + + Enum_Loc = Enum_Par_Val; + if (Enum_Loc == Ident_3) { + /* then, executed */ + return true; + } else { + /* not executed */ + return false; + } +} /* Func_3 */ + + +void Proc_6(Enumeration Enum_Val_Par, Enumeration *Enum_Ref_Par) +/*********************************/ +/* executed once */ +/* Enum_Val_Par == Ident_3, Enum_Ref_Par becomes Ident_2 */ +{ + *Enum_Ref_Par = Enum_Val_Par; + if (!Func_3(Enum_Val_Par)) { + /* then, not executed */ + *Enum_Ref_Par = Ident_4; + } + switch (Enum_Val_Par) { + case Ident_1: + *Enum_Ref_Par = Ident_1; + break; + case Ident_2: + if (Int_Glob > 100) { + /* then */ + *Enum_Ref_Par = Ident_1; + } else { + *Enum_Ref_Par = Ident_4; + } + break; + case Ident_3: /* executed */ + *Enum_Ref_Par = Ident_2; + break; + case Ident_4: + break; + case Ident_5: + *Enum_Ref_Par = Ident_3; + break; + } /* switch */ +} /* Proc_6 */ + + +void Proc_7(One_Fifty Int_1_Par_Val, One_Fifty Int_2_Par_Val, One_Fifty *Int_Par_Ref) +/**********************************************/ +/* executed three times */ +/* first call: Int_1_Par_Val == 2, Int_2_Par_Val == 3, */ +/* Int_Par_Ref becomes 7 */ +/* second call: Int_1_Par_Val == 10, Int_2_Par_Val == 5, */ +/* Int_Par_Ref becomes 17 */ +/* third call: Int_1_Par_Val == 6, Int_2_Par_Val == 10, */ +/* Int_Par_Ref becomes 18 */ +{ + One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 2; + *Int_Par_Ref = Int_2_Par_Val + Int_Loc; +} /* Proc_7 */ + + +void Proc_8(Arr_1_Dim Arr_1_Par_Ref, Arr_2_Dim Arr_2_Par_Ref, int Int_1_Par_Val, int Int_2_Par_Val) +/*********************************************************************/ +/* executed once */ +/* Int_Par_Val_1 == 3 */ +/* Int_Par_Val_2 == 7 */ +{ + One_Fifty Int_Index; + One_Fifty Int_Loc; + + Int_Loc = Int_1_Par_Val + 5; + Arr_1_Par_Ref[Int_Loc] = Int_2_Par_Val; + Arr_1_Par_Ref[Int_Loc+1] = Arr_1_Par_Ref[Int_Loc]; + Arr_1_Par_Ref[Int_Loc+30] = Int_Loc; + for (Int_Index = Int_Loc; Int_Index <= Int_Loc+1; ++Int_Index) + Arr_2_Par_Ref[Int_Loc][Int_Index] = Int_Loc; + Arr_2_Par_Ref[Int_Loc][Int_Loc-1] += 1; + Arr_2_Par_Ref[Int_Loc+20][Int_Loc] = Arr_1_Par_Ref[Int_Loc]; + Int_Glob = 5; +} /* Proc_8 */ + + +Enumeration Func_1(Capital_Letter Ch_1_Par_Val, Capital_Letter Ch_2_Par_Val) +/*************************************************/ +/* executed three times */ +/* first call: Ch_1_Par_Val == 'H', Ch_2_Par_Val == 'R' */ +/* second call: Ch_1_Par_Val == 'A', Ch_2_Par_Val == 'C' */ +/* third call: Ch_1_Par_Val == 'B', Ch_2_Par_Val == 'C' */ +{ + Capital_Letter Ch_1_Loc; + Capital_Letter Ch_2_Loc; + + Ch_1_Loc = Ch_1_Par_Val; + Ch_2_Loc = Ch_1_Loc; + if (Ch_2_Loc != Ch_2_Par_Val) { + /* then, executed */ + return Ident_1; + } else { + /* not executed */ + Ch_1_Glob = Ch_1_Loc; + return Ident_2; + } +} /* Func_1 */ + + +Boolean Func_2(Str_30 Str_1_Par_Ref, Str_30 Str_2_Par_Ref) +/*************************************************/ +/* executed once */ +/* Str_1_Par_Ref == "DHRYSTONE PROGRAM, 1'ST STRING" */ +/* Str_2_Par_Ref == "DHRYSTONE PROGRAM, 2'ND STRING" */ +{ + One_Thirty Int_Loc; + Capital_Letter Ch_Loc; + + Int_Loc = 2; + while (Int_Loc <= 2) { + /* loop body executed once */ + if (Func_1(Str_1_Par_Ref[Int_Loc], + Str_2_Par_Ref[Int_Loc+1]) == Ident_1) { + /* then, executed */ + Ch_Loc = 'A'; + Int_Loc += 1; + } + } /* if, while */ + if (Ch_Loc >= 'W' && Ch_Loc < 'Z') { + /* then, not executed */ + Int_Loc = 7; + } + if (Ch_Loc == 'R') { + /* then, not executed */ + return true; + } else { + /* executed */ + if (strcmp(Str_1_Par_Ref, Str_2_Par_Ref) > 0) { + /* then, not executed */ + Int_Loc += 7; + Int_Glob = Int_Loc; + return true; + } else { + /* executed */ + return false; + } + } /* if Ch_Loc */ +} /* Func_2 */ diff --git a/lib/dhry_run.c b/lib/dhry_run.c new file mode 100644 index 0000000000000..31a1d442e4a0f --- /dev/null +++ b/lib/dhry_run.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Dhrystone benchmark test module + * + * Copyright (C) 2022 Glider bv + */ + +#include "dhry.h" + +#include +#include +#include +#include +#include + +#define DHRY_VAX 1757 + +static int dhry_run_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops run_ops = { + .flags = KERNEL_PARAM_OPS_FL_NOARG, + .set = dhry_run_set, +}; +static bool dhry_run; +module_param_cb(run, &run_ops, &dhry_run, 0200); +MODULE_PARM_DESC(run, "Run the test (default: false)"); + +static int iterations = -1; +module_param(iterations, int, 0644); +MODULE_PARM_DESC(iterations, + "Number of iterations through the benchmark (default: auto)"); + +static void dhry_benchmark(void) +{ + int i, n; + + if (iterations > 0) { + n = dhry(iterations); + goto report; + } + + for (i = DHRY_VAX; i > 0; i <<= 1) { + n = dhry(i); + if (n != -EAGAIN) + break; + } + +report: + if (n >= 0) + pr_info("CPU%u: Dhrystones per Second: %d (%d DMIPS)\n", + smp_processor_id(), n, n / DHRY_VAX); + else if (n == -EAGAIN) + pr_err("Please increase the number of iterations\n"); + else + pr_err("Dhrystone benchmark failed error %pe\n", ERR_PTR(n)); +} + +static int dhry_run_set(const char *val, const struct kernel_param *kp) +{ + int ret; + + if (val) { + ret = param_set_bool(val, kp); + if (ret) + return ret; + } else { + dhry_run = true; + } + + if (dhry_run && system_state == SYSTEM_RUNNING) + dhry_benchmark(); + + return ret; +} + +static int __init dhry_init(void) +{ + if (dhry_run) + dhry_benchmark(); + + return 0; +} +module_init(dhry_init); + +MODULE_AUTHOR("Geert Uytterhoeven "); +MODULE_LICENSE("GPL"); From 4018b11a8a4a996560145750da3c2e29d60c19f4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 18 Dec 2022 10:51:59 +0100 Subject: [PATCH 11/77] lib-add-dhrystone-benchmark-test-fix fix uninitialized use of ret Link: https://lkml.kernel.org/r/alpine.DEB.2.22.394.2212190857310.137329@ramsan.of.borg Reported-by: kernel test robot Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton --- lib/dhry_run.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dhry_run.c b/lib/dhry_run.c index 31a1d442e4a0f..f9d33efa6d090 100644 --- a/lib/dhry_run.c +++ b/lib/dhry_run.c @@ -69,7 +69,7 @@ static int dhry_run_set(const char *val, const struct kernel_param *kp) if (dhry_run && system_state == SYSTEM_RUNNING) dhry_benchmark(); - return ret; + return 0; } static int __init dhry_init(void) From 45f1196931c5ba9cd7e92a201f4cc52f9fd8b128 Mon Sep 17 00:00:00 2001 From: XU pengfei Date: Wed, 21 Dec 2022 11:21:20 +0800 Subject: [PATCH 12/77] hfsplus: remove unnecessary variable initialization Variables are assigned first and then used. Initialization is not required. Link: https://lkml.kernel.org/r/20221221032119.10037-1-xupengfei@nfschina.com Signed-off-by: XU pengfei Reviewed-by: Andrew Morton Cc: Christian Brauner Cc: Kees Cook Signed-off-by: Andrew Morton --- fs/hfsplus/xattr.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 49891b12c4156..7c502df7ccacb 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -257,7 +257,7 @@ static int hfsplus_create_attributes_file(struct super_block *sb) int __hfsplus_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - int err = 0; + int err; struct hfs_find_data cat_fd; hfsplus_cat_entry entry; u16 cat_entry_flags, cat_entry_type; @@ -494,7 +494,7 @@ ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, __be32 xattr_record_type; u32 record_type; u16 record_length = 0; - ssize_t res = 0; + ssize_t res; if ((!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) || @@ -606,7 +606,7 @@ static inline int can_list(const char *xattr_name) static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry, char *buffer, size_t size) { - ssize_t res = 0; + ssize_t res; struct inode *inode = d_inode(dentry); struct hfs_find_data fd; u16 entry_type; @@ -674,10 +674,10 @@ static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry, ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) { ssize_t err; - ssize_t res = 0; + ssize_t res; struct inode *inode = d_inode(dentry); struct hfs_find_data fd; - u16 key_len = 0; + u16 key_len; struct hfsplus_attr_key attr_key; char *strbuf; int xattr_name_len; @@ -766,12 +766,12 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) static int hfsplus_removexattr(struct inode *inode, const char *name) { - int err = 0; + int err; struct hfs_find_data cat_fd; u16 flags; u16 cat_entry_type; - int is_xattr_acl_deleted = 0; - int is_all_xattrs_deleted = 0; + int is_xattr_acl_deleted; + int is_all_xattrs_deleted; if (!HFSPLUS_SB(inode->i_sb)->attr_tree) return -EOPNOTSUPP; From 3db812a42d50d7dde3bfffacb829ceb1b1770276 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 21 Dec 2022 13:32:43 -0800 Subject: [PATCH 13/77] hfsplus-remove-unnecessary-variable-initialization-fix give hfsplus_listxattr:key_len narrower scope Cc: XU pengfei Cc: Andrew Morton Cc: Christian Brauner Cc: Kees Cook Signed-off-by: Andrew Morton --- fs/hfsplus/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index 7c502df7ccacb..7f6e8aaea5716 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -677,7 +677,6 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) ssize_t res; struct inode *inode = d_inode(dentry); struct hfs_find_data fd; - u16 key_len; struct hfsplus_attr_key attr_key; char *strbuf; int xattr_name_len; @@ -719,7 +718,8 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size) } for (;;) { - key_len = hfs_bnode_read_u16(fd.bnode, fd.keyoffset); + u16 key_len = hfs_bnode_read_u16(fd.bnode, fd.keyoffset); + if (key_len == 0 || key_len > fd.tree->max_key_len) { pr_err("invalid xattr key length: %d\n", key_len); res = -EIO; From d899cd97b79fa830c52498b2f3c599792565b75a Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 20 Dec 2022 23:20:30 +0100 Subject: [PATCH 14/77] scripts/spelling.txt: add `permitted' Patch series "spelling: Fix some trivial typos". Seems like permitted has two t's :), Lets add that to spellings to help others. This patch (of 3): Add another common typo. Noticed when I sent a patch with the typo and in kvm and of. Link: https://lkml.kernel.org/r/20221220-permited-v1-1-52ea9857fa61@chromium.org Signed-off-by: Ricardo Ribalda Cc: Joel Fernandes (Google) Signed-off-by: Andrew Morton --- scripts/spelling.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index ded8bcfc02475..bb788a733722e 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -1121,6 +1121,7 @@ perfomring||performing periperal||peripheral peripherial||peripheral permissons||permissions +permited||permitted peroid||period persistance||persistence persistant||persistent From a8f51b81501db2fb01b73368132a086dda9505eb Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 20 Dec 2022 23:20:31 +0100 Subject: [PATCH 15/77] KVM: x86: fix trivial typo Permitted is spelled with two t. Link: https://lkml.kernel.org/r/20221220-permited-v1-2-52ea9857fa61@chromium.org Signed-off-by: Ricardo Ribalda Cc: Joel Fernandes (Google) Signed-off-by: Andrew Morton --- arch/x86/kvm/emulate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5cc3efa0e21c1..56e1cf7c339ef 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2615,8 +2615,8 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, return true; } -static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, - u16 port, u16 len) +static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt, + u16 port, u16 len) { if (ctxt->perm_ok) return true; @@ -3961,7 +3961,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) static int check_perm_in(struct x86_emulate_ctxt *ctxt) { ctxt->dst.bytes = min(ctxt->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes)) + if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -3970,7 +3970,7 @@ static int check_perm_in(struct x86_emulate_ctxt *ctxt) static int check_perm_out(struct x86_emulate_ctxt *ctxt) { ctxt->src.bytes = min(ctxt->src.bytes, 4u); - if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes)) + if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; From 71ef250c9fd3699538c663672388363b1a9e32f9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 29 Dec 2022 14:17:15 -0800 Subject: [PATCH 16/77] checkpatch: mark kunmap() and kunmap_atomic() deprecated It was suggested by Fabio that kunmap() be marked deprecated in checkpatch.[1] This did not seem necessary until an invalid conversion of kmap_local_page() appeared in mainline.[2][3] The introduction of this bug would have been flagged with kunmap() being marked deprecated. Add kunmap() and kunmap_atomic() to checkpatch to help prevent further confusion. [1] https://lore.kernel.org/all/1884934.6tgchFWduM@suse/ [2] d406d26745ab ("cifs: skip alloc when request has no pages") [3] https://lore.kernel.org/r/20221229-cifs-kmap-v1-1-c70d0e9a53eb@intel.com Link: https://lkml.kernel.org/r/20221229-kmap-checkpatch-v2-1-919fc4d4e3c2@intel.com Signed-off-by: Ira Weiny Suggested-by: "Fabio M. De Francesco" Acked-by: Joe Perches Cc: Andy Whitcroft Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 78cc595b98ce1..a6d6d7e1d0cf1 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -823,7 +823,9 @@ sub find_standard_signature { "get_state_synchronize_sched" => "get_state_synchronize_rcu", "cond_synchronize_sched" => "cond_synchronize_rcu", "kmap" => "kmap_local_page", + "kunmap" => "kunmap_local", "kmap_atomic" => "kmap_local_page", + "kunmap_atomic" => "kunmap_local", ); #Create a search pattern for all these strings to speed up a loop below From 1182a27f53841d0a6d934b411c6d0087ba49384d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 30 Dec 2022 12:36:02 +0300 Subject: [PATCH 17/77] proc: mark /proc/cmdline as permanent /proc/cmdline is never removed, mark is as permanent for slightly faster open and close. Link: https://lkml.kernel.org/r/Y66xAveh2yUsP7m9@p183 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/cmdline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c index 91fe1597af7b4..a6f76121955f6 100644 --- a/fs/proc/cmdline.c +++ b/fs/proc/cmdline.c @@ -17,6 +17,7 @@ static int __init proc_cmdline_init(void) struct proc_dir_entry *pde; pde = proc_create_single("cmdline", 0, NULL, cmdline_proc_show); + pde_make_permanent(pde); pde->size = saved_command_line_len + 1; return 0; } From 397cc304d3462b8093d28971c58def227355803f Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Wed, 4 Jan 2023 18:40:17 +0000 Subject: [PATCH 18/77] scripts/spelling: add a few more typos Add a few more typos that found from real patches[1,2] to 'spelling' file. [1] https://lore.kernel.org/linux-mm/4bc4ab74-3ccd-f892-b387-d48451463d3c@huawei.com/ [2] https://lore.kernel.org/damon/20221228174621.34868-1-sj@kernel.org/ Link: https://lkml.kernel.org/r/20230104184017.1724-1-sj@kernel.org Signed-off-by: SeongJae Park Acked-by: Randy Dunlap Cc: Colin Ian King Cc: Drew Fustini Cc: Joe Perches Cc: SeongJae Park Signed-off-by: Andrew Morton --- scripts/spelling.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index bb788a733722e..597a680d53b45 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -664,11 +664,13 @@ feauture||feature feautures||features fetaure||feature fetaures||features +fetcing||fetching fileystem||filesystem fimrware||firmware fimware||firmware firmare||firmware firmaware||firmware +firtly||firstly firware||firmware firwmare||firmware finanize||finalize @@ -1559,6 +1561,7 @@ tunning||tuning ture||true tyep||type udpate||update +updtes||updates uesd||used unknwon||unknown uknown||unknown From b49bd9aee56b7139e49e85f349e45cf02afc00ab Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 4 Jan 2023 22:42:30 +0800 Subject: [PATCH 19/77] kthread_worker: check all delayed works when destroy kthread worker When destroying a kthread worker warn if there are still some pending delayed works. This indicates that the caller should clear all pending delayed works before destroying the kthread worker. Link: https://lkml.kernel.org/r/20230104144230.938521-1-qiang1.zhang@intel.com Signed-off-by: Zqiang Acked-by: Tejun Heo Reviewed-by: Petr Mladek Signed-off-by: Andrew Morton --- kernel/kthread.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/kthread.c b/kernel/kthread.c index f97fd01a29325..7e6751b29101e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1382,6 +1382,10 @@ EXPORT_SYMBOL_GPL(kthread_flush_worker); * Flush and destroy @worker. The simple flush is enough because the kthread * worker API is used only in trivial scenarios. There are no multi-step state * machines needed. + * + * Note that this function is not responsible for handling delayed work, so + * caller should be responsible for queuing or canceling all delayed work items + * before invoke this function. */ void kthread_destroy_worker(struct kthread_worker *worker) { @@ -1393,6 +1397,7 @@ void kthread_destroy_worker(struct kthread_worker *worker) kthread_flush_worker(worker); kthread_stop(task); + WARN_ON(!list_empty(&worker->delayed_work_list)); WARN_ON(!list_empty(&worker->work_list)); kfree(worker); } From fa3212f1f4da812abf2e68957376d550453eaf4c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Jan 2023 14:19:37 +0200 Subject: [PATCH 20/77] util_macros.h: add missing inclusion The header is the direct user of definitions from the math.h, include it. Link: https://lkml.kernel.org/r/20230103121937.32085-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton --- include/linux/util_macros.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 72299f261b253..b641ec00be3e7 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -2,6 +2,8 @@ #ifndef _LINUX_HELPER_MACROS_H_ #define _LINUX_HELPER_MACROS_H_ +#include + #define __find_closest(x, a, as, op) \ ({ \ typeof(as) __fc_i, __fc_as = (as) - 1; \ From 8554f251c0071e64140d9838ca67c648ce762617 Mon Sep 17 00:00:00 2001 From: Dmitrii Bundin Date: Mon, 2 Jan 2023 20:10:14 +0300 Subject: [PATCH 21/77] scripts/gdb: add mm introspection utils This command provides a way to traverse the entire page hierarchy by a given virtual address on x86. In addition to qemu's commands info tlb/info mem it provides the complete information about the paging structure for an arbitrary virtual address. It supports 4KB/2MB/1GB and 5 level paging. Here is an example output for 2MB success translation: (gdb) translate-vm address cr3: cr3 binary data 0x1085be003 next entry physical address 0x1085be000 --- bit 3 page level write through False bit 4 page level cache disabled False level 4: entry address 0xffff8881085be7f8 page entry binary data 0x800000010ac83067 next entry physical address 0x10ac83000 --- bit 0 entry present True bit 1 read/write access allowed True bit 2 user access allowed True bit 3 page level write through False bit 4 page level cache disabled False bit 5 entry has been accessed True bit 7 page size False bit 11 restart to ordinary False bit 63 execute disable True level 3: entry address 0xffff88810ac83a48 page entry binary data 0x101af7067 next entry physical address 0x101af7000 --- bit 0 entry present True bit 1 read/write access allowed True bit 2 user access allowed True bit 3 page level write through False bit 4 page level cache disabled False bit 5 entry has been accessed True bit 7 page size False bit 11 restart to ordinary False bit 63 execute disable False level 2: entry address 0xffff888101af7368 page entry binary data 0x80000001634008e7 page size 2MB page physical address 0x163400000 --- bit 0 entry present True bit 1 read/write access allowed True bit 2 user access allowed True bit 3 page level write through False bit 4 page level cache disabled False bit 5 entry has been accessed True bit 7 page size True bit 6 page dirty True bit 8 global translation False bit 11 restart to ordinary True bit 12 pat False bits (59, 62) protection key 0 bit 63 execute disable True Link: https://lkml.kernel.org/r/20230102171014.31408-1-dmitrii.bundin.a@gmail.com Signed-off-by: Dmitrii Bundin Acked by: Mike Rapoport (IBM) Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Jan Kiszka Cc: Kieran Bingham Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- scripts/gdb/linux/mm.py | 223 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 scripts/gdb/linux/mm.py diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py new file mode 100644 index 0000000000000..2a0b6cb6a1f89 --- /dev/null +++ b/scripts/gdb/linux/mm.py @@ -0,0 +1,223 @@ +# +# gdb helper commands and functions for Linux kernel debugging +# +# routines to introspect page table +# +# Authors: +# Dmitrii Bundin +# +# This work is licensed under the terms of the GNU GPL version 2. +# + +import gdb + +from linux import utils + +PHYSICAL_ADDRESS_MASK = gdb.parse_and_eval('0xfffffffffffff') + + +def page_mask(level=1): + # 4KB + if level == 1: + return gdb.parse_and_eval('(u64) ~0xfff') + # 2MB + elif level == 2: + return gdb.parse_and_eval('(u64) ~0x1fffff') + # 1GB + elif level == 3: + return gdb.parse_and_eval('(u64) ~0x3fffffff') + else: + raise Exception(f'Unknown page level: {level}') + + +#page_offset_base in case CONFIG_DYNAMIC_MEMORY_LAYOUT is disabled +POB_NO_DYNAMIC_MEM_LAYOUT = '0xffff888000000000' +def _page_offset_base(): + pob_symbol = gdb.lookup_global_symbol('page_offset_base') + pob = pob_symbol.name if pob_symbol else POB_NO_DYNAMIC_MEM_LAYOUT + return gdb.parse_and_eval(pob) + + +def is_bit_defined_tupled(data, offset): + return offset, bool(data >> offset & 1) + +def content_tupled(data, bit_start, bit_end): + return (bit_start, bit_end), data >> bit_start & ((1 << (1 + bit_end - bit_start)) - 1) + +def entry_va(level, phys_addr, translating_va): + def start_bit(level): + if level == 5: + return 48 + elif level == 4: + return 39 + elif level == 3: + return 30 + elif level == 2: + return 21 + elif level == 1: + return 12 + else: + raise Exception(f'Unknown level {level}') + + entry_offset = ((translating_va >> start_bit(level)) & 511) * 8 + entry_va = _page_offset_base() + phys_addr + entry_offset + return entry_va + +class Cr3(): + def __init__(self, cr3, page_levels): + self.cr3 = cr3 + self.page_levels = page_levels + self.page_level_write_through = is_bit_defined_tupled(cr3, 3) + self.page_level_cache_disabled = is_bit_defined_tupled(cr3, 4) + self.next_entry_physical_address = cr3 & PHYSICAL_ADDRESS_MASK & page_mask() + + def next_entry(self, va): + next_level = self.page_levels + return PageHierarchyEntry(entry_va(next_level, self.next_entry_physical_address, va), next_level) + + def mk_string(self): + return f"""\ +cr3: + {'cr3 binary data': <30} {hex(self.cr3)} + {'next entry physicall address': <30} {hex(self.next_entry_physical_address)} + --- + {'bit' : <4} {self.page_level_write_through[0]: <10} {'page level write through': <30} {self.page_level_write_through[1]} + {'bit' : <4} {self.page_level_cache_disabled[0]: <10} {'page level cache disabled': <30} {self.page_level_cache_disabled[1]} +""" + + +class PageHierarchyEntry(): + def __init__(self, address, level): + data = int.from_bytes( + memoryview(gdb.selected_inferior().read_memory(address, 8)), + "little" + ) + if level == 1: + self.is_page = True + self.entry_present = is_bit_defined_tupled(data, 0) + self.read_write = is_bit_defined_tupled(data, 1) + self.user_access_allowed = is_bit_defined_tupled(data, 2) + self.page_level_write_through = is_bit_defined_tupled(data, 3) + self.page_level_cache_disabled = is_bit_defined_tupled(data, 4) + self.entry_was_accessed = is_bit_defined_tupled(data, 5) + self.dirty = is_bit_defined_tupled(data, 6) + self.pat = is_bit_defined_tupled(data, 7) + self.global_translation = is_bit_defined_tupled(data, 8) + self.page_physical_address = data & PHYSICAL_ADDRESS_MASK & page_mask(level) + self.next_entry_physical_address = None + self.hlat_restart_with_ordinary = is_bit_defined_tupled(data, 11) + self.protection_key = content_tupled(data, 59, 62) + self.executed_disable = is_bit_defined_tupled(data, 63) + else: + page_size = is_bit_defined_tupled(data, 7) + page_size_bit = page_size[1] + self.is_page = page_size_bit + self.entry_present = is_bit_defined_tupled(data, 0) + self.read_write = is_bit_defined_tupled(data, 1) + self.user_access_allowed = is_bit_defined_tupled(data, 2) + self.page_level_write_through = is_bit_defined_tupled(data, 3) + self.page_level_cache_disabled = is_bit_defined_tupled(data, 4) + self.entry_was_accessed = is_bit_defined_tupled(data, 5) + self.page_size = page_size + self.dirty = is_bit_defined_tupled( + data, 6) if page_size_bit else None + self.global_translation = is_bit_defined_tupled( + data, 8) if page_size_bit else None + self.pat = is_bit_defined_tupled( + data, 12) if page_size_bit else None + self.page_physical_address = data & PHYSICAL_ADDRESS_MASK & page_mask(level) if page_size_bit else None + self.next_entry_physical_address = None if page_size_bit else data & PHYSICAL_ADDRESS_MASK & page_mask() + self.hlat_restart_with_ordinary = is_bit_defined_tupled(data, 11) + self.protection_key = content_tupled(data, 59, 62) if page_size_bit else None + self.executed_disable = is_bit_defined_tupled(data, 63) + self.address = address + self.page_entry_binary_data = data + self.page_hierarchy_level = level + + def next_entry(self, va): + if self.is_page or not self.entry_present[1]: + return None + + next_level = self.page_hierarchy_level - 1 + return PageHierarchyEntry(entry_va(next_level, self.next_entry_physical_address, va), next_level) + + + def mk_string(self): + if not self.entry_present[1]: + return f"""\ +level {self.page_hierarchy_level}: + {'entry address': <30} {hex(self.address)} + {'page entry binary data': <30} {hex(self.page_entry_binary_data)} + --- + PAGE ENTRY IS NOT PRESENT! +""" + elif self.is_page: + def page_size_line(ps_bit, ps, level): + return "" if level == 1 else f"{'bit': <3} {ps_bit: <5} {'page size': <30} {ps}" + + return f"""\ +level {self.page_hierarchy_level}: + {'entry address': <30} {hex(self.address)} + {'page entry binary data': <30} {hex(self.page_entry_binary_data)} + {'page size': <30} {'1GB' if self.page_hierarchy_level == 3 else '2MB' if self.page_hierarchy_level == 2 else '4KB' if self.page_hierarchy_level == 1 else 'Unknown page size for level:' + self.page_hierarchy_level} + {'page physicall address': <30} {hex(self.page_physical_address)} + --- + {'bit': <4} {self.entry_present[0]: <10} {'entry present': <30} {self.entry_present[1]} + {'bit': <4} {self.read_write[0]: <10} {'read/write access allowed': <30} {self.read_write[1]} + {'bit': <4} {self.user_access_allowed[0]: <10} {'user access allowed': <30} {self.user_access_allowed[1]} + {'bit': <4} {self.page_level_write_through[0]: <10} {'page level write through': <30} {self.page_level_write_through[1]} + {'bit': <4} {self.page_level_cache_disabled[0]: <10} {'page level cache disabled': <30} {self.page_level_cache_disabled[1]} + {'bit': <4} {self.entry_was_accessed[0]: <10} {'entry has been accessed': <30} {self.entry_was_accessed[1]} + {"" if self.page_hierarchy_level == 1 else f"{'bit': <4} {self.page_size[0]: <10} {'page size': <30} {self.page_size[1]}"} + {'bit': <4} {self.dirty[0]: <10} {'page dirty': <30} {self.dirty[1]} + {'bit': <4} {self.global_translation[0]: <10} {'global translation': <30} {self.global_translation[1]} + {'bit': <4} {self.hlat_restart_with_ordinary[0]: <10} {'restart to ordinary': <30} {self.hlat_restart_with_ordinary[1]} + {'bit': <4} {self.pat[0]: <10} {'pat': <30} {self.pat[1]} + {'bits': <4} {str(self.protection_key[0]): <10} {'protection key': <30} {self.protection_key[1]} + {'bit': <4} {self.executed_disable[0]: <10} {'execute disable': <30} {self.executed_disable[1]} +""" + else: + return f"""\ +level {self.page_hierarchy_level}: + {'entry address': <30} {hex(self.address)} + {'page entry binary data': <30} {hex(self.page_entry_binary_data)} + {'next entry physicall address': <30} {hex(self.next_entry_physical_address)} + --- + {'bit': <4} {self.entry_present[0]: <10} {'entry present': <30} {self.entry_present[1]} + {'bit': <4} {self.read_write[0]: <10} {'read/write access allowed': <30} {self.read_write[1]} + {'bit': <4} {self.user_access_allowed[0]: <10} {'user access allowed': <30} {self.user_access_allowed[1]} + {'bit': <4} {self.page_level_write_through[0]: <10} {'page level write through': <30} {self.page_level_write_through[1]} + {'bit': <4} {self.page_level_cache_disabled[0]: <10} {'page level cache disabled': <30} {self.page_level_cache_disabled[1]} + {'bit': <4} {self.entry_was_accessed[0]: <10} {'entry has been accessed': <30} {self.entry_was_accessed[1]} + {'bit': <4} {self.page_size[0]: <10} {'page size': <30} {self.page_size[1]} + {'bit': <4} {self.hlat_restart_with_ordinary[0]: <10} {'restart to ordinary': <30} {self.hlat_restart_with_ordinary[1]} + {'bit': <4} {self.executed_disable[0]: <10} {'execute disable': <30} {self.executed_disable[1]} +""" + + +class TranslateVM(gdb.Command): + """Prints the entire paging structure used to translate a given virtual address. + +Having an address space of the currently executed process translates the virtual address +and prints detailed information of all paging structure levels used for the transaltion. +Currently supported arch: x86""" + + def __init__(self): + super(TranslateVM, self).__init__('translate-vm', gdb.COMMAND_USER) + + def invoke(self, arg, from_tty): + if utils.is_target_arch("x86"): + vm_address = gdb.parse_and_eval(f'{arg}') + cr3_data = gdb.parse_and_eval('$cr3') + cr4 = gdb.parse_and_eval('$cr4') + page_levels = 5 if cr4 & (1 << 12) else 4 + page_entry = Cr3(cr3_data, page_levels) + while page_entry: + gdb.write(page_entry.mk_string()) + page_entry = page_entry.next_entry(vm_address) + else: + gdb.GdbError("Virtual address translation is not" + "supported for this arch") + + +TranslateVM() From f26474e4a4e845e3b7cbc4f97cb732ad116e359a Mon Sep 17 00:00:00 2001 From: Dmitrii Bundin Date: Fri, 13 Jan 2023 20:51:51 +0300 Subject: [PATCH 22/77] scripts/gdb: add mm introspection utils added SPDX line, other tweaks Link: https://lkml.kernel.org/r/20230113175151.22278-1-dmitrii.bundin.a@gmail.com Signed-off-by: Dmitrii Bundin Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Jan Kiszka Cc: Kieran Bingham Cc: Mike Rapoport (IBM) Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- scripts/gdb/linux/mm.py | 3 +-- scripts/gdb/vmlinux-gdb.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py index 2a0b6cb6a1f89..7456893ffa9cf 100644 --- a/scripts/gdb/linux/mm.py +++ b/scripts/gdb/linux/mm.py @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # gdb helper commands and functions for Linux kernel debugging # @@ -6,8 +7,6 @@ # Authors: # Dmitrii Bundin # -# This work is licensed under the terms of the GNU GPL version 2. -# import gdb diff --git a/scripts/gdb/vmlinux-gdb.py b/scripts/gdb/vmlinux-gdb.py index 3e8d3669f0ce0..3a5b44cd6bfe4 100644 --- a/scripts/gdb/vmlinux-gdb.py +++ b/scripts/gdb/vmlinux-gdb.py @@ -37,3 +37,4 @@ import linux.clk import linux.genpd import linux.device + import linux.mm From 020e56da2e0b594ee5aaf85602f38c48ab91041c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 13 Jan 2023 20:04:59 -0800 Subject: [PATCH 23/77] scripts-gdb-add-mm-introspection-utils-fix s/physicall/physical/ Cc: Dmitrii Bundin Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Jan Kiszka Cc: Kieran Bingham Cc: Mike Rapoport (IBM) Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- scripts/gdb/linux/mm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/gdb/linux/mm.py b/scripts/gdb/linux/mm.py index 7456893ffa9cf..30d837f3dfae1 100644 --- a/scripts/gdb/linux/mm.py +++ b/scripts/gdb/linux/mm.py @@ -78,7 +78,7 @@ def mk_string(self): return f"""\ cr3: {'cr3 binary data': <30} {hex(self.cr3)} - {'next entry physicall address': <30} {hex(self.next_entry_physical_address)} + {'next entry physical address': <30} {hex(self.next_entry_physical_address)} --- {'bit' : <4} {self.page_level_write_through[0]: <10} {'page level write through': <30} {self.page_level_write_through[1]} {'bit' : <4} {self.page_level_cache_disabled[0]: <10} {'page level cache disabled': <30} {self.page_level_cache_disabled[1]} @@ -159,7 +159,7 @@ def page_size_line(ps_bit, ps, level): {'entry address': <30} {hex(self.address)} {'page entry binary data': <30} {hex(self.page_entry_binary_data)} {'page size': <30} {'1GB' if self.page_hierarchy_level == 3 else '2MB' if self.page_hierarchy_level == 2 else '4KB' if self.page_hierarchy_level == 1 else 'Unknown page size for level:' + self.page_hierarchy_level} - {'page physicall address': <30} {hex(self.page_physical_address)} + {'page physical address': <30} {hex(self.page_physical_address)} --- {'bit': <4} {self.entry_present[0]: <10} {'entry present': <30} {self.entry_present[1]} {'bit': <4} {self.read_write[0]: <10} {'read/write access allowed': <30} {self.read_write[1]} @@ -180,7 +180,7 @@ def page_size_line(ps_bit, ps, level): level {self.page_hierarchy_level}: {'entry address': <30} {hex(self.address)} {'page entry binary data': <30} {hex(self.page_entry_binary_data)} - {'next entry physicall address': <30} {hex(self.next_entry_physical_address)} + {'next entry physical address': <30} {hex(self.next_entry_physical_address)} --- {'bit': <4} {self.entry_present[0]: <10} {'entry present': <30} {self.entry_present[1]} {'bit': <4} {self.read_write[0]: <10} {'read/write access allowed': <30} {self.read_write[1]} From c97d885483aed0e5364a26c849ebffc7e9b35b89 Mon Sep 17 00:00:00 2001 From: Alexander Pantyukhin Date: Fri, 6 Jan 2023 14:13:19 +0500 Subject: [PATCH 24/77] scripts/bloat-o-meter: use the reverse flag for sort The sort function has the inbuilt reversal option. We can use it to save some time. Link: https://lkml.kernel.org/r/20230106091319.3824-1-apantykhin@gmail.com Signed-off-by: Alexander Pantyukhin Cc: Nikolay Borisov Cc: Paul Gortmaker Signed-off-by: Andrew Morton --- scripts/bloat-o-meter | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter index f9553f60a14a8..36303afa9dfc3 100755 --- a/scripts/bloat-o-meter +++ b/scripts/bloat-o-meter @@ -80,8 +80,7 @@ def calc(oldfile, newfile, format): if d<0: shrink, down = shrink+1, down-d delta.append((d, name)) - delta.sort() - delta.reverse() + delta.sort(reverse=True) return grow, shrink, add, remove, up, down, delta, old, new, otot, ntot def print_result(symboltype, symbolformat): From 6f60a5fb7ef9493a57bbe73af9b6d4b2d616ba7e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Jan 2023 18:29:15 -0800 Subject: [PATCH 25/77] freevxfs: fix kernel-doc warnings Fix multiple kernel-doc warnings in freevxfs: fs/freevxfs/vxfs_subr.c:45: warning: Function parameter or member 'mapping' not described in 'vxfs_get_page' fs/freevxfs/vxfs_subr.c:45: warning: Excess function parameter 'ip' description in 'vxfs_get_page' 2 warnings fs/freevxfs/vxfs_subr.c:101: warning: expecting prototype for vxfs_get_block(). Prototype was for vxfs_getblk() instead fs/freevxfs/vxfs_super.c:184: warning: expecting prototype for vxfs_read_super(). Prototype was for vxfs_fill_super() instead Link: https://lkml.kernel.org/r/20230109022915.17504-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reviewed-by: Christoph Hellwig Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- fs/freevxfs/vxfs_subr.c | 6 +++--- fs/freevxfs/vxfs_super.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c index c99282df7761a..f439877ea6e80 100644 --- a/fs/freevxfs/vxfs_subr.c +++ b/fs/freevxfs/vxfs_subr.c @@ -31,7 +31,7 @@ vxfs_put_page(struct page *pp) /** * vxfs_get_page - read a page into memory. - * @ip: inode to read from + * @mapping: mapping to read from * @n: page number * * Description: @@ -81,14 +81,14 @@ vxfs_bread(struct inode *ip, int block) } /** - * vxfs_get_block - locate buffer for given inode,block tuple + * vxfs_getblk - locate buffer for given inode,block tuple * @ip: inode * @iblock: logical block * @bp: buffer skeleton * @create: %TRUE if blocks may be newly allocated. * * Description: - * The vxfs_get_block function fills @bp with the right physical + * The vxfs_getblk function fills @bp with the right physical * block and device number to perform a lowlevel read/write on * it. * diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index c3b82f716f9a7..310d73e254df2 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -165,7 +165,7 @@ static int vxfs_try_sb_magic(struct super_block *sbp, int silent, } /** - * vxfs_read_super - read superblock into memory and initialize filesystem + * vxfs_fill_super - read superblock into memory and initialize filesystem * @sbp: VFS superblock (to fill) * @dp: fs private mount data * @silent: do not complain loudly when sth is wrong From 53e99e0122ebd2cc68b27df08c4fc137159bc116 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Jan 2023 17:00:41 -0800 Subject: [PATCH 26/77] ntfs: fix multiple kernel-doc warnings Fix many W=1 kernel-doc warnings in fs/ntfs/: fs/ntfs/aops.c:30: warning: Incorrect use of kernel-doc format: * ntfs_end_buffer_async_read - async io completion for reading attributes fs/ntfs/aops.c:46: warning: expecting prototype for aops.c(). Prototype was for ntfs_end_buffer_async_read() instead fs/ntfs/aops.c:1655: warning: cannot understand function prototype: 'const struct address_space_operations ntfs_normal_aops = ' fs/ntfs/aops.c:1670: warning: cannot understand function prototype: 'const struct address_space_operations ntfs_compressed_aops = ' fs/ntfs/aops.c:1685: warning: cannot understand function prototype: 'const struct address_space_operations ntfs_mst_aops = ' fs/ntfs/compress.c:22: warning: Incorrect use of kernel-doc format: * ntfs_compression_constants - enum of constants used in the compression code fs/ntfs/compress.c:24: warning: cannot understand function prototype: 'typedef enum ' fs/ntfs/compress.c:47: warning: cannot understand function prototype: 'u8 *ntfs_compression_buffer; ' fs/ntfs/compress.c:52: warning: expecting prototype for ntfs_cb_lock(). Prototype was for DEFINE_SPINLOCK() instead fs/ntfs/dir.c:21: warning: Incorrect use of kernel-doc format: * The little endian Unicode string $I30 as a global constant. fs/ntfs/dir.c:23: warning: cannot understand function prototype: 'ntfschar I30[5] = ' fs/ntfs/inode.c:31: warning: Incorrect use of kernel-doc format: * ntfs_test_inode - compare two (possibly fake) inodes for equality fs/ntfs/inode.c:47: warning: expecting prototype for inode.c(). Prototype was for ntfs_test_inode() instead fs/ntfs/inode.c:2956: warning: expecting prototype for ntfs_write_inode(). Prototype was for __ntfs_write_inode() instead fs/ntfs/mft.c:24: warning: expecting prototype for mft.c - NTFS kernel mft record operations. Part of the Linux(). Prototype was for MAX_BHS() instead fs/ntfs/namei.c:263: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Inode operations for directories. fs/ntfs/namei.c:368: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Export operations allowing NFS exporting of mounted NTFS partitions. fs/ntfs/runlist.c:16: warning: Incorrect use of kernel-doc format: * ntfs_rl_mm - runlist memmove fs/ntfs/runlist.c:22: warning: expecting prototype for runlist.c - NTFS runlist handling code. Part of the Linux(). Prototype was for ntfs_rl_mm() instead fs/ntfs/super.c:61: warning: missing initial short description on line: * simple_getbool - fs/ntfs/super.c:2661: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * The complete super operations. Link: https://lkml.kernel.org/r/20230109010041.21442-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Anton Altaparmakov Signed-off-by: Andrew Morton --- fs/ntfs/aops.c | 10 +++++----- fs/ntfs/aops.h | 2 +- fs/ntfs/compress.c | 6 +++--- fs/ntfs/dir.c | 4 ++-- fs/ntfs/inode.c | 6 +++--- fs/ntfs/mft.c | 2 +- fs/ntfs/namei.c | 4 ++-- fs/ntfs/runlist.c | 2 +- fs/ntfs/super.c | 12 ++++++++++-- 9 files changed, 28 insertions(+), 20 deletions(-) diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 9364d35b4a10e..e8aeba124a954 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * aops.c - NTFS kernel address space operations and page cache handling. * * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. @@ -1646,7 +1646,7 @@ static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) return block; } -/** +/* * ntfs_normal_aops - address space operations for normal inodes and attributes * * Note these are not used for compressed or mst protected inodes and @@ -1664,7 +1664,7 @@ const struct address_space_operations ntfs_normal_aops = { .error_remove_page = generic_error_remove_page, }; -/** +/* * ntfs_compressed_aops - address space operations for compressed inodes */ const struct address_space_operations ntfs_compressed_aops = { @@ -1678,9 +1678,9 @@ const struct address_space_operations ntfs_compressed_aops = { .error_remove_page = generic_error_remove_page, }; -/** +/* * ntfs_mst_aops - general address space operations for mst protecteed inodes - * and attributes + * and attributes */ const struct address_space_operations ntfs_mst_aops = { .read_folio = ntfs_read_folio, /* Fill page with data. */ diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h index 0cac5458c023e..8d0958a149cb2 100644 --- a/fs/ntfs/aops.h +++ b/fs/ntfs/aops.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/** +/* * aops.h - Defines for NTFS kernel address space operations and page cache * handling. Part of the Linux-NTFS project. * diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 587e9b1878738..f9cb180b6f6be 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * compress.c - NTFS kernel compressed attributes handling. * Part of the Linux-NTFS project. * @@ -41,12 +41,12 @@ typedef enum { NTFS_MAX_CB_SIZE = 64 * 1024, } ntfs_compression_constants; -/** +/* * ntfs_compression_buffer - one buffer for the decompression engine */ static u8 *ntfs_compression_buffer; -/** +/* * ntfs_cb_lock - spinlock which protects ntfs_compression_buffer */ static DEFINE_SPINLOCK(ntfs_cb_lock); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index cd96083a12c8e..518c3a21a556d 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. * * Copyright (c) 2001-2007 Anton Altaparmakov @@ -17,7 +17,7 @@ #include "debug.h" #include "ntfs.h" -/** +/* * The little endian Unicode string $I30 as a global constant. */ ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'), diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 08c659332e26b..cbbf3ffe12209 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * inode.c - NTFS kernel inode handling. * * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc. @@ -2935,7 +2935,7 @@ int ntfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, } /** - * ntfs_write_inode - write out a dirty inode + * __ntfs_write_inode - write out a dirty inode * @vi: inode to write out * @sync: if true, write out synchronously * @@ -3033,7 +3033,7 @@ int __ntfs_write_inode(struct inode *vi, int sync) * might not need to be written out. * NOTE: It is not a problem when the inode for $MFT itself is being * written out as mark_ntfs_record_dirty() will only set I_DIRTY_PAGES - * on the $MFT inode and hence ntfs_write_inode() will not be + * on the $MFT inode and hence __ntfs_write_inode() will not be * re-invoked because of it which in turn is ok since the dirtied mft * record will be cleaned and written out to disk below, i.e. before * this function returns. diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index f7bf5ce960ccc..48030899dc6ec 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. * * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc. diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 4e6a44bc654ce..ab44f2db533be 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -259,7 +259,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, } } -/** +/* * Inode operations for directories. */ const struct inode_operations ntfs_dir_inode_ops = { @@ -364,7 +364,7 @@ static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid, ntfs_nfs_get_inode); } -/** +/* * Export operations allowing NFS exporting of mounted NTFS partitions. * * We use the default ->encode_fh() for now. Note that they diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index 97932fb5179cd..0d448e9881f71 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. * * Copyright (c) 2001-2007 Anton Altaparmakov diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 001f4e053c85a..2643a08182e18 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -58,9 +58,17 @@ const option_t on_errors_arr[] = { }; /** - * simple_getbool - + * simple_getbool - convert input string to a boolean value + * @s: input string to convert + * @setval: where to store the output boolean value * * Copied from old ntfs driver (which copied from vfat driver). + * + * "1", "yes", "true", or an empty string are converted to %true. + * "0", "no", and "false" are converted to %false. + * + * Return: %1 if the string is converted or was empty and *setval contains it; + * %0 if the string was not valid. */ static int simple_getbool(char *s, bool *setval) { @@ -2657,7 +2665,7 @@ static int ntfs_write_inode(struct inode *vi, struct writeback_control *wbc) } #endif -/** +/* * The complete super operations. */ static const struct super_operations ntfs_sops = { From 4936bacc38e9f8c618f113bdeceb5f48f5465ab6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 7 Jan 2023 18:12:43 -0800 Subject: [PATCH 27/77] userns: fix a struct's kernel-doc notation Use the 'struct' keyword for a struct's kernel-doc notation to avoid a kernel-doc warning: kernel/user_namespace.c:232: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * idmap_key struct holds the information necessary to find an idmapping in a Link: https://lkml.kernel.org/r/20230108021243.16683-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Eric Biederman Signed-off-by: Andrew Morton --- kernel/user_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 54211dbd516c5..1d8e47bed3f11 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -229,7 +229,7 @@ void __put_user_ns(struct user_namespace *ns) EXPORT_SYMBOL(__put_user_ns); /** - * idmap_key struct holds the information necessary to find an idmapping in a + * struct idmap_key - holds the information necessary to find an idmapping in a * sorted idmap array. It is passed to cmp_map_id() as first argument. */ struct idmap_key { From 33a4de892c770d3efb6814ae3e557edb71c67e67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 26 Dec 2022 15:25:12 +0100 Subject: [PATCH 28/77] fat: fix return value of vfat_bad_char() and vfat_replace_char() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions returns boolean value not wide character. Link: https://lkml.kernel.org/r/20221226142512.13848-1-pali@kernel.org Signed-off-by: Pali Rohár Acked-by: OGAWA Hirofumi Signed-off-by: Andrew Morton --- fs/fat/namei_vfat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 21620054e1c44..0735e4f0aeede 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -200,7 +200,7 @@ static const struct dentry_operations vfat_dentry_ops = { /* Characters that are undesirable in an MS-DOS file name */ -static inline wchar_t vfat_bad_char(wchar_t w) +static inline bool vfat_bad_char(wchar_t w) { return (w < 0x0020) || (w == '*') || (w == '?') || (w == '<') || (w == '>') @@ -208,7 +208,7 @@ static inline wchar_t vfat_bad_char(wchar_t w) || (w == '\\'); } -static inline wchar_t vfat_replace_char(wchar_t w) +static inline bool vfat_replace_char(wchar_t w) { return (w == '[') || (w == ']') || (w == ';') || (w == ',') || (w == '+') || (w == '='); From 5e41c64499aad389d771c8896909fd7e5e2619cc Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 4 Jan 2023 15:38:46 +0100 Subject: [PATCH 29/77] Documentation: sysctl: correct kexec_load_disabled Patch series "kexec: Add new parameter to limit the access to kexec", v6. Add two parameter to specify how many times a kexec kernel can be loaded. These parameter allow hardening the system. While we are at it, fix a documentation issue and refactor some code. This patch (of 3): kexec_load_disabled affects both ``kexec_load`` and ``kexec_file_load`` syscalls. Make it explicit. Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-0-6a8531a09b9a@chromium.org Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-1-6a8531a09b9a@chromium.org Signed-off-by: Ricardo Ribalda Reviewed-by: Steven Rostedt (Google) Acked-by: Baoquan He Cc: Bagas Sanjaya Cc: "Eric W. Biederman" Cc: Guilherme G. Piccoli Cc: Joel Fernandes (Google) Cc: Jonathan Corbet Cc: Philipp Rudo Cc: Ross Zwisler Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- Documentation/admin-guide/sysctl/kernel.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 46e3d62c0eea8..b53c0235cb433 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -453,9 +453,10 @@ this allows system administrators to override the kexec_load_disabled =================== -A toggle indicating if the ``kexec_load`` syscall has been disabled. -This value defaults to 0 (false: ``kexec_load`` enabled), but can be -set to 1 (true: ``kexec_load`` disabled). +A toggle indicating if the syscalls ``kexec_load`` and +``kexec_file_load`` have been disabled. +This value defaults to 0 (false: ``kexec_*load`` enabled), but can be +set to 1 (true: ``kexec_*load`` disabled). Once true, kexec can no longer be used, and the toggle cannot be set back to false. This allows a kexec image to be loaded before disabling the syscall, From 682cec4f3e03f6d6ef182ec4c81497744834d6be Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 4 Jan 2023 15:38:47 +0100 Subject: [PATCH 30/77] kexec: factor out kexec_load_permitted Both syscalls (kexec and kexec_file) do the same check, let's factor it out. Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-2-6a8531a09b9a@chromium.org Signed-off-by: Ricardo Ribalda Reviewed-by: Steven Rostedt (Google) Acked-by: Baoquan He Cc: Bagas Sanjaya Cc: "Eric W. Biederman" Cc: Guilherme G. Piccoli Cc: Joel Fernandes (Google) Cc: Jonathan Corbet Cc: Philipp Rudo Cc: Ross Zwisler Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/kexec.h | 3 ++- kernel/kexec.c | 2 +- kernel/kexec_core.c | 11 ++++++++++- kernel/kexec_file.c | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 5dd4343c1bbe5..f18a3c9e813b5 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -403,7 +403,8 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; -extern int kexec_load_disabled; + +bool kexec_load_permitted(void); #ifndef kexec_flush_icache_page #define kexec_flush_icache_page(page) diff --git a/kernel/kexec.c b/kernel/kexec.c index cb8e6e6f983c7..ce1bca874a8d0 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -193,7 +193,7 @@ static inline int kexec_load_check(unsigned long nr_segments, int result; /* We only trust the superuser with rebooting the system. */ - if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) + if (!kexec_load_permitted()) return -EPERM; /* Permit LSMs and IMA to fail the kexec */ diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 969e8f52f7da9..d51ebbaeb1b29 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -922,7 +922,7 @@ int kimage_load_segment(struct kimage *image, struct kimage *kexec_image; struct kimage *kexec_crash_image; -int kexec_load_disabled; +static int kexec_load_disabled; #ifdef CONFIG_SYSCTL static struct ctl_table kexec_core_sysctls[] = { { @@ -946,6 +946,15 @@ static int __init kexec_core_sysctl_init(void) late_initcall(kexec_core_sysctl_init); #endif +bool kexec_load_permitted(void) +{ + /* + * Only the superuser can use the kexec syscall and if it has not + * been disabled. + */ + return capable(CAP_SYS_BOOT) && !kexec_load_disabled; +} + /* * No panic_cpu check version of crash_kexec(). This function is called * only when panic_cpu holds the current CPU number; this is the only CPU diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index dd5983010b7b0..c897eb4b8c8cd 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -330,7 +330,7 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, struct kimage **dest_image, *image; /* We only trust the superuser with rebooting the system. */ - if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) + if (!kexec_load_permitted()) return -EPERM; /* Make sure we have a legal set of flags */ From 7c89b46c661cb3d1cf922b79758f3439e94bf770 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 4 Jan 2023 15:38:48 +0100 Subject: [PATCH 31/77] kexec: introduce sysctl parameters kexec_load_limit_* kexec allows replacing the current kernel with a different one. This is usually a source of concerns for sysadmins that want to harden a system. Linux already provides a way to disable loading new kexec kernel via kexec_load_disabled, but that control is very coard, it is all or nothing and does not make distinction between a panic kexec and a normal kexec. This patch introduces new sysctl parameters, with finer tuning to specify how many times a kexec kernel can be loaded. The sysadmin can set different limits for kexec panic and kexec reboot kernels. The value can be modified at runtime via sysctl, but only with a stricter value. With these new parameters on place, a system with loadpin and verity enabled, using the following kernel parameters: sysctl.kexec_load_limit_reboot=0 sysct.kexec_load_limit_panic=1 can have a good warranty that if initrd tries to load a panic kernel, a malitious user will have small chances to replace that kernel with a different one, even if they can trigger timeouts on the disk where the panic kernel lives. Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-3-6a8531a09b9a@chromium.org Signed-off-by: Ricardo Ribalda Reviewed-by: Steven Rostedt (Google) Acked-by: Baoquan He Cc: Bagas Sanjaya Cc: "Eric W. Biederman" Cc: Guilherme G. Piccoli # Steam Deck Cc: Joel Fernandes (Google) Cc: Jonathan Corbet Cc: Philipp Rudo Cc: Ross Zwisler Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- Documentation/admin-guide/sysctl/kernel.rst | 18 +++++ include/linux/kexec.h | 2 +- kernel/kexec.c | 4 +- kernel/kexec_core.c | 87 ++++++++++++++++++++- kernel/kexec_file.c | 11 ++- 5 files changed, 114 insertions(+), 8 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index b53c0235cb433..4b7bfea28cd77 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -464,6 +464,24 @@ allowing a system to set up (and later use) an image without it being altered. Generally used together with the `modules_disabled`_ sysctl. +kexec_load_limit_panic +====================== + +This parameter specifies a limit to the number of times the syscalls +``kexec_load`` and ``kexec_file_load`` can be called with a crash +image. It can only be set with a more restrictive value than the +current one. + +== ====================================================== +-1 Unlimited calls to kexec. This is the default setting. +N Number of calls left. +== ====================================================== + +kexec_load_limit_reboot +======================= + +Similar functionality as ``kexec_load_limit_panic``, but for a normal +image. kptr_restrict ============= diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f18a3c9e813b5..6883c59227015 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -404,7 +404,7 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; -bool kexec_load_permitted(void); +bool kexec_load_permitted(int kexec_image_type); #ifndef kexec_flush_icache_page #define kexec_flush_icache_page(page) diff --git a/kernel/kexec.c b/kernel/kexec.c index ce1bca874a8d0..92d301f987766 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -190,10 +190,12 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments, static inline int kexec_load_check(unsigned long nr_segments, unsigned long flags) { + int image_type = (flags & KEXEC_ON_CRASH) ? + KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT; int result; /* We only trust the superuser with rebooting the system. */ - if (!kexec_load_permitted()) + if (!kexec_load_permitted(image_type)) return -EPERM; /* Permit LSMs and IMA to fail the kexec */ diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index d51ebbaeb1b29..ab140098c3add 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -920,10 +920,64 @@ int kimage_load_segment(struct kimage *image, return result; } +struct kexec_load_limit { + /* Mutex protects the limit count. */ + struct mutex mutex; + int limit; +}; + +static struct kexec_load_limit load_limit_reboot = { + .mutex = __MUTEX_INITIALIZER(load_limit_reboot.mutex), + .limit = -1, +}; + +static struct kexec_load_limit load_limit_panic = { + .mutex = __MUTEX_INITIALIZER(load_limit_panic.mutex), + .limit = -1, +}; + struct kimage *kexec_image; struct kimage *kexec_crash_image; static int kexec_load_disabled; + #ifdef CONFIG_SYSCTL +static int kexec_limit_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct kexec_load_limit *limit = table->data; + int val; + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(val), + .mode = table->mode, + }; + int ret; + + if (write) { + ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); + if (ret) + return ret; + + if (val < 0) + return -EINVAL; + + mutex_lock(&limit->mutex); + if (limit->limit != -1 && val >= limit->limit) + ret = -EINVAL; + else + limit->limit = val; + mutex_unlock(&limit->mutex); + + return ret; + } + + mutex_lock(&limit->mutex); + val = limit->limit; + mutex_unlock(&limit->mutex); + + return proc_dointvec(&tmp, write, buffer, lenp, ppos); +} + static struct ctl_table kexec_core_sysctls[] = { { .procname = "kexec_load_disabled", @@ -935,6 +989,18 @@ static struct ctl_table kexec_core_sysctls[] = { .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_ONE, }, + { + .procname = "kexec_load_limit_panic", + .data = &load_limit_panic, + .mode = 0644, + .proc_handler = kexec_limit_handler, + }, + { + .procname = "kexec_load_limit_reboot", + .data = &load_limit_reboot, + .mode = 0644, + .proc_handler = kexec_limit_handler, + }, { } }; @@ -946,13 +1012,30 @@ static int __init kexec_core_sysctl_init(void) late_initcall(kexec_core_sysctl_init); #endif -bool kexec_load_permitted(void) +bool kexec_load_permitted(int kexec_image_type) { + struct kexec_load_limit *limit; + /* * Only the superuser can use the kexec syscall and if it has not * been disabled. */ - return capable(CAP_SYS_BOOT) && !kexec_load_disabled; + if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) + return false; + + /* Check limit counter and decrease it.*/ + limit = (kexec_image_type == KEXEC_TYPE_CRASH) ? + &load_limit_panic : &load_limit_reboot; + mutex_lock(&limit->mutex); + if (!limit->limit) { + mutex_unlock(&limit->mutex); + return false; + } + if (limit->limit != -1) + limit->limit--; + mutex_unlock(&limit->mutex); + + return true; } /* diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index c897eb4b8c8cd..f1a0e4e3fb5cd 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -326,11 +326,13 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) { - int ret = 0, i; + int image_type = (flags & KEXEC_FILE_ON_CRASH) ? + KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT; struct kimage **dest_image, *image; + int ret = 0, i; /* We only trust the superuser with rebooting the system. */ - if (!kexec_load_permitted()) + if (!kexec_load_permitted(image_type)) return -EPERM; /* Make sure we have a legal set of flags */ @@ -342,11 +344,12 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, if (!kexec_trylock()) return -EBUSY; - dest_image = &kexec_image; - if (flags & KEXEC_FILE_ON_CRASH) { + if (image_type == KEXEC_TYPE_CRASH) { dest_image = &kexec_crash_image; if (kexec_crash_image) arch_kexec_unprotect_crashkres(); + } else { + dest_image = &kexec_image; } if (flags & KEXEC_FILE_UNLOAD) From 98ec668609d305dc7ba5f245132d5bb2194a1b11 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 14 Jan 2023 18:21:38 +0100 Subject: [PATCH 32/77] initramfs: use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Link: https://lkml.kernel.org/r/2597e80cb7059ec6ad63a01b77d7c944dcc99195.1673716768.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Cc: Christian Brauner Cc: David Disseldorp Cc: "Eric W. Biederman" Cc: Martin Wilck Cc: wuchi Cc: XU pengfei Signed-off-by: Andrew Morton --- init/initramfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/init/initramfs.c b/init/initramfs.c index 62321883fe613..f6c112e30bd47 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -571,8 +572,7 @@ __setup("keepinitrd", keepinitrd_setup); static bool __initdata initramfs_async = true; static int __init initramfs_async_setup(char *str) { - strtobool(str, &initramfs_async); - return 1; + return kstrtobool(str, &initramfs_async) == 0; } __setup("initramfs_async=", initramfs_async_setup); From 8089bb0b2368b1e89d0efffe65277b0fd973caa9 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 18 Jan 2023 16:07:03 +0100 Subject: [PATCH 33/77] lib/genalloc: use try_cmpxchg in {set,clear}_bits_ll Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in {set,clear}_bits_ll. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails. Note that the value from *ptr should be read using READ_ONCE to prevent the compiler from merging, refetching or reordering the read. The patch also declares these two functions inline, to ensure inlining. No functional change intended. Link: https://lkml.kernel.org/r/20230118150703.4024-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Signed-off-by: Andrew Morton --- lib/genalloc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/genalloc.c b/lib/genalloc.c index 00fc50d0a640e..0c883d6fbd44d 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -40,32 +40,30 @@ static inline size_t chunk_size(const struct gen_pool_chunk *chunk) return chunk->end_addr - chunk->start_addr + 1; } -static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) +static inline int +set_bits_ll(unsigned long *addr, unsigned long mask_to_set) { - unsigned long val, nval; + unsigned long val = READ_ONCE(*addr); - nval = *addr; do { - val = nval; if (val & mask_to_set) return -EBUSY; cpu_relax(); - } while ((nval = cmpxchg(addr, val, val | mask_to_set)) != val); + } while (!try_cmpxchg(addr, &val, val | mask_to_set)); return 0; } -static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) +static inline int +clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) { - unsigned long val, nval; + unsigned long val = READ_ONCE(*addr); - nval = *addr; do { - val = nval; if ((val & mask_to_clear) != mask_to_clear) return -EBUSY; cpu_relax(); - } while ((nval = cmpxchg(addr, val, val & ~mask_to_clear)) != val); + } while (!try_cmpxchg(addr, &val, val & ~mask_to_clear)); return 0; } From aa5adcd72ead54c6f2e585f6b110ce756c0f5986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20Wasserb=C3=A4ch?= Date: Fri, 20 Jan 2023 13:35:18 +0100 Subject: [PATCH 34/77] checkpatch: warn when unknown tags are used for links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "checkpatch.pl: warn about discouraged tags and missing Link: tags", v4. The first two changes make checkpatch.pl check for a few mistakes wrt to links to bug reports Linus recently complained about a few times. Avoiding those is also important for my regression tracking efforts a lot, as the automated tracking performed by regzbot relies on the proper usage of the Link: tag. The third patch fixes a few small oddities noticed in existing code during review of the two changes. This patch (of 3): Issue a warning when encountering URLs behind unknown tags, as Linus recently stated ```please stop making up random tags that make no sense. Just use "Link:"```[1]. That statement was triggered by an use of 'BugLink', but that's not the only tag people invented: $ git log -100000 --no-merges --format=email -P \ --grep='^\w+:[ ]*http' | grep -Poh '^\w+:[ ]*http' | \ sort | uniq -c | sort -rn | head -n 20 103958 Link: http 418 BugLink: http 372 Patchwork: http 280 Closes: http 224 Bug: http 123 References: http 84 Bugzilla: http 61 URL: http 42 v1: http 38 Datasheet: http 20 v2: http 9 Ref: http 9 Fixes: http 9 Buglink: http 8 v3: http 8 Reference: http 7 See: http 6 1: http 5 link: http 3 Link:http Some of these non-standard tags make it harder for external tools that rely on use of proper tags. One of those tools is the regression tracking bot 'regzbot', which looks out for "Link:" tags pointing to reports of tracked regressions. The initial idea was to use a disallow list to raise an error when encountering known unwanted tags like BugLink:; during review it was requested to use a list of allowed tags instead[2]. Link: https://lkml.kernel.org/r/cover.1674217480.git.linux@leemhuis.info Link: https://lore.kernel.org/all/CAHk-=wgs38ZrfPvy=nOwVkVzjpM3VFU1zobP37Fwd_h9iAD5JQ@mail.gmail.com/ [1] Link: https://lore.kernel.org/all/15f7df96d49082fb7799dda6e187b33c84f38831.camel@perches.com/ [2] Link: https://lkml.kernel.org/r/3b036087d80b8c0e07a46a1dbaaf4ad0d018f8d5.1674217480.git.linux@leemhuis.info Signed-off-by: Kai Wasserbäch Co-developed-by: Thorsten Leemhuis Signed-off-by: Thorsten Leemhuis Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Joe Perches Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a6d6d7e1d0cf1..facebdc6ee076 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3252,6 +3252,18 @@ sub process { $commit_log_possible_stack_dump = 0; } +# Check for odd tags before a URI/URL + if ($in_commit_log && + $line =~ /^\s*(\w+):\s*http/ && $1 ne 'Link') { + if ($1 =~ /^v(?:ersion)?\d+/i) { + WARN("COMMIT_LOG_VERSIONING", + "Patch version information should be after the --- line\n" . $herecurr); + } else { + WARN("COMMIT_LOG_USE_LINK", + "Unknown link reference '$1:', use 'Link:' instead\n" . $herecurr); + } + } + # Check for lines starting with a # if ($in_commit_log && $line =~ /^#/) { if (WARN("COMMIT_COMMENT_SYMBOL", From ddff2ad7480eb2176fc3b8931c31d22e109e20b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20Wasserb=C3=A4ch?= Date: Fri, 20 Jan 2023 13:35:19 +0100 Subject: [PATCH 35/77] checkpatch: warn when Reported-by: is not followed by Link: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Encourage patch authors to link to reports by issuing a warning, if a Reported-by: is not accompanied by a link to the report. Those links are often extremely useful for any code archaeologist that wants to know more about the backstory of a change than the commit message provides. That includes maintainers higher up in the patch-flow hierarchy, which is why Linus asks developers to add such links [1, 2, 3]. To quote [1]: > Again, the commit has a link to the patch *submission*, which is > almost entirely useless. There's no link to the actual problem the > patch fixes. > > [...] > > Put another way: I can see that > > Reported-by: Zhangfei Gao > > in the commit, but I don't have a clue what the actual report was, and > there really isn't enough information in the commit itself, except for > a fairly handwavy "Device drivers might, for instance, still need to > flush operations.." > > I don't want to know what device drivers _might_ do. I would want to > have an actual pointer to what they do and where. Another reason why these links are wanted: the ongoing regression tracking efforts can only scale with them, as they allow the regression tracking bot 'regzbot' to automatically connect tracked reports with patches that are posted or committed to fix tracked regressions. Link: https://lore.kernel.org/all/CAHk-=wjMmSZzMJ3Xnskdg4+GGz=5p5p+GSYyFBTh0f-DgvdBWg@mail.gmail.com/ [1] Link: https://lore.kernel.org/all/CAHk-=wgs38ZrfPvy=nOwVkVzjpM3VFU1zobP37Fwd_h9iAD5JQ@mail.gmail.com/ [2] Link: https://lore.kernel.org/all/CAHk-=wjxzafG-=J8oT30s7upn4RhBs6TX-uVFZ5rME+L5_DoJA@mail.gmail.com/ [3] Link: https://lkml.kernel.org/r/bb5dfd55ea2026303ab2296f4a6df3da7dd64006.1674217480.git.linux@leemhuis.info Signed-off-by: Kai Wasserbäch Co-developed-by: Thorsten Leemhuis Signed-off-by: Thorsten Leemhuis Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Joe Perches Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index facebdc6ee076..10c82f8f3636e 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3157,8 +3157,20 @@ sub process { "Co-developed-by and Signed-off-by: name/email do not match \n" . "$here\n" . $rawline . "\n" .$rawlines[$linenr]); } } + +# check if Reported-by: is followed by a Link: + if ($sign_off =~ /^reported(?:|-and-tested)-by:$/i) { + if (!defined $lines[$linenr]) { + WARN("BAD_REPORTED_BY_LINK", + "Reported-by: should be immediately followed by Link: to the report\n" . $herecurr . $rawlines[$linenr] . "\n"); + } elsif ($rawlines[$linenr] !~ m{^link:\s*https?://}i) { + WARN("BAD_REPORTED_BY_LINK", + "Reported-by: should be immediately followed by Link: with a URL to the report\n" . $herecurr . $rawlines[$linenr] . "\n"); + } + } } + # Check Fixes: styles is correct if (!$in_header_lines && $line =~ /^\s*fixes:?\s*(?:commit\s*)?[0-9a-f]{5,}\b/i) { From 09742ace927723cb795ddc8c62ce6f2c1ce8f08c Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Fri, 20 Jan 2023 13:35:20 +0100 Subject: [PATCH 36/77] checkpatch: use proper way for show problematic line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using an unnecessarily complicated approach to print a line that is warned about, use `$herecurr` instead, just like everywhere else in checkpatch. While at it, remove a superfluous space in one of the changed lines, too. In a unmodified line also remove a superfluous check for a space before a signed-off-by tag, to me consistent with the check at the start of the section. All three problems were found by Joe Perches during review of new code inspired by the code modified here. Link: https://lkml.kernel.org/r/a6d455c5196219b2095c2ac3645498052845f32e.1674217480.git.linux@leemhuis.info Signed-off-by: Thorsten Leemhuis Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Joe Perches Cc: Kai Wasserbäch Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 10c82f8f3636e..a2fc7d5561267 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3144,17 +3144,17 @@ sub process { if ($sign_off =~ /^co-developed-by:$/i) { if ($email eq $author) { WARN("BAD_SIGN_OFF", - "Co-developed-by: should not be used to attribute nominal patch author '$author'\n" . "$here\n" . $rawline); + "Co-developed-by: should not be used to attribute nominal patch author '$author'\n" . $herecurr); } if (!defined $lines[$linenr]) { WARN("BAD_SIGN_OFF", - "Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline); - } elsif ($rawlines[$linenr] !~ /^\s*signed-off-by:\s*(.*)/i) { + "Co-developed-by: must be immediately followed by Signed-off-by:\n" . $herecurr); + } elsif ($rawlines[$linenr] !~ /^signed-off-by:\s*(.*)/i) { WARN("BAD_SIGN_OFF", - "Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline . "\n" .$rawlines[$linenr]); + "Co-developed-by: must be immediately followed by Signed-off-by:\n" . $herecurr . $rawlines[$linenr] . "\n"); } elsif ($1 ne $email) { WARN("BAD_SIGN_OFF", - "Co-developed-by and Signed-off-by: name/email do not match \n" . "$here\n" . $rawline . "\n" .$rawlines[$linenr]); + "Co-developed-by and Signed-off-by: name/email do not match\n" . $herecurr . $rawlines[$linenr] . "\n"); } } From ffab61a390ecba4f078be8b26d569ebbbd6a6253 Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Sun, 22 Jan 2023 18:32:56 +0100 Subject: [PATCH 37/77] scripts/spelling.txt: add more spelling corrections Current Debian lintian tool flagged several (more) spelling errors, so add them so they can hopefully be prevented in the future. Link: https://lkml.kernel.org/r/20230122173256.52280-1-didi.debian@cknow.org Signed-off-by: Diederik de Haas Reviewed-by: Randy Dunlap Acked-by: Ian Rogers Cc: Colin Ian King Signed-off-by: Andrew Morton --- scripts/spelling.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 597a680d53b45..b3fa70e93dd38 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -65,6 +65,7 @@ acumulative||accumulative acumulator||accumulator acutally||actually adapater||adapter +adderted||asserted addional||additional additionaly||additionally additonal||additional @@ -122,6 +123,7 @@ alue||value ambigious||ambiguous ambigous||ambiguous amoung||among +amount of times||number of times amout||amount amplifer||amplifier amplifyer||amplifier @@ -287,6 +289,7 @@ capapbilities||capabilities caputure||capture carefuly||carefully cariage||carriage +casued||caused catagory||category cehck||check challange||challenge @@ -370,6 +373,7 @@ conbination||combination conditionaly||conditionally conditon||condition condtion||condition +condtional||conditional conected||connected conector||connector configration||configuration @@ -423,6 +427,7 @@ cound||could couter||counter coutner||counter cryptocraphic||cryptographic +cummulative||cumulative cunter||counter curently||currently cylic||cyclic @@ -627,6 +632,7 @@ existant||existent exixt||exist exlcude||exclude exlcusive||exclusive +exlusive||exclusive exmaple||example expecially||especially experies||expires @@ -840,6 +846,7 @@ integrety||integrity integrey||integrity intendet||intended intented||intended +interal||internal interanl||internal interchangable||interchangeable interferring||interfering @@ -1025,6 +1032,7 @@ negotation||negotiation nerver||never nescessary||necessary nessessary||necessary +none existent||non-existent noticable||noticeable notication||notification notications||notifications @@ -1046,6 +1054,7 @@ occured||occurred occurence||occurrence occure||occurred occuring||occurring +ocurrence||occurrence offser||offset offet||offset offlaod||offload @@ -1057,6 +1066,7 @@ omitt||omit ommiting||omitting ommitted||omitted onself||oneself +onthe||on the ony||only openning||opening operatione||operation @@ -1337,6 +1347,7 @@ sacrifying||sacrificing safly||safely safty||safety satify||satisfy +satisifed||satisfied savable||saveable scaleing||scaling scaned||scanned @@ -1618,6 +1629,7 @@ unuseful||useless unvalid||invalid upate||update upsupported||unsupported +upto||up to useable||usable usefule||useful usefull||useful From 85ae0de4dceb4d79aab8a8729323f003d855e3ea Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 21 Nov 2022 12:21:33 +0100 Subject: [PATCH 38/77] fs: hfs: initialize fsdata in hfs_file_truncate() When aops->write_begin() does not initialize fsdata, KMSAN may report an error passing the latter to aops->write_end(). Fix this by unconditionally initializing fsdata. Link: https://lkml.kernel.org/r/20221121112134.407362-4-glider@google.com Signed-off-by: Alexander Potapenko Suggested-by: Eric Biggers Cc: Andreas Dilger Cc: Chao Yu Cc: Jaegeuk Kim Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- fs/hfs/extent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index 3f7e9bef98743..6d1878b99b305 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -486,7 +486,7 @@ void hfs_file_truncate(struct inode *inode) inode->i_size); if (inode->i_size > HFS_I(inode)->phys_size) { struct address_space *mapping = inode->i_mapping; - void *fsdata; + void *fsdata = NULL; struct page *page; /* XXX: Can use generic_cont_expand? */ From 633df7bc129cabec416c106a4ed9315839545170 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 21 Nov 2022 12:21:34 +0100 Subject: [PATCH 39/77] fs: hfsplus: initialize fsdata in hfsplus_file_truncate() When aops->write_begin() does not initialize fsdata, KMSAN may report an error passing the latter to aops->write_end(). Fix this by unconditionally initializing fsdata. Link: https://lkml.kernel.org/r/20221121112134.407362-5-glider@google.com Signed-off-by: Alexander Potapenko Suggested-by: Eric Biggers Cc: Andreas Dilger Cc: Chao Yu Cc: Jaegeuk Kim Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- fs/hfsplus/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 721f779b4ec3e..7a542f3dbe502 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -554,7 +554,7 @@ void hfsplus_file_truncate(struct inode *inode) if (inode->i_size > hip->phys_size) { struct address_space *mapping = inode->i_mapping; struct page *page; - void *fsdata; + void *fsdata = NULL; loff_t size = inode->i_size; res = hfsplus_write_begin(NULL, mapping, size, 0, From 48a585965c84c04c5ee3afd7f7a42d7d19b91148 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 2 Nov 2022 08:11:47 +0100 Subject: [PATCH 40/77] fs/ext4: use try_cmpxchg in ext4_update_bh_state Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in ext4_update_bh_state. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails. There is no need to re-read the value in the loop. No functional change intended. Link: https://lkml.kernel.org/r/20221102071147.6642-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Cc: "Theodore Ts'o" Cc: Andreas Dilger Signed-off-by: Andrew Morton --- fs/ext4/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9d9f414f99fec..dc8da7bbc240a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -786,11 +786,10 @@ static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) * once we get rid of using bh as a container for mapping information * to pass to / from get_block functions, this can go away. */ + old_state = READ_ONCE(bh->b_state); do { - old_state = READ_ONCE(bh->b_state); new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; - } while (unlikely( - cmpxchg(&bh->b_state, old_state, new_state) != old_state)); + } while (unlikely(!try_cmpxchg(&bh->b_state, &old_state, new_state))); } static int _ext4_get_block(struct inode *inode, sector_t iblock, From 9ecd4c4de2c6ea4da513a3575ce7fc257c6f9201 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Jan 2023 15:16:55 -0800 Subject: [PATCH 41/77] checkpatch: improve EMBEDDED_FILENAME test Privately, Heinz Mauelshagen showed that the embedded filename test is not specific enough. > WARNING: It's generally not useful to have the filename in the file > #113: FILE: errors.c:113: > + block < registered_errors.blocks + registered_errors.count; Extend the test to use the appropriate word boundary tests. Link: https://lkml.kernel.org/r/36069dac5d07509dab1c7f1238f8cbb08db80ac6.camel@perches.com Signed-off-by: Joe Perches Reported-by: Heinz Mauelshagen Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a2fc7d5561267..bd44d12965c98 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3751,7 +3751,7 @@ sub process { } # check for embedded filenames - if ($rawline =~ /^\+.*\Q$realfile\E/) { + if ($rawline =~ /^\+.*\b\Q$realfile\E\b/) { WARN("EMBEDDED_FILENAME", "It's generally not useful to have the filename in the file\n" . $herecurr); } From de0c5e7c057523154a845efb7fe8317308900431 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 24 Jan 2023 10:16:31 -0800 Subject: [PATCH 42/77] cramfs: Kconfig: fix spelling & punctuation Fix spelling and hyphenation in cramfs Kconfig. Link: https://lkml.kernel.org/r/20230124181631.15204-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Acked-by: Nicolas Pitre Signed-off-by: Andrew Morton --- fs/cramfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig index d98cef0dbb6bb..4612c9bbf1022 100644 --- a/fs/cramfs/Kconfig +++ b/fs/cramfs/Kconfig @@ -38,7 +38,7 @@ config CRAMFS_MTD default y if !CRAMFS_BLOCKDEV help This option allows the CramFs driver to load data directly from - a linear adressed memory range (usually non volatile memory + a linear addressed memory range (usually non-volatile memory like flash) instead of going through the block device layer. This saves some memory since no intermediate buffering is necessary. From fdecf7549bef13a91bacd06e34fcf7c454ed6467 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 Jan 2023 09:51:55 +0100 Subject: [PATCH 43/77] fs: gracefully handle ->get_block not mapping bh in __mpage_writepage When filesystem's ->get_block function does not map the buffer head when called from __mpage_writepage(), __mpage_writepage() will happily go and pass bogus bdev and block number to bio allocation routines which leads to crashes sooner or later. E.g. UDF can do this because it doesn't want to allocate blocks from ->writepages callbacks. It allocates blocks on write or page fault but writeback can still spot dirty buffers without underlying blocks allocated e.g. if blocksize < pagesize, the tail page is dirtied (which means all its buffers are dirtied), and truncate extends the file so that some buffer starts to be within i_size. Link: https://lkml.kernel.org/r/20230126085155.26395-1-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- fs/mpage.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/mpage.c b/fs/mpage.c index 0f8ae954a5790..ce53179428db4 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -532,6 +532,8 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, map_bh.b_size = 1 << blkbits; if (mpd->get_block(inode, block_in_file, &map_bh, 1)) goto confused; + if (!buffer_mapped(&map_bh)) + goto confused; if (buffer_new(&map_bh)) clean_bdev_bh_alias(&map_bh); if (buffer_boundary(&map_bh)) { From e57b23ca6e4161d32fa21e5957c6d7908fb0a0cd Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 26 Jan 2023 16:22:05 +0100 Subject: [PATCH 44/77] scripts/spelling.txt: add "exsits" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: exsits||exists Link: https://lkml.kernel.org/r/20230126152205.959277-1-luca.ceresoli@bootlin.com Signed-off-by: Luca Ceresoli Signed-off-by: Andrew Morton --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 +- net/openvswitch/flow_table.c | 2 +- scripts/spelling.txt | 1 + sound/soc/fsl/fsl-asoc-card.c | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 620ae5b2d80dc..6b76037653839 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -446,7 +446,7 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, * @is_leading: indicate if this is the session leading connection (MCS) * * Return: zero on success, $error if iscsi_conn_bind fails and - * -EINVAL in case end-point doesn't exsits anymore or iser connection + * -EINVAL in case end-point doesn't exists anymore or iser connection * state is not UP (teardown already started). */ static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 0a0e4c283f02e..cfac54cbafdf9 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -1012,7 +1012,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, mask = flow_mask_find(tbl, new); if (!mask) { - /* Allocate a new mask if none exsits. */ + /* Allocate a new mask if none exists. */ mask = mask_alloc(); if (!mask) return -ENOMEM; diff --git a/scripts/spelling.txt b/scripts/spelling.txt index b3fa70e93dd38..f8bd6178d17b0 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -630,6 +630,7 @@ exeuction||execution existance||existence existant||existent exixt||exist +exsits||exists exlcude||exclude exlcusive||exclusive exlusive||exclusive diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 8d14b5593658d..2f25358196eee 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -811,7 +811,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) priv->card.num_links = 1; if (asrc_pdev) { - /* DPCM DAI Links only if ASRC exsits */ + /* DPCM DAI Links only if ASRC exists */ priv->dai_link[1].cpus->of_node = asrc_np; priv->dai_link[1].platforms->of_node = asrc_np; priv->dai_link[2].codecs->dai_name = codec_dai_name; From d26ef95febb1ac540d425e16c5dbbee64d32a2e9 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Jan 2023 01:41:14 +0900 Subject: [PATCH 45/77] nilfs2: replace WARN_ONs for invalid DAT metadata block requests If DAT metadata file block access fails due to corruption of the DAT file or abnormal virtual block numbers held by b-trees or inodes, a kernel warning is generated. This replaces the WARN_ONs by error output, so that a kernel, booted with panic_on_warn, does not panic. This patch also replaces the detected return code -ENOENT with another internal code -EINVAL to notify the bmap layer of metadata corruption. When the bmap layer sees -EINVAL, it handles the abnormal situation with nilfs_bmap_convert_error() and finally returns code -EIO as it should. Link: https://lkml.kernel.org/r/0000000000005cc3d205ea23ddcf@google.com Link: https://lkml.kernel.org/r/20230126164114.6911-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/dat.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 9930fa901039f..1e7f653c1df7e 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { - return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, - create, &req->pr_entry_bh); + int ret; + + ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr, + create, &req->pr_entry_bh); + if (unlikely(ret == -ENOENT)) { + nilfs_err(dat->i_sb, + "DAT doesn't have a block to manage vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + /* + * Return internal code -EINVAL to notify bmap layer of + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } static void nilfs_dat_commit_entry(struct inode *dat, @@ -123,11 +136,7 @@ static void nilfs_dat_commit_free(struct inode *dat, int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) { - int ret; - - ret = nilfs_dat_prepare_entry(dat, req, 0); - WARN_ON(ret == -ENOENT); - return ret; + return nilfs_dat_prepare_entry(dat, req, 0); } void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, @@ -154,10 +163,8 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) int ret; ret = nilfs_dat_prepare_entry(dat, req, 0); - if (ret < 0) { - WARN_ON(ret == -ENOENT); + if (ret < 0) return ret; - } kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, From 34ecea99b5b554d5f0fbb69d83262da85d9460dc Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Thu, 26 Jan 2023 14:14:21 +0100 Subject: [PATCH 46/77] lib/zlib: adjust offset calculation for dfltcc_state Patch series "lib/zlib: Set of s390 DFLTCC related patches for kernel zlib". Patches 1-7 represent a set of s390 zlib hardware support (DFLTCC) related fixes and enhancements integrated from zlib-ng repo relevant to kernel zlib (https://github.com/zlib-ng/zlib-ng). Since the official zlib repository never got DFLTCC support code merged, all the patches have been picked from zlib-ng fork (zlib data compression library for the next generation systems). This repo contains new optimizations and fixes not getting implemented into the official zlib repository and falls under the same zlib License. All of the original patches from zlib-ng were authored by Ilya Leoshkevich . Coding style has been preserved for future maintainability. Patches 1-2 should have no effect for the kernel zlib but make the code closer to zlib-ng for future maintainability. Only Patch 3 touches common zlib_deflate code, other patches are relevant to s390 tree only. Patch 8 is separate and intends to resolve an issue with kernel PPP driver which can use kernel zlib for packet compression. Without this patch PPP decompression can fail due to error code returned by hardware (dfltcc_inflate) and PPP disables zlib compression for further packets. This patch (of 8): This commit is based on: https://github.com/zlib-ng/zlib-ng/commit/d8b67f5 Link: https://lkml.kernel.org/r/20230126131428.1222214-1-zaslonko@linux.ibm.com Link: https://lkml.kernel.org/r/20230126131428.1222214-2-zaslonko@linux.ibm.com Signed-off-by: Mikhail Zaslonko Acked-by: Ilya Leoshkevich Cc: Heiko Carstens (s390) Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- lib/zlib_dfltcc/dfltcc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/zlib_dfltcc/dfltcc.h b/lib/zlib_dfltcc/dfltcc.h index 2a2fac1d050a2..1f63094366e93 100644 --- a/lib/zlib_dfltcc/dfltcc.h +++ b/lib/zlib_dfltcc/dfltcc.h @@ -100,8 +100,9 @@ struct dfltcc_state { char msg[64]; /* Buffer for strm->msg */ }; +#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) /* Resides right after inflate_state or deflate_state */ -#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((state) + 1)) +#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((char *)(state) + ALIGN_UP(sizeof(*state), 8))) /* External functions */ int dfltcc_can_deflate(z_streamp strm); From d0a2b6816be297977a7ac8f8dec5d64333e3be5a Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Thu, 26 Jan 2023 14:14:22 +0100 Subject: [PATCH 47/77] lib/zlib: implement switching between DFLTCC and software This commit is based on: https://github.com/zlib-ng/zlib-ng/commit/fc04275 Link: https://lkml.kernel.org/r/20230126131428.1222214-3-zaslonko@linux.ibm.com Signed-off-by: Mikhail Zaslonko Acked-by: Ilya Leoshkevich Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- lib/zlib_dfltcc/dfltcc_deflate.c | 38 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/lib/zlib_dfltcc/dfltcc_deflate.c b/lib/zlib_dfltcc/dfltcc_deflate.c index 6c946e8532eec..dc85ec8901636 100644 --- a/lib/zlib_dfltcc/dfltcc_deflate.c +++ b/lib/zlib_dfltcc/dfltcc_deflate.c @@ -112,30 +112,36 @@ int dfltcc_deflate( int soft_bcc; int no_flush; - if (!dfltcc_can_deflate(strm)) + if (!dfltcc_can_deflate(strm)) { + /* Clear history. */ + if (flush == Z_FULL_FLUSH) + param->hl = 0; return 0; + } again: masked_avail_in = 0; soft_bcc = 0; no_flush = flush == Z_NO_FLUSH; - /* Trailing empty block. Switch to software, except when Continuation Flag - * is set, which means that DFLTCC has buffered some output in the - * parameter block and needs to be called again in order to flush it. + /* No input data. Return, except when Continuation Flag is set, which means + * that DFLTCC has buffered some output in the parameter block and needs to + * be called again in order to flush it. */ - if (flush == Z_FINISH && strm->avail_in == 0 && !param->cf) { - if (param->bcf) { - /* A block is still open, and the hardware does not support closing - * blocks without adding data. Thus, close it manually. - */ + if (strm->avail_in == 0 && !param->cf) { + /* A block is still open, and the hardware does not support closing + * blocks without adding data. Thus, close it manually. + */ + if (!no_flush && param->bcf) { send_eobs(strm, param); param->bcf = 0; } - return 0; - } - - if (strm->avail_in == 0 && !param->cf) { + /* Let one of deflate_* functions write a trailing empty block. */ + if (flush == Z_FINISH) + return 0; + /* Clear history. */ + if (flush == Z_FULL_FLUSH) + param->hl = 0; *result = need_more; return 1; } @@ -189,7 +195,7 @@ int dfltcc_deflate( param->cvt = CVT_ADLER32; if (!no_flush) /* We need to close a block. Always do this in software - when there is - * no input data, the hardware will not nohor BCC. */ + * no input data, the hardware will not hohor BCC. */ soft_bcc = 1; if (flush == Z_FINISH && !param->bcf) /* We are about to open a BFINAL block, set Block Header Final bit @@ -204,8 +210,8 @@ int dfltcc_deflate( param->sbb = (unsigned int)state->bi_valid; if (param->sbb > 0) *strm->next_out = (Byte)state->bi_buf; - if (param->hl) - param->nt = 0; /* Honor history */ + /* Honor history and check value */ + param->nt = 0; param->cv = strm->adler; /* When opening a block, choose a Huffman-Table Type */ From 5320b810621d9122ef0d143e2db763ecec249f3f Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Thu, 26 Jan 2023 14:14:23 +0100 Subject: [PATCH 48/77] lib/zlib: fix DFLTCC not flushing EOBS when creating raw streams This commit is based on: https://github.com/zlib-ng/zlib-ng/commit/ca99a88 Link: https://lkml.kernel.org/r/20230126131428.1222214-4-zaslonko@linux.ibm.com Signed-off-by: Mikhail Zaslonko Acked-by: Ilya Leoshkevich Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- lib/zlib_deflate/deflate.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c index 8a878d0d892ce..f30d6b8a69b29 100644 --- a/lib/zlib_deflate/deflate.c +++ b/lib/zlib_deflate/deflate.c @@ -451,17 +451,24 @@ int zlib_deflate( Assert(strm->avail_out > 0, "bug2"); if (flush != Z_FINISH) return Z_OK; - if (s->noheader) return Z_STREAM_END; - /* Write the zlib trailer (adler32) */ - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); + if (!s->noheader) { + /* Write zlib trailer (adler32) */ + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } flush_pending(strm); /* If avail_out is zero, the application will call deflate again * to flush the rest. */ - s->noheader = -1; /* write the trailer only once! */ - return s->pending != 0 ? Z_OK : Z_STREAM_END; + if (!s->noheader) { + s->noheader = -1; /* write the trailer only once! */ + } + if (s->pending == 0) { + Assert(s->bi_valid == 0, "bi_buf not flushed"); + return Z_STREAM_END; + } + return Z_OK; } /* ========================================================================= */ From cbde1abf0271003880e83fe27f3297b764316ec9 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Thu, 26 Jan 2023 14:14:24 +0100 Subject: [PATCH 49/77] lib/zlib: fix DFLTCC ignoring flush modes when avail_in == 0 This commit is based on: https://github.com/zlib-ng/zlib-ng/commit/40acb3f Link: https://lkml.kernel.org/r/20230126131428.1222214-5-zaslonko@linux.ibm.com Signed-off-by: Mikhail Zaslonko Acked-by: Ilya Leoshkevich Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- lib/zlib_dfltcc/dfltcc_deflate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/zlib_dfltcc/dfltcc_deflate.c b/lib/zlib_dfltcc/dfltcc_deflate.c index dc85ec8901636..211d344710d5d 100644 --- a/lib/zlib_dfltcc/dfltcc_deflate.c +++ b/lib/zlib_dfltcc/dfltcc_deflate.c @@ -142,7 +142,8 @@ int dfltcc_deflate( /* Clear history. */ if (flush == Z_FULL_FLUSH) param->hl = 0; - *result = need_more; + /* Trigger block post-processing if necessary. */ + *result = no_flush ? need_more : block_done; return 1; } From 345c2bc3bb29b6b98c49688a5821957920433c4a Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Thu, 26 Jan 2023 14:14:25 +0100 Subject: [PATCH 50/77] lib/zlib: DFLTCC not writing header bits when avail_out == 0 This commit is based on: https://github.com/zlib-ng/zlib-ng/commit/ce409c6 Link: https://lkml.kernel.org/r/20230126131428.1222214-6-zaslonko@linux.ibm.com Signed-off-by: Mikhail Zaslonko Acked-by: Ilya Leoshkevich Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- lib/zlib_dfltcc/dfltcc_deflate.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/zlib_dfltcc/dfltcc_deflate.c b/lib/zlib_dfltcc/dfltcc_deflate.c index 211d344710d5d..d4c92f99808e3 100644 --- a/lib/zlib_dfltcc/dfltcc_deflate.c +++ b/lib/zlib_dfltcc/dfltcc_deflate.c @@ -170,13 +170,18 @@ int dfltcc_deflate( param->bcf = 0; dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size; - if (strm->avail_out == 0) { - *result = need_more; - return 1; - } } } + /* No space for compressed data. If we proceed, dfltcc_cmpr() will return + * DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still + * set BCF=1, which is wrong. Avoid complications and return early. + */ + if (strm->avail_out == 0) { + *result = need_more; + return 1; + } + /* The caller gave us too much data. Pass only one block worth of * uncompressed data to DFLTCC and mask the rest, so that on the next * iteration we start a new block. From e6a090c1aea2be2642c0999a992d452c7115524f Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Thu, 26 Jan 2023 14:14:26 +0100 Subject: [PATCH 51/77] lib/zlib: Split deflate and inflate states for DFLTCC Currently deflate and inflate both use a common state struct. There are several variables in this struct that we don't need for inflate, and more may be coming in the future. Therefore split them in two separate structs. Apart from that, introduce separate headers for dfltcc_deflate and dfltcc_inflate. This commit is based on: https://github.com/zlib-ng/zlib-ng/commit/c592b1b Link: https://lkml.kernel.org/r/20230126131428.1222214-7-zaslonko@linux.ibm.com Signed-off-by: Mikhail Zaslonko Acked-by: Ilya Leoshkevich Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- lib/zlib_deflate/deflate.c | 4 +-- lib/zlib_dfltcc/dfltcc.c | 25 ++------------- lib/zlib_dfltcc/dfltcc.h | 52 ++++++-------------------------- lib/zlib_dfltcc/dfltcc_deflate.c | 35 ++++++++++++++++----- lib/zlib_dfltcc/dfltcc_deflate.h | 21 +++++++++++++ lib/zlib_dfltcc/dfltcc_inflate.c | 10 +++++- lib/zlib_dfltcc/dfltcc_inflate.h | 37 +++++++++++++++++++++++ lib/zlib_inflate/inflate.c | 2 +- 8 files changed, 110 insertions(+), 76 deletions(-) create mode 100644 lib/zlib_dfltcc/dfltcc_deflate.h create mode 100644 lib/zlib_dfltcc/dfltcc_inflate.h diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c index f30d6b8a69b29..3a1d8d34182e4 100644 --- a/lib/zlib_deflate/deflate.c +++ b/lib/zlib_deflate/deflate.c @@ -54,7 +54,7 @@ /* architecture-specific bits */ #ifdef CONFIG_ZLIB_DFLTCC -# include "../zlib_dfltcc/dfltcc.h" +# include "../zlib_dfltcc/dfltcc_deflate.h" #else #define DEFLATE_RESET_HOOK(strm) do {} while (0) #define DEFLATE_HOOK(strm, flush, bstate) 0 @@ -106,7 +106,7 @@ typedef struct deflate_workspace { deflate_state deflate_memory; #ifdef CONFIG_ZLIB_DFLTCC /* State memory for s390 hardware deflate */ - struct dfltcc_state dfltcc_memory; + struct dfltcc_deflate_state dfltcc_memory; #endif Byte *window_memory; Pos *prev_memory; diff --git a/lib/zlib_dfltcc/dfltcc.c b/lib/zlib_dfltcc/dfltcc.c index 782f76e9d4dab..ac6ac9739f5b8 100644 --- a/lib/zlib_dfltcc/dfltcc.c +++ b/lib/zlib_dfltcc/dfltcc.c @@ -23,37 +23,18 @@ char *oesc_msg( } } -void dfltcc_reset( - z_streamp strm, - uInt size -) -{ - struct dfltcc_state *dfltcc_state = - (struct dfltcc_state *)((char *)strm->state + size); - struct dfltcc_qaf_param *param = - (struct dfltcc_qaf_param *)&dfltcc_state->param; - +void dfltcc_reset_state(struct dfltcc_state *dfltcc_state) { /* Initialize available functions */ if (is_dfltcc_enabled()) { - dfltcc(DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL); - memmove(&dfltcc_state->af, param, sizeof(dfltcc_state->af)); + dfltcc(DFLTCC_QAF, &dfltcc_state->param, NULL, NULL, NULL, NULL, NULL); + memmove(&dfltcc_state->af, &dfltcc_state->param, sizeof(dfltcc_state->af)); } else memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); /* Initialize parameter block */ memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param)); dfltcc_state->param.nt = 1; - - /* Initialize tuning parameters */ - if (zlib_dfltcc_support == ZLIB_DFLTCC_FULL_DEBUG) - dfltcc_state->level_mask = DFLTCC_LEVEL_MASK_DEBUG; - else - dfltcc_state->level_mask = DFLTCC_LEVEL_MASK; - dfltcc_state->block_size = DFLTCC_BLOCK_SIZE; - dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE; - dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; dfltcc_state->param.ribm = DFLTCC_RIBM; } -EXPORT_SYMBOL(dfltcc_reset); MODULE_LICENSE("GPL"); diff --git a/lib/zlib_dfltcc/dfltcc.h b/lib/zlib_dfltcc/dfltcc.h index 1f63094366e93..b96232bdd44d2 100644 --- a/lib/zlib_dfltcc/dfltcc.h +++ b/lib/zlib_dfltcc/dfltcc.h @@ -93,64 +93,32 @@ static_assert(sizeof(struct dfltcc_param_v0) == 1536); struct dfltcc_state { struct dfltcc_param_v0 param; /* Parameter block */ struct dfltcc_qaf_param af; /* Available functions */ + char msg[64]; /* Buffer for strm->msg */ +}; + +/* + * Extension of inflate_state and deflate_state for DFLTCC. + */ +struct dfltcc_deflate_state { + struct dfltcc_state common; /* Parameter block */ uLong level_mask; /* Levels on which to use DFLTCC */ uLong block_size; /* New block each X bytes */ uLong block_threshold; /* New block after total_in > X */ uLong dht_threshold; /* New block only if avail_in >= X */ - char msg[64]; /* Buffer for strm->msg */ }; #define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) /* Resides right after inflate_state or deflate_state */ #define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((char *)(state) + ALIGN_UP(sizeof(*state), 8))) -/* External functions */ -int dfltcc_can_deflate(z_streamp strm); -int dfltcc_deflate(z_streamp strm, - int flush, - block_state *result); -void dfltcc_reset(z_streamp strm, uInt size); -int dfltcc_can_inflate(z_streamp strm); -typedef enum { - DFLTCC_INFLATE_CONTINUE, - DFLTCC_INFLATE_BREAK, - DFLTCC_INFLATE_SOFTWARE, -} dfltcc_inflate_action; -dfltcc_inflate_action dfltcc_inflate(z_streamp strm, - int flush, int *ret); +void dfltcc_reset_state(struct dfltcc_state *dfltcc_state); + static inline int is_dfltcc_enabled(void) { return (zlib_dfltcc_support != ZLIB_DFLTCC_DISABLED && test_facility(DFLTCC_FACILITY)); } -#define DEFLATE_RESET_HOOK(strm) \ - dfltcc_reset((strm), sizeof(deflate_state)) - -#define DEFLATE_HOOK dfltcc_deflate - -#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm))) - #define DEFLATE_DFLTCC_ENABLED() is_dfltcc_enabled() -#define INFLATE_RESET_HOOK(strm) \ - dfltcc_reset((strm), sizeof(struct inflate_state)) - -#define INFLATE_TYPEDO_HOOK(strm, flush) \ - if (dfltcc_can_inflate((strm))) { \ - dfltcc_inflate_action action; \ -\ - RESTORE(); \ - action = dfltcc_inflate((strm), (flush), &ret); \ - LOAD(); \ - if (action == DFLTCC_INFLATE_CONTINUE) \ - break; \ - else if (action == DFLTCC_INFLATE_BREAK) \ - goto inf_leave; \ - } - -#define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm))) - -#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm))) - #endif /* DFLTCC_H */ diff --git a/lib/zlib_dfltcc/dfltcc_deflate.c b/lib/zlib_dfltcc/dfltcc_deflate.c index d4c92f99808e3..80924f067c248 100644 --- a/lib/zlib_dfltcc/dfltcc_deflate.c +++ b/lib/zlib_dfltcc/dfltcc_deflate.c @@ -2,11 +2,13 @@ #include "../zlib_deflate/defutil.h" #include "dfltcc_util.h" -#include "dfltcc.h" +#include "dfltcc_deflate.h" #include #include #include +#define GET_DFLTCC_DEFLATE_STATE(state) ((struct dfltcc_deflate_state *)GET_DFLTCC_STATE(state)) + /* * Compress. */ @@ -15,7 +17,7 @@ int dfltcc_can_deflate( ) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state); /* Check for kernel dfltcc command line parameter */ if (zlib_dfltcc_support == ZLIB_DFLTCC_DISABLED || @@ -28,15 +30,32 @@ int dfltcc_can_deflate( return 0; /* Unsupported hardware */ - if (!is_bit_set(dfltcc_state->af.fns, DFLTCC_GDHT) || - !is_bit_set(dfltcc_state->af.fns, DFLTCC_CMPR) || - !is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0)) + if (!is_bit_set(dfltcc_state->common.af.fns, DFLTCC_GDHT) || + !is_bit_set(dfltcc_state->common.af.fns, DFLTCC_CMPR) || + !is_bit_set(dfltcc_state->common.af.fmts, DFLTCC_FMT0)) return 0; return 1; } EXPORT_SYMBOL(dfltcc_can_deflate); +void dfltcc_reset_deflate_state(z_streamp strm) { + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state); + + dfltcc_reset_state(&dfltcc_state->common); + + /* Initialize tuning parameters */ + if (zlib_dfltcc_support == ZLIB_DFLTCC_FULL_DEBUG) + dfltcc_state->level_mask = DFLTCC_LEVEL_MASK_DEBUG; + else + dfltcc_state->level_mask = DFLTCC_LEVEL_MASK; + dfltcc_state->block_size = DFLTCC_BLOCK_SIZE; + dfltcc_state->block_threshold = DFLTCC_FIRST_FHT_BLOCK_SIZE; + dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; +} +EXPORT_SYMBOL(dfltcc_reset_deflate_state); + static void dfltcc_gdht( z_streamp strm ) @@ -104,8 +123,8 @@ int dfltcc_deflate( ) { deflate_state *state = (deflate_state *)strm->state; - struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); - struct dfltcc_param_v0 *param = &dfltcc_state->param; + struct dfltcc_deflate_state *dfltcc_state = GET_DFLTCC_DEFLATE_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->common.param; uInt masked_avail_in; dfltcc_cc cc; int need_empty_block; @@ -244,7 +263,7 @@ int dfltcc_deflate( } while (cc == DFLTCC_CC_AGAIN); /* Translate parameter block to stream */ - strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); + strm->msg = oesc_msg(dfltcc_state->common.msg, param->oesc); state->bi_valid = param->sbb; if (state->bi_valid == 0) state->bi_buf = 0; /* Avoid accessing next_out */ diff --git a/lib/zlib_dfltcc/dfltcc_deflate.h b/lib/zlib_dfltcc/dfltcc_deflate.h new file mode 100644 index 0000000000000..be44b43833b1c --- /dev/null +++ b/lib/zlib_dfltcc/dfltcc_deflate.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Zlib +#ifndef DFLTCC_DEFLATE_H +#define DFLTCC_DEFLATE_H + +#include "dfltcc.h" + +/* External functions */ +int dfltcc_can_deflate(z_streamp strm); +int dfltcc_deflate(z_streamp strm, + int flush, + block_state *result); +void dfltcc_reset_deflate_state(z_streamp strm); + +#define DEFLATE_RESET_HOOK(strm) \ + dfltcc_reset_deflate_state((strm)) + +#define DEFLATE_HOOK dfltcc_deflate + +#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm))) + +#endif /* DFLTCC_DEFLATE_H */ diff --git a/lib/zlib_dfltcc/dfltcc_inflate.c b/lib/zlib_dfltcc/dfltcc_inflate.c index fb60b5a6a1cb6..a16f416c88117 100644 --- a/lib/zlib_dfltcc/dfltcc_inflate.c +++ b/lib/zlib_dfltcc/dfltcc_inflate.c @@ -2,7 +2,7 @@ #include "../zlib_inflate/inflate.h" #include "dfltcc_util.h" -#include "dfltcc.h" +#include "dfltcc_inflate.h" #include #include #include @@ -32,6 +32,14 @@ int dfltcc_can_inflate( } EXPORT_SYMBOL(dfltcc_can_inflate); +void dfltcc_reset_inflate_state(z_streamp strm) { + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + + dfltcc_reset_state(dfltcc_state); +} +EXPORT_SYMBOL(dfltcc_reset_inflate_state); + static int dfltcc_was_inflate_used( z_streamp strm ) diff --git a/lib/zlib_dfltcc/dfltcc_inflate.h b/lib/zlib_dfltcc/dfltcc_inflate.h new file mode 100644 index 0000000000000..98d4bc42e5267 --- /dev/null +++ b/lib/zlib_dfltcc/dfltcc_inflate.h @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Zlib +#ifndef DFLTCC_INFLATE_H +#define DFLTCC_INFLATE_H + +#include "dfltcc.h" + +/* External functions */ +void dfltcc_reset_inflate_state(z_streamp strm); +int dfltcc_can_inflate(z_streamp strm); +typedef enum { + DFLTCC_INFLATE_CONTINUE, + DFLTCC_INFLATE_BREAK, + DFLTCC_INFLATE_SOFTWARE, +} dfltcc_inflate_action; +dfltcc_inflate_action dfltcc_inflate(z_streamp strm, + int flush, int *ret); +#define INFLATE_RESET_HOOK(strm) \ + dfltcc_reset_inflate_state((strm)) + +#define INFLATE_TYPEDO_HOOK(strm, flush) \ + if (dfltcc_can_inflate((strm))) { \ + dfltcc_inflate_action action; \ +\ + RESTORE(); \ + action = dfltcc_inflate((strm), (flush), &ret); \ + LOAD(); \ + if (action == DFLTCC_INFLATE_CONTINUE) \ + break; \ + else if (action == DFLTCC_INFLATE_BREAK) \ + goto inf_leave; \ + } + +#define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm))) + +#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm))) + +#endif /* DFLTCC_DEFLATE_H */ diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c index ee39b5eb71f7b..d1efad69f02b3 100644 --- a/lib/zlib_inflate/inflate.c +++ b/lib/zlib_inflate/inflate.c @@ -17,7 +17,7 @@ /* architecture-specific bits */ #ifdef CONFIG_ZLIB_DFLTCC -# include "../zlib_dfltcc/dfltcc.h" +# include "../zlib_dfltcc/dfltcc_inflate.h" #else #define INFLATE_RESET_HOOK(strm) do {} while (0) #define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) From b2f2197ed39d9534d629d0969cb2c2d77e2758cb Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Thu, 26 Jan 2023 14:14:27 +0100 Subject: [PATCH 52/77] lib/zlib: DFLTCC support inflate with small window There is no hardware control for DFLTCC window size. After this change, software and hardware window formats no longer match: the software will use wbits and wsize, and the hardware will use HB_BITS and HB_SIZE. Since neither dictionary manipulation nor internal allocation functions are relevant to kernel zlib and zlib_inflate_workspacesize() always use MAX_WBITS for window size calculation, only dfltcc_can_inflate() and dfltcc_inflate() functions are affected by this patch. This commit is based on: https://github.com/zlib-ng/zlib-ng/commit/3eab317 Link: https://lkml.kernel.org/r/20230126131428.1222214-8-zaslonko@linux.ibm.com Signed-off-by: Mikhail Zaslonko Acked-by: Ilya Leoshkevich Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- lib/zlib_dfltcc/dfltcc_inflate.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/zlib_dfltcc/dfltcc_inflate.c b/lib/zlib_dfltcc/dfltcc_inflate.c index a16f416c88117..5786030c6dc20 100644 --- a/lib/zlib_dfltcc/dfltcc_inflate.c +++ b/lib/zlib_dfltcc/dfltcc_inflate.c @@ -22,10 +22,6 @@ int dfltcc_can_inflate( zlib_dfltcc_support == ZLIB_DFLTCC_DEFLATE_ONLY) return 0; - /* Unsupported compression settings */ - if (state->wbits != HB_BITS) - return 0; - /* Unsupported hardware */ return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); @@ -129,8 +125,6 @@ dfltcc_inflate_action dfltcc_inflate( /* Translate stream to parameter block */ param->cvt = CVT_ADLER32; param->sbb = state->bits; - param->hl = state->whave; /* Software and hardware history formats match */ - param->ho = (state->write - state->whave) & ((1 << HB_BITS) - 1); if (param->hl) param->nt = 0; /* Honor history for the first block */ param->cv = state->check; @@ -144,8 +138,6 @@ dfltcc_inflate_action dfltcc_inflate( strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); state->last = cc == DFLTCC_CC_OK; state->bits = param->sbb; - state->whave = param->hl; - state->write = (param->ho + param->hl) & ((1 << HB_BITS) - 1); state->check = param->cv; if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { /* Report an error if stream is corrupted */ From b0016c7eeeb86107688687f4ffad9065f9b4f0f7 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Thu, 26 Jan 2023 14:14:28 +0100 Subject: [PATCH 53/77] lib/zlib: DFLTCC always switch to software inflate for Z_PACKET_FLUSH option Since hardware inflate does not support Z_PACKET_FLUSH option (used exclusively by kernel PPP driver), always switch to software like we already do for Z_BLOCK flush option. Without this patch, PPP might get Z_DATA_ERROR return code from zlib_inflate() and disable zlib compression for the packets. Link: https://lkml.kernel.org/r/20230126131428.1222214-9-zaslonko@linux.ibm.com Signed-off-by: Mikhail Zaslonko Acked-by: Ilya Leoshkevich Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- lib/zlib_dfltcc/dfltcc_inflate.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/zlib_dfltcc/dfltcc_inflate.c b/lib/zlib_dfltcc/dfltcc_inflate.c index 5786030c6dc20..437cd34c84906 100644 --- a/lib/zlib_dfltcc/dfltcc_inflate.c +++ b/lib/zlib_dfltcc/dfltcc_inflate.c @@ -95,8 +95,10 @@ dfltcc_inflate_action dfltcc_inflate( struct dfltcc_param_v0 *param = &dfltcc_state->param; dfltcc_cc cc; - if (flush == Z_BLOCK) { - /* DFLTCC does not support stopping on block boundaries */ + if (flush == Z_BLOCK || flush == Z_PACKET_FLUSH) { + /* DFLTCC does not support stopping on block boundaries (Z_BLOCK flush option) + * as well as the use of Z_PACKET_FLUSH option (used exclusively by PPP driver) + */ if (dfltcc_inflate_disable(strm)) { *ret = Z_STREAM_ERROR; return DFLTCC_INFLATE_BREAK; From a34c4098901dea4fafc115e21f9f38cec3935b75 Mon Sep 17 00:00:00 2001 From: Hyeonggon Yoo <42.hyeyoo@gmail.com> Date: Sat, 21 Jan 2023 12:39:42 +0900 Subject: [PATCH 54/77] lib/Kconfig.debug: do not enable DEBUG_PREEMPT by default In workloads where this_cpu operations are frequently performed, enabling DEBUG_PREEMPT may result in significant increase in runtime overhead due to frequent invocation of __this_cpu_preempt_check() function. This can be demonstrated through benchmarks such as hackbench where this configuration results in a 10% reduction in performance, primarily due to the added overhead within memcg charging path. Therefore, do not to enable DEBUG_PREEMPT by default and make users aware of its potential impact on performance in some workloads. hackbench-process-sockets debug_preempt no_debug_preempt Amean 1 0.4743 ( 0.00%) 0.4295 * 9.45%* Amean 4 1.4191 ( 0.00%) 1.2650 * 10.86%* Amean 7 2.2677 ( 0.00%) 2.0094 * 11.39%* Amean 12 3.6821 ( 0.00%) 3.2115 * 12.78%* Amean 21 6.6752 ( 0.00%) 5.7956 * 13.18%* Amean 30 9.6646 ( 0.00%) 8.5197 * 11.85%* Amean 48 15.3363 ( 0.00%) 13.5559 * 11.61%* Amean 79 24.8603 ( 0.00%) 22.0597 * 11.27%* Amean 96 30.1240 ( 0.00%) 26.8073 * 11.01%* Link: https://lkml.kernel.org/r/20230121033942.350387-1-42.hyeyoo@gmail.com Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: Roman Gushchin Acked-by: Mel Gorman Cc: Ben Segall Cc: Christoph Lameter Cc: Daniel Bristot de Oliveira Cc: David Rientjes Cc: Dennis Zhou Cc: Dietmar Eggemann Cc: Ingo Molnar Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Juri Lelli Cc: Matthew Wilcox Cc: Muchun Song Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Shakeel Butt Cc: Steven Rostedt (Google) Cc: Tejun Heo Cc: Valentin Schneider Cc: Vincent Guittot Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 918bda4ee120d..a97aca81db248 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1246,13 +1246,16 @@ config DEBUG_TIMEKEEPING config DEBUG_PREEMPT bool "Debug preemptible kernel" depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT - default y help If you say Y here then the kernel will use a debug variant of the commonly used smp_processor_id() function and will print warnings if kernel code uses it in a preemption-unsafe way. Also, the kernel will detect preemption count underflows. + This option has potential to introduce high runtime overhead, + depending on workload as it triggers debugging routines for each + this_cpu operation. It should only be used for debugging purposes. + menu "Lock Debugging (spinlocks, mutexes, etc...)" config LOCK_DEBUGGING_SUPPORT From 19f189731111d8d91d317807d4eb4a45ba68ee06 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 28 Jan 2023 08:50:48 -0800 Subject: [PATCH 55/77] lib/zlib: remove redundation assignement of avail_in dfltcc_gdht() cppcheck reports lib/zlib_dfltcc/dfltcc_deflate.c:65:21: warning: Redundant assignment of 'avail_in' to itself. [selfAssignment] size_t avail_in = avail_in = strm->avail_in; Only setting avail_in once is needed. Link: https://lkml.kernel.org/r/20230128165048.1245792-1-trix@redhat.com Fixes: aa5b395b69b6 ("lib/zlib: add s390 hardware support for kernel zlib_deflate") Signed-off-by: Tom Rix Acked-by: Ilya Leoshkevich Acked-by: Mikhail Zaslonko Signed-off-by: Andrew Morton --- lib/zlib_dfltcc/dfltcc_deflate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zlib_dfltcc/dfltcc_deflate.c b/lib/zlib_dfltcc/dfltcc_deflate.c index 80924f067c248..b732b6d9e35d6 100644 --- a/lib/zlib_dfltcc/dfltcc_deflate.c +++ b/lib/zlib_dfltcc/dfltcc_deflate.c @@ -62,7 +62,7 @@ static void dfltcc_gdht( { deflate_state *state = (deflate_state *)strm->state; struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; - size_t avail_in = avail_in = strm->avail_in; + size_t avail_in = strm->avail_in; dfltcc(DFLTCC_GDHT, param, NULL, NULL, From c777fcd532517a198e7ec423c69e3e59535759f8 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 27 Jan 2023 22:22:02 +0900 Subject: [PATCH 56/77] nilfs2: prevent WARNING in nilfs_dat_commit_end() If nilfs2 reads a corrupted disk image and its DAT metadata file contains invalid lifetime data for a virtual block number, a kernel warning can be generated by the WARN_ON check in nilfs_dat_commit_end() and can panic if the kernel is booted with panic_on_warn. This patch avoids the issue with a sanity check that treats it as an error. Since error return is not allowed in the execution phase of nilfs_dat_commit_end(), this inserts that sanity check in nilfs_dat_prepare_end(), which prepares for nilfs_dat_commit_end(). As the error code, -EINVAL is returned to notify bmap layer of the metadata corruption. When the bmap layer sees this code, it handles the abnormal situation and replaces the return code with -EIO as it should. Link: https://lkml.kernel.org/r/000000000000154d2c05e9ec7df6@google.com Link: https://lkml.kernel.org/r/20230127132202.6083-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: Tested-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/dat.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 1e7f653c1df7e..9cf6ba58f5859 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -158,6 +158,7 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; + __u64 start; sector_t blocknr; void *kaddr; int ret; @@ -169,6 +170,7 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); + start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); kunmap_atomic(kaddr); @@ -179,6 +181,15 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) return ret; } } + if (unlikely(start > nilfs_mdt_cno(dat))) { + nilfs_err(dat->i_sb, + "vblocknr = %llu has abnormal lifetime: start cno (= %llu) > current cno (= %llu)", + (unsigned long long)req->pr_entry_nr, + (unsigned long long)start, + (unsigned long long)nilfs_mdt_cno(dat)); + nilfs_dat_abort_entry(dat, req); + return -EINVAL; + } return 0; } From e01b56c9c2070a6aee97e639559bd85832d65ad3 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sat, 28 Jan 2023 14:49:16 +0800 Subject: [PATCH 57/77] scripts/tags.sh: fix the Kconfig tags generation when using latest ctags The Kconfig language has already been built-in in the latest ctags, so it would error exit if we try to define it as an user-defined language via '--langdef=kconfig'. This results that there is no Kconfig tags in the final tag file. Fix this by skipping the user Kconfig definition for the latest ctags. Link: https://lkml.kernel.org/r/20230128064916.912744-1-haokexin@gmail.com Signed-off-by: Kevin Hao Reviewed-by: Cristian Ciocaltea Cc: Greg Kroah-Hartman Cc: Masahiro Yamada Cc: Nicolas Schier Cc: Paulo Miguel Almeida Cc: Vipin Sharma Signed-off-by: Andrew Morton --- scripts/tags.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index e137cf15aae9d..84775f08260f3 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -264,10 +264,12 @@ exuberant() --$CTAGS_EXTRA=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \ "${regex[@]}" - setup_regex exuberant kconfig - all_kconfigs | xargs $1 -a \ - --langdef=kconfig --language-force=kconfig "${regex[@]}" - + KCONFIG_ARGS=() + if ! $1 --list-languages | grep -iq kconfig; then + setup_regex exuberant kconfig + KCONFIG_ARGS=(--langdef=kconfig --language-force=kconfig "${regex[@]}") + fi + all_kconfigs | xargs $1 -a "${KCONFIG_ARGS[@]}" } emacs() From e4315a63f2d98d27e3df2cd1d3b47146ae602cfe Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:25 +0100 Subject: [PATCH 58/77] lib/stackdepot: fix setting next_slab_inited in init_stack_slab Patch series "lib/stackdepot: fixes and clean-ups". A set of fixes, comments, and clean-ups I came up with while reading the stack depot code. This patch (of 18): In commit 305e519ce48e ("lib/stackdepot.c: fix global out-of-bounds in stack_slabs"), init_stack_slab was changed to only use preallocated memory for the next slab if the slab number limit is not reached. However, setting next_slab_inited was not moved together with updating stack_slabs. Set next_slab_inited only if the preallocated memory was used for the next slab. Link: https://lkml.kernel.org/r/cover.1675111415.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/9fbb4d2bf9b2676a29b120980b5ffbda8e2304ee.1675111415.git.andreyknvl@google.com Fixes: 305e519ce48e ("lib/stackdepot.c: fix global out-of-bounds in stack_slabs") Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 79e894cf84064..0eed9bbcf23e1 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -105,12 +105,13 @@ static bool init_stack_slab(void **prealloc) if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { stack_slabs[depot_index + 1] = *prealloc; *prealloc = NULL; + /* + * This smp_store_release pairs with smp_load_acquire() + * from |next_slab_inited| above and in + * stack_depot_save(). + */ + smp_store_release(&next_slab_inited, 1); } - /* - * This smp_store_release pairs with smp_load_acquire() from - * |next_slab_inited| above and in stack_depot_save(). - */ - smp_store_release(&next_slab_inited, 1); } return true; } From f934faffd408227299c9405f3f2b31dac2cadcd3 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:26 +0100 Subject: [PATCH 59/77] lib/stackdepot: put functions in logical order Put stack depot functions' declarations and definitions in a more logical order: 1. Functions that save stack traces into stack depot. 2. Functions that fetch and print stack traces. 3. stack_depot_get_extra_bits that operates on stack depot handles and does not interact with the stack depot storage. No functional changes. Link: https://lkml.kernel.org/r/632393332c364171c69b7c054b3b2233acbfa996.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 15 +- lib/stackdepot.c | 316 ++++++++++++++++++------------------- 2 files changed, 166 insertions(+), 165 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 9ca7798d7a318..1296a6eeaec0a 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -14,17 +14,13 @@ #include typedef u32 depot_stack_handle_t; + /* * Number of bits in the handle that stack depot doesn't use. Users may store * information in them. */ #define STACK_DEPOT_EXTRA_BITS 5 -depot_stack_handle_t __stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - unsigned int extra_bits, - gfp_t gfp_flags, bool can_alloc); - /* * Every user of stack depot has to call stack_depot_init() during its own init * when it's decided that it will be calling stack_depot_save() later. This is @@ -59,17 +55,22 @@ static inline void stack_depot_want_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif +depot_stack_handle_t __stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + unsigned int extra_bits, + gfp_t gfp_flags, bool can_alloc); + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); -unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); +void stack_depot_print(depot_stack_handle_t stack); int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); -void stack_depot_print(depot_stack_handle_t stack); +unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 0eed9bbcf23e1..23d2a68a587b0 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -79,85 +79,6 @@ static int next_slab_inited; static size_t depot_offset; static DEFINE_RAW_SPINLOCK(depot_lock); -unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) -{ - union handle_parts parts = { .handle = handle }; - - return parts.extra; -} -EXPORT_SYMBOL(stack_depot_get_extra_bits); - -static bool init_stack_slab(void **prealloc) -{ - if (!*prealloc) - return false; - /* - * This smp_load_acquire() pairs with smp_store_release() to - * |next_slab_inited| below and in depot_alloc_stack(). - */ - if (smp_load_acquire(&next_slab_inited)) - return true; - if (stack_slabs[depot_index] == NULL) { - stack_slabs[depot_index] = *prealloc; - *prealloc = NULL; - } else { - /* If this is the last depot slab, do not touch the next one. */ - if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { - stack_slabs[depot_index + 1] = *prealloc; - *prealloc = NULL; - /* - * This smp_store_release pairs with smp_load_acquire() - * from |next_slab_inited| above and in - * stack_depot_save(). - */ - smp_store_release(&next_slab_inited, 1); - } - } - return true; -} - -/* Allocation of a new stack in raw storage */ -static struct stack_record * -depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) -{ - struct stack_record *stack; - size_t required_size = struct_size(stack, entries, size); - - required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); - - if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { - if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { - WARN_ONCE(1, "Stack depot reached limit capacity"); - return NULL; - } - depot_index++; - depot_offset = 0; - /* - * smp_store_release() here pairs with smp_load_acquire() from - * |next_slab_inited| in stack_depot_save() and - * init_stack_slab(). - */ - if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) - smp_store_release(&next_slab_inited, 0); - } - init_stack_slab(prealloc); - if (stack_slabs[depot_index] == NULL) - return NULL; - - stack = stack_slabs[depot_index] + depot_offset; - - stack->hash = hash; - stack->size = size; - stack->handle.slabindex = depot_index; - stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; - stack->handle.valid = 1; - stack->handle.extra = 0; - memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); - depot_offset += required_size; - - return stack; -} - /* one hash table bucket entry per 16kB of memory */ #define STACK_HASH_SCALE 14 /* limited between 4k and 1M buckets */ @@ -271,6 +192,77 @@ int stack_depot_init(void) } EXPORT_SYMBOL_GPL(stack_depot_init); +static bool init_stack_slab(void **prealloc) +{ + if (!*prealloc) + return false; + /* + * This smp_load_acquire() pairs with smp_store_release() to + * |next_slab_inited| below and in depot_alloc_stack(). + */ + if (smp_load_acquire(&next_slab_inited)) + return true; + if (stack_slabs[depot_index] == NULL) { + stack_slabs[depot_index] = *prealloc; + *prealloc = NULL; + } else { + /* If this is the last depot slab, do not touch the next one. */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { + stack_slabs[depot_index + 1] = *prealloc; + *prealloc = NULL; + /* + * This smp_store_release pairs with smp_load_acquire() + * from |next_slab_inited| above and in + * stack_depot_save(). + */ + smp_store_release(&next_slab_inited, 1); + } + } + return true; +} + +/* Allocation of a new stack in raw storage */ +static struct stack_record * +depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) +{ + struct stack_record *stack; + size_t required_size = struct_size(stack, entries, size); + + required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); + + if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { + if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { + WARN_ONCE(1, "Stack depot reached limit capacity"); + return NULL; + } + depot_index++; + depot_offset = 0; + /* + * smp_store_release() here pairs with smp_load_acquire() from + * |next_slab_inited| in stack_depot_save() and + * init_stack_slab(). + */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) + smp_store_release(&next_slab_inited, 0); + } + init_stack_slab(prealloc); + if (stack_slabs[depot_index] == NULL) + return NULL; + + stack = stack_slabs[depot_index] + depot_offset; + + stack->hash = hash; + stack->size = size; + stack->handle.slabindex = depot_index; + stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; + stack->handle.valid = 1; + stack->handle.extra = 0; + memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); + depot_offset += required_size; + + return stack; +} + /* Calculate hash for a stack */ static inline u32 hash_stack(unsigned long *entries, unsigned int size) { @@ -310,85 +302,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } -/** - * stack_depot_snprint - print stack entries from a depot into a buffer - * - * @handle: Stack depot handle which was returned from - * stack_depot_save(). - * @buf: Pointer to the print buffer - * - * @size: Size of the print buffer - * - * @spaces: Number of leading spaces to print - * - * Return: Number of bytes printed. - */ -int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, - int spaces) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(handle, &entries); - return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, - spaces) : 0; -} -EXPORT_SYMBOL_GPL(stack_depot_snprint); - -/** - * stack_depot_print - print stack entries from a depot - * - * @stack: Stack depot handle which was returned from - * stack_depot_save(). - * - */ -void stack_depot_print(depot_stack_handle_t stack) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(stack, &entries); - if (nr_entries > 0) - stack_trace_print(entries, nr_entries, 0); -} -EXPORT_SYMBOL_GPL(stack_depot_print); - -/** - * stack_depot_fetch - Fetch stack entries from a depot - * - * @handle: Stack depot handle which was returned from - * stack_depot_save(). - * @entries: Pointer to store the entries address - * - * Return: The number of trace entries for this depot. - */ -unsigned int stack_depot_fetch(depot_stack_handle_t handle, - unsigned long **entries) -{ - union handle_parts parts = { .handle = handle }; - void *slab; - size_t offset = parts.offset << STACK_ALLOC_ALIGN; - struct stack_record *stack; - - *entries = NULL; - if (!handle) - return 0; - - if (parts.slabindex > depot_index) { - WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", - parts.slabindex, depot_index, handle); - return 0; - } - slab = stack_slabs[parts.slabindex]; - if (!slab) - return 0; - stack = slab + offset; - - *entries = stack->entries; - return stack->size; -} -EXPORT_SYMBOL_GPL(stack_depot_fetch); - /** * __stack_depot_save - Save a stack trace from an array * @@ -534,3 +447,90 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, return __stack_depot_save(entries, nr_entries, 0, alloc_flags, true); } EXPORT_SYMBOL_GPL(stack_depot_save); + +/** + * stack_depot_fetch - Fetch stack entries from a depot + * + * @handle: Stack depot handle which was returned from + * stack_depot_save(). + * @entries: Pointer to store the entries address + * + * Return: The number of trace entries for this depot. + */ +unsigned int stack_depot_fetch(depot_stack_handle_t handle, + unsigned long **entries) +{ + union handle_parts parts = { .handle = handle }; + void *slab; + size_t offset = parts.offset << STACK_ALLOC_ALIGN; + struct stack_record *stack; + + *entries = NULL; + if (!handle) + return 0; + + if (parts.slabindex > depot_index) { + WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", + parts.slabindex, depot_index, handle); + return 0; + } + slab = stack_slabs[parts.slabindex]; + if (!slab) + return 0; + stack = slab + offset; + + *entries = stack->entries; + return stack->size; +} +EXPORT_SYMBOL_GPL(stack_depot_fetch); + +/** + * stack_depot_print - print stack entries from a depot + * + * @stack: Stack depot handle which was returned from + * stack_depot_save(). + * + */ +void stack_depot_print(depot_stack_handle_t stack) +{ + unsigned long *entries; + unsigned int nr_entries; + + nr_entries = stack_depot_fetch(stack, &entries); + if (nr_entries > 0) + stack_trace_print(entries, nr_entries, 0); +} +EXPORT_SYMBOL_GPL(stack_depot_print); + +/** + * stack_depot_snprint - print stack entries from a depot into a buffer + * + * @handle: Stack depot handle which was returned from + * stack_depot_save(). + * @buf: Pointer to the print buffer + * + * @size: Size of the print buffer + * + * @spaces: Number of leading spaces to print + * + * Return: Number of bytes printed. + */ +int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, + int spaces) +{ + unsigned long *entries; + unsigned int nr_entries; + + nr_entries = stack_depot_fetch(handle, &entries); + return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, + spaces) : 0; +} +EXPORT_SYMBOL_GPL(stack_depot_snprint); + +unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) +{ + union handle_parts parts = { .handle = handle }; + + return parts.extra; +} +EXPORT_SYMBOL(stack_depot_get_extra_bits); From 73af337245330cea62c5f32ce45887c1082c9083 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:27 +0100 Subject: [PATCH 60/77] lib/stackdepot: use pr_fmt to define message format Use pr_fmt to define the format for printing stack depot messages instead of duplicating the "Stack Depot" prefix in each message. Link: https://lkml.kernel.org/r/3600069e0b0b3df602999ec8a2d4fc14fcc56a01.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 23d2a68a587b0..90c4dd48d75e2 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -19,6 +19,8 @@ * Based on code by Dmitry Chernenkov. */ +#define pr_fmt(fmt) "stackdepot: " fmt + #include #include #include @@ -98,7 +100,7 @@ static int __init is_stack_depot_disabled(char *str) ret = kstrtobool(str, &stack_depot_disable); if (!ret && stack_depot_disable) { - pr_info("Stack Depot is disabled\n"); + pr_info("disabled\n"); stack_table = NULL; } return 0; @@ -142,7 +144,7 @@ int __init stack_depot_early_init(void) 1UL << STACK_HASH_ORDER_MAX); if (!stack_table) { - pr_err("Stack Depot hash table allocation failed, disabling\n"); + pr_err("hash table allocation failed, disabling\n"); stack_depot_disable = true; return -ENOMEM; } @@ -177,11 +179,11 @@ int stack_depot_init(void) if (entries > 1UL << STACK_HASH_ORDER_MAX) entries = 1UL << STACK_HASH_ORDER_MAX; - pr_info("Stack Depot allocating hash table of %lu entries with kvcalloc\n", + pr_info("allocating hash table of %lu entries with kvcalloc\n", entries); stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); if (!stack_table) { - pr_err("Stack Depot hash table allocation failed, disabling\n"); + pr_err("hash table allocation failed, disabling\n"); stack_depot_disable = true; ret = -ENOMEM; } From c68b69ab9cd2b5fb9aefbf3fc058108414061a74 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:28 +0100 Subject: [PATCH 61/77] lib/stackdepot, mm: rename stack_depot_want_early_init Rename stack_depot_want_early_init to stack_depot_request_early_init. The old name is confusing, as it hints at returning some kind of intention of stack depot. The new name reflects that this function requests an action from stack depot instead. No functional changes. Link: https://lkml.kernel.org/r/cb34925852c81be2ec6aac75766292e4e590523e.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 14 +++++++------- lib/stackdepot.c | 10 +++++----- mm/page_owner.c | 2 +- mm/slub.c | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 1296a6eeaec0a..c4e3abc16b16a 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -31,26 +31,26 @@ typedef u32 depot_stack_handle_t; * enabled as part of mm_init(), for subsystems where it's known at compile time * that stack depot will be used. * - * Another alternative is to call stack_depot_want_early_init(), when the + * Another alternative is to call stack_depot_request_early_init(), when the * decision to use stack depot is taken e.g. when evaluating kernel boot * parameters, which precedes the enablement point in mm_init(). * - * stack_depot_init() and stack_depot_want_early_init() can be called regardless - * of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print - * functions should only be called from code that makes sure CONFIG_STACKDEPOT - * is enabled. + * stack_depot_init() and stack_depot_request_early_init() can be called + * regardless of CONFIG_STACKDEPOT and are no-op when disabled. The actual + * save/fetch/print functions should only be called from code that makes sure + * CONFIG_STACKDEPOT is enabled. */ #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); -void __init stack_depot_want_early_init(void); +void __init stack_depot_request_early_init(void); /* This is supposed to be called only from mm_init() */ int __init stack_depot_early_init(void); #else static inline int stack_depot_init(void) { return 0; } -static inline void stack_depot_want_early_init(void) { } +static inline void stack_depot_request_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 90c4dd48d75e2..8743fad1485f0 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -71,7 +71,7 @@ struct stack_record { unsigned long entries[]; /* Variable-sized array of entries. */ }; -static bool __stack_depot_want_early_init __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); +static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; @@ -107,12 +107,12 @@ static int __init is_stack_depot_disabled(char *str) } early_param("stack_depot_disable", is_stack_depot_disabled); -void __init stack_depot_want_early_init(void) +void __init stack_depot_request_early_init(void) { - /* Too late to request early init now */ + /* Too late to request early init now. */ WARN_ON(__stack_depot_early_init_passed); - __stack_depot_want_early_init = true; + __stack_depot_early_init_requested = true; } int __init stack_depot_early_init(void) @@ -128,7 +128,7 @@ int __init stack_depot_early_init(void) if (kasan_enabled() && !stack_hash_order) stack_hash_order = STACK_HASH_ORDER_MAX; - if (!__stack_depot_want_early_init || stack_depot_disable) + if (!__stack_depot_early_init_requested || stack_depot_disable) return 0; if (stack_hash_order) diff --git a/mm/page_owner.c b/mm/page_owner.c index 2d27f532df4c1..90a4a087e6c7e 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -48,7 +48,7 @@ static int __init early_page_owner_param(char *buf) int ret = kstrtobool(buf, &page_owner_enabled); if (page_owner_enabled) - stack_depot_want_early_init(); + stack_depot_request_early_init(); return ret; } diff --git a/mm/slub.c b/mm/slub.c index 13459c69095a2..f2c6c356bc360 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1592,7 +1592,7 @@ static int __init setup_slub_debug(char *str) } else { slab_list_specified = true; if (flags & SLAB_STORE_USER) - stack_depot_want_early_init(); + stack_depot_request_early_init(); } } @@ -1611,7 +1611,7 @@ static int __init setup_slub_debug(char *str) out: slub_debug = global_flags; if (slub_debug & SLAB_STORE_USER) - stack_depot_want_early_init(); + stack_depot_request_early_init(); if (slub_debug != 0 || slub_debug_string) static_branch_enable(&slub_debug_enabled); else From 1940450c3a3c7506752cab1b592a377a2a7c07fa Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 30 Jan 2023 17:43:08 -0800 Subject: [PATCH 62/77] lib-stackdepot-mm-rename-stack_depot_want_early_init-fix fix it for "mm: use stack_depot_early_init for kmemleak" Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Cc: Zhaoyang Huang Signed-off-by: Andrew Morton --- mm/kmemleak.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 92f670edbf518..2d4f685f77ea3 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -104,6 +104,9 @@ #include #include +/* akpm: stupid temporary hack */ +#define stack_depot_want_early_init stack_depot_request_early_init + /* * Kmemleak configuration and common defines. */ From 237df218842454cbf347b153b8986f0d96e907fc Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:29 +0100 Subject: [PATCH 63/77] lib/stackdepot: rename stack_depot_disable Rename stack_depot_disable to stack_depot_disabled to make its name look similar to the names of other stack depot flags. Also put stack_depot_disabled's definition together with the other flags. Also rename is_stack_depot_disabled to disable_stack_depot: this name looks more conventional for a function that processes a boot parameter. No functional changes. Link: https://lkml.kernel.org/r/293567627b0d59f1ae5a27ac9537c027a5ff729d.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 8743fad1485f0..6e8aef12cf890 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -71,6 +71,7 @@ struct stack_record { unsigned long entries[]; /* Variable-sized array of entries. */ }; +static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; @@ -91,21 +92,20 @@ static DEFINE_RAW_SPINLOCK(depot_lock); static unsigned int stack_hash_order; static unsigned int stack_hash_mask; -static bool stack_depot_disable; static struct stack_record **stack_table; -static int __init is_stack_depot_disabled(char *str) +static int __init disable_stack_depot(char *str) { int ret; - ret = kstrtobool(str, &stack_depot_disable); - if (!ret && stack_depot_disable) { + ret = kstrtobool(str, &stack_depot_disabled); + if (!ret && stack_depot_disabled) { pr_info("disabled\n"); stack_table = NULL; } return 0; } -early_param("stack_depot_disable", is_stack_depot_disabled); +early_param("stack_depot_disable", disable_stack_depot); void __init stack_depot_request_early_init(void) { @@ -128,7 +128,7 @@ int __init stack_depot_early_init(void) if (kasan_enabled() && !stack_hash_order) stack_hash_order = STACK_HASH_ORDER_MAX; - if (!__stack_depot_early_init_requested || stack_depot_disable) + if (!__stack_depot_early_init_requested || stack_depot_disabled) return 0; if (stack_hash_order) @@ -145,7 +145,7 @@ int __init stack_depot_early_init(void) if (!stack_table) { pr_err("hash table allocation failed, disabling\n"); - stack_depot_disable = true; + stack_depot_disabled = true; return -ENOMEM; } @@ -158,7 +158,7 @@ int stack_depot_init(void) int ret = 0; mutex_lock(&stack_depot_init_mutex); - if (!stack_depot_disable && !stack_table) { + if (!stack_depot_disabled && !stack_table) { unsigned long entries; int scale = STACK_HASH_SCALE; @@ -184,7 +184,7 @@ int stack_depot_init(void) stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); if (!stack_table) { pr_err("hash table allocation failed, disabling\n"); - stack_depot_disable = true; + stack_depot_disabled = true; ret = -ENOMEM; } stack_hash_mask = entries - 1; @@ -354,7 +354,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, */ nr_entries = filter_irq_stacks(entries, nr_entries); - if (unlikely(nr_entries == 0) || stack_depot_disable) + if (unlikely(nr_entries == 0) || stack_depot_disabled) goto fast_exit; hash = hash_stack(entries, nr_entries); From 27f8683f55d8a4babd890aff4c7f0eebd2eb016c Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:30 +0100 Subject: [PATCH 64/77] lib/stackdepot: annotate init and early init functions Add comments to stack_depot_early_init and stack_depot_init to explain certain parts of their implementation. Also add a pr_info message to stack_depot_early_init similar to the one in stack_depot_init. Also move the scale variable in stack_depot_init to the scope where it is being used. Link: https://lkml.kernel.org/r/be09b64fb196ffe0c19ce7afc4130efba5425df9.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 6e8aef12cf890..b06f6a5caa833 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -115,24 +115,34 @@ void __init stack_depot_request_early_init(void) __stack_depot_early_init_requested = true; } +/* Allocates a hash table via memblock. Can only be used during early boot. */ int __init stack_depot_early_init(void) { unsigned long entries = 0; - /* This is supposed to be called only once, from mm_init() */ + /* This function must be called only once, from mm_init(). */ if (WARN_ON(__stack_depot_early_init_passed)) return 0; - __stack_depot_early_init_passed = true; + /* + * If KASAN is enabled, use the maximum order: KASAN is frequently used + * in fuzzing scenarios, which leads to a large number of different + * stack traces being stored in stack depot. + */ if (kasan_enabled() && !stack_hash_order) stack_hash_order = STACK_HASH_ORDER_MAX; if (!__stack_depot_early_init_requested || stack_depot_disabled) return 0; + /* + * If stack_hash_order is not set, leave entries as 0 to rely on the + * automatic calculations performed by alloc_large_system_hash. + */ if (stack_hash_order) - entries = 1UL << stack_hash_order; + entries = 1UL << stack_hash_order; + pr_info("allocating hash table via alloc_large_system_hash\n"); stack_table = alloc_large_system_hash("stackdepot", sizeof(struct stack_record *), entries, @@ -142,7 +152,6 @@ int __init stack_depot_early_init(void) &stack_hash_mask, 1UL << STACK_HASH_ORDER_MIN, 1UL << STACK_HASH_ORDER_MAX); - if (!stack_table) { pr_err("hash table allocation failed, disabling\n"); stack_depot_disabled = true; @@ -152,6 +161,7 @@ int __init stack_depot_early_init(void) return 0; } +/* Allocates a hash table via kvmalloc. Can be used after boot. */ int stack_depot_init(void) { static DEFINE_MUTEX(stack_depot_init_mutex); @@ -160,11 +170,16 @@ int stack_depot_init(void) mutex_lock(&stack_depot_init_mutex); if (!stack_depot_disabled && !stack_table) { unsigned long entries; - int scale = STACK_HASH_SCALE; + /* + * Similarly to stack_depot_early_init, use stack_hash_order + * if assigned, and rely on automatic scaling otherwise. + */ if (stack_hash_order) { entries = 1UL << stack_hash_order; } else { + int scale = STACK_HASH_SCALE; + entries = nr_free_buffer_pages(); entries = roundup_pow_of_two(entries); @@ -179,7 +194,7 @@ int stack_depot_init(void) if (entries > 1UL << STACK_HASH_ORDER_MAX) entries = 1UL << STACK_HASH_ORDER_MAX; - pr_info("allocating hash table of %lu entries with kvcalloc\n", + pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); if (!stack_table) { From 59ec33def4b730c96479bd176dd199ff04a673f0 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:31 +0100 Subject: [PATCH 65/77] lib/stackdepot: lower the indentation in stack_depot_init stack_depot_init does most things inside an if check. Move them out and use a goto statement instead. No functional changes. Link: https://lkml.kernel.org/r/eb6f0a014b8d0bfa73a8bbd358c627dc66cf51b7.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 70 +++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index b06f6a5caa833..cb098bc992863 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -165,46 +165,50 @@ int __init stack_depot_early_init(void) int stack_depot_init(void) { static DEFINE_MUTEX(stack_depot_init_mutex); + unsigned long entries; int ret = 0; mutex_lock(&stack_depot_init_mutex); - if (!stack_depot_disabled && !stack_table) { - unsigned long entries; - /* - * Similarly to stack_depot_early_init, use stack_hash_order - * if assigned, and rely on automatic scaling otherwise. - */ - if (stack_hash_order) { - entries = 1UL << stack_hash_order; - } else { - int scale = STACK_HASH_SCALE; - - entries = nr_free_buffer_pages(); - entries = roundup_pow_of_two(entries); - - if (scale > PAGE_SHIFT) - entries >>= (scale - PAGE_SHIFT); - else - entries <<= (PAGE_SHIFT - scale); - } + if (stack_depot_disabled || stack_table) + goto out_unlock; - if (entries < 1UL << STACK_HASH_ORDER_MIN) - entries = 1UL << STACK_HASH_ORDER_MIN; - if (entries > 1UL << STACK_HASH_ORDER_MAX) - entries = 1UL << STACK_HASH_ORDER_MAX; - - pr_info("allocating hash table of %lu entries via kvcalloc\n", - entries); - stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); - if (!stack_table) { - pr_err("hash table allocation failed, disabling\n"); - stack_depot_disabled = true; - ret = -ENOMEM; - } - stack_hash_mask = entries - 1; + /* + * Similarly to stack_depot_early_init, use stack_hash_order + * if assigned, and rely on automatic scaling otherwise. + */ + if (stack_hash_order) { + entries = 1UL << stack_hash_order; + } else { + int scale = STACK_HASH_SCALE; + + entries = nr_free_buffer_pages(); + entries = roundup_pow_of_two(entries); + + if (scale > PAGE_SHIFT) + entries >>= (scale - PAGE_SHIFT); + else + entries <<= (PAGE_SHIFT - scale); } + + if (entries < 1UL << STACK_HASH_ORDER_MIN) + entries = 1UL << STACK_HASH_ORDER_MIN; + if (entries > 1UL << STACK_HASH_ORDER_MAX) + entries = 1UL << STACK_HASH_ORDER_MAX; + + pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); + stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); + if (!stack_table) { + pr_err("hash table allocation failed, disabling\n"); + stack_depot_disabled = true; + ret = -ENOMEM; + goto out_unlock; + } + stack_hash_mask = entries - 1; + +out_unlock: mutex_unlock(&stack_depot_init_mutex); + return ret; } EXPORT_SYMBOL_GPL(stack_depot_init); From bc5a3d2e7bd763c6378d472539b7c9806b5048a7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:32 +0100 Subject: [PATCH 66/77] lib/stackdepot: reorder and annotate global variables Group stack depot global variables by their purpose: 1. Hash table-related variables, 2. Slab-related variables, and add comments. Also clean up comments for hash table-related constants. Link: https://lkml.kernel.org/r/4ed1d0828e837e15566a7cfa7688a47006e3f4b3.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index cb098bc992863..89aee133303a5 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -75,24 +75,31 @@ static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); static bool __stack_depot_early_init_passed __initdata; -static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; - -static int depot_index; -static int next_slab_inited; -static size_t depot_offset; -static DEFINE_RAW_SPINLOCK(depot_lock); - -/* one hash table bucket entry per 16kB of memory */ +/* Use one hash table bucket per 16 KB of memory. */ #define STACK_HASH_SCALE 14 -/* limited between 4k and 1M buckets */ +/* Limit the number of buckets between 4K and 1M. */ #define STACK_HASH_ORDER_MIN 12 #define STACK_HASH_ORDER_MAX 20 +/* Initial seed for jhash2. */ #define STACK_HASH_SEED 0x9747b28c +/* Hash table of pointers to stored stack traces. */ +static struct stack_record **stack_table; +/* Fixed order of the number of table buckets. Used when KASAN is enabled. */ static unsigned int stack_hash_order; +/* Hash mask for indexing the table. */ static unsigned int stack_hash_mask; -static struct stack_record **stack_table; +/* Array of memory regions that store stack traces. */ +static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; +/* Currently used slab in stack_slabs. */ +static int depot_index; +/* Offset to the unused space in the currently used slab. */ +static size_t depot_offset; +/* Lock that protects the variables above. */ +static DEFINE_RAW_SPINLOCK(depot_lock); +/* Whether the next slab is initialized. */ +static int next_slab_inited; static int __init disable_stack_depot(char *str) { From 8e6934376f7769ab7cd410fd55037846fbe66fc4 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:33 +0100 Subject: [PATCH 67/77] lib/stackdepot: rename hash table constants and variables Give more meaningful names to hash table-related constants and variables: 1. Rename STACK_HASH_SCALE to STACK_TABLE_SCALE to point out that it is related to scaling the hash table. 2. Rename STACK_HASH_ORDER_MIN/MAX to STACK_BUCKET_NUMBER_ORDER_MIN/MAX to point out that it is related to the number of hash table buckets. 3. Rename stack_hash_order to stack_bucket_number_order for the same reason as #2. No functional changes. Link: https://lkml.kernel.org/r/5456286e2c9f3cd5abf25ad2e7e60dc997c71f66.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 89aee133303a5..cddcf029e3073 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -76,17 +76,17 @@ static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_ST static bool __stack_depot_early_init_passed __initdata; /* Use one hash table bucket per 16 KB of memory. */ -#define STACK_HASH_SCALE 14 +#define STACK_TABLE_SCALE 14 /* Limit the number of buckets between 4K and 1M. */ -#define STACK_HASH_ORDER_MIN 12 -#define STACK_HASH_ORDER_MAX 20 +#define STACK_BUCKET_NUMBER_ORDER_MIN 12 +#define STACK_BUCKET_NUMBER_ORDER_MAX 20 /* Initial seed for jhash2. */ #define STACK_HASH_SEED 0x9747b28c /* Hash table of pointers to stored stack traces. */ static struct stack_record **stack_table; /* Fixed order of the number of table buckets. Used when KASAN is enabled. */ -static unsigned int stack_hash_order; +static unsigned int stack_bucket_number_order; /* Hash mask for indexing the table. */ static unsigned int stack_hash_mask; @@ -137,28 +137,28 @@ int __init stack_depot_early_init(void) * in fuzzing scenarios, which leads to a large number of different * stack traces being stored in stack depot. */ - if (kasan_enabled() && !stack_hash_order) - stack_hash_order = STACK_HASH_ORDER_MAX; + if (kasan_enabled() && !stack_bucket_number_order) + stack_bucket_number_order = STACK_BUCKET_NUMBER_ORDER_MAX; if (!__stack_depot_early_init_requested || stack_depot_disabled) return 0; /* - * If stack_hash_order is not set, leave entries as 0 to rely on the - * automatic calculations performed by alloc_large_system_hash. + * If stack_bucket_number_order is not set, leave entries as 0 to rely + * on the automatic calculations performed by alloc_large_system_hash. */ - if (stack_hash_order) - entries = 1UL << stack_hash_order; + if (stack_bucket_number_order) + entries = 1UL << stack_bucket_number_order; pr_info("allocating hash table via alloc_large_system_hash\n"); stack_table = alloc_large_system_hash("stackdepot", sizeof(struct stack_record *), entries, - STACK_HASH_SCALE, + STACK_TABLE_SCALE, HASH_EARLY | HASH_ZERO, NULL, &stack_hash_mask, - 1UL << STACK_HASH_ORDER_MIN, - 1UL << STACK_HASH_ORDER_MAX); + 1UL << STACK_BUCKET_NUMBER_ORDER_MIN, + 1UL << STACK_BUCKET_NUMBER_ORDER_MAX); if (!stack_table) { pr_err("hash table allocation failed, disabling\n"); stack_depot_disabled = true; @@ -181,13 +181,13 @@ int stack_depot_init(void) goto out_unlock; /* - * Similarly to stack_depot_early_init, use stack_hash_order + * Similarly to stack_depot_early_init, use stack_bucket_number_order * if assigned, and rely on automatic scaling otherwise. */ - if (stack_hash_order) { - entries = 1UL << stack_hash_order; + if (stack_bucket_number_order) { + entries = 1UL << stack_bucket_number_order; } else { - int scale = STACK_HASH_SCALE; + int scale = STACK_TABLE_SCALE; entries = nr_free_buffer_pages(); entries = roundup_pow_of_two(entries); @@ -198,10 +198,10 @@ int stack_depot_init(void) entries <<= (PAGE_SHIFT - scale); } - if (entries < 1UL << STACK_HASH_ORDER_MIN) - entries = 1UL << STACK_HASH_ORDER_MIN; - if (entries > 1UL << STACK_HASH_ORDER_MAX) - entries = 1UL << STACK_HASH_ORDER_MAX; + if (entries < 1UL << STACK_BUCKET_NUMBER_ORDER_MIN) + entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MIN; + if (entries > 1UL << STACK_BUCKET_NUMBER_ORDER_MAX) + entries = 1UL << STACK_BUCKET_NUMBER_ORDER_MAX; pr_info("allocating hash table of %lu entries via kvcalloc\n", entries); stack_table = kvcalloc(entries, sizeof(struct stack_record *), GFP_KERNEL); From 763385b5819a98ccfdf09e3fbaddabe8fbec14e9 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:34 +0100 Subject: [PATCH 68/77] lib/stackdepot: rename init_stack_slab Rename init_stack_slab to depot_init_slab to align the name with depot_alloc_stack. No functional changes. Link: https://lkml.kernel.org/r/b756e381a3526c6e59cb68c53ac0f172ddd22776.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index cddcf029e3073..69b9316b0d4b5 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -220,7 +220,7 @@ int stack_depot_init(void) } EXPORT_SYMBOL_GPL(stack_depot_init); -static bool init_stack_slab(void **prealloc) +static bool depot_init_slab(void **prealloc) { if (!*prealloc) return false; @@ -268,12 +268,12 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) /* * smp_store_release() here pairs with smp_load_acquire() from * |next_slab_inited| in stack_depot_save() and - * init_stack_slab(). + * depot_init_slab(). */ if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) smp_store_release(&next_slab_inited, 0); } - init_stack_slab(prealloc); + depot_init_slab(prealloc); if (stack_slabs[depot_index] == NULL) return NULL; @@ -402,7 +402,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * lock. * * The smp_load_acquire() here pairs with smp_store_release() to - * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). + * |next_slab_inited| in depot_alloc_stack() and depot_init_slab(). */ if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) { /* @@ -438,7 +438,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * We didn't need to store this stack trace, but let's keep * the preallocated memory for the future. */ - WARN_ON(!init_stack_slab(&prealloc)); + WARN_ON(!depot_init_slab(&prealloc)); } raw_spin_unlock_irqrestore(&depot_lock, flags); From 8d9815c9893d170587de67cfb26634877b1229e3 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:35 +0100 Subject: [PATCH 69/77] lib/stackdepot: rename slab variables Give better names to slab-related global variables: change "depot_" prefix to "slab_" to point out that these variables are related to stack depot slabs. Also rename the slabindex field in handle_parts to align its name with the slab_index global variable. No functional changes. Link: https://lkml.kernel.org/r/fc73ab8b1469d476363a918cbdfe28e1388c043a.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 69b9316b0d4b5..023f299bedf6c 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -56,7 +56,7 @@ union handle_parts { depot_stack_handle_t handle; struct { - u32 slabindex : STACK_ALLOC_INDEX_BITS; + u32 slab_index : STACK_ALLOC_INDEX_BITS; u32 offset : STACK_ALLOC_OFFSET_BITS; u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS; u32 extra : STACK_DEPOT_EXTRA_BITS; @@ -93,11 +93,11 @@ static unsigned int stack_hash_mask; /* Array of memory regions that store stack traces. */ static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; /* Currently used slab in stack_slabs. */ -static int depot_index; +static int slab_index; /* Offset to the unused space in the currently used slab. */ -static size_t depot_offset; +static size_t slab_offset; /* Lock that protects the variables above. */ -static DEFINE_RAW_SPINLOCK(depot_lock); +static DEFINE_RAW_SPINLOCK(slab_lock); /* Whether the next slab is initialized. */ static int next_slab_inited; @@ -230,13 +230,13 @@ static bool depot_init_slab(void **prealloc) */ if (smp_load_acquire(&next_slab_inited)) return true; - if (stack_slabs[depot_index] == NULL) { - stack_slabs[depot_index] = *prealloc; + if (stack_slabs[slab_index] == NULL) { + stack_slabs[slab_index] = *prealloc; *prealloc = NULL; } else { /* If this is the last depot slab, do not touch the next one. */ - if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { - stack_slabs[depot_index + 1] = *prealloc; + if (slab_index + 1 < STACK_ALLOC_MAX_SLABS) { + stack_slabs[slab_index + 1] = *prealloc; *prealloc = NULL; /* * This smp_store_release pairs with smp_load_acquire() @@ -258,35 +258,35 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); - if (unlikely(depot_offset + required_size > STACK_ALLOC_SIZE)) { - if (unlikely(depot_index + 1 >= STACK_ALLOC_MAX_SLABS)) { + if (unlikely(slab_offset + required_size > STACK_ALLOC_SIZE)) { + if (unlikely(slab_index + 1 >= STACK_ALLOC_MAX_SLABS)) { WARN_ONCE(1, "Stack depot reached limit capacity"); return NULL; } - depot_index++; - depot_offset = 0; + slab_index++; + slab_offset = 0; /* * smp_store_release() here pairs with smp_load_acquire() from * |next_slab_inited| in stack_depot_save() and * depot_init_slab(). */ - if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) + if (slab_index + 1 < STACK_ALLOC_MAX_SLABS) smp_store_release(&next_slab_inited, 0); } depot_init_slab(prealloc); - if (stack_slabs[depot_index] == NULL) + if (stack_slabs[slab_index] == NULL) return NULL; - stack = stack_slabs[depot_index] + depot_offset; + stack = stack_slabs[slab_index] + slab_offset; stack->hash = hash; stack->size = size; - stack->handle.slabindex = depot_index; - stack->handle.offset = depot_offset >> STACK_ALLOC_ALIGN; + stack->handle.slab_index = slab_index; + stack->handle.offset = slab_offset >> STACK_ALLOC_ALIGN; stack->handle.valid = 1; stack->handle.extra = 0; memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); - depot_offset += required_size; + slab_offset += required_size; return stack; } @@ -418,7 +418,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, prealloc = page_address(page); } - raw_spin_lock_irqsave(&depot_lock, flags); + raw_spin_lock_irqsave(&slab_lock, flags); found = find_stack(*bucket, entries, nr_entries, hash); if (!found) { @@ -441,7 +441,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, WARN_ON(!depot_init_slab(&prealloc)); } - raw_spin_unlock_irqrestore(&depot_lock, flags); + raw_spin_unlock_irqrestore(&slab_lock, flags); exit: if (prealloc) { /* Nobody used this memory, ok to free it. */ @@ -497,12 +497,12 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle) return 0; - if (parts.slabindex > depot_index) { + if (parts.slab_index > slab_index) { WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", - parts.slabindex, depot_index, handle); + parts.slab_index, slab_index, handle); return 0; } - slab = stack_slabs[parts.slabindex]; + slab = stack_slabs[parts.slab_index]; if (!slab) return 0; stack = slab + offset; From 190eb8235a64689002fa9919cb860352ba100c15 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:36 +0100 Subject: [PATCH 70/77] lib/stackdepot: rename handle and slab constants Change the "STACK_ALLOC_" prefix to "DEPOT_" for the constants that define the number of bits in stack depot handles and the maximum number of slabs. The old prefix is unclear and makes wonder about how these constants are related to stack allocations. The new prefix is also shorter. Also simplify the comment for DEPOT_SLAB_ORDER. No functional changes. Link: https://lkml.kernel.org/r/d9c6d1fa0ae6e1e65577ee81444656c99eb598d8.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 56 +++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 023f299bedf6c..b946ba74fea08 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -36,30 +36,28 @@ #include #include -#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8) - -#define STACK_ALLOC_NULL_PROTECTION_BITS 1 -#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ -#define STACK_ALLOC_SIZE (1LL << (PAGE_SHIFT + STACK_ALLOC_ORDER)) -#define STACK_ALLOC_ALIGN 4 -#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGE_SHIFT - \ - STACK_ALLOC_ALIGN) -#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ - STACK_ALLOC_NULL_PROTECTION_BITS - \ - STACK_ALLOC_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS) -#define STACK_ALLOC_SLABS_CAP 8192 -#define STACK_ALLOC_MAX_SLABS \ - (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ - (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP) +#define DEPOT_HANDLE_BITS (sizeof(depot_stack_handle_t) * 8) + +#define DEPOT_VALID_BITS 1 +#define DEPOT_SLAB_ORDER 2 /* Slab size order, 4 pages */ +#define DEPOT_SLAB_SIZE (1LL << (PAGE_SHIFT + DEPOT_SLAB_ORDER)) +#define DEPOT_STACK_ALIGN 4 +#define DEPOT_OFFSET_BITS (DEPOT_SLAB_ORDER + PAGE_SHIFT - DEPOT_STACK_ALIGN) +#define DEPOT_SLAB_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_VALID_BITS - \ + DEPOT_OFFSET_BITS - STACK_DEPOT_EXTRA_BITS) +#define DEPOT_SLABS_CAP 8192 +#define DEPOT_MAX_SLABS \ + (((1LL << (DEPOT_SLAB_INDEX_BITS)) < DEPOT_SLABS_CAP) ? \ + (1LL << (DEPOT_SLAB_INDEX_BITS)) : DEPOT_SLABS_CAP) /* The compact structure to store the reference to stacks. */ union handle_parts { depot_stack_handle_t handle; struct { - u32 slab_index : STACK_ALLOC_INDEX_BITS; - u32 offset : STACK_ALLOC_OFFSET_BITS; - u32 valid : STACK_ALLOC_NULL_PROTECTION_BITS; - u32 extra : STACK_DEPOT_EXTRA_BITS; + u32 slab_index : DEPOT_SLAB_INDEX_BITS; + u32 offset : DEPOT_OFFSET_BITS; + u32 valid : DEPOT_VALID_BITS; + u32 extra : STACK_DEPOT_EXTRA_BITS; }; }; @@ -91,7 +89,7 @@ static unsigned int stack_bucket_number_order; static unsigned int stack_hash_mask; /* Array of memory regions that store stack traces. */ -static void *stack_slabs[STACK_ALLOC_MAX_SLABS]; +static void *stack_slabs[DEPOT_MAX_SLABS]; /* Currently used slab in stack_slabs. */ static int slab_index; /* Offset to the unused space in the currently used slab. */ @@ -235,7 +233,7 @@ static bool depot_init_slab(void **prealloc) *prealloc = NULL; } else { /* If this is the last depot slab, do not touch the next one. */ - if (slab_index + 1 < STACK_ALLOC_MAX_SLABS) { + if (slab_index + 1 < DEPOT_MAX_SLABS) { stack_slabs[slab_index + 1] = *prealloc; *prealloc = NULL; /* @@ -256,10 +254,10 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) struct stack_record *stack; size_t required_size = struct_size(stack, entries, size); - required_size = ALIGN(required_size, 1 << STACK_ALLOC_ALIGN); + required_size = ALIGN(required_size, 1 << DEPOT_STACK_ALIGN); - if (unlikely(slab_offset + required_size > STACK_ALLOC_SIZE)) { - if (unlikely(slab_index + 1 >= STACK_ALLOC_MAX_SLABS)) { + if (unlikely(slab_offset + required_size > DEPOT_SLAB_SIZE)) { + if (unlikely(slab_index + 1 >= DEPOT_MAX_SLABS)) { WARN_ONCE(1, "Stack depot reached limit capacity"); return NULL; } @@ -270,7 +268,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) * |next_slab_inited| in stack_depot_save() and * depot_init_slab(). */ - if (slab_index + 1 < STACK_ALLOC_MAX_SLABS) + if (slab_index + 1 < DEPOT_MAX_SLABS) smp_store_release(&next_slab_inited, 0); } depot_init_slab(prealloc); @@ -282,7 +280,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) stack->hash = hash; stack->size = size; stack->handle.slab_index = slab_index; - stack->handle.offset = slab_offset >> STACK_ALLOC_ALIGN; + stack->handle.offset = slab_offset >> DEPOT_STACK_ALIGN; stack->handle.valid = 1; stack->handle.extra = 0; memcpy(stack->entries, entries, flex_array_size(stack, entries, size)); @@ -413,7 +411,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, alloc_flags &= ~GFP_ZONEMASK; alloc_flags &= (GFP_ATOMIC | GFP_KERNEL); alloc_flags |= __GFP_NOWARN; - page = alloc_pages(alloc_flags, STACK_ALLOC_ORDER); + page = alloc_pages(alloc_flags, DEPOT_SLAB_ORDER); if (page) prealloc = page_address(page); } @@ -445,7 +443,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, exit: if (prealloc) { /* Nobody used this memory, ok to free it. */ - free_pages((unsigned long)prealloc, STACK_ALLOC_ORDER); + free_pages((unsigned long)prealloc, DEPOT_SLAB_ORDER); } if (found) retval.handle = found->handle.handle; @@ -490,7 +488,7 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, { union handle_parts parts = { .handle = handle }; void *slab; - size_t offset = parts.offset << STACK_ALLOC_ALIGN; + size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; *entries = NULL; From dbbe4edc9303c4b98e411eb55ed92aa7280de4ed Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:37 +0100 Subject: [PATCH 71/77] lib/stacktrace: drop impossible WARN_ON for depot_init_slab depot_init_slab has two call sites: 1. In depot_alloc_stack with a potentially NULL prealloc. 2. In __stack_depot_save with a non-NULL prealloc. At the same time depot_init_slab can only return false when prealloc is NULL. As the second call site makes sure that prealloc is not NULL, the WARN_ON there can never trigger. Thus, drop the WARN_ON and also move the prealloc check from depot_init_slab to its first call site. Also change the return type of depot_init_slab to void as it now always returns true. Link: https://lkml.kernel.org/r/7e7434a0d4e8a71138aec2c8a3c69a4eebf49935.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index b946ba74fea08..d6be82a5c2234 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -218,16 +218,14 @@ int stack_depot_init(void) } EXPORT_SYMBOL_GPL(stack_depot_init); -static bool depot_init_slab(void **prealloc) +static void depot_init_slab(void **prealloc) { - if (!*prealloc) - return false; /* * This smp_load_acquire() pairs with smp_store_release() to * |next_slab_inited| below and in depot_alloc_stack(). */ if (smp_load_acquire(&next_slab_inited)) - return true; + return; if (stack_slabs[slab_index] == NULL) { stack_slabs[slab_index] = *prealloc; *prealloc = NULL; @@ -244,7 +242,6 @@ static bool depot_init_slab(void **prealloc) smp_store_release(&next_slab_inited, 1); } } - return true; } /* Allocation of a new stack in raw storage */ @@ -271,7 +268,8 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) if (slab_index + 1 < DEPOT_MAX_SLABS) smp_store_release(&next_slab_inited, 0); } - depot_init_slab(prealloc); + if (*prealloc) + depot_init_slab(prealloc); if (stack_slabs[slab_index] == NULL) return NULL; @@ -436,7 +434,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * We didn't need to store this stack trace, but let's keep * the preallocated memory for the future. */ - WARN_ON(!depot_init_slab(&prealloc)); + depot_init_slab(&prealloc); } raw_spin_unlock_irqrestore(&slab_lock, flags); From 55cc53101c0db7c589bbce4f11f3c07a2c431b88 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:38 +0100 Subject: [PATCH 72/77] lib/stackdepot: annotate depot_init_slab and depot_alloc_stack Clean up the exisiting comments and add new ones to depot_init_slab and depot_alloc_stack. As a part of the clean-up, remove mentions of which variable is accessed by smp_store_release and smp_load_acquire: it is clear as is from the code. Link: https://lkml.kernel.org/r/3cf68a23da43cef0f8737f7a2c07f35ce6d841a7.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index d6be82a5c2234..7282565722f29 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -218,33 +218,41 @@ int stack_depot_init(void) } EXPORT_SYMBOL_GPL(stack_depot_init); +/* Uses preallocated memory to initialize a new stack depot slab. */ static void depot_init_slab(void **prealloc) { /* - * This smp_load_acquire() pairs with smp_store_release() to - * |next_slab_inited| below and in depot_alloc_stack(). + * If the next slab is already initialized, do not use the + * preallocated memory. + * smp_load_acquire() here pairs with smp_store_release() below and + * in depot_alloc_stack(). */ if (smp_load_acquire(&next_slab_inited)) return; + + /* Check if the current slab is not yet allocated. */ if (stack_slabs[slab_index] == NULL) { + /* Use the preallocated memory for the current slab. */ stack_slabs[slab_index] = *prealloc; *prealloc = NULL; } else { - /* If this is the last depot slab, do not touch the next one. */ + /* + * Otherwise, use the preallocated memory for the next slab + * as long as we do not exceed the maximum number of slabs. + */ if (slab_index + 1 < DEPOT_MAX_SLABS) { stack_slabs[slab_index + 1] = *prealloc; *prealloc = NULL; /* * This smp_store_release pairs with smp_load_acquire() - * from |next_slab_inited| above and in - * stack_depot_save(). + * above and in stack_depot_save(). */ smp_store_release(&next_slab_inited, 1); } } } -/* Allocation of a new stack in raw storage */ +/* Allocates a new stack in a stack depot slab. */ static struct stack_record * depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { @@ -253,28 +261,35 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) required_size = ALIGN(required_size, 1 << DEPOT_STACK_ALIGN); + /* Check if there is not enough space in the current slab. */ if (unlikely(slab_offset + required_size > DEPOT_SLAB_SIZE)) { + /* Bail out if we reached the slab limit. */ if (unlikely(slab_index + 1 >= DEPOT_MAX_SLABS)) { WARN_ONCE(1, "Stack depot reached limit capacity"); return NULL; } + + /* Move on to the next slab. */ slab_index++; slab_offset = 0; /* - * smp_store_release() here pairs with smp_load_acquire() from - * |next_slab_inited| in stack_depot_save() and - * depot_init_slab(). + * smp_store_release() here pairs with smp_load_acquire() in + * stack_depot_save() and depot_init_slab(). */ if (slab_index + 1 < DEPOT_MAX_SLABS) smp_store_release(&next_slab_inited, 0); } + + /* Assign the preallocated memory to a slab if required. */ if (*prealloc) depot_init_slab(prealloc); + + /* Check if we have a slab to save the stack trace. */ if (stack_slabs[slab_index] == NULL) return NULL; + /* Save the stack trace. */ stack = stack_slabs[slab_index] + slab_offset; - stack->hash = hash; stack->size = size; stack->handle.slab_index = slab_index; From 87f5b2a7e315b72a404632202f4662aa8eb4ced6 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:39 +0100 Subject: [PATCH 73/77] lib/stacktrace, kasan, kmsan: rework extra_bits interface The current implementation of the extra_bits interface is confusing: passing extra_bits to __stack_depot_save makes it seem that the extra bits are somehow stored in stack depot. In reality, they are only embedded into a stack depot handle and are not used within stack depot. Drop the extra_bits argument from __stack_depot_save and instead provide a new stack_depot_set_extra_bits function (similar to the exsiting stack_depot_get_extra_bits) that saves extra bits into a stack depot handle. Update the callers of __stack_depot_save to use the new interace. This change also fixes a minor issue in the old code: __stack_depot_save does not return NULL if saving stack trace fails and extra_bits is used. Link: https://lkml.kernel.org/r/fbe58d38b7d93a9ef8500a72c0c4f103222418e6.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 4 +++- lib/stackdepot.c | 38 +++++++++++++++++++++++++++++--------- mm/kasan/common.c | 2 +- mm/kmsan/core.c | 10 +++++++--- 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index c4e3abc16b16a..f999811c66d78 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -57,7 +57,6 @@ static inline int stack_depot_early_init(void) { return 0; } depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, - unsigned int extra_bits, gfp_t gfp_flags, bool can_alloc); depot_stack_handle_t stack_depot_save(unsigned long *entries, @@ -71,6 +70,9 @@ void stack_depot_print(depot_stack_handle_t stack); int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); +depot_stack_handle_t stack_depot_set_extra_bits(depot_stack_handle_t handle, + unsigned int extra_bits); + unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 7282565722f29..f291ad6a4e72d 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -346,7 +346,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, * * @entries: Pointer to storage array * @nr_entries: Size of the storage array - * @extra_bits: Flags to store in unused bits of depot_stack_handle_t * @alloc_flags: Allocation gfp flags * @can_alloc: Allocate stack slabs (increased chance of failure if false) * @@ -358,10 +357,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, * If the stack trace in @entries is from an interrupt, only the portion up to * interrupt entry is saved. * - * Additional opaque flags can be passed in @extra_bits, stored in the unused - * bits of the stack handle, and retrieved using stack_depot_get_extra_bits() - * without calling stack_depot_fetch(). - * * Context: Any context, but setting @can_alloc to %false is required if * alloc_pages() cannot be used from the current context. Currently * this is the case from contexts where neither %GFP_ATOMIC nor @@ -371,7 +366,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, - unsigned int extra_bits, gfp_t alloc_flags, bool can_alloc) { struct stack_record *found = NULL, **bucket; @@ -461,8 +455,6 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, if (found) retval.handle = found->handle.handle; fast_exit: - retval.extra = extra_bits; - return retval.handle; } EXPORT_SYMBOL_GPL(__stack_depot_save); @@ -483,7 +475,7 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t alloc_flags) { - return __stack_depot_save(entries, nr_entries, 0, alloc_flags, true); + return __stack_depot_save(entries, nr_entries, alloc_flags, true); } EXPORT_SYMBOL_GPL(stack_depot_save); @@ -566,6 +558,34 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, } EXPORT_SYMBOL_GPL(stack_depot_snprint); +/** + * stack_depot_set_extra_bits - Set extra bits in a stack depot handle + * + * @handle: Stack depot handle + * @extra_bits: Value to set the extra bits + * + * Return: Stack depot handle with extra bits set + * + * Stack depot handles have a few unused bits, which can be used for storing + * user-specific information. These bits are transparent to the stack depot. + */ +depot_stack_handle_t stack_depot_set_extra_bits(depot_stack_handle_t handle, + unsigned int extra_bits) +{ + union handle_parts parts = { .handle = handle }; + + parts.extra = extra_bits; + return parts.handle; +} +EXPORT_SYMBOL(stack_depot_set_extra_bits); + +/** + * stack_depot_get_extra_bits - Retrieve extra bits from a stack depot handle + * + * @handle: Stack depot handle with extra bits saved + * + * Return: Extra bits retrieved from the stack depot handle + */ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) { union handle_parts parts = { .handle = handle }; diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 833bf2cfd2a39..50f4338b477f2 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -43,7 +43,7 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc) unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); - return __stack_depot_save(entries, nr_entries, 0, flags, can_alloc); + return __stack_depot_save(entries, nr_entries, flags, can_alloc); } void kasan_set_track(struct kasan_track *track, gfp_t flags) diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c index 112dce135c7f6..f710257d68670 100644 --- a/mm/kmsan/core.c +++ b/mm/kmsan/core.c @@ -69,13 +69,15 @@ depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags, { unsigned long entries[KMSAN_STACK_DEPTH]; unsigned int nr_entries; + depot_stack_handle_t handle; nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0); /* Don't sleep (see might_sleep_if() in __alloc_pages_nodemask()). */ flags &= ~__GFP_DIRECT_RECLAIM; - return __stack_depot_save(entries, nr_entries, extra, flags, true); + handle = __stack_depot_save(entries, nr_entries, flags, true); + return stack_depot_set_extra_bits(handle, extra); } /* Copy the metadata following the memmove() behavior. */ @@ -215,6 +217,7 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id) u32 extra_bits; int depth; bool uaf; + depot_stack_handle_t handle; if (!id) return id; @@ -250,8 +253,9 @@ depot_stack_handle_t kmsan_internal_chain_origin(depot_stack_handle_t id) * positives when __stack_depot_save() passes it to instrumented code. */ kmsan_internal_unpoison_memory(entries, sizeof(entries), false); - return __stack_depot_save(entries, ARRAY_SIZE(entries), extra_bits, - GFP_ATOMIC, true); + handle = __stack_depot_save(entries, ARRAY_SIZE(entries), GFP_ATOMIC, + true); + return stack_depot_set_extra_bits(handle, extra_bits); } void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b, From c479a059fdf103435c0368173a00229602242d22 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:40 +0100 Subject: [PATCH 74/77] lib/stackdepot: annotate racy slab_index accesses Accesses to slab_index are protected by slab_lock everywhere except in a sanity check in stack_depot_fetch. The read access there can race with the write access in depot_alloc_stack. Use WRITE/READ_ONCE() to annotate the racy accesses. As the sanity check is only used to print a warning in case of a violation of the stack depot interface usage, it does not make a lot of sense to use proper synchronization. Link: https://lkml.kernel.org/r/19512bb03eed27ced5abeb5bd03f9a8381742cb1.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index f291ad6a4e72d..cc2fe8563af41 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -269,8 +269,11 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) return NULL; } - /* Move on to the next slab. */ - slab_index++; + /* + * Move on to the next slab. + * WRITE_ONCE annotates a race with stack_depot_fetch. + */ + WRITE_ONCE(slab_index, slab_index + 1); slab_offset = 0; /* * smp_store_release() here pairs with smp_load_acquire() in @@ -492,6 +495,8 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { union handle_parts parts = { .handle = handle }; + /* READ_ONCE annotates a race with depot_alloc_stack. */ + int slab_index_cached = READ_ONCE(slab_index); void *slab; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; @@ -500,9 +505,9 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle) return 0; - if (parts.slab_index > slab_index) { + if (parts.slab_index > slab_index_cached) { WARN(1, "slab index %d out of bounds (%d) for stack id %08x\n", - parts.slab_index, slab_index, handle); + parts.slab_index, slab_index_cached, handle); return 0; } slab = stack_slabs[parts.slab_index]; From dc196d5651fb8c115851e6e26b77c7fa17e5c898 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 31 Jan 2023 13:10:50 -0800 Subject: [PATCH 75/77] lib-stackdepot-annotate-racy-slab_index-accesses-fix enhance comment, per Marco Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/stackdepot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index cc2fe8563af41..fe37b8f896380 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -271,7 +271,8 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) /* * Move on to the next slab. - * WRITE_ONCE annotates a race with stack_depot_fetch. + * WRITE_ONCE pairs with potential concurrent read in + * stack_depot_fetch(). */ WRITE_ONCE(slab_index, slab_index + 1); slab_offset = 0; From bbb2f17b4edde0b64b293ef38e561f3e21045a64 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:41 +0100 Subject: [PATCH 76/77] lib/stackdepot: various comments clean-ups Clean up comments in include/linux/stackdepot.h and lib/stackdepot.c: 1. Rework the initialization comment in stackdepot.h. 2. Rework the header comment in stackdepot.c. 3. Various clean-ups for other comments. Also adjust whitespaces for find_stack and depot_alloc_stack call sites. No functional changes. Link: https://lkml.kernel.org/r/e6d509e769bacc1842de868084e157787e6a3817.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 36 +++++------ lib/stackdepot.c | 120 ++++++++++++++++++------------------- 2 files changed, 78 insertions(+), 78 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index f999811c66d78..173740987d8b2 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * A generic stack depot implementation + * Stack depot - a stack trace storage that avoids duplication. * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * - * Based on code by Dmitry Chernenkov. + * Based on the code by Dmitry Chernenkov. */ #ifndef _LINUX_STACKDEPOT_H @@ -17,35 +17,37 @@ typedef u32 depot_stack_handle_t; /* * Number of bits in the handle that stack depot doesn't use. Users may store - * information in them. + * information in them via stack_depot_set/get_extra_bits. */ #define STACK_DEPOT_EXTRA_BITS 5 /* - * Every user of stack depot has to call stack_depot_init() during its own init - * when it's decided that it will be calling stack_depot_save() later. This is - * recommended for e.g. modules initialized later in the boot process, when - * slab_is_available() is true. + * Using stack depot requires its initialization, which can be done in 3 ways: * - * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot - * enabled as part of mm_init(), for subsystems where it's known at compile time - * that stack depot will be used. + * 1. Selecting CONFIG_STACKDEPOT_ALWAYS_INIT. This option is suitable in + * scenarios where it's known at compile time that stack depot will be used. + * Enabling this config makes the kernel initialize stack depot in mm_init(). * - * Another alternative is to call stack_depot_request_early_init(), when the - * decision to use stack depot is taken e.g. when evaluating kernel boot - * parameters, which precedes the enablement point in mm_init(). + * 2. Calling stack_depot_request_early_init() during early boot, before + * stack_depot_early_init() in mm_init() completes. For example, this can + * be done when evaluating kernel boot parameters. + * + * 3. Calling stack_depot_init(). Possible after boot is complete. This option + * is recommended for modules initialized later in the boot process, after + * mm_init() completes. * * stack_depot_init() and stack_depot_request_early_init() can be called - * regardless of CONFIG_STACKDEPOT and are no-op when disabled. The actual - * save/fetch/print functions should only be called from code that makes sure - * CONFIG_STACKDEPOT is enabled. + * regardless of whether CONFIG_STACKDEPOT is enabled and are no-op when this + * config is disabled. The save/fetch/print stack depot functions can only be + * called from the code that makes sure CONFIG_STACKDEPOT is enabled _and_ + * initializes stack depot via one of the ways listed above. */ #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); void __init stack_depot_request_early_init(void); -/* This is supposed to be called only from mm_init() */ +/* Must be only called from mm_init(). */ int __init stack_depot_early_init(void); #else static inline int stack_depot_init(void) { return 0; } diff --git a/lib/stackdepot.c b/lib/stackdepot.c index fe37b8f896380..cb0dc6144c97d 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -1,22 +1,26 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Generic stack depot for storing stack traces. + * Stack depot - a stack trace storage that avoids duplication. * - * Some debugging tools need to save stack traces of certain events which can - * be later presented to the user. For example, KASAN needs to safe alloc and - * free stacks for each object, but storing two stack traces per object - * requires too much memory (e.g. SLUB_DEBUG needs 256 bytes per object for - * that). + * Stack depot is intended to be used by subsystems that need to store and + * later retrieve many potentially duplicated stack traces without wasting + * memory. * - * Instead, stack depot maintains a hashtable of unique stacktraces. Since alloc - * and free stacks repeat a lot, we save about 100x space. - * Stacks are never removed from depot, so we store them contiguously one after - * another in a contiguous memory allocation. + * For example, KASAN needs to save allocation and free stack traces for each + * object. Storing two stack traces per object requires a lot of memory (e.g. + * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free + * stack traces often repeat, using stack depot allows to save about 100x space. + * + * Internally, stack depot maintains a hash table of unique stacktraces. The + * stack traces themselves are stored contiguously one after another in a set + * of separate page allocations. + * + * Stack traces are never removed from stack depot. * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * - * Based on code by Dmitry Chernenkov. + * Based on the code by Dmitry Chernenkov. */ #define pr_fmt(fmt) "stackdepot: " fmt @@ -50,7 +54,7 @@ (((1LL << (DEPOT_SLAB_INDEX_BITS)) < DEPOT_SLABS_CAP) ? \ (1LL << (DEPOT_SLAB_INDEX_BITS)) : DEPOT_SLABS_CAP) -/* The compact structure to store the reference to stacks. */ +/* Compact structure that stores a reference to a stack. */ union handle_parts { depot_stack_handle_t handle; struct { @@ -62,11 +66,11 @@ union handle_parts { }; struct stack_record { - struct stack_record *next; /* Link in the hashtable */ - u32 hash; /* Hash in the hastable */ - u32 size; /* Number of frames in the stack */ + struct stack_record *next; /* Link in the hash table */ + u32 hash; /* Hash in the hash table */ + u32 size; /* Number of stored frames */ union handle_parts handle; - unsigned long entries[]; /* Variable-sized array of entries. */ + unsigned long entries[]; /* Variable-sized array of frames */ }; static bool stack_depot_disabled; @@ -306,7 +310,7 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) return stack; } -/* Calculate hash for a stack */ +/* Calculates the hash for a stack. */ static inline u32 hash_stack(unsigned long *entries, unsigned int size) { return jhash2((u32 *)entries, @@ -314,9 +318,9 @@ static inline u32 hash_stack(unsigned long *entries, unsigned int size) STACK_HASH_SEED); } -/* Use our own, non-instrumented version of memcmp(). - * - * We actually don't care about the order, just the equality. +/* + * Non-instrumented version of memcmp(). + * Does not check the lexicographical order, only the equality. */ static inline int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, @@ -329,7 +333,7 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, return 0; } -/* Find a stack that is equal to the one stored in entries in the hash */ +/* Finds a stack in a bucket of the hash table. */ static inline struct stack_record *find_stack(struct stack_record *bucket, unsigned long *entries, int size, u32 hash) @@ -346,27 +350,27 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, } /** - * __stack_depot_save - Save a stack trace from an array + * __stack_depot_save - Save a stack trace to stack depot * - * @entries: Pointer to storage array - * @nr_entries: Size of the storage array - * @alloc_flags: Allocation gfp flags + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags * @can_alloc: Allocate stack slabs (increased chance of failure if false) * * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is - * %true, is allowed to replenish the stack slab pool in case no space is left + * %true, stack depot can replenish the stack slab pool in case no space is left * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids - * any allocations and will fail if no space is left to store the stack trace. + * any allocations and fails if no space is left to store the stack trace. * - * If the stack trace in @entries is from an interrupt, only the portion up to - * interrupt entry is saved. + * If the provided stack trace comes from the interrupt context, only the part + * up to the interrupt entry is saved. * * Context: Any context, but setting @can_alloc to %false is required if * alloc_pages() cannot be used from the current context. Currently - * this is the case from contexts where neither %GFP_ATOMIC nor + * this is the case for contexts where neither %GFP_ATOMIC nor * %GFP_NOWAIT can be used (NMI, raw_spin_lock). * - * Return: The handle of the stack struct stored in depot, 0 on failure. + * Return: Handle of the stack struct stored in depot, 0 on failure */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, @@ -381,11 +385,11 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, /* * If this stack trace is from an interrupt, including anything before - * interrupt entry usually leads to unbounded stackdepot growth. + * interrupt entry usually leads to unbounded stack depot growth. * - * Because use of filter_irq_stacks() is a requirement to ensure - * stackdepot can efficiently deduplicate interrupt stacks, always - * filter_irq_stacks() to simplify all callers' use of stackdepot. + * Since use of filter_irq_stacks() is a requirement to ensure stack + * depot can efficiently deduplicate interrupt stacks, always + * filter_irq_stacks() to simplify all callers' use of stack depot. */ nr_entries = filter_irq_stacks(entries, nr_entries); @@ -400,8 +404,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, * The smp_load_acquire() here pairs with smp_store_release() to * |bucket| below. */ - found = find_stack(smp_load_acquire(bucket), entries, - nr_entries, hash); + found = find_stack(smp_load_acquire(bucket), entries, nr_entries, hash); if (found) goto exit; @@ -431,7 +434,8 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, found = find_stack(*bucket, entries, nr_entries, hash); if (!found) { - struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); + struct stack_record *new = + depot_alloc_stack(entries, nr_entries, hash, &prealloc); if (new) { new->next = *bucket; @@ -444,8 +448,8 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, } } else if (prealloc) { /* - * We didn't need to store this stack trace, but let's keep - * the preallocated memory for the future. + * Stack depot already contains this stack trace, but let's + * keep the preallocated memory for the future. */ depot_init_slab(&prealloc); } @@ -453,7 +457,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, raw_spin_unlock_irqrestore(&slab_lock, flags); exit: if (prealloc) { - /* Nobody used this memory, ok to free it. */ + /* Stack depot didn't use this memory, free it. */ free_pages((unsigned long)prealloc, DEPOT_SLAB_ORDER); } if (found) @@ -464,16 +468,16 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, EXPORT_SYMBOL_GPL(__stack_depot_save); /** - * stack_depot_save - Save a stack trace from an array + * stack_depot_save - Save a stack trace to stack depot * - * @entries: Pointer to storage array - * @nr_entries: Size of the storage array - * @alloc_flags: Allocation gfp flags + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags * * Context: Contexts where allocations via alloc_pages() are allowed. * See __stack_depot_save() for more details. * - * Return: The handle of the stack struct stored in depot, 0 on failure. + * Return: Handle of the stack trace stored in depot, 0 on failure */ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, @@ -484,13 +488,12 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, EXPORT_SYMBOL_GPL(stack_depot_save); /** - * stack_depot_fetch - Fetch stack entries from a depot + * stack_depot_fetch - Fetch a stack trace from stack depot * - * @handle: Stack depot handle which was returned from - * stack_depot_save(). - * @entries: Pointer to store the entries address + * @handle: Stack depot handle returned from stack_depot_save() + * @entries: Pointer to store the address of the stack trace * - * Return: The number of trace entries for this depot. + * Return: Number of frames for the fetched stack */ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) @@ -522,11 +525,9 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, EXPORT_SYMBOL_GPL(stack_depot_fetch); /** - * stack_depot_print - print stack entries from a depot - * - * @stack: Stack depot handle which was returned from - * stack_depot_save(). + * stack_depot_print - Print a stack trace from stack depot * + * @stack: Stack depot handle returned from stack_depot_save() */ void stack_depot_print(depot_stack_handle_t stack) { @@ -540,17 +541,14 @@ void stack_depot_print(depot_stack_handle_t stack) EXPORT_SYMBOL_GPL(stack_depot_print); /** - * stack_depot_snprint - print stack entries from a depot into a buffer + * stack_depot_snprint - Print a stack trace from stack depot into a buffer * - * @handle: Stack depot handle which was returned from - * stack_depot_save(). + * @handle: Stack depot handle returned from stack_depot_save() * @buf: Pointer to the print buffer - * * @size: Size of the print buffer - * * @spaces: Number of leading spaces to print * - * Return: Number of bytes printed. + * Return: Number of bytes printed */ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces) From 0304ec313a3eb35af55bfe98071269db36aee259 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 30 Jan 2023 21:49:42 +0100 Subject: [PATCH 77/77] lib/stackdepot: move documentation comments to stackdepot.h Move all interface- and usage-related documentation comments to include/linux/stackdepot.h. It makes sense to have them in the header where they are available to the interface users. Link: https://lkml.kernel.org/r/341353394ec1134c5a92a2b298348ddc4c48c8a0.1675111415.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 87 ++++++++++++++++++++++++++++++++++++++ lib/stackdepot.c | 87 -------------------------------------- 2 files changed, 87 insertions(+), 87 deletions(-) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 173740987d8b2..a828fbece1ba3 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -2,6 +2,17 @@ /* * Stack depot - a stack trace storage that avoids duplication. * + * Stack depot is intended to be used by subsystems that need to store and + * later retrieve many potentially duplicated stack traces without wasting + * memory. + * + * For example, KASAN needs to save allocation and free stack traces for each + * object. Storing two stack traces per object requires a lot of memory (e.g. + * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free + * stack traces often repeat, using stack depot allows to save about 100x space. + * + * Stack traces are never removed from stack depot. + * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * @@ -57,24 +68,100 @@ static inline void stack_depot_request_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif +/** + * __stack_depot_save - Save a stack trace to stack depot + * + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags + * @can_alloc: Allocate stack slabs (increased chance of failure if false) + * + * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is + * %true, stack depot can replenish the stack slab pool in case no space is left + * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids + * any allocations and fails if no space is left to store the stack trace. + * + * If the provided stack trace comes from the interrupt context, only the part + * up to the interrupt entry is saved. + * + * Context: Any context, but setting @can_alloc to %false is required if + * alloc_pages() cannot be used from the current context. Currently + * this is the case for contexts where neither %GFP_ATOMIC nor + * %GFP_NOWAIT can be used (NMI, raw_spin_lock). + * + * Return: Handle of the stack struct stored in depot, 0 on failure + */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags, bool can_alloc); +/** + * stack_depot_save - Save a stack trace to stack depot + * + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags + * + * Context: Contexts where allocations via alloc_pages() are allowed. + * See __stack_depot_save() for more details. + * + * Return: Handle of the stack trace stored in depot, 0 on failure + */ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); +/** + * stack_depot_fetch - Fetch a stack trace from stack depot + * + * @handle: Stack depot handle returned from stack_depot_save() + * @entries: Pointer to store the address of the stack trace + * + * Return: Number of frames for the fetched stack + */ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +/** + * stack_depot_print - Print a stack trace from stack depot + * + * @stack: Stack depot handle returned from stack_depot_save() + */ void stack_depot_print(depot_stack_handle_t stack); +/** + * stack_depot_snprint - Print a stack trace from stack depot into a buffer + * + * @handle: Stack depot handle returned from stack_depot_save() + * @buf: Pointer to the print buffer + * @size: Size of the print buffer + * @spaces: Number of leading spaces to print + * + * Return: Number of bytes printed + */ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); +/** + * stack_depot_set_extra_bits - Set extra bits in a stack depot handle + * + * @handle: Stack depot handle + * @extra_bits: Value to set the extra bits + * + * Return: Stack depot handle with extra bits set + * + * Stack depot handles have a few unused bits, which can be used for storing + * user-specific information. These bits are transparent to the stack depot. + */ depot_stack_handle_t stack_depot_set_extra_bits(depot_stack_handle_t handle, unsigned int extra_bits); +/** + * stack_depot_get_extra_bits - Retrieve extra bits from a stack depot handle + * + * @handle: Stack depot handle with extra bits saved + * + * Return: Extra bits retrieved from the stack depot handle + */ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif diff --git a/lib/stackdepot.c b/lib/stackdepot.c index cb0dc6144c97d..d2ab4d29fa1a3 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -2,21 +2,10 @@ /* * Stack depot - a stack trace storage that avoids duplication. * - * Stack depot is intended to be used by subsystems that need to store and - * later retrieve many potentially duplicated stack traces without wasting - * memory. - * - * For example, KASAN needs to save allocation and free stack traces for each - * object. Storing two stack traces per object requires a lot of memory (e.g. - * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free - * stack traces often repeat, using stack depot allows to save about 100x space. - * * Internally, stack depot maintains a hash table of unique stacktraces. The * stack traces themselves are stored contiguously one after another in a set * of separate page allocations. * - * Stack traces are never removed from stack depot. - * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * @@ -349,29 +338,6 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } -/** - * __stack_depot_save - Save a stack trace to stack depot - * - * @entries: Pointer to the stack trace - * @nr_entries: Number of frames in the stack - * @alloc_flags: Allocation GFP flags - * @can_alloc: Allocate stack slabs (increased chance of failure if false) - * - * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is - * %true, stack depot can replenish the stack slab pool in case no space is left - * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids - * any allocations and fails if no space is left to store the stack trace. - * - * If the provided stack trace comes from the interrupt context, only the part - * up to the interrupt entry is saved. - * - * Context: Any context, but setting @can_alloc to %false is required if - * alloc_pages() cannot be used from the current context. Currently - * this is the case for contexts where neither %GFP_ATOMIC nor - * %GFP_NOWAIT can be used (NMI, raw_spin_lock). - * - * Return: Handle of the stack struct stored in depot, 0 on failure - */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t alloc_flags, bool can_alloc) @@ -467,18 +433,6 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, } EXPORT_SYMBOL_GPL(__stack_depot_save); -/** - * stack_depot_save - Save a stack trace to stack depot - * - * @entries: Pointer to the stack trace - * @nr_entries: Number of frames in the stack - * @alloc_flags: Allocation GFP flags - * - * Context: Contexts where allocations via alloc_pages() are allowed. - * See __stack_depot_save() for more details. - * - * Return: Handle of the stack trace stored in depot, 0 on failure - */ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t alloc_flags) @@ -487,14 +441,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, } EXPORT_SYMBOL_GPL(stack_depot_save); -/** - * stack_depot_fetch - Fetch a stack trace from stack depot - * - * @handle: Stack depot handle returned from stack_depot_save() - * @entries: Pointer to store the address of the stack trace - * - * Return: Number of frames for the fetched stack - */ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { @@ -524,11 +470,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, } EXPORT_SYMBOL_GPL(stack_depot_fetch); -/** - * stack_depot_print - Print a stack trace from stack depot - * - * @stack: Stack depot handle returned from stack_depot_save() - */ void stack_depot_print(depot_stack_handle_t stack) { unsigned long *entries; @@ -540,16 +481,6 @@ void stack_depot_print(depot_stack_handle_t stack) } EXPORT_SYMBOL_GPL(stack_depot_print); -/** - * stack_depot_snprint - Print a stack trace from stack depot into a buffer - * - * @handle: Stack depot handle returned from stack_depot_save() - * @buf: Pointer to the print buffer - * @size: Size of the print buffer - * @spaces: Number of leading spaces to print - * - * Return: Number of bytes printed - */ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces) { @@ -562,17 +493,6 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, } EXPORT_SYMBOL_GPL(stack_depot_snprint); -/** - * stack_depot_set_extra_bits - Set extra bits in a stack depot handle - * - * @handle: Stack depot handle - * @extra_bits: Value to set the extra bits - * - * Return: Stack depot handle with extra bits set - * - * Stack depot handles have a few unused bits, which can be used for storing - * user-specific information. These bits are transparent to the stack depot. - */ depot_stack_handle_t stack_depot_set_extra_bits(depot_stack_handle_t handle, unsigned int extra_bits) { @@ -583,13 +503,6 @@ depot_stack_handle_t stack_depot_set_extra_bits(depot_stack_handle_t handle, } EXPORT_SYMBOL(stack_depot_set_extra_bits); -/** - * stack_depot_get_extra_bits - Retrieve extra bits from a stack depot handle - * - * @handle: Stack depot handle with extra bits saved - * - * Return: Extra bits retrieved from the stack depot handle - */ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) { union handle_parts parts = { .handle = handle };