From 0e1ff67d164be45e8ddfea5aaf5803224ede0805 Mon Sep 17 00:00:00 2001 From: Maksim Davydov Date: Tue, 25 Mar 2025 11:58:07 +0300 Subject: [PATCH 1/3] x86/split_lock: Simplify reenabling When split_lock_mitigate is disabled, each CPU needs its own delayed_work structure. They are used to reenable split lock detection after its disabling. But delayed_work structure must be correctly initialized after its allocation. Current implementation uses deferred initialization that makes the split lock handler code unclear. The code can be simplified a bit if the initialization is moved to the appropriate initcall. sld_setup() is called before setup_per_cpu_areas(), thus it can't be used for this purpose, so introduce an independent initcall for the initialization. [ mingo: Simplified the 'work' assignment line a bit more. ] Signed-off-by: Maksim Davydov Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250325085807.171885-1-davydov-max@yandex-team.ru --- arch/x86/kernel/cpu/bus_lock.c | 35 +++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/bus_lock.c b/arch/x86/kernel/cpu/bus_lock.c index 97222efb4d2a..237faf7e700c 100644 --- a/arch/x86/kernel/cpu/bus_lock.c +++ b/arch/x86/kernel/cpu/bus_lock.c @@ -200,6 +200,26 @@ static void __split_lock_reenable(struct work_struct *work) */ static DEFINE_PER_CPU(struct delayed_work, sl_reenable); +/* + * Per-CPU delayed_work can't be statically initialized properly because + * the struct address is unknown. Thus per-CPU delayed_work structures + * have to be initialized during kernel initialization and after calling + * setup_per_cpu_areas(). + */ +static int __init setup_split_lock_delayed_work(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + struct delayed_work *work = per_cpu_ptr(&sl_reenable, cpu); + + INIT_DELAYED_WORK(work, __split_lock_reenable); + } + + return 0; +} +pure_initcall(setup_split_lock_delayed_work); + /* * If a CPU goes offline with pending delayed work to re-enable split lock * detection then the delayed work will be executed on some other CPU. That @@ -219,15 +239,16 @@ static int splitlock_cpu_offline(unsigned int cpu) static void split_lock_warn(unsigned long ip) { - struct delayed_work *work = NULL; + struct delayed_work *work; int cpu; + unsigned int saved_sld_mitigate = READ_ONCE(sysctl_sld_mitigate); if (!current->reported_split_lock) pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", current->comm, current->pid, ip); current->reported_split_lock = 1; - if (sysctl_sld_mitigate) { + if (saved_sld_mitigate) { /* * misery factor #1: * sleep 10ms before trying to execute split lock. @@ -240,18 +261,10 @@ static void split_lock_warn(unsigned long ip) */ if (down_interruptible(&buslock_sem) == -EINTR) return; - work = &sl_reenable_unlock; } cpu = get_cpu(); - - if (!work) { - work = this_cpu_ptr(&sl_reenable); - /* Deferred initialization of per-CPU struct */ - if (!work->work.func) - INIT_DELAYED_WORK(work, __split_lock_reenable); - } - + work = saved_sld_mitigate ? &sl_reenable_unlock : per_cpu_ptr(&sl_reenable, cpu); schedule_delayed_work_on(cpu, work, 2); /* Disable split lock detection on this CPU to make progress */ From 61c39d8c83e2077f33e0a2c8980a76a7f323f0ce Mon Sep 17 00:00:00 2001 From: Ryo Takakura Date: Fri, 21 Mar 2025 07:33:22 -0700 Subject: [PATCH 2/3] lockdep: Fix wait context check on softirq for PREEMPT_RT Since: 0c1d7a2c2d32 ("lockdep: Remove softirq accounting on PREEMPT_RT.") the wait context test for mutex usage within "in softirq context" fails as it references @softirq_context: | wait context tests | -------------------------------------------------------------------------- | rcu | raw | spin |mutex | -------------------------------------------------------------------------- in hardirq context: ok | ok | ok | ok | in hardirq context (not threaded): ok | ok | ok | ok | in softirq context: ok | ok | ok |FAILED| As a fix, add lockdep map for BH disabled section. This fixes the issue by letting us catch cases when local_bh_disable() gets called with preemption disabled where local_lock doesn't get acquired. In the case of "in softirq context" selftest, local_bh_disable() was being called with preemption disable as it's early in the boot. [ boqun: Move the lockdep annotations into __local_bh_*() to avoid false positives because of unpaired local_bh_disable() reported by Borislav Petkov and Peter Zijlstra, and make bh_lock_map only exist for PREEMPT_RT. ] [ mingo: Restored authorship and improved the bh_lock_map definition. ] Signed-off-by: Ryo Takakura Signed-off-by: Boqun Feng Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250321143322.79651-1-boqun.feng@gmail.com --- kernel/softirq.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/kernel/softirq.c b/kernel/softirq.c index 4dae6ac2e83f..513b1945987c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -126,6 +126,18 @@ static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = { .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), }; +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key bh_lock_key; +struct lockdep_map bh_lock_map = { + .name = "local_bh", + .key = &bh_lock_key, + .wait_type_outer = LD_WAIT_FREE, + .wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_RT makes BH preemptible. */ + .lock_type = LD_LOCK_PERCPU, +}; +EXPORT_SYMBOL_GPL(bh_lock_map); +#endif + /** * local_bh_blocked() - Check for idle whether BH processing is blocked * @@ -148,6 +160,8 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) WARN_ON_ONCE(in_hardirq()); + lock_map_acquire_read(&bh_lock_map); + /* First entry of a task into a BH disabled section? */ if (!current->softirq_disable_cnt) { if (preemptible()) { @@ -211,6 +225,8 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) WARN_ON_ONCE(in_hardirq()); lockdep_assert_irqs_enabled(); + lock_map_release(&bh_lock_map); + local_irq_save(flags); curcnt = __this_cpu_read(softirq_ctrl.cnt); @@ -261,6 +277,8 @@ static inline void ksoftirqd_run_begin(void) /* Counterpart to ksoftirqd_run_begin() */ static inline void ksoftirqd_run_end(void) { + /* pairs with the lock_map_acquire_read() in ksoftirqd_run_begin() */ + lock_map_release(&bh_lock_map); __local_bh_enable(SOFTIRQ_OFFSET, true); WARN_ON_ONCE(in_interrupt()); local_irq_enable(); From 495f53d5cca0f939eaed9dca90b67e7e6fb0e30c Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 26 Mar 2025 11:08:30 -0700 Subject: [PATCH 3/3] locking/lockdep: Decrease nr_unused_locks if lock unused in zap_class() Currently, when a lock class is allocated, nr_unused_locks will be increased by 1, until it gets used: nr_unused_locks will be decreased by 1 in mark_lock(). However, one scenario is missed: a lock class may be zapped without even being used once. This could result into a situation that nr_unused_locks != 0 but no unused lock class is active in the system, and when `cat /proc/lockdep_stats`, a WARN_ON() will be triggered in a CONFIG_DEBUG_LOCKDEP=y kernel: [...] DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused) [...] WARNING: CPU: 41 PID: 1121 at kernel/locking/lockdep_proc.c:283 lockdep_stats_show+0xba9/0xbd0 And as a result, lockdep will be disabled after this. Therefore, nr_unused_locks needs to be accounted correctly at zap_class() time. Signed-off-by: Boqun Feng Signed-off-by: Ingo Molnar Reviewed-by: Waiman Long Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250326180831.510348-1-boqun.feng@gmail.com --- kernel/locking/lockdep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b15757e63626..58d78a33ac65 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -6264,6 +6264,9 @@ static void zap_class(struct pending_free *pf, struct lock_class *class) hlist_del_rcu(&class->hash_entry); WRITE_ONCE(class->key, NULL); WRITE_ONCE(class->name, NULL); + /* Class allocated but not used, -1 in nr_unused_locks */ + if (class->usage_mask == 0) + debug_atomic_dec(nr_unused_locks); nr_lock_classes--; __clear_bit(class - lock_classes, lock_classes_in_use); if (class - lock_classes == max_lock_class_idx)