From a1ff03cd6fb9c501fff63a4a2bface9adcfa81cd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker <frederic@kernel.org> Date: Wed, 2 Feb 2022 01:01:07 +0100 Subject: [PATCH 1/4] tick: Detect and fix jiffies update stall On some rare cases, the timekeeper CPU may be delaying its jiffies update duty for a while. Known causes include: * The timekeeper is waiting on stop_machine in a MULTI_STOP_DISABLE_IRQ or MULTI_STOP_RUN state. Disabled interrupts prevent from timekeeping updates while waiting for the target CPU to complete its stop_machine() callback. * The timekeeper vcpu has VMEXIT'ed for a long while due to some overload on the host. Detect and fix these situations with emergency timekeeping catchups. Original-patch-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> --- kernel/time/tick-sched.c | 17 +++++++++++++++++ kernel/time/tick-sched.h | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 17a283ce2b20f..c89f50a7e6903 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void) return period; } +#define MAX_STALLED_JIFFIES 5 + static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) { int cpu = smp_processor_id(); @@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) if (tick_do_timer_cpu == cpu) tick_do_update_jiffies64(now); + /* + * If jiffies update stalled for too long (timekeeper in stop_machine() + * or VMEXIT'ed for several msecs), force an update. + */ + if (ts->last_tick_jiffies != jiffies) { + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } else { + if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) { + tick_do_update_jiffies64(now); + ts->stalled_jiffies = 0; + ts->last_tick_jiffies = READ_ONCE(jiffies); + } + } + if (ts->inidle) ts->got_idle_tick = 1; } diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index d952ae3934236..504649513399b 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -49,6 +49,8 @@ enum tick_nohz_mode { * @timer_expires_base: Base time clock monotonic for @timer_expires * @next_timer: Expiry time of next expiring timer for debugging purpose only * @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick + * @last_tick_jiffies: Value of jiffies seen on last tick + * @stalled_jiffies: Number of stalled jiffies detected across ticks */ struct tick_sched { struct hrtimer sched_timer; @@ -77,6 +79,8 @@ struct tick_sched { u64 next_timer; ktime_t idle_expires; atomic_t tick_dep_mask; + unsigned long last_tick_jiffies; + unsigned int stalled_jiffies; }; extern struct tick_sched *tick_get_tick_sched(int cpu); From 2984539959dbaf4e65e19bf90c2419304a81a985 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker <frederic@kernel.org> Date: Tue, 8 Feb 2022 17:16:33 +0100 Subject: [PATCH 2/4] tick/rcu: Remove obsolete rcu_needs_cpu() parameters With the removal of CONFIG_RCU_FAST_NO_HZ, the parameters in rcu_needs_cpu() are not necessary anymore. Simply remove them. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Paul Menzel <pmenzel@molgen.mpg.de> --- include/linux/rcutiny.h | 3 +-- include/linux/rcutree.h | 2 +- kernel/rcu/tree.c | 3 +-- kernel/time/tick-sched.c | 10 ++++------ 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 858f4d429946d..5fed476f977f6 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -64,9 +64,8 @@ static inline void rcu_softirq_qs(void) rcu_tasks_qs(current, (preempt)); \ } while (0) -static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt) +static inline int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return 0; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 53209d6694001..6cc91291d0782 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -19,7 +19,7 @@ void rcu_softirq_qs(void); void rcu_note_context_switch(bool preempt); -int rcu_needs_cpu(u64 basem, u64 *nextevt); +int rcu_needs_cpu(void); void rcu_cpu_stall_reset(void); /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283b0b..80faf2273ce9d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1086,9 +1086,8 @@ void rcu_irq_enter_irqson(void) * Just check whether or not this CPU has non-offloaded RCU callbacks * queued. */ -int rcu_needs_cpu(u64 basemono, u64 *nextevt) +int rcu_needs_cpu(void) { - *nextevt = KTIME_MAX; return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) && !rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data)); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c89f50a7e6903..566ad5bd83e96 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -785,7 +785,7 @@ static inline bool local_timer_softirq_pending(void) static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) { - u64 basemono, next_tick, next_tmr, next_rcu, delta, expires; + u64 basemono, next_tick, delta, expires; unsigned long basejiff; unsigned int seq; @@ -808,7 +808,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * minimal delta which brings us back to this place * immediately. Lather, rinse and repeat... */ - if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() || + if (rcu_needs_cpu() || arch_needs_cpu() || irq_work_needs_cpu() || local_timer_softirq_pending()) { next_tick = basemono + TICK_NSEC; } else { @@ -819,10 +819,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * disabled this also looks at the next expiring * hrtimer. */ - next_tmr = get_next_timer_interrupt(basejiff, basemono); - ts->next_timer = next_tmr; - /* Take the next rcu event into account */ - next_tick = next_rcu < next_tmr ? next_rcu : next_tmr; + next_tick = get_next_timer_interrupt(basejiff, basemono); + ts->next_timer = next_tick; } /* From 0345691b24c076655ce8f0f4bfd24cba3467ccbd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker <frederic@kernel.org> Date: Tue, 8 Feb 2022 17:16:34 +0100 Subject: [PATCH 3/4] tick/rcu: Stop allowing RCU_SOFTIRQ in idle RCU_SOFTIRQ used to be special in that it could be raised on purpose within the idle path to prevent from stopping the tick. Some code still prevents from unnecessary warnings related to this specific behaviour while entering in dynticks-idle mode. However the nohz layout has changed quite a bit in ten years, and the removal of CONFIG_RCU_FAST_NO_HZ has been the final straw to this safe-conduct. Now the RCU_SOFTIRQ vector is expected to be raised from sane places. A remaining corner case is admitted though when the vector is invoked in fragile hotplug path. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Paul Menzel <pmenzel@molgen.mpg.de> --- include/linux/interrupt.h | 8 ++++++- kernel/time/tick-sched.c | 50 +++++++++++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9367f1cb2e3c4..9613326d2f8af 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -579,7 +579,13 @@ enum NR_SOFTIRQS }; -#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ)) +/* + * Ignoring the RCU vector after ksoftirqd is parked is fine + * because: + * 1) rcutree_migrate_callbacks() takes care of the queue. + * 2) rcu_report_dead() reports the final quiescent states. + */ +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 566ad5bd83e96..2d76c91b85de4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -999,6 +999,45 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) __tick_nohz_full_update_tick(ts, ktime_get()); } +/* + * A pending softirq outside an IRQ (or softirq disabled section) context + * should be waiting for ksoftirqd to handle it. Therefore we shouldn't + * reach here due to the need_resched() early check in can_stop_idle_tick(). + * + * However if we are between CPUHP_AP_SMPBOOT_THREADS and CPU_TEARDOWN_CPU on the + * cpu_down() process, softirqs can still be raised while ksoftirqd is parked, + * triggering the below since wakep_softirqd() is ignored. + * + */ +static bool report_idle_softirq(void) +{ + static int ratelimit; + unsigned int pending = local_softirq_pending(); + + if (likely(!pending)) + return false; + + /* Some softirqs claim to be safe against hotplug and ksoftirqd parking */ + if (!cpu_active(smp_processor_id())) { + pending &= ~SOFTIRQ_HOTPLUG_SAFE_MASK; + if (!pending) + return false; + } + + if (ratelimit < 10) + return false; + + /* On RT, softirqs handling may be waiting on some lock */ + if (!local_bh_blocked()) + return false; + + pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", + pending); + ratelimit++; + + return true; +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -1025,17 +1064,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (need_resched()) return false; - if (unlikely(local_softirq_pending())) { - static int ratelimit; - - if (ratelimit < 10 && !local_bh_blocked() && - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { - pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", - (unsigned int) local_softirq_pending()); - ratelimit++; - } + if (unlikely(report_idle_softirq())) return false; - } if (tick_nohz_full_enabled()) { /* From f96272a90d9eaea9933aaab704ddbd258feb3841 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker <frederic@kernel.org> Date: Tue, 8 Feb 2022 17:16:35 +0100 Subject: [PATCH 4/4] lib/irq_poll: Declare IRQ_POLL softirq vector as ksoftirqd-parking safe The following warning may appear while setting a CPU down: NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #20!!! The IRQ_POLL_SOFTIRQ vector can be raised during the hotplug cpu_down() path after ksoftirqd is parked and before the CPU actually dies. However this is handled afterward at the CPUHP_IRQ_POLL_DEAD stage where the queue gets migrated. Hence this warning can be considered spurious and the vector can join the "hotplug-safe" list. Reported-and-tested-by: Paul Menzel <pmenzel@molgen.mpg.de> Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Paul Menzel <pmenzel@molgen.mpg.de> --- include/linux/interrupt.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9613326d2f8af..f40754caaefa4 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -580,12 +580,15 @@ enum }; /* - * Ignoring the RCU vector after ksoftirqd is parked is fine - * because: - * 1) rcutree_migrate_callbacks() takes care of the queue. + * The following vectors can be safely ignored after ksoftirqd is parked: + * + * _ RCU: + * 1) rcutree_migrate_callbacks() migrates the queue. * 2) rcu_report_dead() reports the final quiescent states. + * + * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue */ -#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ)) +#define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(RCU_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq.