Skip to content

Commit

Permalink
Merge branch 'stall.2023.01.09a' into HEAD
Browse files Browse the repository at this point in the history
stall.2023.01.09a: RCU CPU stall-warning updates.
  • Loading branch information
Paul E. McKenney committed Feb 3, 2023
2 parents 8e1704b + 84ec7c2 commit bba8d3d
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 5 deletions.
6 changes: 6 additions & 0 deletions Documentation/admin-guide/kernel-parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5113,6 +5113,12 @@
rcupdate.rcu_cpu_stall_timeout to be used (after
conversion from seconds to milliseconds).

rcupdate.rcu_cpu_stall_cputime= [KNL]
Provide statistics on the cputime and count of
interrupts and tasks during the sampling period. For
multiple continuous RCU stalls, all sampling periods
begin at half of the first RCU stall timeout.

rcupdate.rcu_exp_stall_task_details= [KNL]
Print stack dumps of any tasks blocking the
current expedited RCU grace period during an
Expand Down
14 changes: 13 additions & 1 deletion include/linux/kernel_stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
#define kstat_cpu(cpu) per_cpu(kstat, cpu)
#define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)

extern unsigned long long nr_context_switches_cpu(int cpu);
extern unsigned long long nr_context_switches(void);

extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
Expand All @@ -67,6 +68,17 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
return kstat_cpu(cpu).softirqs[irq];
}

static inline unsigned int kstat_cpu_softirqs_sum(int cpu)
{
int i;
unsigned int sum = 0;

for (i = 0; i < NR_SOFTIRQS; i++)
sum += kstat_softirqs_cpu(i, cpu);

return sum;
}

/*
* Number of interrupts per specific IRQ source, since bootup
*/
Expand All @@ -75,7 +87,7 @@ extern unsigned int kstat_irqs_usr(unsigned int irq);
/*
* Number of interrupts per cpu, since bootup
*/
static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
static inline unsigned long kstat_cpu_irqs_sum(unsigned int cpu)
{
return kstat_cpu(cpu).irqs_sum;
}
Expand Down
15 changes: 14 additions & 1 deletion kernel/rcu/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ config RCU_CPU_STALL_TIMEOUT
config RCU_EXP_CPU_STALL_TIMEOUT
int "Expedited RCU CPU stall timeout in milliseconds"
depends on RCU_STALL_COMMON
range 0 21000
range 0 300000
default 0
help
If a given expedited RCU grace period extends more than the
Expand All @@ -92,6 +92,19 @@ config RCU_EXP_CPU_STALL_TIMEOUT
says to use the RCU_CPU_STALL_TIMEOUT value converted from
seconds to milliseconds.

config RCU_CPU_STALL_CPUTIME
bool "Provide additional RCU stall debug information"
depends on RCU_STALL_COMMON
default n
help
Collect statistics during the sampling period, such as the number of
(hard interrupts, soft interrupts, task switches) and the cputime of
(hard interrupts, soft interrupts, kernel tasks) are added to the
RCU stall report. For multiple continuous RCU stalls, all sampling
periods begin at half of the first RCU stall timeout.
The boot option rcupdate.rcu_cpu_stall_cputime has the same function
as this one, but will override this if it exists.

config RCU_TRACE
bool "Enable tracing for RCU"
depends on DEBUG_KERNEL
Expand Down
1 change: 1 addition & 0 deletions kernel/rcu/rcu.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
extern int rcu_exp_cpu_stall_timeout;
extern int rcu_cpu_stall_cputime;
extern bool rcu_exp_stall_task_details __read_mostly;
int rcu_jiffies_till_stall_check(void);
int rcu_exp_jiffies_till_stall_check(void);
Expand Down
18 changes: 18 additions & 0 deletions kernel/rcu/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,24 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
rdp->rcu_iw_gp_seq = rnp->gp_seq;
irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
}

if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) {
int cpu = rdp->cpu;
struct rcu_snap_record *rsrp;
struct kernel_cpustat *kcsp;

kcsp = &kcpustat_cpu(cpu);

rsrp = &rdp->snap_record;
rsrp->cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
rsrp->cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu);
rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu);
rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu);
rsrp->jiffies = jiffies;
rsrp->gp_seq = rdp->gp_seq;
}
}

return 0;
Expand Down
19 changes: 19 additions & 0 deletions kernel/rcu/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,23 @@ union rcu_noqs {
u16 s; /* Set of bits, aggregate OR here. */
};

/*
* Record the snapshot of the core stats at half of the first RCU stall timeout.
* The member gp_seq is used to ensure that all members are updated only once
* during the sampling period. The snapshot is taken only if this gp_seq is not
* equal to rdp->gp_seq.
*/
struct rcu_snap_record {
unsigned long gp_seq; /* Track rdp->gp_seq counter */
u64 cputime_irq; /* Accumulated cputime of hard irqs */
u64 cputime_softirq;/* Accumulated cputime of soft irqs */
u64 cputime_system; /* Accumulated cputime of kernel tasks */
unsigned long nr_hardirqs; /* Accumulated number of hard irqs */
unsigned int nr_softirqs; /* Accumulated number of soft irqs */
unsigned long long nr_csw; /* Accumulated number of task switches */
unsigned long jiffies; /* Track jiffies value */
};

/* Per-CPU data for read-copy update. */
struct rcu_data {
/* 1) quiescent-state and grace-period handling : */
Expand Down Expand Up @@ -262,6 +279,8 @@ struct rcu_data {
short rcu_onl_gp_flags; /* ->gp_flags at last online. */
unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */
struct rcu_snap_record snap_record; /* Snapshot of core stats at half of */
/* the first RCU stall timeout */

long lazy_len; /* Length of buffered lazy callbacks. */
int cpu;
Expand Down
37 changes: 34 additions & 3 deletions kernel/rcu/tree_stall.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ int rcu_exp_jiffies_till_stall_check(void)
// CONFIG_RCU_EXP_CPU_STALL_TIMEOUT, so check the allowed range.
// The minimum clamped value is "2UL", because at least one full
// tick has to be guaranteed.
till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 21UL * HZ);
till_stall_check = clamp(msecs_to_jiffies(cpu_stall_timeout), 2UL, 300UL * HZ);

if (cpu_stall_timeout && jiffies_to_msecs(till_stall_check) != cpu_stall_timeout)
WRITE_ONCE(rcu_exp_cpu_stall_timeout, jiffies_to_msecs(till_stall_check));
Expand Down Expand Up @@ -428,6 +428,35 @@ static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp
return j > 2 * HZ;
}

static void print_cpu_stat_info(int cpu)
{
struct rcu_snap_record rsr, *rsrp;
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
struct kernel_cpustat *kcsp = &kcpustat_cpu(cpu);

if (!rcu_cpu_stall_cputime)
return;

rsrp = &rdp->snap_record;
if (rsrp->gp_seq != rdp->gp_seq)
return;

rsr.cputime_irq = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
rsr.cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
rsr.cputime_system = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);

pr_err("\t hardirqs softirqs csw/system\n");
pr_err("\t number: %8ld %10d %12lld\n",
kstat_cpu_irqs_sum(cpu) - rsrp->nr_hardirqs,
kstat_cpu_softirqs_sum(cpu) - rsrp->nr_softirqs,
nr_context_switches_cpu(cpu) - rsrp->nr_csw);
pr_err("\tcputime: %8lld %10lld %12lld ==> %d(ms)\n",
div_u64(rsr.cputime_irq - rsrp->cputime_irq, NSEC_PER_MSEC),
div_u64(rsr.cputime_softirq - rsrp->cputime_softirq, NSEC_PER_MSEC),
div_u64(rsr.cputime_system - rsrp->cputime_system, NSEC_PER_MSEC),
jiffies_to_msecs(jiffies - rsrp->jiffies));
}

/*
* Print out diagnostic information for the specified stalled CPU.
*
Expand Down Expand Up @@ -484,6 +513,8 @@ static void print_cpu_stall_info(int cpu)
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
rcuc_starved ? buf : "",
falsepositive ? " (false positive?)" : "");

print_cpu_stat_info(cpu);
}

/* Complain about starvation of grace-period kthread. */
Expand Down Expand Up @@ -588,7 +619,7 @@ static void print_other_cpu_stall(unsigned long gp_seq, unsigned long gps)

for_each_possible_cpu(cpu)
totqlen += rcu_get_n_cbs_cpu(cpu);
pr_cont("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n",
pr_err("\t(detected by %d, t=%ld jiffies, g=%ld, q=%lu ncpus=%d)\n",
smp_processor_id(), (long)(jiffies - gps),
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);
if (ndetected) {
Expand Down Expand Up @@ -649,7 +680,7 @@ static void print_cpu_stall(unsigned long gps)
raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
for_each_possible_cpu(cpu)
totqlen += rcu_get_n_cbs_cpu(cpu);
pr_cont("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n",
pr_err("\t(t=%lu jiffies g=%ld q=%lu ncpus=%d)\n",
jiffies - gps,
(long)rcu_seq_current(&rcu_state.gp_seq), totqlen, rcu_state.n_online_cpus);

Expand Down
2 changes: 2 additions & 0 deletions kernel/rcu/update.c
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,8 @@ int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
module_param(rcu_cpu_stall_timeout, int, 0644);
int rcu_exp_cpu_stall_timeout __read_mostly = CONFIG_RCU_EXP_CPU_STALL_TIMEOUT;
module_param(rcu_exp_cpu_stall_timeout, int, 0644);
int rcu_cpu_stall_cputime __read_mostly = IS_ENABLED(CONFIG_RCU_CPU_STALL_CPUTIME);
module_param(rcu_cpu_stall_cputime, int, 0644);
bool rcu_exp_stall_task_details __read_mostly;
module_param(rcu_exp_stall_task_details, bool, 0644);
#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
Expand Down
5 changes: 5 additions & 0 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -5282,6 +5282,11 @@ bool single_task_running(void)
}
EXPORT_SYMBOL(single_task_running);

unsigned long long nr_context_switches_cpu(int cpu)
{
return cpu_rq(cpu)->nr_switches;
}

unsigned long long nr_context_switches(void)
{
int i;
Expand Down

0 comments on commit bba8d3d

Please sign in to comment.