Skip to content

Commit

Permalink
sched/core: add forced idle accounting for cgroups
Browse files Browse the repository at this point in the history
4feee7d previously added per-task forced idle accounting. This patch
extends this to also include cgroups.

rstat is used for cgroup accounting, except for the root, which uses
kcpustat in order to bypass the need for doing an rstat flush when
reading root stats.

Only cgroup v2 is supported. Similar to the task accounting, the cgroup
accounting requires that schedstats is enabled.

Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lkml.kernel.org/r/20220629211426.3329954-1-joshdon@google.com
  • Loading branch information
Josh Don authored and Peter Zijlstra committed Jul 4, 2022
1 parent b812fc9 commit 1fcf54d
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 7 deletions.
4 changes: 4 additions & 0 deletions include/linux/cgroup-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ struct css_set {

struct cgroup_base_stat {
struct task_cputime cputime;

#ifdef CONFIG_SCHED_CORE
u64 forceidle_sum;
#endif
};

/*
Expand Down
7 changes: 7 additions & 0 deletions include/linux/kernel_stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ enum cpu_usage_stat {
CPUTIME_STEAL,
CPUTIME_GUEST,
CPUTIME_GUEST_NICE,
#ifdef CONFIG_SCHED_CORE
CPUTIME_FORCEIDLE,
#endif
NR_STATS,
};

Expand Down Expand Up @@ -115,4 +118,8 @@ extern void account_process_tick(struct task_struct *, int user);

extern void account_idle_ticks(unsigned long ticks);

#ifdef CONFIG_SCHED_CORE
extern void __account_forceidle_time(struct task_struct *tsk, u64 delta);
#endif

#endif /* _LINUX_KERNEL_STAT_H */
44 changes: 38 additions & 6 deletions kernel/cgroup/rstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,9 @@ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat,
dst_bstat->cputime.utime += src_bstat->cputime.utime;
dst_bstat->cputime.stime += src_bstat->cputime.stime;
dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
#ifdef CONFIG_SCHED_CORE
dst_bstat->forceidle_sum += src_bstat->forceidle_sum;
#endif
}

static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
Expand All @@ -318,6 +321,9 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
dst_bstat->cputime.utime -= src_bstat->cputime.utime;
dst_bstat->cputime.stime -= src_bstat->cputime.stime;
dst_bstat->cputime.sum_exec_runtime -= src_bstat->cputime.sum_exec_runtime;
#ifdef CONFIG_SCHED_CORE
dst_bstat->forceidle_sum -= src_bstat->forceidle_sum;
#endif
}

static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
Expand Down Expand Up @@ -398,6 +404,11 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
case CPUTIME_SOFTIRQ:
rstatc->bstat.cputime.stime += delta_exec;
break;
#ifdef CONFIG_SCHED_CORE
case CPUTIME_FORCEIDLE:
rstatc->bstat.forceidle_sum += delta_exec;
break;
#endif
default:
break;
}
Expand All @@ -411,8 +422,9 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp,
* with how it is done by __cgroup_account_cputime_field for each bit of
* cpu time attributed to a cgroup.
*/
static void root_cgroup_cputime(struct task_cputime *cputime)
static void root_cgroup_cputime(struct cgroup_base_stat *bstat)
{
struct task_cputime *cputime = &bstat->cputime;
int i;

cputime->stime = 0;
Expand All @@ -438,34 +450,54 @@ static void root_cgroup_cputime(struct task_cputime *cputime)
cputime->sum_exec_runtime += user;
cputime->sum_exec_runtime += sys;
cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL];

#ifdef CONFIG_SCHED_CORE
bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE];
#endif
}
}

void cgroup_base_stat_cputime_show(struct seq_file *seq)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
u64 usage, utime, stime;
struct task_cputime cputime;
struct cgroup_base_stat bstat;
#ifdef CONFIG_SCHED_CORE
u64 forceidle_time;
#endif

if (cgroup_parent(cgrp)) {
cgroup_rstat_flush_hold(cgrp);
usage = cgrp->bstat.cputime.sum_exec_runtime;
cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
&utime, &stime);
#ifdef CONFIG_SCHED_CORE
forceidle_time = cgrp->bstat.forceidle_sum;
#endif
cgroup_rstat_flush_release();
} else {
root_cgroup_cputime(&cputime);
usage = cputime.sum_exec_runtime;
utime = cputime.utime;
stime = cputime.stime;
root_cgroup_cputime(&bstat);
usage = bstat.cputime.sum_exec_runtime;
utime = bstat.cputime.utime;
stime = bstat.cputime.stime;
#ifdef CONFIG_SCHED_CORE
forceidle_time = bstat.forceidle_sum;
#endif
}

do_div(usage, NSEC_PER_USEC);
do_div(utime, NSEC_PER_USEC);
do_div(stime, NSEC_PER_USEC);
#ifdef CONFIG_SCHED_CORE
do_div(forceidle_time, NSEC_PER_USEC);
#endif

seq_printf(seq, "usage_usec %llu\n"
"user_usec %llu\n"
"system_usec %llu\n",
usage, utime, stime);

#ifdef CONFIG_SCHED_CORE
seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time);
#endif
}
6 changes: 5 additions & 1 deletion kernel/sched/core_sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,11 @@ void __sched_core_account_forceidle(struct rq *rq)
if (p == rq_i->idle)
continue;

__schedstat_add(p->stats.core_forceidle_sum, delta);
/*
* Note: this will account forceidle to the current cpu, even
* if it comes from our SMT sibling.
*/
__account_forceidle_time(p, delta);
}
}

Expand Down
15 changes: 15 additions & 0 deletions kernel/sched/cputime.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,21 @@ void account_idle_time(u64 cputime)
cpustat[CPUTIME_IDLE] += cputime;
}


#ifdef CONFIG_SCHED_CORE
/*
* Account for forceidle time due to core scheduling.
*
* REQUIRES: schedstat is enabled.
*/
void __account_forceidle_time(struct task_struct *p, u64 delta)
{
__schedstat_add(p->stats.core_forceidle_sum, delta);

task_group_account_field(p, CPUTIME_FORCEIDLE, delta);
}
#endif

/*
* When a guest is interrupted for a longer amount of time, missed clock
* ticks are not redelivered later. Due to that, this function may on
Expand Down

0 comments on commit 1fcf54d

Please sign in to comment.