Skip to content

Commit

Permalink
sched/vtime: Bring up complete kcpustat accessor
Browse files Browse the repository at this point in the history
Many callsites want to fetch the values of system, user, user_nice, guest
or guest_nice kcpustat fields altogether or at least a pair of these.

In that case calling kcpustat_field() for each requested field brings
unecessary overhead when we could fetch all of them in a row.

So provide kcpustat_cpu_fetch() that fetches the whole kcpustat array
in a vtime safe way under the same RCU and seqcount block.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Link: https://lkml.kernel.org/r/20191121024430.19938-3-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Frederic Weisbecker authored and Ingo Molnar committed Nov 21, 2019
1 parent 5a1c955 commit 74722bb
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 20 deletions.
7 changes: 7 additions & 0 deletions include/linux/kernel_stat.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,19 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern u64 kcpustat_field(struct kernel_cpustat *kcpustat,
enum cpu_usage_stat usage, int cpu);
extern void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu);
#else
static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat,
enum cpu_usage_stat usage, int cpu)
{
return kcpustat->cpustat[usage];
}

static inline void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
{
*dst = kcpustat_cpu(cpu);
}

#endif

extern void account_user_time(struct task_struct *, u64);
Expand Down
136 changes: 116 additions & 20 deletions kernel/sched/cputime.c
Original file line number Diff line number Diff line change
Expand Up @@ -912,6 +912,30 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
} while (read_seqcount_retry(&vtime->seqcount, seq));
}

static int vtime_state_check(struct vtime *vtime, int cpu)
{
/*
* We raced against a context switch, fetch the
* kcpustat task again.
*/
if (vtime->cpu != cpu && vtime->cpu != -1)
return -EAGAIN;

/*
* Two possible things here:
* 1) We are seeing the scheduling out task (prev) or any past one.
* 2) We are seeing the scheduling in task (next) but it hasn't
* passed though vtime_task_switch() yet so the pending
* cputime of the prev task may not be flushed yet.
*
* Case 1) is ok but 2) is not. So wait for a safe VTIME state.
*/
if (vtime->state == VTIME_INACTIVE)
return -EAGAIN;

return 0;
}

static u64 kcpustat_user_vtime(struct vtime *vtime)
{
if (vtime->state == VTIME_USER)
Expand All @@ -933,26 +957,9 @@ static int kcpustat_field_vtime(u64 *cpustat,
do {
seq = read_seqcount_begin(&vtime->seqcount);

/*
* We raced against context switch, fetch the
* kcpustat task again.
*/
if (vtime->cpu != cpu && vtime->cpu != -1)
return -EAGAIN;

/*
* Two possible things here:
* 1) We are seeing the scheduling out task (prev) or any past one.
* 2) We are seeing the scheduling in task (next) but it hasn't
* passed though vtime_task_switch() yet so the pending
* cputime of the prev task may not be flushed yet.
*
* Case 1) is ok but 2) is not. So wait for a safe VTIME state.
*/
if (vtime->state == VTIME_INACTIVE)
return -EAGAIN;

err = 0;
err = vtime_state_check(vtime, cpu);
if (err < 0)
return err;

*val = cpustat[usage];

Expand Down Expand Up @@ -1025,4 +1032,93 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
}
}
EXPORT_SYMBOL_GPL(kcpustat_field);

static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
const struct kernel_cpustat *src,
struct task_struct *tsk, int cpu)
{
struct vtime *vtime = &tsk->vtime;
unsigned int seq;
int err;

do {
u64 *cpustat;
u64 delta;

seq = read_seqcount_begin(&vtime->seqcount);

err = vtime_state_check(vtime, cpu);
if (err < 0)
return err;

*dst = *src;
cpustat = dst->cpustat;

/* Task is sleeping, dead or idle, nothing to add */
if (vtime->state < VTIME_SYS)
continue;

delta = vtime_delta(vtime);

/*
* Task runs either in user (including guest) or kernel space,
* add pending nohz time to the right place.
*/
if (vtime->state == VTIME_SYS) {
cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
} else if (vtime->state == VTIME_USER) {
if (task_nice(tsk) > 0)
cpustat[CPUTIME_NICE] += vtime->utime + delta;
else
cpustat[CPUTIME_USER] += vtime->utime + delta;
} else {
WARN_ON_ONCE(vtime->state != VTIME_GUEST);
if (task_nice(tsk) > 0) {
cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
cpustat[CPUTIME_NICE] += vtime->gtime + delta;
} else {
cpustat[CPUTIME_GUEST] += vtime->gtime + delta;
cpustat[CPUTIME_USER] += vtime->gtime + delta;
}
}
} while (read_seqcount_retry(&vtime->seqcount, seq));

return err;
}

void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
{
const struct kernel_cpustat *src = &kcpustat_cpu(cpu);
struct rq *rq;
int err;

if (!vtime_accounting_enabled_cpu(cpu)) {
*dst = *src;
return;
}

rq = cpu_rq(cpu);

for (;;) {
struct task_struct *curr;

rcu_read_lock();
curr = rcu_dereference(rq->curr);
if (WARN_ON_ONCE(!curr)) {
rcu_read_unlock();
*dst = *src;
return;
}

err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu);
rcu_read_unlock();

if (!err)
return;

cpu_relax();
}
}
EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);

#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */

0 comments on commit 74722bb

Please sign in to comment.