Skip to content

Commit

Permalink
timers, sched/clock: Avoid deadlock during read from NMI
Browse files Browse the repository at this point in the history
Currently it is possible for an NMI (or FIQ on ARM) to come in
and read sched_clock() whilst update_sched_clock() has locked
the seqcount for writing. This results in the NMI handler
locking up when it calls raw_read_seqcount_begin().

This patch fixes the NMI safety issues by providing banked clock
data. This is a similar approach to the one used in Thomas
Gleixner's 4396e05("timekeeping: Provide fast and NMI safe
access to CLOCK_MONOTONIC").

Suggested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1427397806-20889-6-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Daniel Thompson authored and Ingo Molnar committed Mar 27, 2015
1 parent 9fee69a commit 1809bfa
Showing 1 changed file with 68 additions and 35 deletions.
103 changes: 68 additions & 35 deletions kernel/time/sched_clock.c
Original file line number Diff line number Diff line change
@@ -47,19 +47,20 @@ struct clock_read_data {
* struct clock_data - all data needed for sched_clock (including
* registration of a new clock source)
*
* @seq: Sequence counter for protecting updates.
* @seq: Sequence counter for protecting updates. The lowest
* bit is the index for @read_data.
* @read_data: Data required to read from sched_clock.
* @wrap_kt: Duration for which clock can run before wrapping
* @rate: Tick rate of the registered clock
* @actual_read_sched_clock: Registered clock read function
*
* The ordering of this structure has been chosen to optimize cache
* performance. In particular seq and read_data (combined) should fit
* performance. In particular seq and read_data[0] (combined) should fit
* into a single 64 byte cache line.
*/
struct clock_data {
seqcount_t seq;
struct clock_read_data read_data;
struct clock_read_data read_data[2];
ktime_t wrap_kt;
unsigned long rate;
u64 (*actual_read_sched_clock)(void);
@@ -80,10 +81,9 @@ static u64 notrace jiffy_sched_clock_read(void)
}

static struct clock_data cd ____cacheline_aligned = {
.read_data = { .mult = NSEC_PER_SEC / HZ,
.read_sched_clock = jiffy_sched_clock_read, },
.read_data[0] = { .mult = NSEC_PER_SEC / HZ,
.read_sched_clock = jiffy_sched_clock_read, },
.actual_read_sched_clock = jiffy_sched_clock_read,

};

static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
@@ -95,10 +95,11 @@ unsigned long long notrace sched_clock(void)
{
u64 cyc, res;
unsigned long seq;
struct clock_read_data *rd = &cd.read_data;
struct clock_read_data *rd;

do {
seq = raw_read_seqcount_begin(&cd.seq);
seq = raw_read_seqcount(&cd.seq);
rd = cd.read_data + (seq & 1);

cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
rd->sched_clock_mask;
@@ -108,27 +109,51 @@ unsigned long long notrace sched_clock(void)
return res;
}

/*
* Updating the data required to read the clock.
*
* sched_clock will never observe mis-matched data even if called from
* an NMI. We do this by maintaining an odd/even copy of the data and
* steering sched_clock to one or the other using a sequence counter.
* In order to preserve the data cache profile of sched_clock as much
* as possible the system reverts back to the even copy when the update
* completes; the odd copy is used *only* during an update.
*/
static void update_clock_read_data(struct clock_read_data *rd)
{
/* update the backup (odd) copy with the new data */
cd.read_data[1] = *rd;

/* steer readers towards the odd copy */
raw_write_seqcount_latch(&cd.seq);

/* now its safe for us to update the normal (even) copy */
cd.read_data[0] = *rd;

/* switch readers back to the even copy */
raw_write_seqcount_latch(&cd.seq);
}

/*
* Atomically update the sched_clock epoch.
*/
static void update_sched_clock(void)
{
unsigned long flags;
u64 cyc;
u64 ns;
struct clock_read_data *rd = &cd.read_data;
struct clock_read_data rd;

rd = cd.read_data[0];

cyc = cd.actual_read_sched_clock();
ns = rd->epoch_ns +
cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
rd->mult, rd->shift);

raw_local_irq_save(flags);
raw_write_seqcount_begin(&cd.seq);
rd->epoch_ns = ns;
rd->epoch_cyc = cyc;
raw_write_seqcount_end(&cd.seq);
raw_local_irq_restore(flags);
ns = rd.epoch_ns +
cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask,
rd.mult, rd.shift);

rd.epoch_ns = ns;
rd.epoch_cyc = cyc;

update_clock_read_data(&rd);
}

static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
@@ -145,7 +170,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
u32 new_mult, new_shift;
unsigned long r;
char r_unit;
struct clock_read_data *rd = &cd.read_data;
struct clock_read_data rd;

if (cd.rate > rate)
return;
@@ -162,22 +187,23 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
cd.wrap_kt = ns_to_ktime(wrap);

rd = cd.read_data[0];

/* update epoch for new counter and update epoch_ns from old counter*/
new_epoch = read();
cyc = cd.actual_read_sched_clock();
ns = rd->epoch_ns +
cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
rd->mult, rd->shift);
ns = rd.epoch_ns +
cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask,
rd.mult, rd.shift);
cd.actual_read_sched_clock = read;

raw_write_seqcount_begin(&cd.seq);
rd->read_sched_clock = read;
rd->sched_clock_mask = new_mask;
rd->mult = new_mult;
rd->shift = new_shift;
rd->epoch_cyc = new_epoch;
rd->epoch_ns = ns;
raw_write_seqcount_end(&cd.seq);
rd.read_sched_clock = read;
rd.sched_clock_mask = new_mask;
rd.mult = new_mult;
rd.shift = new_shift;
rd.epoch_cyc = new_epoch;
rd.epoch_ns = ns;
update_clock_read_data(&rd);

r = rate;
if (r >= 4000000) {
@@ -227,15 +253,22 @@ void __init sched_clock_postinit(void)
*
* This function makes it appear to sched_clock() as if the clock
* stopped counting at its last update.
*
* This function must only be called from the critical
* section in sched_clock(). It relies on the read_seqcount_retry()
* at the end of the critical section to be sure we observe the
* correct copy of epoch_cyc.
*/
static u64 notrace suspended_sched_clock_read(void)
{
return cd.read_data.epoch_cyc;
unsigned long seq = raw_read_seqcount(&cd.seq);

return cd.read_data[seq & 1].epoch_cyc;
}

static int sched_clock_suspend(void)
{
struct clock_read_data *rd = &cd.read_data;
struct clock_read_data *rd = &cd.read_data[0];

update_sched_clock();
hrtimer_cancel(&sched_clock_timer);
@@ -245,7 +278,7 @@ static int sched_clock_suspend(void)

static void sched_clock_resume(void)
{
struct clock_read_data *rd = &cd.read_data;
struct clock_read_data *rd = &cd.read_data[0];

rd->epoch_cyc = cd.actual_read_sched_clock();
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);

0 comments on commit 1809bfa

Please sign in to comment.