Skip to content

Commit

Permalink
ntp: Fix leap-second hrtimer livelock
Browse files Browse the repository at this point in the history
This is a backport of 6b43ae8

This should have been backported when it was commited, but I
mistook the problem as requiring the ntp_lock changes
that landed in 3.4 in order for it to occur.

Unfortunately the same issue can happen (with only one cpu)
as follows:
do_adjtimex()
 write_seqlock_irq(&xtime_lock);
  process_adjtimex_modes()
   process_adj_status()
    ntp_start_leap_timer()
     hrtimer_start()
      hrtimer_reprogram()
       tick_program_event()
        clockevents_program_event()
         ktime_get()
          seq = req_seqbegin(xtime_lock); [DEADLOCK]

This deadlock will no always occur, as it requires the
leap_timer to force a hrtimer_reprogram which only happens
if its set and there's no sooner timer to expire.

NOTE: This patch, being faithful to the original commit,
introduces a bug (we don't update wall_to_monotonic),
which will be resovled by backporting a following fix.

Original commit message below:

Since commit 7dffa3c the ntp
subsystem has used an hrtimer for triggering the leapsecond
adjustment. However, this can cause a potential livelock.

Thomas diagnosed this as the following pattern:
CPU 0                                                    CPU 1
do_adjtimex()
  spin_lock_irq(&ntp_lock);
    process_adjtimex_modes();				 timer_interrupt()
      process_adj_status();                                do_timer()
        ntp_start_leap_timer();                             write_lock(&xtime_lock);
          hrtimer_start();                                  update_wall_time();
             hrtimer_reprogram();                            ntp_tick_length()
               tick_program_event()                            spin_lock(&ntp_lock);
                 clockevents_program_event()
		   ktime_get()
                     seq = req_seqbegin(xtime_lock);

This patch tries to avoid the problem by reverting back to not using
an hrtimer to inject leapseconds, and instead we handle the leapsecond
processing in the second_overflow() function.

The downside to this change is that on systems that support highres
timers, the leap second processing will occur on a HZ tick boundary,
(ie: ~1-10ms, depending on HZ)  after the leap second instead of
possibly sooner (~34us in my tests w/ x86_64 lapic).

This patch applies on top of tip/timers/core.

CC: Sasha Levin <levinsasha928@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Diagnoised-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
John Stultz authored and Greg Kroah-Hartman committed Jul 19, 2012
1 parent 31b83ef commit 9c24771
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 94 deletions.
2 changes: 1 addition & 1 deletion include/linux/timex.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ static inline int ntp_synced(void)
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 tick_length;

extern void second_overflow(void);
extern int second_overflow(unsigned long secs);
extern void update_ntp_one_tick(void);
extern int do_adjtimex(struct timex *);
extern void hardpps(const struct timespec *, const struct timespec *);
Expand Down
122 changes: 40 additions & 82 deletions kernel/time/ntp.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ unsigned long tick_nsec;
u64 tick_length;
static u64 tick_length_base;

static struct hrtimer leap_timer;

#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
Expand Down Expand Up @@ -350,60 +348,60 @@ void ntp_clear(void)
}

/*
* Leap second processing. If in leap-insert state at the end of the
* day, the system clock is set back one second; if in leap-delete
* state, the system clock is set ahead one second.
* this routine handles the overflow of the microsecond field
*
* The tricky bits of code to handle the accurate clock support
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
* They were originally developed for SUN and DEC kernels.
* All the kudos should go to Dave for this stuff.
*
* Also handles leap second processing, and returns leap offset
*/
static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
int second_overflow(unsigned long secs)
{
enum hrtimer_restart res = HRTIMER_NORESTART;

write_seqlock(&xtime_lock);
int leap = 0;
s64 delta;

/*
* Leap second processing. If in leap-insert state at the end of the
* day, the system clock is set back one second; if in leap-delete
* state, the system clock is set ahead one second.
*/
switch (time_state) {
case TIME_OK:
if (time_status & STA_INS)
time_state = TIME_INS;
else if (time_status & STA_DEL)
time_state = TIME_DEL;
break;
case TIME_INS:
timekeeping_leap_insert(-1);
time_state = TIME_OOP;
printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
res = HRTIMER_RESTART;
if (secs % 86400 == 0) {
leap = -1;
time_state = TIME_OOP;
printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
}
break;
case TIME_DEL:
timekeeping_leap_insert(1);
time_tai--;
time_state = TIME_WAIT;
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
if ((secs + 1) % 86400 == 0) {
leap = 1;
time_tai--;
time_state = TIME_WAIT;
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
}
break;
case TIME_OOP:
time_tai++;
time_state = TIME_WAIT;
/* fall through */
break;

case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
}

write_sequnlock(&xtime_lock);

return res;
}

/*
* this routine handles the overflow of the microsecond field
*
* The tricky bits of code to handle the accurate clock support
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
* They were originally developed for SUN and DEC kernels.
* All the kudos should go to Dave for this stuff.
*/
void second_overflow(void)
{
s64 delta;

/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
Expand All @@ -423,23 +421,25 @@ void second_overflow(void)
pps_dec_valid();

if (!time_adjust)
return;
goto out;

if (time_adjust > MAX_TICKADJ) {
time_adjust -= MAX_TICKADJ;
tick_length += MAX_TICKADJ_SCALED;
return;
goto out;
}

if (time_adjust < -MAX_TICKADJ) {
time_adjust += MAX_TICKADJ;
tick_length -= MAX_TICKADJ_SCALED;
return;
goto out;
}

tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
<< NTP_SCALE_SHIFT;
time_adjust = 0;
out:
return leap;
}

#ifdef CONFIG_GENERIC_CMOS_UPDATE
Expand Down Expand Up @@ -501,27 +501,6 @@ static void notify_cmos_timer(void)
static inline void notify_cmos_timer(void) { }
#endif

/*
* Start the leap seconds timer:
*/
static inline void ntp_start_leap_timer(struct timespec *ts)
{
long now = ts->tv_sec;

if (time_status & STA_INS) {
time_state = TIME_INS;
now += 86400 - now % 86400;
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);

return;
}

if (time_status & STA_DEL) {
time_state = TIME_DEL;
now += 86400 - (now + 1) % 86400;
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
}
}

/*
* Propagate a new txc->status value into the NTP state:
Expand All @@ -546,22 +525,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
time_status &= STA_RONLY;
time_status |= txc->status & ~STA_RONLY;

switch (time_state) {
case TIME_OK:
ntp_start_leap_timer(ts);
break;
case TIME_INS:
case TIME_DEL:
time_state = TIME_OK;
ntp_start_leap_timer(ts);
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
case TIME_OOP:
hrtimer_restart(&leap_timer);
break;
}
}
/*
* Called with the xtime lock held, so we can access and modify
Expand Down Expand Up @@ -643,9 +606,6 @@ int do_adjtimex(struct timex *txc)
(txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ))
return -EINVAL;

if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
hrtimer_cancel(&leap_timer);
}

if (txc->modes & ADJ_SETOFFSET) {
Expand Down Expand Up @@ -967,6 +927,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
ntp_clear();
hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
leap_timer.function = ntp_leap_second;
}
18 changes: 7 additions & 11 deletions kernel/time/timekeeping.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,6 @@ static struct timespec raw_time;
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;

/* must hold xtime_lock */
void timekeeping_leap_insert(int leapsecond)
{
xtime.tv_sec += leapsecond;
wall_to_monotonic.tv_sec -= leapsecond;
update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
timekeeper.mult);
}

/**
* timekeeping_forward_now - update clock to the current time
*
Expand Down Expand Up @@ -828,9 +819,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)

timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
while (timekeeper.xtime_nsec >= nsecps) {
int leap;
timekeeper.xtime_nsec -= nsecps;
xtime.tv_sec++;
second_overflow();
leap = second_overflow(xtime.tv_sec);
xtime.tv_sec += leap;
}

/* Accumulate raw time */
Expand Down Expand Up @@ -936,9 +929,12 @@ static void update_wall_time(void)
* xtime.tv_nsec isn't larger then NSEC_PER_SEC
*/
if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
int leap;
xtime.tv_nsec -= NSEC_PER_SEC;
xtime.tv_sec++;
second_overflow();
leap = second_overflow(xtime.tv_sec);
xtime.tv_sec += leap;

}

/* check to see if there is a new clocksource to use */
Expand Down

0 comments on commit 9c24771

Please sign in to comment.