ntp: Fix leap-second hrtimer livelock
Since commit 7dffa3c673
the ntp
subsystem has used an hrtimer for triggering the leapsecond
adjustment. However, this can cause a potential livelock.
Thomas diagnosed this as the following pattern:
CPU 0 CPU 1
do_adjtimex()
spin_lock_irq(&ntp_lock);
process_adjtimex_modes(); timer_interrupt()
process_adj_status(); do_timer()
ntp_start_leap_timer(); write_lock(&xtime_lock);
hrtimer_start(); update_wall_time();
hrtimer_reprogram(); ntp_tick_length()
tick_program_event() spin_lock(&ntp_lock);
clockevents_program_event()
ktime_get()
seq = req_seqbegin(xtime_lock);
This patch tries to avoid the problem by reverting back to not using
an hrtimer to inject leapseconds, and instead we handle the leapsecond
processing in the second_overflow() function.
The downside to this change is that on systems that support highres
timers, the leap second processing will occur on a HZ tick boundary,
(ie: ~1-10ms, depending on HZ) after the leap second instead of
possibly sooner (~34us in my tests w/ x86_64 lapic).
This patch applies on top of tip/timers/core.
CC: Sasha Levin <levinsasha928@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Diagnoised-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
This commit is contained in:
parent
57779dc2b3
commit
6b43ae8a61
@ -252,7 +252,7 @@ extern void ntp_clear(void);
|
|||||||
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
|
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
|
||||||
extern u64 ntp_tick_length(void);
|
extern u64 ntp_tick_length(void);
|
||||||
|
|
||||||
extern void second_overflow(void);
|
extern int second_overflow(unsigned long secs);
|
||||||
extern int do_adjtimex(struct timex *);
|
extern int do_adjtimex(struct timex *);
|
||||||
extern void hardpps(const struct timespec *, const struct timespec *);
|
extern void hardpps(const struct timespec *, const struct timespec *);
|
||||||
|
|
||||||
|
@ -34,8 +34,6 @@ unsigned long tick_nsec;
|
|||||||
static u64 tick_length;
|
static u64 tick_length;
|
||||||
static u64 tick_length_base;
|
static u64 tick_length_base;
|
||||||
|
|
||||||
static struct hrtimer leap_timer;
|
|
||||||
|
|
||||||
#define MAX_TICKADJ 500LL /* usecs */
|
#define MAX_TICKADJ 500LL /* usecs */
|
||||||
#define MAX_TICKADJ_SCALED \
|
#define MAX_TICKADJ_SCALED \
|
||||||
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
|
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
|
||||||
@ -380,57 +378,6 @@ u64 ntp_tick_length(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Leap second processing. If in leap-insert state at the end of the
|
|
||||||
* day, the system clock is set back one second; if in leap-delete
|
|
||||||
* state, the system clock is set ahead one second.
|
|
||||||
*/
|
|
||||||
static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
|
|
||||||
{
|
|
||||||
enum hrtimer_restart res = HRTIMER_NORESTART;
|
|
||||||
unsigned long flags;
|
|
||||||
int leap = 0;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&ntp_lock, flags);
|
|
||||||
switch (time_state) {
|
|
||||||
case TIME_OK:
|
|
||||||
break;
|
|
||||||
case TIME_INS:
|
|
||||||
leap = -1;
|
|
||||||
time_state = TIME_OOP;
|
|
||||||
printk(KERN_NOTICE
|
|
||||||
"Clock: inserting leap second 23:59:60 UTC\n");
|
|
||||||
hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
|
|
||||||
res = HRTIMER_RESTART;
|
|
||||||
break;
|
|
||||||
case TIME_DEL:
|
|
||||||
leap = 1;
|
|
||||||
time_tai--;
|
|
||||||
time_state = TIME_WAIT;
|
|
||||||
printk(KERN_NOTICE
|
|
||||||
"Clock: deleting leap second 23:59:59 UTC\n");
|
|
||||||
break;
|
|
||||||
case TIME_OOP:
|
|
||||||
time_tai++;
|
|
||||||
time_state = TIME_WAIT;
|
|
||||||
/* fall through */
|
|
||||||
case TIME_WAIT:
|
|
||||||
if (!(time_status & (STA_INS | STA_DEL)))
|
|
||||||
time_state = TIME_OK;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&ntp_lock, flags);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We have to call this outside of the ntp_lock to keep
|
|
||||||
* the proper locking hierarchy
|
|
||||||
*/
|
|
||||||
if (leap)
|
|
||||||
timekeeping_leap_insert(leap);
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this routine handles the overflow of the microsecond field
|
* this routine handles the overflow of the microsecond field
|
||||||
*
|
*
|
||||||
@ -438,14 +385,58 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
|
|||||||
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
|
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
|
||||||
* They were originally developed for SUN and DEC kernels.
|
* They were originally developed for SUN and DEC kernels.
|
||||||
* All the kudos should go to Dave for this stuff.
|
* All the kudos should go to Dave for this stuff.
|
||||||
|
*
|
||||||
|
* Also handles leap second processing, and returns leap offset
|
||||||
*/
|
*/
|
||||||
void second_overflow(void)
|
int second_overflow(unsigned long secs)
|
||||||
{
|
{
|
||||||
s64 delta;
|
s64 delta;
|
||||||
|
int leap = 0;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&ntp_lock, flags);
|
spin_lock_irqsave(&ntp_lock, flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Leap second processing. If in leap-insert state at the end of the
|
||||||
|
* day, the system clock is set back one second; if in leap-delete
|
||||||
|
* state, the system clock is set ahead one second.
|
||||||
|
*/
|
||||||
|
switch (time_state) {
|
||||||
|
case TIME_OK:
|
||||||
|
if (time_status & STA_INS)
|
||||||
|
time_state = TIME_INS;
|
||||||
|
else if (time_status & STA_DEL)
|
||||||
|
time_state = TIME_DEL;
|
||||||
|
break;
|
||||||
|
case TIME_INS:
|
||||||
|
if (secs % 86400 == 0) {
|
||||||
|
leap = -1;
|
||||||
|
time_state = TIME_OOP;
|
||||||
|
printk(KERN_NOTICE
|
||||||
|
"Clock: inserting leap second 23:59:60 UTC\n");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case TIME_DEL:
|
||||||
|
if ((secs + 1) % 86400 == 0) {
|
||||||
|
leap = 1;
|
||||||
|
time_tai--;
|
||||||
|
time_state = TIME_WAIT;
|
||||||
|
printk(KERN_NOTICE
|
||||||
|
"Clock: deleting leap second 23:59:59 UTC\n");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case TIME_OOP:
|
||||||
|
time_tai++;
|
||||||
|
time_state = TIME_WAIT;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TIME_WAIT:
|
||||||
|
if (!(time_status & (STA_INS | STA_DEL)))
|
||||||
|
time_state = TIME_OK;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Bump the maxerror field */
|
/* Bump the maxerror field */
|
||||||
time_maxerror += MAXFREQ / NSEC_PER_USEC;
|
time_maxerror += MAXFREQ / NSEC_PER_USEC;
|
||||||
if (time_maxerror > NTP_PHASE_LIMIT) {
|
if (time_maxerror > NTP_PHASE_LIMIT) {
|
||||||
@ -481,8 +472,13 @@ void second_overflow(void)
|
|||||||
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
|
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
|
||||||
<< NTP_SCALE_SHIFT;
|
<< NTP_SCALE_SHIFT;
|
||||||
time_adjust = 0;
|
time_adjust = 0;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
spin_unlock_irqrestore(&ntp_lock, flags);
|
spin_unlock_irqrestore(&ntp_lock, flags);
|
||||||
|
|
||||||
|
return leap;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_GENERIC_CMOS_UPDATE
|
#ifdef CONFIG_GENERIC_CMOS_UPDATE
|
||||||
@ -544,27 +540,6 @@ static void notify_cmos_timer(void)
|
|||||||
static inline void notify_cmos_timer(void) { }
|
static inline void notify_cmos_timer(void) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* Start the leap seconds timer:
|
|
||||||
*/
|
|
||||||
static inline void ntp_start_leap_timer(struct timespec *ts)
|
|
||||||
{
|
|
||||||
long now = ts->tv_sec;
|
|
||||||
|
|
||||||
if (time_status & STA_INS) {
|
|
||||||
time_state = TIME_INS;
|
|
||||||
now += 86400 - now % 86400;
|
|
||||||
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (time_status & STA_DEL) {
|
|
||||||
time_state = TIME_DEL;
|
|
||||||
now += 86400 - (now + 1) % 86400;
|
|
||||||
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Propagate a new txc->status value into the NTP state:
|
* Propagate a new txc->status value into the NTP state:
|
||||||
@ -589,22 +564,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
|
|||||||
time_status &= STA_RONLY;
|
time_status &= STA_RONLY;
|
||||||
time_status |= txc->status & ~STA_RONLY;
|
time_status |= txc->status & ~STA_RONLY;
|
||||||
|
|
||||||
switch (time_state) {
|
|
||||||
case TIME_OK:
|
|
||||||
ntp_start_leap_timer(ts);
|
|
||||||
break;
|
|
||||||
case TIME_INS:
|
|
||||||
case TIME_DEL:
|
|
||||||
time_state = TIME_OK;
|
|
||||||
ntp_start_leap_timer(ts);
|
|
||||||
case TIME_WAIT:
|
|
||||||
if (!(time_status & (STA_INS | STA_DEL)))
|
|
||||||
time_state = TIME_OK;
|
|
||||||
break;
|
|
||||||
case TIME_OOP:
|
|
||||||
hrtimer_restart(&leap_timer);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* Called with the xtime lock held, so we can access and modify
|
* Called with the xtime lock held, so we can access and modify
|
||||||
@ -686,9 +645,6 @@ int do_adjtimex(struct timex *txc)
|
|||||||
(txc->tick < 900000/USER_HZ ||
|
(txc->tick < 900000/USER_HZ ||
|
||||||
txc->tick > 1100000/USER_HZ))
|
txc->tick > 1100000/USER_HZ))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
|
|
||||||
hrtimer_cancel(&leap_timer);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (txc->modes & ADJ_SETOFFSET) {
|
if (txc->modes & ADJ_SETOFFSET) {
|
||||||
@ -1010,6 +966,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
|
|||||||
void __init ntp_init(void)
|
void __init ntp_init(void)
|
||||||
{
|
{
|
||||||
ntp_clear();
|
ntp_clear();
|
||||||
hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
|
|
||||||
leap_timer.function = ntp_leap_second;
|
|
||||||
}
|
}
|
||||||
|
@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void timekeeping_leap_insert(int leapsecond)
|
|
||||||
{
|
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
write_seqlock_irqsave(&timekeeper.lock, flags);
|
|
||||||
timekeeper.xtime.tv_sec += leapsecond;
|
|
||||||
timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
|
|
||||||
timekeeping_update(false);
|
|
||||||
write_sequnlock_irqrestore(&timekeeper.lock, flags);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* timekeeping_forward_now - update clock to the current time
|
* timekeeping_forward_now - update clock to the current time
|
||||||
*
|
*
|
||||||
@ -969,9 +957,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
|
|||||||
|
|
||||||
timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
|
timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
|
||||||
while (timekeeper.xtime_nsec >= nsecps) {
|
while (timekeeper.xtime_nsec >= nsecps) {
|
||||||
|
int leap;
|
||||||
timekeeper.xtime_nsec -= nsecps;
|
timekeeper.xtime_nsec -= nsecps;
|
||||||
timekeeper.xtime.tv_sec++;
|
timekeeper.xtime.tv_sec++;
|
||||||
second_overflow();
|
leap = second_overflow(timekeeper.xtime.tv_sec);
|
||||||
|
timekeeper.xtime.tv_sec += leap;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Accumulate raw time */
|
/* Accumulate raw time */
|
||||||
@ -1082,9 +1072,11 @@ static void update_wall_time(void)
|
|||||||
* xtime.tv_nsec isn't larger then NSEC_PER_SEC
|
* xtime.tv_nsec isn't larger then NSEC_PER_SEC
|
||||||
*/
|
*/
|
||||||
if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
|
if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
|
||||||
|
int leap;
|
||||||
timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
|
timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
|
||||||
timekeeper.xtime.tv_sec++;
|
timekeeper.xtime.tv_sec++;
|
||||||
second_overflow();
|
leap = second_overflow(timekeeper.xtime.tv_sec);
|
||||||
|
timekeeper.xtime.tv_sec += leap;
|
||||||
}
|
}
|
||||||
|
|
||||||
timekeeping_update(false);
|
timekeeping_update(false);
|
||||||
|
Loading…
Reference in New Issue
Block a user