ntp: Fix leap-second hrtimer livelock
Since commit 7dffa3c673 the ntp
subsystem has used an hrtimer for triggering the leapsecond
adjustment. However, this can cause a potential livelock.
Thomas diagnosed this as the following pattern:
CPU 0                                                    CPU 1
do_adjtimex()
  spin_lock_irq(&ntp_lock);
    process_adjtimex_modes();				 timer_interrupt()
      process_adj_status();                                do_timer()
        ntp_start_leap_timer();                             write_lock(&xtime_lock);
          hrtimer_start();                                  update_wall_time();
             hrtimer_reprogram();                            ntp_tick_length()
               tick_program_event()                            spin_lock(&ntp_lock);
                 clockevents_program_event()
		   ktime_get()
                     seq = req_seqbegin(xtime_lock);
This patch tries to avoid the problem by reverting back to not using
an hrtimer to inject leapseconds, and instead we handle the leapsecond
processing in the second_overflow() function.
The downside to this change is that on systems that support highres
timers, the leap second processing will occur on a HZ tick boundary,
(ie: ~1-10ms, depending on HZ)  after the leap second instead of
possibly sooner (~34us in my tests w/ x86_64 lapic).
This patch applies on top of tip/timers/core.
CC: Sasha Levin <levinsasha928@gmail.com>
CC: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Diagnoised-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
			
			
This commit is contained in:
		
							parent
							
								
									57779dc2b3
								
							
						
					
					
						commit
						6b43ae8a61
					
				| @ -252,7 +252,7 @@ extern void ntp_clear(void); | ||||
| /* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */ | ||||
| extern u64 ntp_tick_length(void); | ||||
| 
 | ||||
| extern void second_overflow(void); | ||||
| extern int second_overflow(unsigned long secs); | ||||
| extern int do_adjtimex(struct timex *); | ||||
| extern void hardpps(const struct timespec *, const struct timespec *); | ||||
| 
 | ||||
|  | ||||
| @ -34,8 +34,6 @@ unsigned long			tick_nsec; | ||||
| static u64			tick_length; | ||||
| static u64			tick_length_base; | ||||
| 
 | ||||
| static struct hrtimer		leap_timer; | ||||
| 
 | ||||
| #define MAX_TICKADJ		500LL		/* usecs */ | ||||
| #define MAX_TICKADJ_SCALED \ | ||||
| 	(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) | ||||
| @ -380,57 +378,6 @@ u64 ntp_tick_length(void) | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * Leap second processing. If in leap-insert state at the end of the | ||||
|  * day, the system clock is set back one second; if in leap-delete | ||||
|  * state, the system clock is set ahead one second. | ||||
|  */ | ||||
| static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) | ||||
| { | ||||
| 	enum hrtimer_restart res = HRTIMER_NORESTART; | ||||
| 	unsigned long flags; | ||||
| 	int leap = 0; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&ntp_lock, flags); | ||||
| 	switch (time_state) { | ||||
| 	case TIME_OK: | ||||
| 		break; | ||||
| 	case TIME_INS: | ||||
| 		leap = -1; | ||||
| 		time_state = TIME_OOP; | ||||
| 		printk(KERN_NOTICE | ||||
| 			"Clock: inserting leap second 23:59:60 UTC\n"); | ||||
| 		hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC); | ||||
| 		res = HRTIMER_RESTART; | ||||
| 		break; | ||||
| 	case TIME_DEL: | ||||
| 		leap = 1; | ||||
| 		time_tai--; | ||||
| 		time_state = TIME_WAIT; | ||||
| 		printk(KERN_NOTICE | ||||
| 			"Clock: deleting leap second 23:59:59 UTC\n"); | ||||
| 		break; | ||||
| 	case TIME_OOP: | ||||
| 		time_tai++; | ||||
| 		time_state = TIME_WAIT; | ||||
| 		/* fall through */ | ||||
| 	case TIME_WAIT: | ||||
| 		if (!(time_status & (STA_INS | STA_DEL))) | ||||
| 			time_state = TIME_OK; | ||||
| 		break; | ||||
| 	} | ||||
| 	spin_unlock_irqrestore(&ntp_lock, flags); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We have to call this outside of the ntp_lock to keep | ||||
| 	 * the proper locking hierarchy | ||||
| 	 */ | ||||
| 	if (leap) | ||||
| 		timekeeping_leap_insert(leap); | ||||
| 
 | ||||
| 	return res; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * this routine handles the overflow of the microsecond field | ||||
|  * | ||||
| @ -438,14 +385,58 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) | ||||
|  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. | ||||
|  * They were originally developed for SUN and DEC kernels. | ||||
|  * All the kudos should go to Dave for this stuff. | ||||
|  * | ||||
|  * Also handles leap second processing, and returns leap offset | ||||
|  */ | ||||
| void second_overflow(void) | ||||
| int second_overflow(unsigned long secs) | ||||
| { | ||||
| 	s64 delta; | ||||
| 	int leap = 0; | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	spin_lock_irqsave(&ntp_lock, flags); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Leap second processing. If in leap-insert state at the end of the | ||||
| 	 * day, the system clock is set back one second; if in leap-delete | ||||
| 	 * state, the system clock is set ahead one second. | ||||
| 	 */ | ||||
| 	switch (time_state) { | ||||
| 	case TIME_OK: | ||||
| 		if (time_status & STA_INS) | ||||
| 			time_state = TIME_INS; | ||||
| 		else if (time_status & STA_DEL) | ||||
| 			time_state = TIME_DEL; | ||||
| 		break; | ||||
| 	case TIME_INS: | ||||
| 		if (secs % 86400 == 0) { | ||||
| 			leap = -1; | ||||
| 			time_state = TIME_OOP; | ||||
| 			printk(KERN_NOTICE | ||||
| 				"Clock: inserting leap second 23:59:60 UTC\n"); | ||||
| 		} | ||||
| 		break; | ||||
| 	case TIME_DEL: | ||||
| 		if ((secs + 1) % 86400 == 0) { | ||||
| 			leap = 1; | ||||
| 			time_tai--; | ||||
| 			time_state = TIME_WAIT; | ||||
| 			printk(KERN_NOTICE | ||||
| 				"Clock: deleting leap second 23:59:59 UTC\n"); | ||||
| 		} | ||||
| 		break; | ||||
| 	case TIME_OOP: | ||||
| 		time_tai++; | ||||
| 		time_state = TIME_WAIT; | ||||
| 		break; | ||||
| 
 | ||||
| 	case TIME_WAIT: | ||||
| 		if (!(time_status & (STA_INS | STA_DEL))) | ||||
| 			time_state = TIME_OK; | ||||
| 		break; | ||||
| 	} | ||||
| 
 | ||||
| 
 | ||||
| 	/* Bump the maxerror field */ | ||||
| 	time_maxerror += MAXFREQ / NSEC_PER_USEC; | ||||
| 	if (time_maxerror > NTP_PHASE_LIMIT) { | ||||
| @ -481,8 +472,13 @@ void second_overflow(void) | ||||
| 	tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) | ||||
| 							 << NTP_SCALE_SHIFT; | ||||
| 	time_adjust = 0; | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| out: | ||||
| 	spin_unlock_irqrestore(&ntp_lock, flags); | ||||
| 
 | ||||
| 	return leap; | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_GENERIC_CMOS_UPDATE | ||||
| @ -544,27 +540,6 @@ static void notify_cmos_timer(void) | ||||
| static inline void notify_cmos_timer(void) { } | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * Start the leap seconds timer: | ||||
|  */ | ||||
| static inline void ntp_start_leap_timer(struct timespec *ts) | ||||
| { | ||||
| 	long now = ts->tv_sec; | ||||
| 
 | ||||
| 	if (time_status & STA_INS) { | ||||
| 		time_state = TIME_INS; | ||||
| 		now += 86400 - now % 86400; | ||||
| 		hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||||
| 
 | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	if (time_status & STA_DEL) { | ||||
| 		time_state = TIME_DEL; | ||||
| 		now += 86400 - (now + 1) % 86400; | ||||
| 		hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Propagate a new txc->status value into the NTP state: | ||||
| @ -589,22 +564,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts) | ||||
| 	time_status &= STA_RONLY; | ||||
| 	time_status |= txc->status & ~STA_RONLY; | ||||
| 
 | ||||
| 	switch (time_state) { | ||||
| 	case TIME_OK: | ||||
| 		ntp_start_leap_timer(ts); | ||||
| 		break; | ||||
| 	case TIME_INS: | ||||
| 	case TIME_DEL: | ||||
| 		time_state = TIME_OK; | ||||
| 		ntp_start_leap_timer(ts); | ||||
| 	case TIME_WAIT: | ||||
| 		if (!(time_status & (STA_INS | STA_DEL))) | ||||
| 			time_state = TIME_OK; | ||||
| 		break; | ||||
| 	case TIME_OOP: | ||||
| 		hrtimer_restart(&leap_timer); | ||||
| 		break; | ||||
| 	} | ||||
| } | ||||
| /*
 | ||||
|  * Called with the xtime lock held, so we can access and modify | ||||
| @ -686,9 +645,6 @@ int do_adjtimex(struct timex *txc) | ||||
| 		    (txc->tick <  900000/USER_HZ || | ||||
| 		     txc->tick > 1100000/USER_HZ)) | ||||
| 			return -EINVAL; | ||||
| 
 | ||||
| 		if (txc->modes & ADJ_STATUS && time_state != TIME_OK) | ||||
| 			hrtimer_cancel(&leap_timer); | ||||
| 	} | ||||
| 
 | ||||
| 	if (txc->modes & ADJ_SETOFFSET) { | ||||
| @ -1010,6 +966,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); | ||||
| void __init ntp_init(void) | ||||
| { | ||||
| 	ntp_clear(); | ||||
| 	hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | ||||
| 	leap_timer.function = ntp_leap_second; | ||||
| } | ||||
|  | ||||
| @ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp) | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| void timekeeping_leap_insert(int leapsecond) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	write_seqlock_irqsave(&timekeeper.lock, flags); | ||||
| 	timekeeper.xtime.tv_sec += leapsecond; | ||||
| 	timekeeper.wall_to_monotonic.tv_sec -= leapsecond; | ||||
| 	timekeeping_update(false); | ||||
| 	write_sequnlock_irqrestore(&timekeeper.lock, flags); | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * timekeeping_forward_now - update clock to the current time | ||||
|  * | ||||
| @ -969,9 +957,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | ||||
| 
 | ||||
| 	timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; | ||||
| 	while (timekeeper.xtime_nsec >= nsecps) { | ||||
| 		int leap; | ||||
| 		timekeeper.xtime_nsec -= nsecps; | ||||
| 		timekeeper.xtime.tv_sec++; | ||||
| 		second_overflow(); | ||||
| 		leap = second_overflow(timekeeper.xtime.tv_sec); | ||||
| 		timekeeper.xtime.tv_sec += leap; | ||||
| 	} | ||||
| 
 | ||||
| 	/* Accumulate raw time */ | ||||
| @ -1082,9 +1072,11 @@ static void update_wall_time(void) | ||||
| 	 * xtime.tv_nsec isn't larger then NSEC_PER_SEC | ||||
| 	 */ | ||||
| 	if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) { | ||||
| 		int leap; | ||||
| 		timekeeper.xtime.tv_nsec -= NSEC_PER_SEC; | ||||
| 		timekeeper.xtime.tv_sec++; | ||||
| 		second_overflow(); | ||||
| 		leap = second_overflow(timekeeper.xtime.tv_sec); | ||||
| 		timekeeper.xtime.tv_sec += leap; | ||||
| 	} | ||||
| 
 | ||||
| 	timekeeping_update(false); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user