diff options
-rw-r--r-- | drivers/rtc/Makefile | 1 | ||||
-rw-r--r-- | drivers/rtc/class.c | 9 | ||||
-rw-r--r-- | drivers/rtc/rtc-cmos.c | 3 | ||||
-rw-r--r-- | drivers/rtc/rtc-mc146818-lib.c | 70 | ||||
-rw-r--r-- | drivers/rtc/systohc.c | 61 | ||||
-rw-r--r-- | include/linux/rtc.h | 69 | ||||
-rw-r--r-- | include/linux/timex.h | 1 | ||||
-rw-r--r-- | kernel/time/ntp.c | 229 | ||||
-rw-r--r-- | kernel/time/ntp_internal.h | 7 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 74 |
10 files changed, 273 insertions, 251 deletions
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index bfb57464118d..bb8f319b09fb 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -6,7 +6,6 @@ ccflags-$(CONFIG_RTC_DEBUG) := -DDEBUG obj-$(CONFIG_RTC_LIB) += lib.o -obj-$(CONFIG_RTC_SYSTOHC) += systohc.o obj-$(CONFIG_RTC_CLASS) += rtc-core.o obj-$(CONFIG_RTC_MC146818_LIB) += rtc-mc146818-lib.o rtc-core-y := class.o interface.o diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 7c88d190c51f..5855aa2eef62 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -200,8 +200,13 @@ static struct rtc_device *rtc_allocate_device(void) device_initialize(&rtc->dev); - /* Drivers can revise this default after allocating the device. */ - rtc->set_offset_nsec = NSEC_PER_SEC / 2; + /* + * Drivers can revise this default after allocating the device. + * The default is what most RTCs do: Increment seconds exactly one + * second after the write happened. This adds a default transport + * time of 5ms which is at least halfways close to reality. + */ + rtc->set_offset_nsec = NSEC_PER_SEC + 5 * NSEC_PER_MSEC; rtc->irq_freq = 1; rtc->max_user_freq = 64; diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index c633319cdb91..c5bcd2adc9fe 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -868,6 +868,9 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) if (retval) goto cleanup2; + /* Set the sync offset for the periodic 11min update correct */ + cmos_rtc.rtc->set_offset_nsec = NSEC_PER_SEC / 2; + /* export at least the first block of NVRAM */ nvmem_cfg.size = address_space - NVRAM_OFFSET; if (rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg)) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 2ecd8752b088..972a5b9a629d 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -8,41 +8,41 @@ #include <linux/acpi.h> #endif -/* - * Returns true if a clock update is in progress - */ -static inline unsigned char mc146818_is_updating(void) -{ - unsigned char uip; - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); - spin_unlock_irqrestore(&rtc_lock, flags); - return uip; -} - unsigned int mc146818_get_time(struct rtc_time *time) { unsigned char ctrl; unsigned long flags; unsigned char century = 0; + bool retry; #ifdef CONFIG_MACH_DECSTATION unsigned int real_year; #endif +again: + spin_lock_irqsave(&rtc_lock, flags); /* - * read RTC once any update in progress is done. The update - * can take just over 2ms. We wait 20ms. There is no need to - * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP. - * If you need to know *exactly* when a second has started, enable - * periodic update complete interrupts, (via ioctl) and then - * immediately read /dev/rtc which will block until you get the IRQ. - * Once the read clears, read the RTC time (again via ioctl). Easy. + * Check whether there is an update in progress during which the + * readout is unspecified. The maximum update time is ~2ms. Poll + * every msec for completion. + * + * Store the second value before checking UIP so a long lasting NMI + * which happens to hit after the UIP check cannot make an update + * cycle invisible. */ - if (mc146818_is_updating()) - mdelay(20); + time->tm_sec = CMOS_READ(RTC_SECONDS); + + if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { + spin_unlock_irqrestore(&rtc_lock, flags); + mdelay(1); + goto again; + } + + /* Revalidate the above readout */ + if (time->tm_sec != CMOS_READ(RTC_SECONDS)) { + spin_unlock_irqrestore(&rtc_lock, flags); + goto again; + } /* * Only the values that we read from the RTC are set. We leave @@ -50,8 +50,6 @@ unsigned int mc146818_get_time(struct rtc_time *time) * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated * by the RTC when initially set to a non-zero value. */ - spin_lock_irqsave(&rtc_lock, flags); - time->tm_sec = CMOS_READ(RTC_SECONDS); time->tm_min = CMOS_READ(RTC_MINUTES); time->tm_hour = CMOS_READ(RTC_HOURS); time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH); @@ -66,8 +64,24 @@ unsigned int mc146818_get_time(struct rtc_time *time) century = CMOS_READ(acpi_gbl_FADT.century); #endif ctrl = CMOS_READ(RTC_CONTROL); + /* + * Check for the UIP bit again. If it is set now then + * the above values may contain garbage. + */ + retry = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; + /* + * A NMI might have interrupted the above sequence so check whether + * the seconds value has changed which indicates that the NMI took + * longer than the UIP bit was set. Unlikely, but possible and + * there is also virt... + */ + retry |= time->tm_sec != CMOS_READ(RTC_SECONDS); + spin_unlock_irqrestore(&rtc_lock, flags); + if (retry) + goto again; + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { time->tm_sec = bcd2bin(time->tm_sec); @@ -121,7 +135,6 @@ int mc146818_set_time(struct rtc_time *time) if (yrs > 255) /* They are unsigned */ return -EINVAL; - spin_lock_irqsave(&rtc_lock, flags); #ifdef CONFIG_MACH_DECSTATION real_yrs = yrs; leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) || @@ -150,10 +163,8 @@ int mc146818_set_time(struct rtc_time *time) /* These limits and adjustments are independent of * whether the chip is in binary mode or not. */ - if (yrs > 169) { - spin_unlock_irqrestore(&rtc_lock, flags); + if (yrs > 169) return -EINVAL; - } if (yrs >= 100) yrs -= 100; @@ -169,6 +180,7 @@ int mc146818_set_time(struct rtc_time *time) century = bin2bcd(century); } + spin_lock_irqsave(&rtc_lock, flags); save_control = CMOS_READ(RTC_CONTROL); CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c deleted file mode 100644 index 8b70f0520e13..000000000000 --- a/drivers/rtc/systohc.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/rtc.h> -#include <linux/time.h> - -/** - * rtc_set_ntp_time - Save NTP synchronized time to the RTC - * @now: Current time of day - * @target_nsec: pointer for desired now->tv_nsec value - * - * Replacement for the NTP platform function update_persistent_clock64 - * that stores time for later retrieval by rtc_hctosys. - * - * Returns 0 on successful RTC update, -ENODEV if a RTC update is not - * possible at all, and various other -errno for specific temporary failure - * cases. - * - * -EPROTO is returned if now.tv_nsec is not close enough to *target_nsec. - * - * If temporary failure is indicated the caller should try again 'soon' - */ -int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec) -{ - struct rtc_device *rtc; - struct rtc_time tm; - struct timespec64 to_set; - int err = -ENODEV; - bool ok; - - rtc = rtc_class_open(CONFIG_RTC_SYSTOHC_DEVICE); - if (!rtc) - goto out_err; - - if (!rtc->ops || !rtc->ops->set_time) - goto out_close; - - /* Compute the value of tv_nsec we require the caller to supply in - * now.tv_nsec. This is the value such that (now + - * set_offset_nsec).tv_nsec == 0. - */ - set_normalized_timespec64(&to_set, 0, -rtc->set_offset_nsec); - *target_nsec = to_set.tv_nsec; - - /* The ntp code must call this with the correct value in tv_nsec, if - * it does not we update target_nsec and return EPROTO to make the ntp - * code try again later. - */ - ok = rtc_tv_nsec_ok(rtc->set_offset_nsec, &to_set, &now); - if (!ok) { - err = -EPROTO; - goto out_close; - } - - rtc_time64_to_tm(to_set.tv_sec, &tm); - - err = rtc_set_time(rtc, &tm); - -out_close: - rtc_class_close(rtc); -out_err: - return err; -} diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 22d1575e4991..b829382de6c3 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -110,13 +110,36 @@ struct rtc_device { /* Some hardware can't support UIE mode */ int uie_unsupported; - /* Number of nsec it takes to set the RTC clock. This influences when - * the set ops are called. An offset: - * - of 0.5 s will call RTC set for wall clock time 10.0 s at 9.5 s - * - of 1.5 s will call RTC set for wall clock time 10.0 s at 8.5 s - * - of -0.5 s will call RTC set for wall clock time 10.0 s at 10.5 s + /* + * This offset specifies the update timing of the RTC. + * + * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds + * + * The offset defines how tsched is computed so that the write to + * the RTC (t2.tv_sec - 1sec) is correct versus the time required + * for the transport of the write and the time which the RTC needs + * to increment seconds the first time after the write (t2). + * + * For direct accessible RTCs tsched ~= t1 because the write time + * is negligible. For RTCs behind slow busses the transport time is + * significant and has to be taken into account. + * + * The time between the write (t1) and the first increment after + * the write (t2) is RTC specific. For a MC146818 RTC it's 500ms, + * for many others it's exactly 1 second. Consult the datasheet. + * + * The value of this offset is also used to calculate the to be + * written value (t2.tv_sec - 1sec) at tsched. + * + * The default value for this is NSEC_PER_SEC + 10 msec default + * transport time. The offset can be adjusted by drivers so the + * calculation for the to be written value at tsched becomes + * correct: + * + * newval = tsched + set_offset_nsec - NSEC_PER_SEC + * and (tsched + set_offset_nsec) % NSEC_PER_SEC == 0 */ - long set_offset_nsec; + unsigned long set_offset_nsec; bool registered; @@ -165,7 +188,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); -extern int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); @@ -205,39 +227,6 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } -/* Determine if we can call to driver to set the time. Drivers can only be - * called to set a second aligned time value, and the field set_offset_nsec - * specifies how far away from the second aligned time to call the driver. - * - * This also computes 'to_set' which is the time we are trying to set, and has - * a zero in tv_nsecs, such that: - * to_set - set_delay_nsec == now +/- FUZZ - * - */ -static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec, - struct timespec64 *to_set, - const struct timespec64 *now) -{ - /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */ - const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5; - struct timespec64 delay = {.tv_sec = 0, - .tv_nsec = set_offset_nsec}; - - *to_set = timespec64_add(*now, delay); - - if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) { - to_set->tv_nsec = 0; - return true; - } - - if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) { - to_set->tv_sec++; - to_set->tv_nsec = 0; - return true; - } - return false; -} - #define rtc_register_device(device) \ __rtc_register_device(THIS_MODULE, device) diff --git a/include/linux/timex.h b/include/linux/timex.h index ce0859763670..9c2e54faf9b7 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -157,7 +157,6 @@ extern int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * extern void hardpps(const struct timespec64 *, const struct timespec64 *); int read_current_timer(unsigned long *timer_val); -void ntp_notify_cmos_timer(void); /* The clock frequency of the i8253/i8254 PIT */ #define PIT_TICK_RATE 1193182ul diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 069ca78fb0bf..7404d3831527 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -494,65 +494,74 @@ out: return leap; } +#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) static void sync_hw_clock(struct work_struct *work); -static DECLARE_DELAYED_WORK(sync_work, sync_hw_clock); - -static void sched_sync_hw_clock(struct timespec64 now, - unsigned long target_nsec, bool fail) +static DECLARE_WORK(sync_work, sync_hw_clock); +static struct hrtimer sync_hrtimer; +#define SYNC_PERIOD_NS (11UL * 60 * NSEC_PER_SEC) +static enum hrtimer_restart sync_timer_callback(struct hrtimer *timer) { - struct timespec64 next; - - ktime_get_real_ts64(&next); - if (!fail) - next.tv_sec = 659; - else { - /* - * Try again as soon as possible. Delaying long periods - * decreases the accuracy of the work queue timer. Due to this - * the algorithm is very likely to require a short-sleep retry - * after the above long sleep to synchronize ts_nsec. - */ - next.tv_sec = 0; - } - - /* Compute the needed delay that will get to tv_nsec == target_nsec */ - next.tv_nsec = target_nsec - next.tv_nsec; - if (next.tv_nsec <= 0) - next.tv_nsec += NSEC_PER_SEC; - if (next.tv_nsec >= NSEC_PER_SEC) { - next.tv_sec++; - next.tv_nsec -= NSEC_PER_SEC; - } + queue_work(system_power_efficient_wq, &sync_work); - queue_delayed_work(system_power_efficient_wq, &sync_work, - timespec64_to_jiffies(&next)); + return HRTIMER_NORESTART; } -static void sync_rtc_clock(void) +static void sched_sync_hw_clock(unsigned long offset_nsec, bool retry) { - unsigned long target_nsec; - struct timespec64 adjust, now; - int rc; + ktime_t exp = ktime_set(ktime_get_real_seconds(), 0); - if (!IS_ENABLED(CONFIG_RTC_SYSTOHC)) - return; + if (retry) + exp = ktime_add_ns(exp, 2 * NSEC_PER_SEC - offset_nsec); + else + exp = ktime_add_ns(exp, SYNC_PERIOD_NS - offset_nsec); - ktime_get_real_ts64(&now); + hrtimer_start(&sync_hrtimer, exp, HRTIMER_MODE_ABS); +} - adjust = now; - if (persistent_clock_is_local) - adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); +/* + * Check whether @now is correct versus the required time to update the RTC + * and calculate the value which needs to be written to the RTC so that the + * next seconds increment of the RTC after the write is aligned with the next + * seconds increment of clock REALTIME. + * + * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds + * + * t2.tv_nsec == 0 + * tsched = t2 - set_offset_nsec + * newval = t2 - NSEC_PER_SEC + * + * ==> neval = tsched + set_offset_nsec - NSEC_PER_SEC + * + * As the execution of this code is not guaranteed to happen exactly at + * tsched this allows it to happen within a fuzzy region: + * + * abs(now - tsched) < FUZZ + * + * If @now is not inside the allowed window the function returns false. + */ +static inline bool rtc_tv_nsec_ok(unsigned long set_offset_nsec, + struct timespec64 *to_set, + const struct timespec64 *now) +{ + /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */ + const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5; + struct timespec64 delay = {.tv_sec = -1, + .tv_nsec = set_offset_nsec}; - /* - * The current RTC in use will provide the target_nsec it wants to be - * called at, and does rtc_tv_nsec_ok internally. - */ - rc = rtc_set_ntp_time(adjust, &target_nsec); - if (rc == -ENODEV) - return; + *to_set = timespec64_add(*now, delay); + + if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) { + to_set->tv_nsec = 0; + return true; + } - sched_sync_hw_clock(now, target_nsec, rc); + if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) { + to_set->tv_sec++; + to_set->tv_nsec = 0; + return true; + } + return false; } #ifdef CONFIG_GENERIC_CMOS_UPDATE @@ -560,48 +569,47 @@ int __weak update_persistent_clock64(struct timespec64 now64) { return -ENODEV; } +#else +static inline int update_persistent_clock64(struct timespec64 now64) +{ + return -ENODEV; +} #endif -static bool sync_cmos_clock(void) +#ifdef CONFIG_RTC_SYSTOHC +/* Save NTP synchronized time to the RTC */ +static int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec) { - static bool no_cmos; - struct timespec64 now; - struct timespec64 adjust; - int rc = -EPROTO; - long target_nsec = NSEC_PER_SEC / 2; + struct rtc_device *rtc; + struct rtc_time tm; + int err = -ENODEV; - if (!IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE)) - return false; + rtc = rtc_class_open(CONFIG_RTC_SYSTOHC_DEVICE); + if (!rtc) + return -ENODEV; - if (no_cmos) - return false; + if (!rtc->ops || !rtc->ops->set_time) + goto out_close; - /* - * Historically update_persistent_clock64() has followed x86 - * semantics, which match the MC146818A/etc RTC. This RTC will store - * 'adjust' and then in .5s it will advance once second. - * - * Architectures are strongly encouraged to use rtclib and not - * implement this legacy API. - */ - ktime_get_real_ts64(&now); - if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) { - if (persistent_clock_is_local) - adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); - rc = update_persistent_clock64(adjust); - /* - * The machine does not support update_persistent_clock64 even - * though it defines CONFIG_GENERIC_CMOS_UPDATE. - */ - if (rc == -ENODEV) { - no_cmos = true; - return false; - } + /* First call might not have the correct offset */ + if (*offset_nsec == rtc->set_offset_nsec) { + rtc_time64_to_tm(to_set->tv_sec, &tm); + err = rtc_set_time(rtc, &tm); + } else { + /* Store the update offset and let the caller try again */ + *offset_nsec = rtc->set_offset_nsec; + err = -EAGAIN; } - - sched_sync_hw_clock(now, target_nsec, rc); - return true; +out_close: + rtc_class_close(rtc); + return err; +} +#else +static inline int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec) +{ + return -ENODEV; } +#endif /* * If we have an externally synchronized Linux clock, then update RTC clock @@ -613,24 +621,64 @@ static bool sync_cmos_clock(void) */ static void sync_hw_clock(struct work_struct *work) { - if (!ntp_synced()) - return; + /* + * The default synchronization offset is 500ms for the deprecated + * update_persistent_clock64() under the assumption that it uses + * the infamous CMOS clock (MC146818). + */ + static unsigned long offset_nsec = NSEC_PER_SEC / 2; + struct timespec64 now, to_set; + int res = -EAGAIN; - if (sync_cmos_clock()) + /* + * Don't update if STA_UNSYNC is set and if ntp_notify_cmos_timer() + * managed to schedule the work between the timer firing and the + * work being able to rearm the timer. Wait for the timer to expire. + */ + if (!ntp_synced() || hrtimer_is_queued(&sync_hrtimer)) return; - sync_rtc_clock(); + ktime_get_real_ts64(&now); + /* If @now is not in the allowed window, try again */ + if (!rtc_tv_nsec_ok(offset_nsec, &to_set, &now)) + goto rearm; + + /* Take timezone adjusted RTCs into account */ + if (persistent_clock_is_local) + to_set.tv_sec -= (sys_tz.tz_minuteswest * 60); + + /* Try the legacy RTC first. */ + res = update_persistent_clock64(to_set); + if (res != -ENODEV) + goto rearm; + + /* Try the RTC class */ + res = update_rtc(&to_set, &offset_nsec); + if (res == -ENODEV) + return; +rearm: + sched_sync_hw_clock(offset_nsec, res != 0); } void ntp_notify_cmos_timer(void) { - if (!ntp_synced()) - return; + /* + * When the work is currently executed but has not yet the timer + * rearmed this queues the work immediately again. No big issue, + * just a pointless work scheduled. + */ + if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer)) + queue_work(system_power_efficient_wq, &sync_work); +} - if (IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE) || - IS_ENABLED(CONFIG_RTC_SYSTOHC)) - queue_delayed_work(system_power_efficient_wq, &sync_work, 0); +static void __init ntp_init_cmos_sync(void) +{ + hrtimer_init(&sync_hrtimer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + sync_hrtimer.function = sync_timer_callback; } +#else /* CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */ +static inline void __init ntp_init_cmos_sync(void) { } +#endif /* !CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */ /* * Propagate a new txc->status value into the NTP state: @@ -1044,4 +1092,5 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup); void __init ntp_init(void) { ntp_clear(); + ntp_init_cmos_sync(); } diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 908ecaa65fc3..23d1b74c3065 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -12,4 +12,11 @@ extern int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai, struct audit_ntp_data *ad); extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts); + +#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) +extern void ntp_notify_cmos_timer(void); +#else +static inline void ntp_notify_cmos_timer(void) { } +#endif + #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index cc7cba20382e..a9e68936822d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -57,36 +57,42 @@ static ktime_t last_jiffies_update; static void tick_do_update_jiffies64(ktime_t now) { unsigned long ticks = 1; - ktime_t delta; + ktime_t delta, nextp; /* - * Do a quick check without holding jiffies_lock. The READ_ONCE() + * 64bit can do a quick check without holding jiffies lock and + * without looking at the sequence count. The smp_load_acquire() * pairs with the update done later in this function. * - * This is also an intentional data race which is even safe on - * 32bit in theory. If there is a concurrent update then the check - * might give a random answer. It does not matter because if it - * returns then the concurrent update is already taking care, if it - * falls through then it will pointlessly contend on jiffies_lock. - * - * Though there is one nasty case on 32bit due to store tearing of - * the 64bit value. If the first 32bit store makes the quick check - * return on all other CPUs and the writing CPU context gets - * delayed to complete the second store (scheduled out on virt) - * then jiffies can become stale for up to ~2^32 nanoseconds - * without noticing. After that point all CPUs will wait for - * jiffies lock. - * - * OTOH, this is not any different than the situation with NOHZ=off - * where one CPU is responsible for updating jiffies and - * timekeeping. If that CPU goes out for lunch then all other CPUs - * will operate on stale jiffies until it decides to come back. + * 32bit cannot do that because the store of tick_next_period + * consists of two 32bit stores and the first store could move it + * to a random point in the future. */ - if (ktime_before(now, READ_ONCE(tick_next_period))) - return; + if (IS_ENABLED(CONFIG_64BIT)) { + if (ktime_before(now, smp_load_acquire(&tick_next_period))) + return; + } else { + unsigned int seq; - /* Reevaluate with jiffies_lock held */ + /* + * Avoid contention on jiffies_lock and protect the quick + * check with the sequence count. + */ + do { + seq = read_seqcount_begin(&jiffies_seq); + nextp = tick_next_period; + } while (read_seqcount_retry(&jiffies_seq, seq)); + + if (ktime_before(now, nextp)) + return; + } + + /* Quick check failed, i.e. update is required. */ raw_spin_lock(&jiffies_lock); + /* + * Reevaluate with the lock held. Another CPU might have done the + * update already. + */ if (ktime_before(now, tick_next_period)) { raw_spin_unlock(&jiffies_lock); return; @@ -112,11 +118,25 @@ static void tick_do_update_jiffies64(ktime_t now) jiffies_64 += ticks; /* - * Keep the tick_next_period variable up to date. WRITE_ONCE() - * pairs with the READ_ONCE() in the lockless quick check above. + * Keep the tick_next_period variable up to date. */ - WRITE_ONCE(tick_next_period, - ktime_add_ns(last_jiffies_update, TICK_NSEC)); + nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC); + + if (IS_ENABLED(CONFIG_64BIT)) { + /* + * Pairs with smp_load_acquire() in the lockless quick + * check above and ensures that the update to jiffies_64 is + * not reordered vs. the store to tick_next_period, neither + * by the compiler nor by the CPU. + */ + smp_store_release(&tick_next_period, nextp); + } else { + /* + * A plain store is good enough on 32bit as the quick check + * above is protected by the sequence count. + */ + tick_next_period = nextp; + } /* * Release the sequence count. calc_global_load() below is not |