summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/rtc/Makefile1
-rw-r--r--drivers/rtc/class.c9
-rw-r--r--drivers/rtc/rtc-cmos.c3
-rw-r--r--drivers/rtc/rtc-mc146818-lib.c70
-rw-r--r--drivers/rtc/systohc.c61
-rw-r--r--include/linux/rtc.h69
-rw-r--r--include/linux/timex.h1
-rw-r--r--kernel/time/ntp.c229
-rw-r--r--kernel/time/ntp_internal.h7
-rw-r--r--kernel/time/tick-sched.c74
10 files changed, 273 insertions, 251 deletions
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index bfb57464118d..bb8f319b09fb 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -6,7 +6,6 @@
ccflags-$(CONFIG_RTC_DEBUG) := -DDEBUG
obj-$(CONFIG_RTC_LIB) += lib.o
-obj-$(CONFIG_RTC_SYSTOHC) += systohc.o
obj-$(CONFIG_RTC_CLASS) += rtc-core.o
obj-$(CONFIG_RTC_MC146818_LIB) += rtc-mc146818-lib.o
rtc-core-y := class.o interface.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 7c88d190c51f..5855aa2eef62 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -200,8 +200,13 @@ static struct rtc_device *rtc_allocate_device(void)
device_initialize(&rtc->dev);
- /* Drivers can revise this default after allocating the device. */
- rtc->set_offset_nsec = NSEC_PER_SEC / 2;
+ /*
+ * Drivers can revise this default after allocating the device.
+ * The default is what most RTCs do: Increment seconds exactly one
+ * second after the write happened. This adds a default transport
+ * time of 5ms which is at least halfways close to reality.
+ */
+ rtc->set_offset_nsec = NSEC_PER_SEC + 5 * NSEC_PER_MSEC;
rtc->irq_freq = 1;
rtc->max_user_freq = 64;
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index c633319cdb91..c5bcd2adc9fe 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -868,6 +868,9 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
if (retval)
goto cleanup2;
+ /* Set the sync offset for the periodic 11min update correct */
+ cmos_rtc.rtc->set_offset_nsec = NSEC_PER_SEC / 2;
+
/* export at least the first block of NVRAM */
nvmem_cfg.size = address_space - NVRAM_OFFSET;
if (rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg))
diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
index 2ecd8752b088..972a5b9a629d 100644
--- a/drivers/rtc/rtc-mc146818-lib.c
+++ b/drivers/rtc/rtc-mc146818-lib.c
@@ -8,41 +8,41 @@
#include <linux/acpi.h>
#endif
-/*
- * Returns true if a clock update is in progress
- */
-static inline unsigned char mc146818_is_updating(void)
-{
- unsigned char uip;
- unsigned long flags;
-
- spin_lock_irqsave(&rtc_lock, flags);
- uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
- spin_unlock_irqrestore(&rtc_lock, flags);
- return uip;
-}
-
unsigned int mc146818_get_time(struct rtc_time *time)
{
unsigned char ctrl;
unsigned long flags;
unsigned char century = 0;
+ bool retry;
#ifdef CONFIG_MACH_DECSTATION
unsigned int real_year;
#endif
+again:
+ spin_lock_irqsave(&rtc_lock, flags);
/*
- * read RTC once any update in progress is done. The update
- * can take just over 2ms. We wait 20ms. There is no need to
- * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP.
- * If you need to know *exactly* when a second has started, enable
- * periodic update complete interrupts, (via ioctl) and then
- * immediately read /dev/rtc which will block until you get the IRQ.
- * Once the read clears, read the RTC time (again via ioctl). Easy.
+ * Check whether there is an update in progress during which the
+ * readout is unspecified. The maximum update time is ~2ms. Poll
+ * every msec for completion.
+ *
+ * Store the second value before checking UIP so a long lasting NMI
+ * which happens to hit after the UIP check cannot make an update
+ * cycle invisible.
*/
- if (mc146818_is_updating())
- mdelay(20);
+ time->tm_sec = CMOS_READ(RTC_SECONDS);
+
+ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ mdelay(1);
+ goto again;
+ }
+
+ /* Revalidate the above readout */
+ if (time->tm_sec != CMOS_READ(RTC_SECONDS)) {
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ goto again;
+ }
/*
* Only the values that we read from the RTC are set. We leave
@@ -50,8 +50,6 @@ unsigned int mc146818_get_time(struct rtc_time *time)
* RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
* by the RTC when initially set to a non-zero value.
*/
- spin_lock_irqsave(&rtc_lock, flags);
- time->tm_sec = CMOS_READ(RTC_SECONDS);
time->tm_min = CMOS_READ(RTC_MINUTES);
time->tm_hour = CMOS_READ(RTC_HOURS);
time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH);
@@ -66,8 +64,24 @@ unsigned int mc146818_get_time(struct rtc_time *time)
century = CMOS_READ(acpi_gbl_FADT.century);
#endif
ctrl = CMOS_READ(RTC_CONTROL);
+ /*
+ * Check for the UIP bit again. If it is set now then
+ * the above values may contain garbage.
+ */
+ retry = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP;
+ /*
+ * A NMI might have interrupted the above sequence so check whether
+ * the seconds value has changed which indicates that the NMI took
+ * longer than the UIP bit was set. Unlikely, but possible and
+ * there is also virt...
+ */
+ retry |= time->tm_sec != CMOS_READ(RTC_SECONDS);
+
spin_unlock_irqrestore(&rtc_lock, flags);
+ if (retry)
+ goto again;
+
if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
{
time->tm_sec = bcd2bin(time->tm_sec);
@@ -121,7 +135,6 @@ int mc146818_set_time(struct rtc_time *time)
if (yrs > 255) /* They are unsigned */
return -EINVAL;
- spin_lock_irqsave(&rtc_lock, flags);
#ifdef CONFIG_MACH_DECSTATION
real_yrs = yrs;
leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) ||
@@ -150,10 +163,8 @@ int mc146818_set_time(struct rtc_time *time)
/* These limits and adjustments are independent of
* whether the chip is in binary mode or not.
*/
- if (yrs > 169) {
- spin_unlock_irqrestore(&rtc_lock, flags);
+ if (yrs > 169)
return -EINVAL;
- }
if (yrs >= 100)
yrs -= 100;
@@ -169,6 +180,7 @@ int mc146818_set_time(struct rtc_time *time)
century = bin2bcd(century);
}
+ spin_lock_irqsave(&rtc_lock, flags);
save_control = CMOS_READ(RTC_CONTROL);
CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c
deleted file mode 100644
index 8b70f0520e13..000000000000
--- a/drivers/rtc/systohc.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/rtc.h>
-#include <linux/time.h>
-
-/**
- * rtc_set_ntp_time - Save NTP synchronized time to the RTC
- * @now: Current time of day
- * @target_nsec: pointer for desired now->tv_nsec value
- *
- * Replacement for the NTP platform function update_persistent_clock64
- * that stores time for later retrieval by rtc_hctosys.
- *
- * Returns 0 on successful RTC update, -ENODEV if a RTC update is not
- * possible at all, and various other -errno for specific temporary failure
- * cases.
- *
- * -EPROTO is returned if now.tv_nsec is not close enough to *target_nsec.
- *
- * If temporary failure is indicated the caller should try again 'soon'
- */
-int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec)
-{
- struct rtc_device *rtc;
- struct rtc_time tm;
- struct timespec64 to_set;
- int err = -ENODEV;
- bool ok;
-
- rtc = rtc_class_open(CONFIG_RTC_SYSTOHC_DEVICE);
- if (!rtc)
- goto out_err;
-
- if (!rtc->ops || !rtc->ops->set_time)
- goto out_close;
-
- /* Compute the value of tv_nsec we require the caller to supply in
- * now.tv_nsec. This is the value such that (now +
- * set_offset_nsec).tv_nsec == 0.
- */
- set_normalized_timespec64(&to_set, 0, -rtc->set_offset_nsec);
- *target_nsec = to_set.tv_nsec;
-
- /* The ntp code must call this with the correct value in tv_nsec, if
- * it does not we update target_nsec and return EPROTO to make the ntp
- * code try again later.
- */
- ok = rtc_tv_nsec_ok(rtc->set_offset_nsec, &to_set, &now);
- if (!ok) {
- err = -EPROTO;
- goto out_close;
- }
-
- rtc_time64_to_tm(to_set.tv_sec, &tm);
-
- err = rtc_set_time(rtc, &tm);
-
-out_close:
- rtc_class_close(rtc);
-out_err:
- return err;
-}
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 22d1575e4991..b829382de6c3 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -110,13 +110,36 @@ struct rtc_device {
/* Some hardware can't support UIE mode */
int uie_unsupported;
- /* Number of nsec it takes to set the RTC clock. This influences when
- * the set ops are called. An offset:
- * - of 0.5 s will call RTC set for wall clock time 10.0 s at 9.5 s
- * - of 1.5 s will call RTC set for wall clock time 10.0 s at 8.5 s
- * - of -0.5 s will call RTC set for wall clock time 10.0 s at 10.5 s
+ /*
+ * This offset specifies the update timing of the RTC.
+ *
+ * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds
+ *
+ * The offset defines how tsched is computed so that the write to
+ * the RTC (t2.tv_sec - 1sec) is correct versus the time required
+ * for the transport of the write and the time which the RTC needs
+ * to increment seconds the first time after the write (t2).
+ *
+ * For direct accessible RTCs tsched ~= t1 because the write time
+ * is negligible. For RTCs behind slow busses the transport time is
+ * significant and has to be taken into account.
+ *
+ * The time between the write (t1) and the first increment after
+ * the write (t2) is RTC specific. For a MC146818 RTC it's 500ms,
+ * for many others it's exactly 1 second. Consult the datasheet.
+ *
+ * The value of this offset is also used to calculate the to be
+ * written value (t2.tv_sec - 1sec) at tsched.
+ *
+ * The default value for this is NSEC_PER_SEC + 10 msec default
+ * transport time. The offset can be adjusted by drivers so the
+ * calculation for the to be written value at tsched becomes
+ * correct:
+ *
+ * newval = tsched + set_offset_nsec - NSEC_PER_SEC
+ * and (tsched + set_offset_nsec) % NSEC_PER_SEC == 0
*/
- long set_offset_nsec;
+ unsigned long set_offset_nsec;
bool registered;
@@ -165,7 +188,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc);
extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
-extern int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec);
int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
extern int rtc_read_alarm(struct rtc_device *rtc,
struct rtc_wkalrm *alrm);
@@ -205,39 +227,6 @@ static inline bool is_leap_year(unsigned int year)
return (!(year % 4) && (year % 100)) || !(year % 400);
}
-/* Determine if we can call to driver to set the time. Drivers can only be
- * called to set a second aligned time value, and the field set_offset_nsec
- * specifies how far away from the second aligned time to call the driver.
- *
- * This also computes 'to_set' which is the time we are trying to set, and has
- * a zero in tv_nsecs, such that:
- * to_set - set_delay_nsec == now +/- FUZZ
- *
- */
-static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec,
- struct timespec64 *to_set,
- const struct timespec64 *now)
-{
- /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */
- const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5;
- struct timespec64 delay = {.tv_sec = 0,
- .tv_nsec = set_offset_nsec};
-
- *to_set = timespec64_add(*now, delay);
-
- if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) {
- to_set->tv_nsec = 0;
- return true;
- }
-
- if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) {
- to_set->tv_sec++;
- to_set->tv_nsec = 0;
- return true;
- }
- return false;
-}
-
#define rtc_register_device(device) \
__rtc_register_device(THIS_MODULE, device)
diff --git a/include/linux/timex.h b/include/linux/timex.h
index ce0859763670..9c2e54faf9b7 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -157,7 +157,6 @@ extern int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex *
extern void hardpps(const struct timespec64 *, const struct timespec64 *);
int read_current_timer(unsigned long *timer_val);
-void ntp_notify_cmos_timer(void);
/* The clock frequency of the i8253/i8254 PIT */
#define PIT_TICK_RATE 1193182ul
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 069ca78fb0bf..7404d3831527 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -494,65 +494,74 @@ out:
return leap;
}
+#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
static void sync_hw_clock(struct work_struct *work);
-static DECLARE_DELAYED_WORK(sync_work, sync_hw_clock);
-
-static void sched_sync_hw_clock(struct timespec64 now,
- unsigned long target_nsec, bool fail)
+static DECLARE_WORK(sync_work, sync_hw_clock);
+static struct hrtimer sync_hrtimer;
+#define SYNC_PERIOD_NS (11UL * 60 * NSEC_PER_SEC)
+static enum hrtimer_restart sync_timer_callback(struct hrtimer *timer)
{
- struct timespec64 next;
-
- ktime_get_real_ts64(&next);
- if (!fail)
- next.tv_sec = 659;
- else {
- /*
- * Try again as soon as possible. Delaying long periods
- * decreases the accuracy of the work queue timer. Due to this
- * the algorithm is very likely to require a short-sleep retry
- * after the above long sleep to synchronize ts_nsec.
- */
- next.tv_sec = 0;
- }
-
- /* Compute the needed delay that will get to tv_nsec == target_nsec */
- next.tv_nsec = target_nsec - next.tv_nsec;
- if (next.tv_nsec <= 0)
- next.tv_nsec += NSEC_PER_SEC;
- if (next.tv_nsec >= NSEC_PER_SEC) {
- next.tv_sec++;
- next.tv_nsec -= NSEC_PER_SEC;
- }
+ queue_work(system_power_efficient_wq, &sync_work);
- queue_delayed_work(system_power_efficient_wq, &sync_work,
- timespec64_to_jiffies(&next));
+ return HRTIMER_NORESTART;
}
-static void sync_rtc_clock(void)
+static void sched_sync_hw_clock(unsigned long offset_nsec, bool retry)
{
- unsigned long target_nsec;
- struct timespec64 adjust, now;
- int rc;
+ ktime_t exp = ktime_set(ktime_get_real_seconds(), 0);
- if (!IS_ENABLED(CONFIG_RTC_SYSTOHC))
- return;
+ if (retry)
+ exp = ktime_add_ns(exp, 2 * NSEC_PER_SEC - offset_nsec);
+ else
+ exp = ktime_add_ns(exp, SYNC_PERIOD_NS - offset_nsec);
- ktime_get_real_ts64(&now);
+ hrtimer_start(&sync_hrtimer, exp, HRTIMER_MODE_ABS);
+}
- adjust = now;
- if (persistent_clock_is_local)
- adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
+/*
+ * Check whether @now is correct versus the required time to update the RTC
+ * and calculate the value which needs to be written to the RTC so that the
+ * next seconds increment of the RTC after the write is aligned with the next
+ * seconds increment of clock REALTIME.
+ *
+ * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds
+ *
+ * t2.tv_nsec == 0
+ * tsched = t2 - set_offset_nsec
+ * newval = t2 - NSEC_PER_SEC
+ *
+ * ==> neval = tsched + set_offset_nsec - NSEC_PER_SEC
+ *
+ * As the execution of this code is not guaranteed to happen exactly at
+ * tsched this allows it to happen within a fuzzy region:
+ *
+ * abs(now - tsched) < FUZZ
+ *
+ * If @now is not inside the allowed window the function returns false.
+ */
+static inline bool rtc_tv_nsec_ok(unsigned long set_offset_nsec,
+ struct timespec64 *to_set,
+ const struct timespec64 *now)
+{
+ /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */
+ const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5;
+ struct timespec64 delay = {.tv_sec = -1,
+ .tv_nsec = set_offset_nsec};
- /*
- * The current RTC in use will provide the target_nsec it wants to be
- * called at, and does rtc_tv_nsec_ok internally.
- */
- rc = rtc_set_ntp_time(adjust, &target_nsec);
- if (rc == -ENODEV)
- return;
+ *to_set = timespec64_add(*now, delay);
+
+ if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) {
+ to_set->tv_nsec = 0;
+ return true;
+ }
- sched_sync_hw_clock(now, target_nsec, rc);
+ if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) {
+ to_set->tv_sec++;
+ to_set->tv_nsec = 0;
+ return true;
+ }
+ return false;
}
#ifdef CONFIG_GENERIC_CMOS_UPDATE
@@ -560,48 +569,47 @@ int __weak update_persistent_clock64(struct timespec64 now64)
{
return -ENODEV;
}
+#else
+static inline int update_persistent_clock64(struct timespec64 now64)
+{
+ return -ENODEV;
+}
#endif
-static bool sync_cmos_clock(void)
+#ifdef CONFIG_RTC_SYSTOHC
+/* Save NTP synchronized time to the RTC */
+static int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec)
{
- static bool no_cmos;
- struct timespec64 now;
- struct timespec64 adjust;
- int rc = -EPROTO;
- long target_nsec = NSEC_PER_SEC / 2;
+ struct rtc_device *rtc;
+ struct rtc_time tm;
+ int err = -ENODEV;
- if (!IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE))
- return false;
+ rtc = rtc_class_open(CONFIG_RTC_SYSTOHC_DEVICE);
+ if (!rtc)
+ return -ENODEV;
- if (no_cmos)
- return false;
+ if (!rtc->ops || !rtc->ops->set_time)
+ goto out_close;
- /*
- * Historically update_persistent_clock64() has followed x86
- * semantics, which match the MC146818A/etc RTC. This RTC will store
- * 'adjust' and then in .5s it will advance once second.
- *
- * Architectures are strongly encouraged to use rtclib and not
- * implement this legacy API.
- */
- ktime_get_real_ts64(&now);
- if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) {
- if (persistent_clock_is_local)
- adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
- rc = update_persistent_clock64(adjust);
- /*
- * The machine does not support update_persistent_clock64 even
- * though it defines CONFIG_GENERIC_CMOS_UPDATE.
- */
- if (rc == -ENODEV) {
- no_cmos = true;
- return false;
- }
+ /* First call might not have the correct offset */
+ if (*offset_nsec == rtc->set_offset_nsec) {
+ rtc_time64_to_tm(to_set->tv_sec, &tm);
+ err = rtc_set_time(rtc, &tm);
+ } else {
+ /* Store the update offset and let the caller try again */
+ *offset_nsec = rtc->set_offset_nsec;
+ err = -EAGAIN;
}
-
- sched_sync_hw_clock(now, target_nsec, rc);
- return true;
+out_close:
+ rtc_class_close(rtc);
+ return err;
+}
+#else
+static inline int update_rtc(struct timespec64 *to_set, unsigned long *offset_nsec)
+{
+ return -ENODEV;
}
+#endif
/*
* If we have an externally synchronized Linux clock, then update RTC clock
@@ -613,24 +621,64 @@ static bool sync_cmos_clock(void)
*/
static void sync_hw_clock(struct work_struct *work)
{
- if (!ntp_synced())
- return;
+ /*
+ * The default synchronization offset is 500ms for the deprecated
+ * update_persistent_clock64() under the assumption that it uses
+ * the infamous CMOS clock (MC146818).
+ */
+ static unsigned long offset_nsec = NSEC_PER_SEC / 2;
+ struct timespec64 now, to_set;
+ int res = -EAGAIN;
- if (sync_cmos_clock())
+ /*
+ * Don't update if STA_UNSYNC is set and if ntp_notify_cmos_timer()
+ * managed to schedule the work between the timer firing and the
+ * work being able to rearm the timer. Wait for the timer to expire.
+ */
+ if (!ntp_synced() || hrtimer_is_queued(&sync_hrtimer))
return;
- sync_rtc_clock();
+ ktime_get_real_ts64(&now);
+ /* If @now is not in the allowed window, try again */
+ if (!rtc_tv_nsec_ok(offset_nsec, &to_set, &now))
+ goto rearm;
+
+ /* Take timezone adjusted RTCs into account */
+ if (persistent_clock_is_local)
+ to_set.tv_sec -= (sys_tz.tz_minuteswest * 60);
+
+ /* Try the legacy RTC first. */
+ res = update_persistent_clock64(to_set);
+ if (res != -ENODEV)
+ goto rearm;
+
+ /* Try the RTC class */
+ res = update_rtc(&to_set, &offset_nsec);
+ if (res == -ENODEV)
+ return;
+rearm:
+ sched_sync_hw_clock(offset_nsec, res != 0);
}
void ntp_notify_cmos_timer(void)
{
- if (!ntp_synced())
- return;
+ /*
+ * When the work is currently executed but has not yet the timer
+ * rearmed this queues the work immediately again. No big issue,
+ * just a pointless work scheduled.
+ */
+ if (ntp_synced() && !hrtimer_is_queued(&sync_hrtimer))
+ queue_work(system_power_efficient_wq, &sync_work);
+}
- if (IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE) ||
- IS_ENABLED(CONFIG_RTC_SYSTOHC))
- queue_delayed_work(system_power_efficient_wq, &sync_work, 0);
+static void __init ntp_init_cmos_sync(void)
+{
+ hrtimer_init(&sync_hrtimer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ sync_hrtimer.function = sync_timer_callback;
}
+#else /* CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */
+static inline void __init ntp_init_cmos_sync(void) { }
+#endif /* !CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) */
/*
* Propagate a new txc->status value into the NTP state:
@@ -1044,4 +1092,5 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
ntp_clear();
+ ntp_init_cmos_sync();
}
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 908ecaa65fc3..23d1b74c3065 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -12,4 +12,11 @@ extern int __do_adjtimex(struct __kernel_timex *txc,
const struct timespec64 *ts,
s32 *time_tai, struct audit_ntp_data *ad);
extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
+
+#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
+extern void ntp_notify_cmos_timer(void);
+#else
+static inline void ntp_notify_cmos_timer(void) { }
+#endif
+
#endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index cc7cba20382e..a9e68936822d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -57,36 +57,42 @@ static ktime_t last_jiffies_update;
static void tick_do_update_jiffies64(ktime_t now)
{
unsigned long ticks = 1;
- ktime_t delta;
+ ktime_t delta, nextp;
/*
- * Do a quick check without holding jiffies_lock. The READ_ONCE()
+ * 64bit can do a quick check without holding jiffies lock and
+ * without looking at the sequence count. The smp_load_acquire()
* pairs with the update done later in this function.
*
- * This is also an intentional data race which is even safe on
- * 32bit in theory. If there is a concurrent update then the check
- * might give a random answer. It does not matter because if it
- * returns then the concurrent update is already taking care, if it
- * falls through then it will pointlessly contend on jiffies_lock.
- *
- * Though there is one nasty case on 32bit due to store tearing of
- * the 64bit value. If the first 32bit store makes the quick check
- * return on all other CPUs and the writing CPU context gets
- * delayed to complete the second store (scheduled out on virt)
- * then jiffies can become stale for up to ~2^32 nanoseconds
- * without noticing. After that point all CPUs will wait for
- * jiffies lock.
- *
- * OTOH, this is not any different than the situation with NOHZ=off
- * where one CPU is responsible for updating jiffies and
- * timekeeping. If that CPU goes out for lunch then all other CPUs
- * will operate on stale jiffies until it decides to come back.
+ * 32bit cannot do that because the store of tick_next_period
+ * consists of two 32bit stores and the first store could move it
+ * to a random point in the future.
*/
- if (ktime_before(now, READ_ONCE(tick_next_period)))
- return;
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ if (ktime_before(now, smp_load_acquire(&tick_next_period)))
+ return;
+ } else {
+ unsigned int seq;
- /* Reevaluate with jiffies_lock held */
+ /*
+ * Avoid contention on jiffies_lock and protect the quick
+ * check with the sequence count.
+ */
+ do {
+ seq = read_seqcount_begin(&jiffies_seq);
+ nextp = tick_next_period;
+ } while (read_seqcount_retry(&jiffies_seq, seq));
+
+ if (ktime_before(now, nextp))
+ return;
+ }
+
+ /* Quick check failed, i.e. update is required. */
raw_spin_lock(&jiffies_lock);
+ /*
+ * Reevaluate with the lock held. Another CPU might have done the
+ * update already.
+ */
if (ktime_before(now, tick_next_period)) {
raw_spin_unlock(&jiffies_lock);
return;
@@ -112,11 +118,25 @@ static void tick_do_update_jiffies64(ktime_t now)
jiffies_64 += ticks;
/*
- * Keep the tick_next_period variable up to date. WRITE_ONCE()
- * pairs with the READ_ONCE() in the lockless quick check above.
+ * Keep the tick_next_period variable up to date.
*/
- WRITE_ONCE(tick_next_period,
- ktime_add_ns(last_jiffies_update, TICK_NSEC));
+ nextp = ktime_add_ns(last_jiffies_update, TICK_NSEC);
+
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ /*
+ * Pairs with smp_load_acquire() in the lockless quick
+ * check above and ensures that the update to jiffies_64 is
+ * not reordered vs. the store to tick_next_period, neither
+ * by the compiler nor by the CPU.
+ */
+ smp_store_release(&tick_next_period, nextp);
+ } else {
+ /*
+ * A plain store is good enough on 32bit as the quick check
+ * above is protected by the sequence count.
+ */
+ tick_next_period = nextp;
+ }
/*
* Release the sequence count. calc_global_load() below is not